Compare commits
17 Commits
169e128ca1
...
422b4727c0
Author | SHA1 | Date |
---|---|---|
Kenneth Bruen | 422b4727c0 | 3 months ago |
Kenneth Bruen | 9e680e3bb2 | 1 year ago |
Kenneth Bruen | 8421b49834 | 2 years ago |
Kenneth Bruen | d4ad04b0f3 | 2 years ago |
Kenneth Bruen | 1abb5155e3 | 2 years ago |
Kenneth Bruen | 5785e4a98b | 2 years ago |
Kenneth Bruen | dd273558e4 | 2 years ago |
Kenneth Bruen | 2465a90305 | 2 years ago |
Kenneth Bruen | b66b8f79b1 | 2 years ago |
Kenneth Bruen | 1d9db5b491 | 2 years ago |
Kenneth Bruen | de5d85cff4 | 2 years ago |
Kenneth Bruen | 44a1be5659 | 2 years ago |
Kenneth Bruen | e4de0347ad | 2 years ago |
Kenneth Bruen | 0b5a8bf487 | 2 years ago |
Kenneth Bruen | 0bcaf40ea3 | 2 years ago |
Kenneth Bruen | 8ef53a64ad | 2 years ago |
Kenneth Bruen | 145f7b0ee1 | 2 years ago |
34 changed files with 1486 additions and 761 deletions
@ -0,0 +1,62 @@
|
||||
using System; |
||||
using System.Collections.Generic; |
||||
|
||||
namespace scraper.Models.Itinerary; |
||||
|
||||
#region Interfaces |
||||
|
||||
public interface IItinerary { |
||||
public IReadOnlyList<IItineraryTrain> Trains { get; } |
||||
} |
||||
|
||||
public interface IItineraryTrain { |
||||
public string From { get; } |
||||
public string To { get; } |
||||
public IReadOnlyList<string> IntermediateStops { get; } |
||||
public DateTimeOffset DepartureDate { get; } |
||||
public DateTimeOffset ArrivalDate { get; } |
||||
public int Km { get; } |
||||
public string Operator { get; } |
||||
public string TrainRank { get; } |
||||
public string TrainNumber { get; } |
||||
} |
||||
|
||||
#endregion |
||||
|
||||
#region Implementations |
||||
|
||||
internal record Itinerary : IItinerary { |
||||
private List<IItineraryTrain> ModifyableTrains { get; set; } = new(); |
||||
|
||||
public IReadOnlyList<IItineraryTrain> Trains => ModifyableTrains; |
||||
|
||||
internal void AddTrain(IItineraryTrain train) { |
||||
ModifyableTrains.Add(train); |
||||
} |
||||
|
||||
internal void AddTrain(Action<ItineraryTrain> configurator) { |
||||
ItineraryTrain newTrain = new(); |
||||
configurator(newTrain); |
||||
AddTrain(newTrain); |
||||
} |
||||
} |
||||
|
||||
internal record ItineraryTrain : IItineraryTrain { |
||||
private List<string> ModifyableIntermediateStops { get; set; } = new(); |
||||
|
||||
public string From { get; internal set; } = ""; |
||||
public string To { get; internal set; } = ""; |
||||
public IReadOnlyList<string> IntermediateStops => ModifyableIntermediateStops; |
||||
public DateTimeOffset DepartureDate { get; internal set; } = new(); |
||||
public DateTimeOffset ArrivalDate { get; internal set; } = new(); |
||||
public int Km { get; internal set; } = 0; |
||||
public string Operator { get; internal set; } = ""; |
||||
public string TrainRank { get; internal set; } = ""; |
||||
public string TrainNumber { get; internal set; } = ""; |
||||
|
||||
internal void AddIntermediateStop(string stop) { |
||||
ModifyableIntermediateStops.Add(stop); |
||||
} |
||||
} |
||||
|
||||
#endregion |
@ -1,316 +1,246 @@
|
||||
using System; |
||||
using System.Collections.Generic; |
||||
using System.Text.Json; |
||||
using System.Text.Json.Serialization; |
||||
using InfoferScraper.Models.Status; |
||||
using InfoferScraper.Models.Train.JsonConverters; |
||||
|
||||
namespace InfoferScraper.Models.Train { |
||||
#region Interfaces |
||||
|
||||
public interface ITrainScrapeResult { |
||||
public string Rank { get; } |
||||
|
||||
public string Number { get; } |
||||
|
||||
/// <summary> |
||||
/// Date in the DD.MM.YYYY format |
||||
/// This date is taken as-is from the result. |
||||
/// </summary> |
||||
public string Date { get; } |
||||
|
||||
public string Operator { get; } |
||||
|
||||
public IReadOnlyList<ITrainGroup> Groups { get; } |
||||
} |
||||
|
||||
public interface ITrainGroup { |
||||
public ITrainRoute Route { get; } |
||||
|
||||
public ITrainStatus? Status { get; } |
||||
public IReadOnlyList<ITrainStopDescription> Stations { get; } |
||||
} |
||||
|
||||
public interface ITrainRoute { |
||||
public string From { get; } |
||||
public string To { get; } |
||||
} |
||||
|
||||
public interface ITrainStatus { |
||||
public int Delay { get; } |
||||
public string Station { get; } |
||||
public StatusKind State { get; } |
||||
} |
||||
|
||||
public interface ITrainStopDescription { |
||||
public string Name { get; } |
||||
public int Km { get; } |
||||
|
||||
/// <summary> |
||||
/// The time the train waits in the station in seconds |
||||
/// </summary> |
||||
public int? StoppingTime { get; } |
||||
|
||||
public string? Platform { get; } |
||||
public ITrainStopArrDep? Arrival { get; } |
||||
public ITrainStopArrDep? Departure { get; } |
||||
|
||||
public IReadOnlyList<object> Notes { get; } |
||||
} |
||||
|
||||
public interface ITrainStopNote { |
||||
public NoteKind Kind { get; } |
||||
} |
||||
|
||||
public interface ITrainStopTrainNumberChangeNote : ITrainStopNote { |
||||
public string Rank { get; } |
||||
public string Number { get; } |
||||
} |
||||
|
||||
public interface ITrainStopDepartsAsNote : ITrainStopNote { |
||||
public string Rank { get; } |
||||
public string Number { get; } |
||||
public DateTimeOffset DepartureDate { get; } |
||||
} |
||||
|
||||
public interface ITrainStopDetachingWagonsNote : ITrainStopNote { |
||||
public string Station { get; } |
||||
} |
||||
|
||||
public interface ITrainStopReceivingWagonsNote : ITrainStopNote { |
||||
public string Station { get; } |
||||
} |
||||
|
||||
public interface ITrainStopArrDep { |
||||
public DateTimeOffset ScheduleTime { get; } |
||||
public IStatus? Status { get; } |
||||
} |
||||
|
||||
#endregion |
||||
|
||||
[JsonConverter(typeof(StatusKindConverter))] |
||||
public enum StatusKind { |
||||
Passing, |
||||
Arrival, |
||||
Departure, |
||||
} |
||||
|
||||
[JsonConverter(typeof(NoteKindConverter))] |
||||
public enum NoteKind { |
||||
TrainNumberChange, |
||||
DetachingWagons, |
||||
ReceivingWagons, |
||||
DepartsAs, |
||||
} |
||||
|
||||
#region Implementations |
||||
|
||||
internal record TrainScrapeResult : ITrainScrapeResult { |
||||
private List<ITrainGroup> ModifyableGroups { get; set; } = new(); |
||||
public string Rank { get; set; } = ""; |
||||
public string Number { get; set; } = ""; |
||||
public string Date { get; set; } = ""; |
||||
public string Operator { get; set; } = ""; |
||||
public IReadOnlyList<ITrainGroup> Groups => ModifyableGroups.AsReadOnly(); |
||||
|
||||
private void AddTrainGroup(ITrainGroup trainGroup) { |
||||
ModifyableGroups.Add(trainGroup); |
||||
} |
||||
|
||||
internal void AddTrainGroup(Action<TrainGroup> configurator) { |
||||
TrainGroup newTrainGroup = new(); |
||||
configurator(newTrainGroup); |
||||
AddTrainGroup(newTrainGroup); |
||||
} |
||||
} |
||||
|
||||
internal record TrainGroup : ITrainGroup { |
||||
private List<ITrainStopDescription> ModifyableStations { get; set; } = new(); |
||||
public ITrainRoute Route { get; init; } = new TrainRoute(); |
||||
public ITrainStatus? Status { get; private set; } |
||||
public IReadOnlyList<ITrainStopDescription> Stations => ModifyableStations.AsReadOnly(); |
||||
|
||||
private void AddStopDescription(ITrainStopDescription stopDescription) { |
||||
ModifyableStations.Add(stopDescription); |
||||
} |
||||
|
||||
internal void AddStopDescription(Action<TrainStopDescription> configurator) { |
||||
TrainStopDescription newStopDescription = new(); |
||||
configurator(newStopDescription); |
||||
AddStopDescription(newStopDescription); |
||||
} |
||||
|
||||
internal void ConfigureRoute(Action<TrainRoute> configurator) { |
||||
configurator((TrainRoute)Route); |
||||
} |
||||
|
||||
internal void MakeStatus(Action<TrainStatus> configurator) { |
||||
TrainStatus newStatus = new(); |
||||
configurator(newStatus); |
||||
Status = newStatus; |
||||
} |
||||
} |
||||
|
||||
internal record TrainRoute : ITrainRoute { |
||||
public TrainRoute() { |
||||
From = ""; |
||||
To = ""; |
||||
} |
||||
|
||||
public string From { get; set; } |
||||
public string To { get; set; } |
||||
} |
||||
|
||||
internal record TrainStatus : ITrainStatus { |
||||
public int Delay { get; set; } |
||||
public string Station { get; set; } = ""; |
||||
public StatusKind State { get; set; } |
||||
} |
||||
|
||||
internal record TrainStopDescription : ITrainStopDescription { |
||||
private List<ITrainStopNote> ModifyableNotes { get; } = new(); |
||||
public string Name { get; set; } = ""; |
||||
public int Km { get; set; } |
||||
public int? StoppingTime { get; set; } |
||||
public string? Platform { get; set; } |
||||
public ITrainStopArrDep? Arrival { get; private set; } |
||||
public ITrainStopArrDep? Departure { get; private set; } |
||||
public IReadOnlyList<object> Notes => ModifyableNotes.AsReadOnly(); |
||||
|
||||
internal void MakeArrival(Action<TrainStopArrDep> configurator) { |
||||
TrainStopArrDep newArrival = new(); |
||||
configurator(newArrival); |
||||
Arrival = newArrival; |
||||
} |
||||
|
||||
internal void MakeDeparture(Action<TrainStopArrDep> configurator) { |
||||
TrainStopArrDep newDeparture = new(); |
||||
configurator(newDeparture); |
||||
Departure = newDeparture; |
||||
} |
||||
|
||||
class DepartsAsNote : ITrainStopDepartsAsNote { |
||||
public NoteKind Kind => NoteKind.DepartsAs; |
||||
public string Rank { get; set; } = ""; |
||||
public string Number { get; set; } = ""; |
||||
public DateTimeOffset DepartureDate { get; set; } |
||||
} |
||||
|
||||
class TrainNumberChangeNote : ITrainStopTrainNumberChangeNote { |
||||
public NoteKind Kind => NoteKind.TrainNumberChange; |
||||
public string Rank { get; set; } = ""; |
||||
public string Number { get; set; } = ""; |
||||
} |
||||
|
||||
class ReceivingWagonsNote : ITrainStopReceivingWagonsNote { |
||||
public NoteKind Kind => NoteKind.ReceivingWagons; |
||||
public string Station { get; set; } = ""; |
||||
} |
||||
|
||||
class DetachingWagonsNote : ITrainStopReceivingWagonsNote { |
||||
public NoteKind Kind => NoteKind.DetachingWagons; |
||||
public string Station { get; set; } = ""; |
||||
} |
||||
|
||||
internal void AddDepartsAsNote(string rank, string number, DateTimeOffset departureDate) { |
||||
ModifyableNotes.Add(new DepartsAsNote { Rank = rank, Number = number, DepartureDate = departureDate }); |
||||
} |
||||
|
||||
internal void AddTrainNumberChangeNote(string rank, string number) { |
||||
ModifyableNotes.Add(new TrainNumberChangeNote { Rank = rank, Number = number }); |
||||
} |
||||
|
||||
internal void AddReceivingWagonsNote(string station) { |
||||
ModifyableNotes.Add(new ReceivingWagonsNote { Station = station }); |
||||
} |
||||
|
||||
internal void AddDetachingWagonsNote(string station) { |
||||
ModifyableNotes.Add(new DetachingWagonsNote { Station = station }); |
||||
} |
||||
} |
||||
|
||||
public record TrainStopArrDep : ITrainStopArrDep { |
||||
public DateTimeOffset ScheduleTime { get; set; } |
||||
public IStatus? Status { get; private set; } |
||||
|
||||
internal void MakeStatus(Action<Status.Status> configurator) { |
||||
Status.Status newStatus = new(); |
||||
configurator(newStatus); |
||||
Status = newStatus; |
||||
} |
||||
} |
||||
|
||||
#endregion |
||||
|
||||
#region JSON Converters |
||||
|
||||
namespace JsonConverters { |
||||
internal class StatusKindConverter : JsonConverterFactory { |
||||
public override bool CanConvert(Type typeToConvert) { |
||||
return typeToConvert == typeof(StatusKind); |
||||
} |
||||
|
||||
public override JsonConverter? CreateConverter(Type typeToConvert, JsonSerializerOptions options) { |
||||
return new Converter(); |
||||
} |
||||
|
||||
private class Converter : JsonConverter<StatusKind> { |
||||
public override StatusKind Read( |
||||
ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options |
||||
) { |
||||
return reader.GetString() switch { |
||||
"arrival" => StatusKind.Arrival, |
||||
"departure" => StatusKind.Departure, |
||||
"passing" => StatusKind.Passing, |
||||
_ => throw new NotImplementedException() |
||||
}; |
||||
} |
||||
|
||||
public override void Write(Utf8JsonWriter writer, StatusKind value, JsonSerializerOptions options) { |
||||
writer.WriteStringValue(value switch { |
||||
StatusKind.Passing => "passing", |
||||
StatusKind.Arrival => "arrival", |
||||
StatusKind.Departure => "departure", |
||||
_ => throw new NotImplementedException() |
||||
}); |
||||
} |
||||
} |
||||
} |
||||
|
||||
internal class NoteKindConverter : JsonConverterFactory { |
||||
public override bool CanConvert(Type typeToConvert) { |
||||
return typeToConvert == typeof(NoteKind); |
||||
} |
||||
|
||||
public override JsonConverter? CreateConverter(Type typeToConvert, JsonSerializerOptions options) { |
||||
return new Converter(); |
||||
} |
||||
|
||||
private class Converter : JsonConverter<NoteKind> { |
||||
public override NoteKind Read( |
||||
ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options |
||||
) { |
||||
return reader.GetString() switch { |
||||
"departsAs" => NoteKind.DepartsAs, |
||||
"trainNumberChange" => NoteKind.TrainNumberChange, |
||||
"receivingWagons" => NoteKind.ReceivingWagons, |
||||
"detachingWagons" => NoteKind.DetachingWagons, |
||||
_ => throw new NotImplementedException() |
||||
}; |
||||
} |
||||
|
||||
public override void Write(Utf8JsonWriter writer, NoteKind value, JsonSerializerOptions options) { |
||||
writer.WriteStringValue(value switch { |
||||
NoteKind.DepartsAs => "departsAs", |
||||
NoteKind.TrainNumberChange => "trainNumberChange", |
||||
NoteKind.DetachingWagons => "detachingWagons", |
||||
NoteKind.ReceivingWagons => "receivingWagons", |
||||
_ => throw new NotImplementedException() |
||||
}); |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
#endregion |
||||
} |
||||
using System; |
||||
using System.Collections.Generic; |
||||
using InfoferScraper.Models.Status; |
||||
using Newtonsoft.Json; |
||||
using Newtonsoft.Json.Converters; |
||||
using Newtonsoft.Json.Serialization; |
||||
|
||||
namespace InfoferScraper.Models.Train { |
||||
#region Interfaces |
||||
|
||||
public interface ITrainScrapeResult { |
||||
public string Rank { get; } |
||||
|
||||
public string Number { get; } |
||||
|
||||
/// <summary> |
||||
/// Date in the DD.MM.YYYY format |
||||
/// This date is taken as-is from the result. |
||||
/// </summary> |
||||
public string Date { get; } |
||||
|
||||
public string Operator { get; } |
||||
|
||||
public IReadOnlyList<ITrainGroup> Groups { get; } |
||||
} |
||||
|
||||
public interface ITrainGroup { |
||||
public ITrainRoute Route { get; } |
||||
|
||||
public ITrainStatus? Status { get; } |
||||
public IReadOnlyList<ITrainStopDescription> Stations { get; } |
||||
} |
||||
|
||||
public interface ITrainRoute { |
||||
public string From { get; } |
||||
public string To { get; } |
||||
} |
||||
|
||||
public interface ITrainStatus { |
||||
public int Delay { get; } |
||||
public string Station { get; } |
||||
public StatusKind State { get; } |
||||
} |
||||
|
||||
public interface ITrainStopDescription { |
||||
public string Name { get; } |
||||
public string LinkName { get; } |
||||
public int Km { get; } |
||||
|
||||
/// <summary> |
||||
/// The time the train waits in the station in seconds |
||||
/// </summary> |
||||
public int? StoppingTime { get; } |
||||
|
||||
public string? Platform { get; } |
||||
public ITrainStopArrDep? Arrival { get; } |
||||
public ITrainStopArrDep? Departure { get; } |
||||
|
||||
public IReadOnlyList<object> Notes { get; } |
||||
} |
||||
|
||||
public interface ITrainStopNote { |
||||
public NoteKind Kind { get; } |
||||
} |
||||
|
||||
public interface ITrainStopTrainNumberChangeNote : ITrainStopNote { |
||||
public string Rank { get; } |
||||
public string Number { get; } |
||||
} |
||||
|
||||
public interface ITrainStopDepartsAsNote : ITrainStopNote { |
||||
public string Rank { get; } |
||||
public string Number { get; } |
||||
public DateTimeOffset DepartureDate { get; } |
||||
} |
||||
|
||||
public interface ITrainStopDetachingWagonsNote : ITrainStopNote { |
||||
public string Station { get; } |
||||
} |
||||
|
||||
public interface ITrainStopReceivingWagonsNote : ITrainStopNote { |
||||
public string Station { get; } |
||||
} |
||||
|
||||
public interface ITrainStopArrDep { |
||||
public DateTimeOffset ScheduleTime { get; } |
||||
public IStatus? Status { get; } |
||||
} |
||||
|
||||
#endregion |
||||
|
||||
[JsonConverter(typeof(StringEnumConverter), typeof(CamelCaseNamingStrategy))] |
||||
public enum StatusKind { |
||||
Passing, |
||||
Arrival, |
||||
Departure, |
||||
} |
||||
|
||||
[JsonConverter(typeof(StringEnumConverter), typeof(CamelCaseNamingStrategy))] |
||||
public enum NoteKind { |
||||
TrainNumberChange, |
||||
DetachingWagons, |
||||
ReceivingWagons, |
||||
DepartsAs, |
||||
} |
||||
|
||||
#region Implementations |
||||
|
||||
internal record TrainScrapeResult : ITrainScrapeResult { |
||||
private List<ITrainGroup> ModifyableGroups { get; set; } = new(); |
||||
public string Rank { get; set; } = ""; |
||||
public string Number { get; set; } = ""; |
||||
public string Date { get; set; } = ""; |
||||
public string Operator { get; set; } = ""; |
||||
public IReadOnlyList<ITrainGroup> Groups => ModifyableGroups.AsReadOnly(); |
||||
|
||||
private void AddTrainGroup(ITrainGroup trainGroup) { |
||||
ModifyableGroups.Add(trainGroup); |
||||
} |
||||
|
||||
internal void AddTrainGroup(Action<TrainGroup> configurator) { |
||||
TrainGroup newTrainGroup = new(); |
||||
configurator(newTrainGroup); |
||||
AddTrainGroup(newTrainGroup); |
||||
} |
||||
} |
||||
|
||||
internal record TrainGroup : ITrainGroup { |
||||
private List<ITrainStopDescription> ModifyableStations { get; set; } = new(); |
||||
public ITrainRoute Route { get; init; } = new TrainRoute(); |
||||
public ITrainStatus? Status { get; private set; } |
||||
public IReadOnlyList<ITrainStopDescription> Stations => ModifyableStations.AsReadOnly(); |
||||
|
||||
private void AddStopDescription(ITrainStopDescription stopDescription) { |
||||
ModifyableStations.Add(stopDescription); |
||||
} |
||||
|
||||
internal void AddStopDescription(Action<TrainStopDescription> configurator) { |
||||
TrainStopDescription newStopDescription = new(); |
||||
configurator(newStopDescription); |
||||
AddStopDescription(newStopDescription); |
||||
} |
||||
|
||||
internal void ConfigureRoute(Action<TrainRoute> configurator) { |
||||
configurator((TrainRoute)Route); |
||||
} |
||||
|
||||
internal void MakeStatus(Action<TrainStatus> configurator) { |
||||
TrainStatus newStatus = new(); |
||||
configurator(newStatus); |
||||
Status = newStatus; |
||||
} |
||||
} |
||||
|
||||
internal record TrainRoute : ITrainRoute { |
||||
public TrainRoute() { |
||||
From = ""; |
||||
To = ""; |
||||
} |
||||
|
||||
public string From { get; set; } |
||||
public string To { get; set; } |
||||
} |
||||
|
||||
internal record TrainStatus : ITrainStatus { |
||||
public int Delay { get; set; } |
||||
public string Station { get; set; } = ""; |
||||
public StatusKind State { get; set; } |
||||
} |
||||
|
||||
internal record TrainStopDescription : ITrainStopDescription { |
||||
private List<ITrainStopNote> ModifyableNotes { get; } = new(); |
||||
public string Name { get; set; } = ""; |
||||
public string LinkName { get; set; } = ""; |
||||
public int Km { get; set; } |
||||
public int? StoppingTime { get; set; } |
||||
public string? Platform { get; set; } |
||||
public ITrainStopArrDep? Arrival { get; private set; } |
||||
public ITrainStopArrDep? Departure { get; private set; } |
||||
public IReadOnlyList<object> Notes => ModifyableNotes.AsReadOnly(); |
||||
|
||||
internal void MakeArrival(Action<TrainStopArrDep> configurator) { |
||||
TrainStopArrDep newArrival = new(); |
||||
configurator(newArrival); |
||||
Arrival = newArrival; |
||||
} |
||||
|
||||
internal void MakeDeparture(Action<TrainStopArrDep> configurator) { |
||||
TrainStopArrDep newDeparture = new(); |
||||
configurator(newDeparture); |
||||
Departure = newDeparture; |
||||
} |
||||
|
||||
class DepartsAsNote : ITrainStopDepartsAsNote { |
||||
public NoteKind Kind => NoteKind.DepartsAs; |
||||
public string Rank { get; set; } = ""; |
||||
public string Number { get; set; } = ""; |
||||
public DateTimeOffset DepartureDate { get; set; } |
||||
} |
||||
|
||||
class TrainNumberChangeNote : ITrainStopTrainNumberChangeNote { |
||||
public NoteKind Kind => NoteKind.TrainNumberChange; |
||||
public string Rank { get; set; } = ""; |
||||
public string Number { get; set; } = ""; |
||||
} |
||||
|
||||
class ReceivingWagonsNote : ITrainStopReceivingWagonsNote { |
||||
public NoteKind Kind => NoteKind.ReceivingWagons; |
||||
public string Station { get; set; } = ""; |
||||
} |
||||
|
||||
class DetachingWagonsNote : ITrainStopReceivingWagonsNote { |
||||
public NoteKind Kind => NoteKind.DetachingWagons; |
||||
public string Station { get; set; } = ""; |
||||
} |
||||
|
||||
internal void AddDepartsAsNote(string rank, string number, DateTimeOffset departureDate) { |
||||
ModifyableNotes.Add(new DepartsAsNote { Rank = rank, Number = number, DepartureDate = departureDate }); |
||||
} |
||||
|
||||
internal void AddTrainNumberChangeNote(string rank, string number) { |
||||
ModifyableNotes.Add(new TrainNumberChangeNote { Rank = rank, Number = number }); |
||||
} |
||||
|
||||
internal void AddReceivingWagonsNote(string station) { |
||||
ModifyableNotes.Add(new ReceivingWagonsNote { Station = station }); |
||||
} |
||||
|
||||
internal void AddDetachingWagonsNote(string station) { |
||||
ModifyableNotes.Add(new DetachingWagonsNote { Station = station }); |
||||
} |
||||
} |
||||
|
||||
public record TrainStopArrDep : ITrainStopArrDep { |
||||
public DateTimeOffset ScheduleTime { get; set; } |
||||
public IStatus? Status { get; private set; } |
||||
|
||||
internal void MakeStatus(Action<Status.Status> configurator) { |
||||
Status.Status newStatus = new(); |
||||
configurator(newStatus); |
||||
Status = newStatus; |
||||
} |
||||
} |
||||
|
||||
#endregion |
||||
} |
||||
|
@ -0,0 +1,220 @@
|
||||
using System; |
||||
using System.Collections.Generic; |
||||
using System.Linq; |
||||
using System.Net; |
||||
using System.Net.Http; |
||||
using System.Text.RegularExpressions; |
||||
using System.Threading.Tasks; |
||||
using AngleSharp; |
||||
using AngleSharp.Dom; |
||||
using AngleSharp.Html.Dom; |
||||
using Flurl; |
||||
using InfoferScraper.Models.Train; |
||||
using NodaTime; |
||||
using NodaTime.Extensions; |
||||
using scraper.Models.Itinerary; |
||||
|
||||
namespace InfoferScraper.Scrapers; |
||||
|
||||
public class RouteScraper { |
||||
private const string BaseUrl = "https://mersultrenurilor.infofer.ro/ro-RO/"; |
||||
private static readonly DateTimeZone BucharestTz = DateTimeZoneProviders.Tzdb["Europe/Bucharest"]; |
||||
|
||||
private static readonly Regex KmTrainRankNoRegex = new(@"^([0-9]+)\skm\scu\s([A-Z-]+)\s([0-9]+)$"); |
||||
private static readonly Regex OperatorRegex = new(@$"^Operat\sde\s([{Utils.RoLetters}\s]+)$"); |
||||
private static readonly Regex DepArrRegex = new(@"^(Ple|Sos)\s([0-9]+)\s([a-z]+)\.?\s([0-9]+):([0-9]+)$"); |
||||
|
||||
private static readonly Dictionary<string, int> Months = new Dictionary<string, int>() { |
||||
["ian"] = 1, |
||||
["feb"] = 2, |
||||
["mar"] = 3, |
||||
["apr"] = 4, |
||||
["mai"] = 5, |
||||
["iun"] = 6, |
||||
["iul"] = 7, |
||||
["aug"] = 8, |
||||
["sep"] = 9, |
||||
["oct"] = 10, |
||||
["noi"] = 11, |
||||
["dec"] = 12, |
||||
}; |
||||
|
||||
private readonly CookieContainer cookieContainer = new(); |
||||
|
||||
private readonly HttpClient httpClient; |
||||
|
||||
public RouteScraper(HttpClientHandler? httpClientHandler = null) { |
||||
if (httpClientHandler == null) { |
||||
httpClientHandler = new HttpClientHandler { |
||||
CookieContainer = cookieContainer, |
||||
UseCookies = true, |
||||
}; |
||||
} |
||||
else { |
||||
httpClientHandler.CookieContainer = cookieContainer; |
||||
httpClientHandler.UseCookies = true; |
||||
} |
||||
httpClient = new HttpClient(httpClientHandler) { |
||||
BaseAddress = new Uri(BaseUrl), |
||||
DefaultRequestVersion = new Version(2, 0), |
||||
}; |
||||
} |
||||
|
||||
public async Task<List<IItinerary>?> Scrape(string from, string to, DateTimeOffset? dateOverride = null) { |
||||
var dateOverrideInstant = dateOverride?.ToInstant().InZone(BucharestTz); |
||||
dateOverride = dateOverrideInstant?.ToDateTimeOffset(); |
||||
TrainScrapeResult result = new(); |
||||
|
||||
var asConfig = Configuration.Default; |
||||
var asContext = BrowsingContext.New(asConfig); |
||||
|
||||
var firstUrl = "Rute-trenuri" |
||||
.AppendPathSegment(from) |
||||
.AppendPathSegment(to); |
||||
if (dateOverride != null) { |
||||
firstUrl = firstUrl.SetQueryParam("DepartureDate", $"{dateOverride:d.MM.yyyy}"); |
||||
} |
||||
firstUrl = firstUrl.SetQueryParam("OrderingTypeId", "0"); |
||||
firstUrl = firstUrl.SetQueryParam("TimeSelectionId", "0"); |
||||
firstUrl = firstUrl.SetQueryParam("MinutesInDay", "0"); |
||||
firstUrl = firstUrl.SetQueryParam("ConnectionsTypeId", "1"); |
||||
firstUrl = firstUrl.SetQueryParam("BetweenTrainsMinimumMinutes", "5"); |
||||
firstUrl = firstUrl.SetQueryParam("ChangeStationName", ""); |
||||
|
||||
var firstResponse = await httpClient.GetStringAsync(firstUrl); |
||||
var firstDocument = await asContext.OpenAsync(req => req.Content(firstResponse)); |
||||
var firstForm = firstDocument.GetElementById("form-search")!; |
||||
|
||||
var firstResult = firstForm |
||||
.QuerySelectorAll<IHtmlInputElement>("input") |
||||
.Where(elem => elem.Name != null) |
||||
.ToDictionary(elem => elem.Name!, elem => elem.Value); |
||||
|
||||
var secondUrl = "".AppendPathSegments("Itineraries", "GetItineraries"); |
||||
var secondResponse = await httpClient.PostAsync( |
||||
secondUrl, |
||||
#pragma warning disable CS8620 |
||||
new FormUrlEncodedContent(firstResult) |
||||
#pragma warning restore CS8620 |
||||
); |
||||
var secondResponseContent = await secondResponse.Content.ReadAsStringAsync(); |
||||
var secondDocument = await asContext.OpenAsync( |
||||
req => req.Content(secondResponseContent) |
||||
); |
||||
|
||||
var (itineraryInfoDiv, _) = secondDocument |
||||
.QuerySelectorAll("body > div"); |
||||
|
||||
if (itineraryInfoDiv == null) { |
||||
return null; |
||||
} |
||||
|
||||
var itinerariesLi = secondDocument |
||||
.QuerySelectorAll("body > ul > li"); |
||||
var itineraries = new List<IItinerary>(); |
||||
foreach (var itineraryLi in itinerariesLi) { |
||||
var itinerary = new Itinerary(); |
||||
|
||||
var cardDivs = itineraryLi.QuerySelectorAll(":scope > div > div > div > div"); |
||||
var detailsDivs = cardDivs.Last() |
||||
.QuerySelectorAll(":scope > div > div")[1] |
||||
.QuerySelectorAll(":scope > div"); |
||||
var trainItineraryAndDetailsLis = detailsDivs[0] |
||||
.QuerySelectorAll(":scope > ul > li"); |
||||
var stations = new List<string>(); |
||||
var details = new List<ItineraryTrain>(); |
||||
foreach (var (idx, li) in trainItineraryAndDetailsLis.Select((li, idx) => (idx, li))) { |
||||
if (idx % 2 == 0) { |
||||
// Station |
||||
stations.Add( |
||||
li |
||||
.QuerySelectorAll(":scope > div > div > div > div")[1] |
||||
.Text() |
||||
.WithCollapsedSpaces() |
||||
); |
||||
} |
||||
else { |
||||
var now = LocalDateTime.FromDateTime(DateTime.Now); |
||||
// Detail |
||||
var detailColumns = li.QuerySelectorAll(":scope > div > div"); |
||||
var leftSideDivs = detailColumns[0].QuerySelectorAll(":scope > div"); |
||||
|
||||
var departureDateText = leftSideDivs[0] |
||||
.QuerySelectorAll(":scope > div")[1] |
||||
.Text() |
||||
.WithCollapsedSpaces(); |
||||
var departureDateMatch = DepArrRegex.Match(departureDateText); |
||||
var departureDate = new LocalDateTime( |
||||
now.Year, |
||||
Months[departureDateMatch.Groups[3].Value], |
||||
int.Parse(departureDateMatch.Groups[2].Value), |
||||
int.Parse(departureDateMatch.Groups[4].Value), |
||||
int.Parse(departureDateMatch.Groups[5].Value), |
||||
0 |
||||
); |
||||
if (departureDate < now.PlusDays(-1)) { |
||||
departureDate = departureDate.PlusYears(1); |
||||
} |
||||
|
||||
var arrivalDateText = leftSideDivs[3] |
||||
.QuerySelectorAll(":scope > div")[1] |
||||
.Text() |
||||
.WithCollapsedSpaces(); |
||||
var arrivalDateMatch = DepArrRegex.Match(arrivalDateText); |
||||
var arrivalDate = new LocalDateTime( |
||||
now.Year, |
||||
Months[arrivalDateMatch.Groups[3].Value], |
||||
int.Parse(arrivalDateMatch.Groups[2].Value), |
||||
int.Parse(arrivalDateMatch.Groups[4].Value), |
||||
int.Parse(arrivalDateMatch.Groups[5].Value), |
||||
0 |
||||
); |
||||
if (arrivalDate < now.PlusDays(-1)) { |
||||
arrivalDate = arrivalDate.PlusYears(1); |
||||
} |
||||
|
||||
var rightSideDivs = detailColumns[1].QuerySelectorAll(":scope > div > div"); |
||||
var kmRankNumberText = rightSideDivs[0] |
||||
.QuerySelectorAll(":scope > div > div")[0] |
||||
.Text() |
||||
.WithCollapsedSpaces(); |
||||
var kmRankNumberMatch = KmTrainRankNoRegex.Match(kmRankNumberText); |
||||
|
||||
var operatorText = rightSideDivs[0] |
||||
.QuerySelectorAll(":scope > div > div")[1] |
||||
.Text() |
||||
.WithCollapsedSpaces(); |
||||
var operatorMatch = OperatorRegex.Match(operatorText); |
||||
|
||||
var train = new ItineraryTrain { |
||||
ArrivalDate = BucharestTz.AtLeniently(arrivalDate).ToDateTimeOffset(), |
||||
DepartureDate = BucharestTz.AtLeniently(departureDate).ToDateTimeOffset(), |
||||
Km = int.Parse(kmRankNumberMatch.Groups[1].Value), |
||||
TrainRank = kmRankNumberMatch.Groups[2].Value, |
||||
TrainNumber = kmRankNumberMatch.Groups[3].Value, |
||||
Operator = operatorMatch.Groups[1].Value, |
||||
}; |
||||
|
||||
foreach (var div in leftSideDivs[2] |
||||
.QuerySelectorAll(":scope > div") |
||||
.Where((_, i) => i % 2 != 0)) { |
||||
var text = div.Text().WithCollapsedSpaces(); |
||||
if (text == "Nu sunt stații intermediare.") continue; |
||||
train.AddIntermediateStop(div.Text().WithCollapsedSpaces()); |
||||
} |
||||
|
||||
details.Add(train); |
||||
} |
||||
} |
||||
foreach (var ((iFrom, iTo), detail) in stations.Zip(stations.Skip(1)).Zip(details)) { |
||||
detail.From = iFrom; |
||||
detail.To = iTo; |
||||
itinerary.AddTrain(detail); |
||||
} |
||||
|
||||
itineraries.Add(itinerary); |
||||
} |
||||
|
||||
return itineraries; |
||||
} |
||||
} |
@ -1,239 +1,261 @@
|
||||
using System; |
||||
using System.Collections.Generic; |
||||
using System.Linq; |
||||
using System.Net; |
||||
using System.Net.Http; |
||||
using System.Text.RegularExpressions; |
||||
using System.Threading.Tasks; |
||||
using AngleSharp; |
||||
using AngleSharp.Dom; |
||||
using AngleSharp.Html.Dom; |
||||
using Flurl; |
||||
using InfoferScraper.Models.Train; |
||||
using NodaTime; |
||||
using NodaTime.Extensions; |
||||
using scraper.Exceptions; |
||||
|
||||
namespace InfoferScraper.Scrapers { |
||||
public static class TrainScraper { |
||||
private const string BaseUrl = "https://mersultrenurilor.infofer.ro/ro-RO/"; |
||||
private static readonly Regex TrainInfoRegex = new(@"^([A-Z-]+)\s([0-9]+)\sîn\s([0-9.]+)$"); |
||||
private static readonly Regex OperatorRegex = new(@"^Operat\sde\s(.+)$"); |
||||
|
||||
private static readonly Regex RouteRegex = |
||||
new(@$"^Parcurs\stren\s([{Utils.RoLetters} ]+)[-–]([{Utils.RoLetters}\s]+)$"); |
||||
|
||||
private static readonly Regex SlRegex = |
||||
new( |
||||
@"^(?:Fără|([0-9]+)\smin)\s(întârziere|mai\sdevreme)\sla\s(trecerea\sfără\soprire\sprin|sosirea\sîn|plecarea\sdin)\s(.+)\.$"); |
||||
|
||||
private static readonly Dictionary<char, StatusKind> SlStateMap = new() { |
||||
{ 't', StatusKind.Passing }, |
||||
{ 's', StatusKind.Arrival }, |
||||
{ 'p', StatusKind.Departure }, |
||||
}; |
||||
|
||||
private static readonly Regex KmRegex = new(@"^km\s([0-9]+)$"); |
||||
private static readonly Regex StoppingTimeRegex = new(@"^([0-9]+)\s(min|sec)\soprire$"); |
||||
private static readonly Regex PlatformRegex = new(@"^linia\s(.+)$"); |
||||
|
||||
private static readonly Regex StationArrdepStatusRegex = |
||||
new(@"^(?:(la timp)|(?:((?:\+|-)[0-9]+) min \((?:(?:întârziere)|(?:mai devreme))\)))(\*?)$"); |
||||
|
||||
private static readonly Regex TrainNumberChangeNoteRegex = |
||||
new(@"^Trenul își schimbă numărul în\s([A-Z-]+)\s([0-9]+)$"); |
||||
private static readonly Regex DepartsAsNoteRegex = |
||||
new(@"^Trenul pleacă cu numărul\s([A-Z-]+)\s([0-9]+)\sîn\s([0-9]{2}).([0-9]{2}).([0-9]{4})$"); |
||||
private static readonly Regex ReceivingWagonsNoteRegex = |
||||
new(@"^Trenul primește vagoane de la\s(.+)\.$"); |
||||
private static readonly Regex DetachingWagonsNoteRegex = |
||||
new(@"^Trenul detașează vagoane pentru stația\s(.+)\.$"); |
||||
|
||||
private static readonly DateTimeZone BucharestTz = DateTimeZoneProviders.Tzdb["Europe/Bucharest"]; |
||||
|
||||
private static readonly CookieContainer CookieContainer = new(); |
||||
private static readonly HttpClient HttpClient = new(new HttpClientHandler { |
||||
CookieContainer = CookieContainer, |
||||
UseCookies = true, |
||||
}) { |
||||
BaseAddress = new Uri(BaseUrl), |
||||
DefaultRequestVersion = new Version(2, 0), |
||||
}; |
||||
|
||||
public static async Task<ITrainScrapeResult?> Scrape(string trainNumber, DateTimeOffset? dateOverride = null) { |
||||
var dateOverrideInstant = dateOverride?.ToInstant().InZone(BucharestTz); |
||||
dateOverride = dateOverrideInstant?.ToDateTimeOffset(); |
||||
TrainScrapeResult result = new(); |
||||
|
||||
var asConfig = Configuration.Default; |
||||
var asContext = BrowsingContext.New(asConfig); |
||||
|
||||
var firstUrl = "Tren" |
||||
.AppendPathSegment(trainNumber); |
||||
if (dateOverride != null) { |
||||
firstUrl = firstUrl.SetQueryParam("Date", $"{dateOverride:d.MM.yyyy}"); |
||||
} |
||||
var firstResponse = await HttpClient.GetStringAsync(firstUrl); |
||||
var firstDocument = await asContext.OpenAsync(req => req.Content(firstResponse)); |
||||
var firstForm = firstDocument.GetElementById("form-search")!; |
||||
|
||||
var firstResult = firstForm |
||||
.QuerySelectorAll<IHtmlInputElement>("input") |
||||
.Where(elem => elem.Name != null) |
||||
.ToDictionary(elem => elem.Name!, elem => elem.Value); |
||||
|
||||
var secondUrl = "".AppendPathSegments("Trains", "TrainsResult"); |
||||
var secondResponse = await HttpClient.PostAsync( |
||||
secondUrl, |
||||
#pragma warning disable CS8620 |
||||
new FormUrlEncodedContent(firstResult) |
||||
#pragma warning restore CS8620 |
||||
); |
||||
var secondResponseContent = await secondResponse.Content.ReadAsStringAsync(); |
||||
var secondDocument = await asContext.OpenAsync( |
||||
req => req.Content(secondResponseContent) |
||||
); |
||||
|
||||
var (trainInfoDiv, (_, (_, (resultsDiv, _)))) = secondDocument |
||||
.QuerySelectorAll("body > div"); |
||||
if (trainInfoDiv == null) { |
||||
return null; |
||||
} |
||||
if (resultsDiv == null) { |
||||
throw new TrainNotThisDayException(); |
||||
} |
||||
trainInfoDiv = trainInfoDiv.QuerySelectorAll(":scope > div > div").First(); |
||||
|
||||
(result.Rank, (result.Number, (result.Date, _))) = (TrainInfoRegex.Match( |
||||
trainInfoDiv.QuerySelector(":scope > h2")!.Text().WithCollapsedSpaces() |
||||
).Groups as IEnumerable<Group>).Select(group => group.Value).Skip(1); |
||||
var (scrapedDateD, (scrapedDateM, (scrapedDateY, _))) = result.Date |
||||
.Split('.') |
||||
.Select(int.Parse); |
||||
var date = new DateTime(scrapedDateY, scrapedDateM, scrapedDateD); |
||||
|
||||
result.Operator = (OperatorRegex.Match( |
||||
trainInfoDiv.QuerySelector(":scope > p")!.Text().WithCollapsedSpaces() |
||||
).Groups as IEnumerable<Group>).Skip(1).First().Value; |
||||
|
||||
foreach (var groupDiv in resultsDiv.QuerySelectorAll(":scope > div")) { |
||||
result.AddTrainGroup(group => { |
||||
var statusDiv = groupDiv.QuerySelectorAll(":scope > div").First(); |
||||
var routeText = statusDiv.QuerySelector(":scope > h4")!.Text().WithCollapsedSpaces(); |
||||
group.ConfigureRoute(route => { |
||||
(route.From, (route.To, _)) = (RouteRegex.Match(routeText).Groups as IEnumerable<Group>).Skip(1) |
||||
.Select(group => group.Value); |
||||
}); |
||||
|
||||
try { |
||||
var statusLineMatch = |
||||
SlRegex.Match(statusDiv.QuerySelector(":scope > div")!.Text().WithCollapsedSpaces()); |
||||
var (slmDelay, (slmLate, (slmArrival, (slmStation, _)))) = |
||||
(statusLineMatch.Groups as IEnumerable<Group>).Skip(1).Select(group => group.Value); |
||||
group.MakeStatus(status => { |
||||
status.Delay = string.IsNullOrEmpty(slmDelay) ? 0 : |
||||
slmLate == "întârziere" ? int.Parse(slmDelay) : -int.Parse(slmDelay); |
||||
status.Station = slmStation; |
||||
status.State = SlStateMap[slmArrival[0]]; |
||||
}); |
||||
} |
||||
catch { |
||||
// ignored |
||||
} |
||||
|
||||
Utils.DateTimeSequencer dtSeq = new(date.Year, date.Month, date.Day); |
||||
var stations = statusDiv.QuerySelectorAll(":scope > ul > li"); |
||||
foreach (var station in stations) { |
||||
group.AddStopDescription(stopDescription => { |
||||
var (left, (middle, (right, _))) = station |
||||
.QuerySelectorAll(":scope > div > div"); |
||||
var (stopDetails, (stopNotes, _)) = middle |
||||
.QuerySelectorAll(":scope > div > div > div"); |
||||
stopDescription.Name = stopDetails |
||||
.QuerySelectorAll(":scope > div")[0] |
||||
.Text() |
||||
.WithCollapsedSpaces(); |
||||
var scrapedKm = stopDetails |
||||
.QuerySelectorAll(":scope > div")[1] |
||||
.Text() |
||||
.WithCollapsedSpaces(); |
||||
stopDescription.Km = int.Parse( |
||||
(KmRegex.Match(scrapedKm).Groups as IEnumerable<Group>).Skip(1).First().Value |
||||
); |
||||
var scrapedStoppingTime = stopDetails |
||||
.QuerySelectorAll(":scope > div")[2] |
||||
.Text() |
||||
.WithCollapsedSpaces(); |
||||
if (!string.IsNullOrEmpty(scrapedStoppingTime)) { |
||||
var (stValue, (stMinsec, _)) = |
||||
(StoppingTimeRegex.Match(scrapedStoppingTime).Groups as IEnumerable<Group>) |
||||
.Skip(1) |
||||
.Select(group => group.Value); |
||||
stopDescription.StoppingTime = int.Parse(stValue); |
||||
if (stMinsec == "min") stopDescription.StoppingTime *= 60; |
||||
} |
||||
|
||||
var scrapedPlatform = stopDetails |
||||
.QuerySelectorAll(":scope > div")[3] |
||||
.Text() |
||||
.WithCollapsedSpaces(); |
||||
if (!string.IsNullOrEmpty(scrapedPlatform)) |
||||
stopDescription.Platform = PlatformRegex.Match(scrapedPlatform).Groups[1].Value; |
||||
|
||||
void ScrapeTime(IElement element, ref TrainStopArrDep arrDep) { |
||||
var parts = element.QuerySelectorAll(":scope > div > div > div"); |
||||
if (parts.Length == 0) throw new OperationCanceledException(); |
||||
var time = parts[0]; |
||||
var scrapedTime = time.Text().WithCollapsedSpaces(); |
||||
var (stHour, (stMin, _)) = scrapedTime.Split(':').Select(int.Parse); |
||||
arrDep.ScheduleTime = BucharestTz.AtLeniently(dtSeq.Next(stHour, stMin).ToLocalDateTime()) |
||||
.ToDateTimeOffset(); |
||||
|
||||
if (parts.Length < 2) return; |
||||
|
||||
var statusElement = parts[1]; |
||||
var (onTime, (delay, (approx, _))) = (StationArrdepStatusRegex.Match( |
||||
statusElement.Text().WithCollapsedSpaces(replaceWith: " ") |
||||
).Groups as IEnumerable<Group>).Skip(1).Select(group => group.Value); |
||||
arrDep.MakeStatus(status => { |
||||
status.Delay = string.IsNullOrEmpty(onTime) ? int.Parse(delay) : 0; |
||||
status.Real = string.IsNullOrEmpty(approx); |
||||
}); |
||||
} |
||||
|
||||
try { |
||||
stopDescription.MakeArrival(arrival => { ScrapeTime(left, ref arrival); }); |
||||
} |
||||
catch (OperationCanceledException) { } |
||||
|
||||
try { |
||||
stopDescription.MakeDeparture(departure => { ScrapeTime(right, ref departure); }); |
||||
} |
||||
catch (OperationCanceledException) { } |
||||
|
||||
foreach (var noteDiv in stopNotes.QuerySelectorAll(":scope > div > div")) { |
||||
var noteText = noteDiv.Text().WithCollapsedSpaces(); |
||||
Match trainNumberChangeMatch, departsAsMatch, detachingWagons, receivingWagons; |
||||
if ((trainNumberChangeMatch = TrainNumberChangeNoteRegex.Match(noteText)).Success) { |
||||
stopDescription.AddTrainNumberChangeNote(trainNumberChangeMatch.Groups[1].Value, trainNumberChangeMatch.Groups[2].Value); |
||||
} |
||||
else if ((departsAsMatch = DepartsAsNoteRegex.Match(noteText)).Success) { |
||||
var groups = departsAsMatch.Groups; |
||||
var departureDate = BucharestTz.AtStrictly(new(int.Parse(groups[5].Value), int.Parse(groups[4].Value), int.Parse(groups[3].Value), 0, 0)); |
||||
stopDescription.AddDepartsAsNote(groups[1].Value, groups[2].Value, departureDate.ToDateTimeOffset()); |
||||
} |
||||
else if ((detachingWagons = DetachingWagonsNoteRegex.Match(noteText)).Success) { |
||||
stopDescription.AddDetachingWagonsNote(detachingWagons.Groups[1].Value); |
||||
} |
||||
else if ((receivingWagons = ReceivingWagonsNoteRegex.Match(noteText)).Success) { |
||||
stopDescription.AddReceivingWagonsNote(receivingWagons.Groups[1].Value); |
||||
} |
||||
} |
||||
}); |
||||
} |
||||
}); |
||||
} |
||||
return result; |
||||
} |
||||
} |
||||
} // namespace |
||||
using System; |
||||
using System.Collections.Generic; |
||||
using System.Linq; |
||||
using System.Net; |
||||
using System.Net.Http; |
||||
using System.Text.RegularExpressions; |
||||
using System.Threading.Tasks; |
||||
using AngleSharp; |
||||
using AngleSharp.Dom; |
||||
using AngleSharp.Html.Dom; |
||||
using Flurl; |
||||
using InfoferScraper.Models.Train; |
||||
using NodaTime; |
||||
using NodaTime.Extensions; |
||||
using scraper.Exceptions; |
||||
|
||||
namespace InfoferScraper.Scrapers { |
||||
public class TrainScraper { |
||||
private const string BaseUrl = "https://mersultrenurilor.infofer.ro/ro-RO/"; |
||||
private static readonly Regex TrainInfoRegex = new(@"^([A-Z-]+)\s([0-9]+)\sîn\s([0-9.]+)$"); |
||||
private static readonly Regex OperatorRegex = new(@"^Operat\sde\s(.+)$"); |
||||
|
||||
private static readonly Regex RouteRegex = |
||||
new(@$"^Parcurs\stren\s([{Utils.RoLetters} ]+)[-–]([{Utils.RoLetters}\s]+)$"); |
||||
|
||||
private static readonly Regex SlRegex = |
||||
new( |
||||
@"^(?:Fără|([0-9]+)\smin)\s(întârziere|mai\sdevreme)\sla\s(trecerea\sfără\soprire\sprin|sosirea\sîn|plecarea\sdin)\s(.+)\.$"); |
||||
|
||||
private static readonly Dictionary<char, StatusKind> SlStateMap = new() { |
||||
{ 't', StatusKind.Passing }, |
||||
{ 's', StatusKind.Arrival }, |
||||
{ 'p', StatusKind.Departure }, |
||||
}; |
||||
|
||||
private static readonly Regex KmRegex = new(@"^km\s([0-9]+)$"); |
||||
private static readonly Regex StoppingTimeRegex = new(@"^([0-9]+)\s(min|sec)\soprire$"); |
||||
private static readonly Regex PlatformRegex = new(@"^linia\s(.+)$"); |
||||
|
||||
private static readonly Regex StationArrdepStatusRegex = |
||||
new(@"^(?:(la timp)|(?:((?:\+|-)[0-9]+) min \((?:(?:întârziere)|(?:mai devreme))\)))(\*?)$"); |
||||
|
||||
private static readonly Regex TrainNumberChangeNoteRegex = |
||||
new(@"^Trenul își schimbă numărul în\s([A-Z-]+)\s([0-9]+)$"); |
||||
private static readonly Regex DepartsAsNoteRegex = |
||||
new(@"^Trenul pleacă cu numărul\s([A-Z-]+)\s([0-9]+)\sîn\s([0-9]{2}).([0-9]{2}).([0-9]{4})$"); |
||||
private static readonly Regex ReceivingWagonsNoteRegex = |
||||
new(@"^Trenul primește vagoane de la\s(.+)\.$"); |
||||
private static readonly Regex DetachingWagonsNoteRegex = |
||||
new(@"^Trenul detașează vagoane pentru stația\s(.+)\.$"); |
||||
|
||||
private static readonly DateTimeZone BucharestTz = DateTimeZoneProviders.Tzdb["Europe/Bucharest"]; |
||||
|
||||
private readonly CookieContainer cookieContainer = new(); |
||||
private readonly HttpClient httpClient; |
||||
|
||||
public TrainScraper(HttpClientHandler? httpClientHandler = null) |
||||
{ |
||||
if (httpClientHandler == null) { |
||||
httpClientHandler = new HttpClientHandler { |
||||
CookieContainer = cookieContainer, |
||||
UseCookies = true, |
||||
}; |
||||
} |
||||
else { |
||||
httpClientHandler.CookieContainer = cookieContainer; |
||||
httpClientHandler.UseCookies = true; |
||||
} |
||||
httpClient = new HttpClient(httpClientHandler) { |
||||
BaseAddress = new Uri(BaseUrl), |
||||
DefaultRequestVersion = new Version(2, 0), |
||||
}; |
||||
} |
||||
|
||||
public async Task<ITrainScrapeResult?> Scrape(string trainNumber, DateTimeOffset? dateOverride = null) { |
||||
var dateOverrideInstant = dateOverride?.ToInstant().InZone(BucharestTz); |
||||
dateOverride = dateOverrideInstant?.ToDateTimeOffset(); |
||||
TrainScrapeResult result = new(); |
||||
|
||||
var asConfig = Configuration.Default; |
||||
var asContext = BrowsingContext.New(asConfig); |
||||
|
||||
var firstUrl = "Tren" |
||||
.AppendPathSegment(trainNumber); |
||||
if (dateOverride != null) { |
||||
firstUrl = firstUrl.SetQueryParam("Date", $"{dateOverride:d.MM.yyyy}"); |
||||
} |
||||
var firstResponse = await httpClient.GetStringAsync(firstUrl); |
||||
var firstDocument = await asContext.OpenAsync(req => req.Content(firstResponse)); |
||||
var firstForm = firstDocument.GetElementById("form-search")!; |
||||
|
||||
var firstResult = firstForm |
||||
.QuerySelectorAll<IHtmlInputElement>("input") |
||||
.Where(elem => elem.Name != null) |
||||
.ToDictionary(elem => elem.Name!, elem => elem.Value); |
||||
|
||||
var secondUrl = "".AppendPathSegments("Trains", "TrainsResult"); |
||||
var secondResponse = await httpClient.PostAsync( |
||||
secondUrl, |
||||
#pragma warning disable CS8620 |
||||
new FormUrlEncodedContent(firstResult) |
||||
#pragma warning restore CS8620 |
||||
); |
||||
var secondResponseContent = await secondResponse.Content.ReadAsStringAsync(); |
||||
var secondDocument = await asContext.OpenAsync( |
||||
req => req.Content(secondResponseContent) |
||||
); |
||||
|
||||
var (trainInfoDiv, (_, (_, (resultsDiv, _)))) = secondDocument |
||||
.QuerySelectorAll("body > div"); |
||||
if (trainInfoDiv == null) { |
||||
return null; |
||||
} |
||||
if (resultsDiv == null) { |
||||
throw new TrainNotThisDayException(); |
||||
} |
||||
trainInfoDiv = trainInfoDiv.QuerySelectorAll(":scope > div > div").First(); |
||||
|
||||
(result.Rank, (result.Number, (result.Date, _))) = (TrainInfoRegex.Match( |
||||
trainInfoDiv.QuerySelector(":scope > h2")!.Text().WithCollapsedSpaces() |
||||
).Groups as IEnumerable<Group>).Select(group => group.Value).Skip(1); |
||||
var (scrapedDateD, (scrapedDateM, (scrapedDateY, _))) = result.Date |
||||
.Split('.') |
||||
.Select(int.Parse); |
||||
var date = new DateTime(scrapedDateY, scrapedDateM, scrapedDateD); |
||||
|
||||
result.Operator = (OperatorRegex.Match( |
||||
trainInfoDiv.QuerySelector(":scope > p")!.Text().WithCollapsedSpaces() |
||||
).Groups as IEnumerable<Group>).Skip(1).First().Value; |
||||
|
||||
foreach (var groupDiv in resultsDiv.QuerySelectorAll(":scope > div")) { |
||||
result.AddTrainGroup(group => { |
||||
var statusDiv = groupDiv.QuerySelectorAll(":scope > div").First(); |
||||
var routeText = statusDiv.QuerySelector(":scope > h4")!.Text().WithCollapsedSpaces(); |
||||
group.ConfigureRoute(route => { |
||||
(route.From, (route.To, _)) = (RouteRegex.Match(routeText).Groups as IEnumerable<Group>).Skip(1) |
||||
.Select(group => group.Value); |
||||
}); |
||||
|
||||
try { |
||||
var statusLineMatch = |
||||
SlRegex.Match(statusDiv.QuerySelector(":scope > div")!.Text().WithCollapsedSpaces()); |
||||
var (slmDelay, (slmLate, (slmArrival, (slmStation, _)))) = |
||||
(statusLineMatch.Groups as IEnumerable<Group>).Skip(1).Select(group => group.Value); |
||||
group.MakeStatus(status => { |
||||
status.Delay = string.IsNullOrEmpty(slmDelay) ? 0 : |
||||
slmLate == "întârziere" ? int.Parse(slmDelay) : -int.Parse(slmDelay); |
||||
status.Station = slmStation; |
||||
status.State = SlStateMap[slmArrival[0]]; |
||||
}); |
||||
} |
||||
catch { |
||||
// ignored |
||||
} |
||||
|
||||
Utils.DateTimeSequencer dtSeq = new(date.Year, date.Month, date.Day); |
||||
var stations = statusDiv.QuerySelectorAll(":scope > ul > li"); |
||||
foreach (var station in stations) { |
||||
group.AddStopDescription(stopDescription => { |
||||
var (left, (middle, (right, _))) = station |
||||
.QuerySelectorAll(":scope > div > div"); |
||||
var (stopDetails, (stopNotes, _)) = middle |
||||
.QuerySelectorAll(":scope > div > div > div"); |
||||
stopDescription.Name = stopDetails |
||||
.QuerySelectorAll(":scope > div")[0] |
||||
.Text() |
||||
.WithCollapsedSpaces(); |
||||
stopDescription.LinkName = new Flurl.Url(stopDetails |
||||
.QuerySelectorAll(":scope > div")[0] |
||||
.QuerySelector(":scope a") |
||||
.Attributes["href"] |
||||
.Value).PathSegments.Last(); |
||||
var scrapedKm = stopDetails |
||||
.QuerySelectorAll(":scope > div")[1] |
||||
.Text() |
||||
.WithCollapsedSpaces(); |
||||
stopDescription.Km = int.Parse( |
||||
(KmRegex.Match(scrapedKm).Groups as IEnumerable<Group>).Skip(1).First().Value |
||||
); |
||||
var scrapedStoppingTime = stopDetails |
||||
.QuerySelectorAll(":scope > div")[2] |
||||
.Text() |
||||
.WithCollapsedSpaces(); |
||||
if (!string.IsNullOrEmpty(scrapedStoppingTime)) { |
||||
var (stValue, (stMinsec, _)) = |
||||
(StoppingTimeRegex.Match(scrapedStoppingTime).Groups as IEnumerable<Group>) |
||||
.Skip(1) |
||||
.Select(group => group.Value); |
||||
stopDescription.StoppingTime = int.Parse(stValue); |
||||
if (stMinsec == "min") stopDescription.StoppingTime *= 60; |
||||
} |
||||
|
||||
var scrapedPlatform = stopDetails |
||||
.QuerySelectorAll(":scope > div")[3] |
||||
.Text() |
||||
.WithCollapsedSpaces(); |
||||
if (!string.IsNullOrEmpty(scrapedPlatform)) |
||||
stopDescription.Platform = PlatformRegex.Match(scrapedPlatform).Groups[1].Value; |
||||
|
||||
void ScrapeTime(IElement element, ref TrainStopArrDep arrDep) { |
||||
var parts = element.QuerySelectorAll(":scope > div > div > div"); |
||||
if (parts.Length == 0) throw new OperationCanceledException(); |
||||
var time = parts[0]; |
||||
var scrapedTime = time.Text().WithCollapsedSpaces(); |
||||
var (stHour, (stMin, _)) = scrapedTime.Split(':').Select(int.Parse); |
||||
arrDep.ScheduleTime = BucharestTz.AtLeniently(dtSeq.Next(stHour, stMin).ToLocalDateTime()) |
||||
.ToDateTimeOffset(); |
||||
|
||||
if (parts.Length < 2) return; |
||||
|
||||
var statusElement = parts[1]; |
||||
var (onTime, (delay, (approx, _))) = (StationArrdepStatusRegex.Match( |
||||
statusElement.Text().WithCollapsedSpaces(replaceWith: " ") |
||||
).Groups as IEnumerable<Group>).Skip(1).Select(group => group.Value); |
||||
arrDep.MakeStatus(status => { |
||||
if (string.IsNullOrEmpty(onTime) && delay == null) { |
||||
status.Cancelled = true; |
||||
} |
||||
else { |
||||
status.Delay = string.IsNullOrEmpty(onTime) ? int.Parse(delay) : 0; |
||||
} |
||||
status.Real = string.IsNullOrEmpty(approx); |
||||
}); |
||||
} |
||||
|
||||
try { |
||||
stopDescription.MakeArrival(arrival => { ScrapeTime(left, ref arrival); }); |
||||
} |
||||
catch (OperationCanceledException) { } |
||||
|
||||
try { |
||||
stopDescription.MakeDeparture(departure => { ScrapeTime(right, ref departure); }); |
||||
} |
||||
catch (OperationCanceledException) { } |
||||
|
||||
foreach (var noteDiv in stopNotes.QuerySelectorAll(":scope > div > div")) { |
||||
var noteText = noteDiv.Text().WithCollapsedSpaces(); |
||||
Match trainNumberChangeMatch, departsAsMatch, detachingWagons, receivingWagons; |
||||
if ((trainNumberChangeMatch = TrainNumberChangeNoteRegex.Match(noteText)).Success) { |
||||
stopDescription.AddTrainNumberChangeNote(trainNumberChangeMatch.Groups[1].Value, trainNumberChangeMatch.Groups[2].Value); |
||||
} |
||||
else if ((departsAsMatch = DepartsAsNoteRegex.Match(noteText)).Success) { |
||||
var groups = departsAsMatch.Groups; |
||||
var departureDate = BucharestTz.AtStrictly(new(int.Parse(groups[5].Value), int.Parse(groups[4].Value), int.Parse(groups[3].Value), 0, 0)); |
||||
stopDescription.AddDepartsAsNote(groups[1].Value, groups[2].Value, departureDate.ToDateTimeOffset()); |
||||
} |
||||
else if ((detachingWagons = DetachingWagonsNoteRegex.Match(noteText)).Success) { |
||||
stopDescription.AddDetachingWagonsNote(detachingWagons.Groups[1].Value); |
||||
} |
||||
else if ((receivingWagons = ReceivingWagonsNoteRegex.Match(noteText)).Success) { |
||||
stopDescription.AddReceivingWagonsNote(receivingWagons.Groups[1].Value); |
||||
} |
||||
} |
||||
}); |
||||
} |
||||
}); |
||||
} |
||||
return result; |
||||
} |
||||
} |
||||
} // namespace |
||||
|
@ -0,0 +1,40 @@
|
||||
using System; |
||||
using System.Collections.Generic; |
||||
using System.Threading.Tasks; |
||||
using Microsoft.AspNetCore.Http; |
||||
using Microsoft.AspNetCore.Mvc; |
||||
using scraper.Models.Itinerary; |
||||
using Server.Services.Interfaces; |
||||
|
||||
namespace Server.Controllers.V3; |
||||
|
||||
[ApiController] |
||||
[ApiExplorerSettings(GroupName = "v3")] |
||||
[Route("/v3/[controller]")]
|
||||
public class ItinerariesController : Controller { |
||||
private IDataManager DataManager { get; } |
||||
private IDatabase Database { get; } |
||||
|
||||
public ItinerariesController(IDataManager dataManager, IDatabase database) { |
||||
this.DataManager = dataManager; |
||||
this.Database = database; |
||||
} |
||||
|
||||
|
||||
[HttpGet("")] |
||||
[ProducesResponseType(typeof(IEnumerable<IItinerary>), StatusCodes.Status200OK)] |
||||
[ProducesResponseType(StatusCodes.Status404NotFound)] |
||||
public async Task<ActionResult<IEnumerable<IItinerary>>> FindItineraries( |
||||
[FromQuery] string from, |
||||
[FromQuery] string to, |
||||
[FromQuery] DateTimeOffset? date |
||||
) { |
||||
var itineraries = await DataManager.FetchItineraries(from, to, date); |
||||
|
||||
if (itineraries == null) { |
||||
return NotFound(); |
||||
} |
||||
|
||||
return Ok(itineraries); |
||||
} |
||||
} |
@ -0,0 +1,5 @@
|
||||
namespace Server.Models.Database; |
||||
|
||||
public record MongoSettings(string ConnectionString, string DatabaseName) { |
||||
public MongoSettings() : this("", "") { } |
||||
} |
@ -0,0 +1,17 @@
|
||||
using MongoDB.Bson; |
||||
using MongoDB.Bson.Serialization.Attributes; |
||||
using Newtonsoft.Json; |
||||
|
||||
namespace Server.Models.Database; |
||||
|
||||
public record StationAlias( |
||||
[property: BsonId] |
||||
[property: BsonRepresentation(BsonType.ObjectId)] |
||||
[property: JsonProperty(NullValueHandling = NullValueHandling.Ignore)] |
||||
string? Id, |
||||
string Name, |
||||
[property: BsonRepresentation(BsonType.ObjectId)] |
||||
string? ListingId |
||||
) { |
||||
public StationAlias() : this(null, "", null) { } |
||||
} |
@ -0,0 +1,18 @@
|
||||
using System.Collections.Generic; |
||||
using MongoDB.Bson; |
||||
using MongoDB.Bson.Serialization.Attributes; |
||||
using Newtonsoft.Json; |
||||
|
||||
namespace Server.Models.Database; |
||||
|
||||
public record StationListing( |
||||
[property: BsonId] |
||||
[property: BsonRepresentation(BsonType.ObjectId)] |
||||
[property: JsonProperty(NullValueHandling = NullValueHandling.Ignore)] |
||||
string? Id, |
||||
string Name, |
||||
List<string> StoppedAtBy |
||||
) { |
||||
public StationListing() : this(null, "", new()) { } |
||||
public StationListing(string name, List<string> stoppedAtBy) : this(null, name, stoppedAtBy) { } |
||||
} |
@ -0,0 +1,20 @@
|
||||
using MongoDB.Bson; |
||||
using MongoDB.Bson.Serialization.Attributes; |
||||
using Newtonsoft.Json; |
||||
|
||||
namespace Server.Models.Database; |
||||
|
||||
public record TrainListing( |
||||
[property: BsonId] |
||||
[property: BsonRepresentation(BsonType.ObjectId)] |
||||
[property: JsonProperty(NullValueHandling = NullValueHandling.Ignore)] |
||||
string? Id, |
||||
string Rank, |
||||
string Number, |
||||
string Company, |
||||
[property: BsonRepresentation(BsonType.ObjectId)] |
||||
string? LatestDescription |
||||
) { |
||||
public TrainListing() : this(null, "", "", "", null) { } |
||||
public TrainListing(string rank, string number, string company) : this(null, rank, number, company, null) { } |
||||
} |
@ -0,0 +1,9 @@
|
||||
namespace Server.Models; |
||||
|
||||
public record ProxySettings(string Url, ProxyCredentials? Credentials = null) { |
||||
public ProxySettings() : this("") { } |
||||
} |
||||
|
||||
public record ProxyCredentials(string Username, string Password) { |
||||
public ProxyCredentials() : this("", "") { } |
||||
} |
@ -1,11 +1,14 @@
|
||||
using System; |
||||
using System.Collections.Generic; |
||||
using System.Threading.Tasks; |
||||
using InfoferScraper.Models.Train; |
||||
using InfoferScraper.Models.Station; |
||||
using scraper.Models.Itinerary; |
||||
|
||||
namespace Server.Services.Interfaces; |
||||
|
||||
public interface IDataManager { |
||||
public Task<IStationScrapeResult?> FetchStation(string stationName, DateTimeOffset date); |
||||
public Task<ITrainScrapeResult?> FetchTrain(string trainNumber, DateTimeOffset date); |
||||
public Task<IReadOnlyList<IItinerary>?> FetchItineraries(string from, string to, DateTimeOffset? date = null); |
||||
} |
||||
|
@ -0,0 +1,38 @@
|
||||
using System; |
||||
using System.Threading; |
||||
using System.Threading.Tasks; |
||||
|
||||
namespace Server.Utils; |
||||
|
||||
// Inspired from: https://stackoverflow.com/a/57517920 |
||||
public class AsyncThrottle { |
||||
private readonly SemaphoreSlim openConnectionSemaphore; |
||||
|
||||
public AsyncThrottle(int limit) { |
||||
openConnectionSemaphore = new(limit, limit); |
||||
} |
||||
|
||||
public async Task<T> MakeRequest<T>(Task<T> task) => await MakeRequest(() => task); |
||||
public async Task<T> MakeRequest<T>(Func<Task<T>> taskCreator) { |
||||
await openConnectionSemaphore.WaitAsync(); |
||||
try { |
||||
var result = await taskCreator(); |
||||
return result; |
||||
} |
||||
finally { |
||||
openConnectionSemaphore.Release(); |
||||
} |
||||
} |
||||
|
||||
|
||||
public async Task MakeRequest(Task task) => await MakeRequest(() => task); |
||||
public async Task MakeRequest(Func<Task> taskCreator) { |
||||
await openConnectionSemaphore.WaitAsync(); |
||||
try { |
||||
await taskCreator(); |
||||
} |
||||
finally { |
||||
openConnectionSemaphore.Release(); |
||||
} |
||||
} |
||||
} |
@ -0,0 +1,7 @@
|
||||
using NodaTime; |
||||
|
||||
namespace Server.Utils; |
||||
|
||||
public static class Constants { |
||||
public static readonly DateTimeZone BucharestTz = DateTimeZoneProviders.Tzdb["Europe/Bucharest"]; |
||||
} |
@ -0,0 +1,33 @@
|
||||
using System.Collections.Generic; |
||||
using System.Threading.Tasks; |
||||
using MongoDB.Driver; |
||||
|
||||
namespace Server.Utils; |
||||
|
||||
public record IAsyncCusorAsyncEnumerator<T>(IAsyncCursor<T> Cursor) { |
||||
private IEnumerator<T>? enumerator = null; |
||||
|
||||
public T Current => enumerator!.Current; |
||||
|
||||
public async Task<bool> MoveNextAsync() { |
||||
bool result; |
||||
if (enumerator != null) { |
||||
result = enumerator.MoveNext(); |
||||
if (result) return true; |
||||
} |
||||
|
||||
result = await Cursor.MoveNextAsync(); |
||||
if (result) { |
||||
enumerator = Cursor.Current.GetEnumerator(); |
||||
return true; |
||||
} |
||||
|
||||
return false; |
||||
} |
||||
} |
||||
|
||||
public static class IAsyncCursorExtensions { |
||||
public static IAsyncCusorAsyncEnumerator<T> GetAsyncEnumerator<T>(this IAsyncCursor<T> cursor) { |
||||
return new(cursor); |
||||
} |
||||
} |
Loading…
Reference in new issue