Kenneth Bruen
2 years ago
2 changed files with 562 additions and 555 deletions
@ -1,316 +1,318 @@ |
|||||||
using System; |
using System; |
||||||
using System.Collections.Generic; |
using System.Collections.Generic; |
||||||
using System.Text.Json; |
using System.Text.Json; |
||||||
using System.Text.Json.Serialization; |
using System.Text.Json.Serialization; |
||||||
using InfoferScraper.Models.Status; |
using InfoferScraper.Models.Status; |
||||||
using InfoferScraper.Models.Train.JsonConverters; |
using InfoferScraper.Models.Train.JsonConverters; |
||||||
|
|
||||||
namespace InfoferScraper.Models.Train { |
namespace InfoferScraper.Models.Train { |
||||||
#region Interfaces |
#region Interfaces |
||||||
|
|
||||||
public interface ITrainScrapeResult { |
public interface ITrainScrapeResult { |
||||||
public string Rank { get; } |
public string Rank { get; } |
||||||
|
|
||||||
public string Number { get; } |
public string Number { get; } |
||||||
|
|
||||||
/// <summary> |
/// <summary> |
||||||
/// Date in the DD.MM.YYYY format |
/// Date in the DD.MM.YYYY format |
||||||
/// This date is taken as-is from the result. |
/// This date is taken as-is from the result. |
||||||
/// </summary> |
/// </summary> |
||||||
public string Date { get; } |
public string Date { get; } |
||||||
|
|
||||||
public string Operator { get; } |
public string Operator { get; } |
||||||
|
|
||||||
public IReadOnlyList<ITrainGroup> Groups { get; } |
public IReadOnlyList<ITrainGroup> Groups { get; } |
||||||
} |
} |
||||||
|
|
||||||
public interface ITrainGroup { |
public interface ITrainGroup { |
||||||
public ITrainRoute Route { get; } |
public ITrainRoute Route { get; } |
||||||
|
|
||||||
public ITrainStatus? Status { get; } |
public ITrainStatus? Status { get; } |
||||||
public IReadOnlyList<ITrainStopDescription> Stations { get; } |
public IReadOnlyList<ITrainStopDescription> Stations { get; } |
||||||
} |
} |
||||||
|
|
||||||
public interface ITrainRoute { |
public interface ITrainRoute { |
||||||
public string From { get; } |
public string From { get; } |
||||||
public string To { get; } |
public string To { get; } |
||||||
} |
} |
||||||
|
|
||||||
public interface ITrainStatus { |
public interface ITrainStatus { |
||||||
public int Delay { get; } |
public int Delay { get; } |
||||||
public string Station { get; } |
public string Station { get; } |
||||||
public StatusKind State { get; } |
public StatusKind State { get; } |
||||||
} |
} |
||||||
|
|
||||||
public interface ITrainStopDescription { |
public interface ITrainStopDescription { |
||||||
public string Name { get; } |
public string Name { get; } |
||||||
public int Km { get; } |
public string LinkName { get; } |
||||||
|
public int Km { get; } |
||||||
/// <summary> |
|
||||||
/// The time the train waits in the station in seconds |
/// <summary> |
||||||
/// </summary> |
/// The time the train waits in the station in seconds |
||||||
public int? StoppingTime { get; } |
/// </summary> |
||||||
|
public int? StoppingTime { get; } |
||||||
public string? Platform { get; } |
|
||||||
public ITrainStopArrDep? Arrival { get; } |
public string? Platform { get; } |
||||||
public ITrainStopArrDep? Departure { get; } |
public ITrainStopArrDep? Arrival { get; } |
||||||
|
public ITrainStopArrDep? Departure { get; } |
||||||
public IReadOnlyList<object> Notes { get; } |
|
||||||
} |
public IReadOnlyList<object> Notes { get; } |
||||||
|
} |
||||||
public interface ITrainStopNote { |
|
||||||
public NoteKind Kind { get; } |
public interface ITrainStopNote { |
||||||
} |
public NoteKind Kind { get; } |
||||||
|
} |
||||||
public interface ITrainStopTrainNumberChangeNote : ITrainStopNote { |
|
||||||
public string Rank { get; } |
public interface ITrainStopTrainNumberChangeNote : ITrainStopNote { |
||||||
public string Number { get; } |
public string Rank { get; } |
||||||
} |
public string Number { get; } |
||||||
|
} |
||||||
public interface ITrainStopDepartsAsNote : ITrainStopNote { |
|
||||||
public string Rank { get; } |
public interface ITrainStopDepartsAsNote : ITrainStopNote { |
||||||
public string Number { get; } |
public string Rank { get; } |
||||||
public DateTimeOffset DepartureDate { get; } |
public string Number { get; } |
||||||
} |
public DateTimeOffset DepartureDate { get; } |
||||||
|
} |
||||||
public interface ITrainStopDetachingWagonsNote : ITrainStopNote { |
|
||||||
public string Station { get; } |
public interface ITrainStopDetachingWagonsNote : ITrainStopNote { |
||||||
} |
public string Station { get; } |
||||||
|
} |
||||||
public interface ITrainStopReceivingWagonsNote : ITrainStopNote { |
|
||||||
public string Station { get; } |
public interface ITrainStopReceivingWagonsNote : ITrainStopNote { |
||||||
} |
public string Station { get; } |
||||||
|
} |
||||||
public interface ITrainStopArrDep { |
|
||||||
public DateTimeOffset ScheduleTime { get; } |
public interface ITrainStopArrDep { |
||||||
public IStatus? Status { get; } |
public DateTimeOffset ScheduleTime { get; } |
||||||
} |
public IStatus? Status { get; } |
||||||
|
} |
||||||
#endregion |
|
||||||
|
#endregion |
||||||
[JsonConverter(typeof(StatusKindConverter))] |
|
||||||
public enum StatusKind { |
[JsonConverter(typeof(StatusKindConverter))] |
||||||
Passing, |
public enum StatusKind { |
||||||
Arrival, |
Passing, |
||||||
Departure, |
Arrival, |
||||||
} |
Departure, |
||||||
|
} |
||||||
[JsonConverter(typeof(NoteKindConverter))] |
|
||||||
public enum NoteKind { |
[JsonConverter(typeof(NoteKindConverter))] |
||||||
TrainNumberChange, |
public enum NoteKind { |
||||||
DetachingWagons, |
TrainNumberChange, |
||||||
ReceivingWagons, |
DetachingWagons, |
||||||
DepartsAs, |
ReceivingWagons, |
||||||
} |
DepartsAs, |
||||||
|
} |
||||||
#region Implementations |
|
||||||
|
#region Implementations |
||||||
internal record TrainScrapeResult : ITrainScrapeResult { |
|
||||||
private List<ITrainGroup> ModifyableGroups { get; set; } = new(); |
internal record TrainScrapeResult : ITrainScrapeResult { |
||||||
public string Rank { get; set; } = ""; |
private List<ITrainGroup> ModifyableGroups { get; set; } = new(); |
||||||
public string Number { get; set; } = ""; |
public string Rank { get; set; } = ""; |
||||||
public string Date { get; set; } = ""; |
public string Number { get; set; } = ""; |
||||||
public string Operator { get; set; } = ""; |
public string Date { get; set; } = ""; |
||||||
public IReadOnlyList<ITrainGroup> Groups => ModifyableGroups.AsReadOnly(); |
public string Operator { get; set; } = ""; |
||||||
|
public IReadOnlyList<ITrainGroup> Groups => ModifyableGroups.AsReadOnly(); |
||||||
private void AddTrainGroup(ITrainGroup trainGroup) { |
|
||||||
ModifyableGroups.Add(trainGroup); |
private void AddTrainGroup(ITrainGroup trainGroup) { |
||||||
} |
ModifyableGroups.Add(trainGroup); |
||||||
|
} |
||||||
internal void AddTrainGroup(Action<TrainGroup> configurator) { |
|
||||||
TrainGroup newTrainGroup = new(); |
internal void AddTrainGroup(Action<TrainGroup> configurator) { |
||||||
configurator(newTrainGroup); |
TrainGroup newTrainGroup = new(); |
||||||
AddTrainGroup(newTrainGroup); |
configurator(newTrainGroup); |
||||||
} |
AddTrainGroup(newTrainGroup); |
||||||
} |
} |
||||||
|
} |
||||||
internal record TrainGroup : ITrainGroup { |
|
||||||
private List<ITrainStopDescription> ModifyableStations { get; set; } = new(); |
internal record TrainGroup : ITrainGroup { |
||||||
public ITrainRoute Route { get; init; } = new TrainRoute(); |
private List<ITrainStopDescription> ModifyableStations { get; set; } = new(); |
||||||
public ITrainStatus? Status { get; private set; } |
public ITrainRoute Route { get; init; } = new TrainRoute(); |
||||||
public IReadOnlyList<ITrainStopDescription> Stations => ModifyableStations.AsReadOnly(); |
public ITrainStatus? Status { get; private set; } |
||||||
|
public IReadOnlyList<ITrainStopDescription> Stations => ModifyableStations.AsReadOnly(); |
||||||
private void AddStopDescription(ITrainStopDescription stopDescription) { |
|
||||||
ModifyableStations.Add(stopDescription); |
private void AddStopDescription(ITrainStopDescription stopDescription) { |
||||||
} |
ModifyableStations.Add(stopDescription); |
||||||
|
} |
||||||
internal void AddStopDescription(Action<TrainStopDescription> configurator) { |
|
||||||
TrainStopDescription newStopDescription = new(); |
internal void AddStopDescription(Action<TrainStopDescription> configurator) { |
||||||
configurator(newStopDescription); |
TrainStopDescription newStopDescription = new(); |
||||||
AddStopDescription(newStopDescription); |
configurator(newStopDescription); |
||||||
} |
AddStopDescription(newStopDescription); |
||||||
|
} |
||||||
internal void ConfigureRoute(Action<TrainRoute> configurator) { |
|
||||||
configurator((TrainRoute)Route); |
internal void ConfigureRoute(Action<TrainRoute> configurator) { |
||||||
} |
configurator((TrainRoute)Route); |
||||||
|
} |
||||||
internal void MakeStatus(Action<TrainStatus> configurator) { |
|
||||||
TrainStatus newStatus = new(); |
internal void MakeStatus(Action<TrainStatus> configurator) { |
||||||
configurator(newStatus); |
TrainStatus newStatus = new(); |
||||||
Status = newStatus; |
configurator(newStatus); |
||||||
} |
Status = newStatus; |
||||||
} |
} |
||||||
|
} |
||||||
internal record TrainRoute : ITrainRoute { |
|
||||||
public TrainRoute() { |
internal record TrainRoute : ITrainRoute { |
||||||
From = ""; |
public TrainRoute() { |
||||||
To = ""; |
From = ""; |
||||||
} |
To = ""; |
||||||
|
} |
||||||
public string From { get; set; } |
|
||||||
public string To { get; set; } |
public string From { get; set; } |
||||||
} |
public string To { get; set; } |
||||||
|
} |
||||||
internal record TrainStatus : ITrainStatus { |
|
||||||
public int Delay { get; set; } |
internal record TrainStatus : ITrainStatus { |
||||||
public string Station { get; set; } = ""; |
public int Delay { get; set; } |
||||||
public StatusKind State { get; set; } |
public string Station { get; set; } = ""; |
||||||
} |
public StatusKind State { get; set; } |
||||||
|
} |
||||||
internal record TrainStopDescription : ITrainStopDescription { |
|
||||||
private List<ITrainStopNote> ModifyableNotes { get; } = new(); |
internal record TrainStopDescription : ITrainStopDescription { |
||||||
public string Name { get; set; } = ""; |
private List<ITrainStopNote> ModifyableNotes { get; } = new(); |
||||||
public int Km { get; set; } |
public string Name { get; set; } = ""; |
||||||
public int? StoppingTime { get; set; } |
public string LinkName { get; set; } = ""; |
||||||
public string? Platform { get; set; } |
public int Km { get; set; } |
||||||
public ITrainStopArrDep? Arrival { get; private set; } |
public int? StoppingTime { get; set; } |
||||||
public ITrainStopArrDep? Departure { get; private set; } |
public string? Platform { get; set; } |
||||||
public IReadOnlyList<object> Notes => ModifyableNotes.AsReadOnly(); |
public ITrainStopArrDep? Arrival { get; private set; } |
||||||
|
public ITrainStopArrDep? Departure { get; private set; } |
||||||
internal void MakeArrival(Action<TrainStopArrDep> configurator) { |
public IReadOnlyList<object> Notes => ModifyableNotes.AsReadOnly(); |
||||||
TrainStopArrDep newArrival = new(); |
|
||||||
configurator(newArrival); |
internal void MakeArrival(Action<TrainStopArrDep> configurator) { |
||||||
Arrival = newArrival; |
TrainStopArrDep newArrival = new(); |
||||||
} |
configurator(newArrival); |
||||||
|
Arrival = newArrival; |
||||||
internal void MakeDeparture(Action<TrainStopArrDep> configurator) { |
} |
||||||
TrainStopArrDep newDeparture = new(); |
|
||||||
configurator(newDeparture); |
internal void MakeDeparture(Action<TrainStopArrDep> configurator) { |
||||||
Departure = newDeparture; |
TrainStopArrDep newDeparture = new(); |
||||||
} |
configurator(newDeparture); |
||||||
|
Departure = newDeparture; |
||||||
class DepartsAsNote : ITrainStopDepartsAsNote { |
} |
||||||
public NoteKind Kind => NoteKind.DepartsAs; |
|
||||||
public string Rank { get; set; } = ""; |
class DepartsAsNote : ITrainStopDepartsAsNote { |
||||||
public string Number { get; set; } = ""; |
public NoteKind Kind => NoteKind.DepartsAs; |
||||||
public DateTimeOffset DepartureDate { get; set; } |
public string Rank { get; set; } = ""; |
||||||
} |
public string Number { get; set; } = ""; |
||||||
|
public DateTimeOffset DepartureDate { get; set; } |
||||||
class TrainNumberChangeNote : ITrainStopTrainNumberChangeNote { |
} |
||||||
public NoteKind Kind => NoteKind.TrainNumberChange; |
|
||||||
public string Rank { get; set; } = ""; |
class TrainNumberChangeNote : ITrainStopTrainNumberChangeNote { |
||||||
public string Number { get; set; } = ""; |
public NoteKind Kind => NoteKind.TrainNumberChange; |
||||||
} |
public string Rank { get; set; } = ""; |
||||||
|
public string Number { get; set; } = ""; |
||||||
class ReceivingWagonsNote : ITrainStopReceivingWagonsNote { |
} |
||||||
public NoteKind Kind => NoteKind.ReceivingWagons; |
|
||||||
public string Station { get; set; } = ""; |
class ReceivingWagonsNote : ITrainStopReceivingWagonsNote { |
||||||
} |
public NoteKind Kind => NoteKind.ReceivingWagons; |
||||||
|
public string Station { get; set; } = ""; |
||||||
class DetachingWagonsNote : ITrainStopReceivingWagonsNote { |
} |
||||||
public NoteKind Kind => NoteKind.DetachingWagons; |
|
||||||
public string Station { get; set; } = ""; |
class DetachingWagonsNote : ITrainStopReceivingWagonsNote { |
||||||
} |
public NoteKind Kind => NoteKind.DetachingWagons; |
||||||
|
public string Station { get; set; } = ""; |
||||||
internal void AddDepartsAsNote(string rank, string number, DateTimeOffset departureDate) { |
} |
||||||
ModifyableNotes.Add(new DepartsAsNote { Rank = rank, Number = number, DepartureDate = departureDate }); |
|
||||||
} |
internal void AddDepartsAsNote(string rank, string number, DateTimeOffset departureDate) { |
||||||
|
ModifyableNotes.Add(new DepartsAsNote { Rank = rank, Number = number, DepartureDate = departureDate }); |
||||||
internal void AddTrainNumberChangeNote(string rank, string number) { |
} |
||||||
ModifyableNotes.Add(new TrainNumberChangeNote { Rank = rank, Number = number }); |
|
||||||
} |
internal void AddTrainNumberChangeNote(string rank, string number) { |
||||||
|
ModifyableNotes.Add(new TrainNumberChangeNote { Rank = rank, Number = number }); |
||||||
internal void AddReceivingWagonsNote(string station) { |
} |
||||||
ModifyableNotes.Add(new ReceivingWagonsNote { Station = station }); |
|
||||||
} |
internal void AddReceivingWagonsNote(string station) { |
||||||
|
ModifyableNotes.Add(new ReceivingWagonsNote { Station = station }); |
||||||
internal void AddDetachingWagonsNote(string station) { |
} |
||||||
ModifyableNotes.Add(new DetachingWagonsNote { Station = station }); |
|
||||||
} |
internal void AddDetachingWagonsNote(string station) { |
||||||
} |
ModifyableNotes.Add(new DetachingWagonsNote { Station = station }); |
||||||
|
} |
||||||
public record TrainStopArrDep : ITrainStopArrDep { |
} |
||||||
public DateTimeOffset ScheduleTime { get; set; } |
|
||||||
public IStatus? Status { get; private set; } |
public record TrainStopArrDep : ITrainStopArrDep { |
||||||
|
public DateTimeOffset ScheduleTime { get; set; } |
||||||
internal void MakeStatus(Action<Status.Status> configurator) { |
public IStatus? Status { get; private set; } |
||||||
Status.Status newStatus = new(); |
|
||||||
configurator(newStatus); |
internal void MakeStatus(Action<Status.Status> configurator) { |
||||||
Status = newStatus; |
Status.Status newStatus = new(); |
||||||
} |
configurator(newStatus); |
||||||
} |
Status = newStatus; |
||||||
|
} |
||||||
#endregion |
} |
||||||
|
|
||||||
#region JSON Converters |
#endregion |
||||||
|
|
||||||
namespace JsonConverters { |
#region JSON Converters |
||||||
internal class StatusKindConverter : JsonConverterFactory { |
|
||||||
public override bool CanConvert(Type typeToConvert) { |
namespace JsonConverters { |
||||||
return typeToConvert == typeof(StatusKind); |
internal class StatusKindConverter : JsonConverterFactory { |
||||||
} |
public override bool CanConvert(Type typeToConvert) { |
||||||
|
return typeToConvert == typeof(StatusKind); |
||||||
public override JsonConverter? CreateConverter(Type typeToConvert, JsonSerializerOptions options) { |
} |
||||||
return new Converter(); |
|
||||||
} |
public override JsonConverter? CreateConverter(Type typeToConvert, JsonSerializerOptions options) { |
||||||
|
return new Converter(); |
||||||
private class Converter : JsonConverter<StatusKind> { |
} |
||||||
public override StatusKind Read( |
|
||||||
ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options |
private class Converter : JsonConverter<StatusKind> { |
||||||
) { |
public override StatusKind Read( |
||||||
return reader.GetString() switch { |
ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options |
||||||
"arrival" => StatusKind.Arrival, |
) { |
||||||
"departure" => StatusKind.Departure, |
return reader.GetString() switch { |
||||||
"passing" => StatusKind.Passing, |
"arrival" => StatusKind.Arrival, |
||||||
_ => throw new NotImplementedException() |
"departure" => StatusKind.Departure, |
||||||
}; |
"passing" => StatusKind.Passing, |
||||||
} |
_ => throw new NotImplementedException() |
||||||
|
}; |
||||||
public override void Write(Utf8JsonWriter writer, StatusKind value, JsonSerializerOptions options) { |
} |
||||||
writer.WriteStringValue(value switch { |
|
||||||
StatusKind.Passing => "passing", |
public override void Write(Utf8JsonWriter writer, StatusKind value, JsonSerializerOptions options) { |
||||||
StatusKind.Arrival => "arrival", |
writer.WriteStringValue(value switch { |
||||||
StatusKind.Departure => "departure", |
StatusKind.Passing => "passing", |
||||||
_ => throw new NotImplementedException() |
StatusKind.Arrival => "arrival", |
||||||
}); |
StatusKind.Departure => "departure", |
||||||
} |
_ => throw new NotImplementedException() |
||||||
} |
}); |
||||||
} |
} |
||||||
|
} |
||||||
internal class NoteKindConverter : JsonConverterFactory { |
} |
||||||
public override bool CanConvert(Type typeToConvert) { |
|
||||||
return typeToConvert == typeof(NoteKind); |
internal class NoteKindConverter : JsonConverterFactory { |
||||||
} |
public override bool CanConvert(Type typeToConvert) { |
||||||
|
return typeToConvert == typeof(NoteKind); |
||||||
public override JsonConverter? CreateConverter(Type typeToConvert, JsonSerializerOptions options) { |
} |
||||||
return new Converter(); |
|
||||||
} |
public override JsonConverter? CreateConverter(Type typeToConvert, JsonSerializerOptions options) { |
||||||
|
return new Converter(); |
||||||
private class Converter : JsonConverter<NoteKind> { |
} |
||||||
public override NoteKind Read( |
|
||||||
ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options |
private class Converter : JsonConverter<NoteKind> { |
||||||
) { |
public override NoteKind Read( |
||||||
return reader.GetString() switch { |
ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options |
||||||
"departsAs" => NoteKind.DepartsAs, |
) { |
||||||
"trainNumberChange" => NoteKind.TrainNumberChange, |
return reader.GetString() switch { |
||||||
"receivingWagons" => NoteKind.ReceivingWagons, |
"departsAs" => NoteKind.DepartsAs, |
||||||
"detachingWagons" => NoteKind.DetachingWagons, |
"trainNumberChange" => NoteKind.TrainNumberChange, |
||||||
_ => throw new NotImplementedException() |
"receivingWagons" => NoteKind.ReceivingWagons, |
||||||
}; |
"detachingWagons" => NoteKind.DetachingWagons, |
||||||
} |
_ => throw new NotImplementedException() |
||||||
|
}; |
||||||
public override void Write(Utf8JsonWriter writer, NoteKind value, JsonSerializerOptions options) { |
} |
||||||
writer.WriteStringValue(value switch { |
|
||||||
NoteKind.DepartsAs => "departsAs", |
public override void Write(Utf8JsonWriter writer, NoteKind value, JsonSerializerOptions options) { |
||||||
NoteKind.TrainNumberChange => "trainNumberChange", |
writer.WriteStringValue(value switch { |
||||||
NoteKind.DetachingWagons => "detachingWagons", |
NoteKind.DepartsAs => "departsAs", |
||||||
NoteKind.ReceivingWagons => "receivingWagons", |
NoteKind.TrainNumberChange => "trainNumberChange", |
||||||
_ => throw new NotImplementedException() |
NoteKind.DetachingWagons => "detachingWagons", |
||||||
}); |
NoteKind.ReceivingWagons => "receivingWagons", |
||||||
} |
_ => throw new NotImplementedException() |
||||||
} |
}); |
||||||
} |
} |
||||||
} |
} |
||||||
|
} |
||||||
#endregion |
} |
||||||
} |
|
||||||
|
#endregion |
||||||
|
} |
||||||
|
@ -1,239 +1,244 @@ |
|||||||
using System; |
using System; |
||||||
using System.Collections.Generic; |
using System.Collections.Generic; |
||||||
using System.Linq; |
using System.Linq; |
||||||
using System.Net; |
using System.Net; |
||||||
using System.Net.Http; |
using System.Net.Http; |
||||||
using System.Text.RegularExpressions; |
using System.Text.RegularExpressions; |
||||||
using System.Threading.Tasks; |
using System.Threading.Tasks; |
||||||
using AngleSharp; |
using AngleSharp; |
||||||
using AngleSharp.Dom; |
using AngleSharp.Dom; |
||||||
using AngleSharp.Html.Dom; |
using AngleSharp.Html.Dom; |
||||||
using Flurl; |
using Flurl; |
||||||
using InfoferScraper.Models.Train; |
using InfoferScraper.Models.Train; |
||||||
using NodaTime; |
using NodaTime; |
||||||
using NodaTime.Extensions; |
using NodaTime.Extensions; |
||||||
using scraper.Exceptions; |
using scraper.Exceptions; |
||||||
|
|
||||||
namespace InfoferScraper.Scrapers { |
namespace InfoferScraper.Scrapers { |
||||||
public static class TrainScraper { |
public static class TrainScraper { |
||||||
private const string BaseUrl = "https://mersultrenurilor.infofer.ro/ro-RO/"; |
private const string BaseUrl = "https://mersultrenurilor.infofer.ro/ro-RO/"; |
||||||
private static readonly Regex TrainInfoRegex = new(@"^([A-Z-]+)\s([0-9]+)\sîn\s([0-9.]+)$"); |
private static readonly Regex TrainInfoRegex = new(@"^([A-Z-]+)\s([0-9]+)\sîn\s([0-9.]+)$"); |
||||||
private static readonly Regex OperatorRegex = new(@"^Operat\sde\s(.+)$"); |
private static readonly Regex OperatorRegex = new(@"^Operat\sde\s(.+)$"); |
||||||
|
|
||||||
private static readonly Regex RouteRegex = |
private static readonly Regex RouteRegex = |
||||||
new(@$"^Parcurs\stren\s([{Utils.RoLetters} ]+)[-–]([{Utils.RoLetters}\s]+)$"); |
new(@$"^Parcurs\stren\s([{Utils.RoLetters} ]+)[-–]([{Utils.RoLetters}\s]+)$"); |
||||||
|
|
||||||
private static readonly Regex SlRegex = |
private static readonly Regex SlRegex = |
||||||
new( |
new( |
||||||
@"^(?:Fără|([0-9]+)\smin)\s(întârziere|mai\sdevreme)\sla\s(trecerea\sfără\soprire\sprin|sosirea\sîn|plecarea\sdin)\s(.+)\.$"); |
@"^(?:Fără|([0-9]+)\smin)\s(întârziere|mai\sdevreme)\sla\s(trecerea\sfără\soprire\sprin|sosirea\sîn|plecarea\sdin)\s(.+)\.$"); |
||||||
|
|
||||||
private static readonly Dictionary<char, StatusKind> SlStateMap = new() { |
private static readonly Dictionary<char, StatusKind> SlStateMap = new() { |
||||||
{ 't', StatusKind.Passing }, |
{ 't', StatusKind.Passing }, |
||||||
{ 's', StatusKind.Arrival }, |
{ 's', StatusKind.Arrival }, |
||||||
{ 'p', StatusKind.Departure }, |
{ 'p', StatusKind.Departure }, |
||||||
}; |
}; |
||||||
|
|
||||||
private static readonly Regex KmRegex = new(@"^km\s([0-9]+)$"); |
private static readonly Regex KmRegex = new(@"^km\s([0-9]+)$"); |
||||||
private static readonly Regex StoppingTimeRegex = new(@"^([0-9]+)\s(min|sec)\soprire$"); |
private static readonly Regex StoppingTimeRegex = new(@"^([0-9]+)\s(min|sec)\soprire$"); |
||||||
private static readonly Regex PlatformRegex = new(@"^linia\s(.+)$"); |
private static readonly Regex PlatformRegex = new(@"^linia\s(.+)$"); |
||||||
|
|
||||||
private static readonly Regex StationArrdepStatusRegex = |
private static readonly Regex StationArrdepStatusRegex = |
||||||
new(@"^(?:(la timp)|(?:((?:\+|-)[0-9]+) min \((?:(?:întârziere)|(?:mai devreme))\)))(\*?)$"); |
new(@"^(?:(la timp)|(?:((?:\+|-)[0-9]+) min \((?:(?:întârziere)|(?:mai devreme))\)))(\*?)$"); |
||||||
|
|
||||||
private static readonly Regex TrainNumberChangeNoteRegex = |
private static readonly Regex TrainNumberChangeNoteRegex = |
||||||
new(@"^Trenul își schimbă numărul în\s([A-Z-]+)\s([0-9]+)$"); |
new(@"^Trenul își schimbă numărul în\s([A-Z-]+)\s([0-9]+)$"); |
||||||
private static readonly Regex DepartsAsNoteRegex = |
private static readonly Regex DepartsAsNoteRegex = |
||||||
new(@"^Trenul pleacă cu numărul\s([A-Z-]+)\s([0-9]+)\sîn\s([0-9]{2}).([0-9]{2}).([0-9]{4})$"); |
new(@"^Trenul pleacă cu numărul\s([A-Z-]+)\s([0-9]+)\sîn\s([0-9]{2}).([0-9]{2}).([0-9]{4})$"); |
||||||
private static readonly Regex ReceivingWagonsNoteRegex = |
private static readonly Regex ReceivingWagonsNoteRegex = |
||||||
new(@"^Trenul primește vagoane de la\s(.+)\.$"); |
new(@"^Trenul primește vagoane de la\s(.+)\.$"); |
||||||
private static readonly Regex DetachingWagonsNoteRegex = |
private static readonly Regex DetachingWagonsNoteRegex = |
||||||
new(@"^Trenul detașează vagoane pentru stația\s(.+)\.$"); |
new(@"^Trenul detașează vagoane pentru stația\s(.+)\.$"); |
||||||
|
|
||||||
private static readonly DateTimeZone BucharestTz = DateTimeZoneProviders.Tzdb["Europe/Bucharest"]; |
private static readonly DateTimeZone BucharestTz = DateTimeZoneProviders.Tzdb["Europe/Bucharest"]; |
||||||
|
|
||||||
private static readonly CookieContainer CookieContainer = new(); |
private static readonly CookieContainer CookieContainer = new(); |
||||||
private static readonly HttpClient HttpClient = new(new HttpClientHandler { |
private static readonly HttpClient HttpClient = new(new HttpClientHandler { |
||||||
CookieContainer = CookieContainer, |
CookieContainer = CookieContainer, |
||||||
UseCookies = true, |
UseCookies = true, |
||||||
}) { |
}) { |
||||||
BaseAddress = new Uri(BaseUrl), |
BaseAddress = new Uri(BaseUrl), |
||||||
DefaultRequestVersion = new Version(2, 0), |
DefaultRequestVersion = new Version(2, 0), |
||||||
}; |
}; |
||||||
|
|
||||||
public static async Task<ITrainScrapeResult?> Scrape(string trainNumber, DateTimeOffset? dateOverride = null) { |
public static async Task<ITrainScrapeResult?> Scrape(string trainNumber, DateTimeOffset? dateOverride = null) { |
||||||
var dateOverrideInstant = dateOverride?.ToInstant().InZone(BucharestTz); |
var dateOverrideInstant = dateOverride?.ToInstant().InZone(BucharestTz); |
||||||
dateOverride = dateOverrideInstant?.ToDateTimeOffset(); |
dateOverride = dateOverrideInstant?.ToDateTimeOffset(); |
||||||
TrainScrapeResult result = new(); |
TrainScrapeResult result = new(); |
||||||
|
|
||||||
var asConfig = Configuration.Default; |
var asConfig = Configuration.Default; |
||||||
var asContext = BrowsingContext.New(asConfig); |
var asContext = BrowsingContext.New(asConfig); |
||||||
|
|
||||||
var firstUrl = "Tren" |
var firstUrl = "Tren" |
||||||
.AppendPathSegment(trainNumber); |
.AppendPathSegment(trainNumber); |
||||||
if (dateOverride != null) { |
if (dateOverride != null) { |
||||||
firstUrl = firstUrl.SetQueryParam("Date", $"{dateOverride:d.MM.yyyy}"); |
firstUrl = firstUrl.SetQueryParam("Date", $"{dateOverride:d.MM.yyyy}"); |
||||||
} |
} |
||||||
var firstResponse = await HttpClient.GetStringAsync(firstUrl); |
var firstResponse = await HttpClient.GetStringAsync(firstUrl); |
||||||
var firstDocument = await asContext.OpenAsync(req => req.Content(firstResponse)); |
var firstDocument = await asContext.OpenAsync(req => req.Content(firstResponse)); |
||||||
var firstForm = firstDocument.GetElementById("form-search")!; |
var firstForm = firstDocument.GetElementById("form-search")!; |
||||||
|
|
||||||
var firstResult = firstForm |
var firstResult = firstForm |
||||||
.QuerySelectorAll<IHtmlInputElement>("input") |
.QuerySelectorAll<IHtmlInputElement>("input") |
||||||
.Where(elem => elem.Name != null) |
.Where(elem => elem.Name != null) |
||||||
.ToDictionary(elem => elem.Name!, elem => elem.Value); |
.ToDictionary(elem => elem.Name!, elem => elem.Value); |
||||||
|
|
||||||
var secondUrl = "".AppendPathSegments("Trains", "TrainsResult"); |
var secondUrl = "".AppendPathSegments("Trains", "TrainsResult"); |
||||||
var secondResponse = await HttpClient.PostAsync( |
var secondResponse = await HttpClient.PostAsync( |
||||||
secondUrl, |
secondUrl, |
||||||
#pragma warning disable CS8620 |
#pragma warning disable CS8620 |
||||||
new FormUrlEncodedContent(firstResult) |
new FormUrlEncodedContent(firstResult) |
||||||
#pragma warning restore CS8620 |
#pragma warning restore CS8620 |
||||||
); |
); |
||||||
var secondResponseContent = await secondResponse.Content.ReadAsStringAsync(); |
var secondResponseContent = await secondResponse.Content.ReadAsStringAsync(); |
||||||
var secondDocument = await asContext.OpenAsync( |
var secondDocument = await asContext.OpenAsync( |
||||||
req => req.Content(secondResponseContent) |
req => req.Content(secondResponseContent) |
||||||
); |
); |
||||||
|
|
||||||
var (trainInfoDiv, (_, (_, (resultsDiv, _)))) = secondDocument |
var (trainInfoDiv, (_, (_, (resultsDiv, _)))) = secondDocument |
||||||
.QuerySelectorAll("body > div"); |
.QuerySelectorAll("body > div"); |
||||||
if (trainInfoDiv == null) { |
if (trainInfoDiv == null) { |
||||||
return null; |
return null; |
||||||
} |
} |
||||||
if (resultsDiv == null) { |
if (resultsDiv == null) { |
||||||
throw new TrainNotThisDayException(); |
throw new TrainNotThisDayException(); |
||||||
} |
} |
||||||
trainInfoDiv = trainInfoDiv.QuerySelectorAll(":scope > div > div").First(); |
trainInfoDiv = trainInfoDiv.QuerySelectorAll(":scope > div > div").First(); |
||||||
|
|
||||||
(result.Rank, (result.Number, (result.Date, _))) = (TrainInfoRegex.Match( |
(result.Rank, (result.Number, (result.Date, _))) = (TrainInfoRegex.Match( |
||||||
trainInfoDiv.QuerySelector(":scope > h2")!.Text().WithCollapsedSpaces() |
trainInfoDiv.QuerySelector(":scope > h2")!.Text().WithCollapsedSpaces() |
||||||
).Groups as IEnumerable<Group>).Select(group => group.Value).Skip(1); |
).Groups as IEnumerable<Group>).Select(group => group.Value).Skip(1); |
||||||
var (scrapedDateD, (scrapedDateM, (scrapedDateY, _))) = result.Date |
var (scrapedDateD, (scrapedDateM, (scrapedDateY, _))) = result.Date |
||||||
.Split('.') |
.Split('.') |
||||||
.Select(int.Parse); |
.Select(int.Parse); |
||||||
var date = new DateTime(scrapedDateY, scrapedDateM, scrapedDateD); |
var date = new DateTime(scrapedDateY, scrapedDateM, scrapedDateD); |
||||||
|
|
||||||
result.Operator = (OperatorRegex.Match( |
result.Operator = (OperatorRegex.Match( |
||||||
trainInfoDiv.QuerySelector(":scope > p")!.Text().WithCollapsedSpaces() |
trainInfoDiv.QuerySelector(":scope > p")!.Text().WithCollapsedSpaces() |
||||||
).Groups as IEnumerable<Group>).Skip(1).First().Value; |
).Groups as IEnumerable<Group>).Skip(1).First().Value; |
||||||
|
|
||||||
foreach (var groupDiv in resultsDiv.QuerySelectorAll(":scope > div")) { |
foreach (var groupDiv in resultsDiv.QuerySelectorAll(":scope > div")) { |
||||||
result.AddTrainGroup(group => { |
result.AddTrainGroup(group => { |
||||||
var statusDiv = groupDiv.QuerySelectorAll(":scope > div").First(); |
var statusDiv = groupDiv.QuerySelectorAll(":scope > div").First(); |
||||||
var routeText = statusDiv.QuerySelector(":scope > h4")!.Text().WithCollapsedSpaces(); |
var routeText = statusDiv.QuerySelector(":scope > h4")!.Text().WithCollapsedSpaces(); |
||||||
group.ConfigureRoute(route => { |
group.ConfigureRoute(route => { |
||||||
(route.From, (route.To, _)) = (RouteRegex.Match(routeText).Groups as IEnumerable<Group>).Skip(1) |
(route.From, (route.To, _)) = (RouteRegex.Match(routeText).Groups as IEnumerable<Group>).Skip(1) |
||||||
.Select(group => group.Value); |
.Select(group => group.Value); |
||||||
}); |
}); |
||||||
|
|
||||||
try { |
try { |
||||||
var statusLineMatch = |
var statusLineMatch = |
||||||
SlRegex.Match(statusDiv.QuerySelector(":scope > div")!.Text().WithCollapsedSpaces()); |
SlRegex.Match(statusDiv.QuerySelector(":scope > div")!.Text().WithCollapsedSpaces()); |
||||||
var (slmDelay, (slmLate, (slmArrival, (slmStation, _)))) = |
var (slmDelay, (slmLate, (slmArrival, (slmStation, _)))) = |
||||||
(statusLineMatch.Groups as IEnumerable<Group>).Skip(1).Select(group => group.Value); |
(statusLineMatch.Groups as IEnumerable<Group>).Skip(1).Select(group => group.Value); |
||||||
group.MakeStatus(status => { |
group.MakeStatus(status => { |
||||||
status.Delay = string.IsNullOrEmpty(slmDelay) ? 0 : |
status.Delay = string.IsNullOrEmpty(slmDelay) ? 0 : |
||||||
slmLate == "întârziere" ? int.Parse(slmDelay) : -int.Parse(slmDelay); |
slmLate == "întârziere" ? int.Parse(slmDelay) : -int.Parse(slmDelay); |
||||||
status.Station = slmStation; |
status.Station = slmStation; |
||||||
status.State = SlStateMap[slmArrival[0]]; |
status.State = SlStateMap[slmArrival[0]]; |
||||||
}); |
}); |
||||||
} |
} |
||||||
catch { |
catch { |
||||||
// ignored |
// ignored |
||||||
} |
} |
||||||
|
|
||||||
Utils.DateTimeSequencer dtSeq = new(date.Year, date.Month, date.Day); |
Utils.DateTimeSequencer dtSeq = new(date.Year, date.Month, date.Day); |
||||||
var stations = statusDiv.QuerySelectorAll(":scope > ul > li"); |
var stations = statusDiv.QuerySelectorAll(":scope > ul > li"); |
||||||
foreach (var station in stations) { |
foreach (var station in stations) { |
||||||
group.AddStopDescription(stopDescription => { |
group.AddStopDescription(stopDescription => { |
||||||
var (left, (middle, (right, _))) = station |
var (left, (middle, (right, _))) = station |
||||||
.QuerySelectorAll(":scope > div > div"); |
.QuerySelectorAll(":scope > div > div"); |
||||||
var (stopDetails, (stopNotes, _)) = middle |
var (stopDetails, (stopNotes, _)) = middle |
||||||
.QuerySelectorAll(":scope > div > div > div"); |
.QuerySelectorAll(":scope > div > div > div"); |
||||||
stopDescription.Name = stopDetails |
stopDescription.Name = stopDetails |
||||||
.QuerySelectorAll(":scope > div")[0] |
.QuerySelectorAll(":scope > div")[0] |
||||||
.Text() |
.Text() |
||||||
.WithCollapsedSpaces(); |
.WithCollapsedSpaces(); |
||||||
var scrapedKm = stopDetails |
stopDescription.LinkName = new Flurl.Url(stopDetails |
||||||
.QuerySelectorAll(":scope > div")[1] |
.QuerySelectorAll(":scope > div")[0] |
||||||
.Text() |
.QuerySelector(":scope a") |
||||||
.WithCollapsedSpaces(); |
.Attributes["href"] |
||||||
stopDescription.Km = int.Parse( |
.Value).PathSegments.Last(); |
||||||
(KmRegex.Match(scrapedKm).Groups as IEnumerable<Group>).Skip(1).First().Value |
var scrapedKm = stopDetails |
||||||
); |
.QuerySelectorAll(":scope > div")[1] |
||||||
var scrapedStoppingTime = stopDetails |
.Text() |
||||||
.QuerySelectorAll(":scope > div")[2] |
.WithCollapsedSpaces(); |
||||||
.Text() |
stopDescription.Km = int.Parse( |
||||||
.WithCollapsedSpaces(); |
(KmRegex.Match(scrapedKm).Groups as IEnumerable<Group>).Skip(1).First().Value |
||||||
if (!string.IsNullOrEmpty(scrapedStoppingTime)) { |
); |
||||||
var (stValue, (stMinsec, _)) = |
var scrapedStoppingTime = stopDetails |
||||||
(StoppingTimeRegex.Match(scrapedStoppingTime).Groups as IEnumerable<Group>) |
.QuerySelectorAll(":scope > div")[2] |
||||||
.Skip(1) |
.Text() |
||||||
.Select(group => group.Value); |
.WithCollapsedSpaces(); |
||||||
stopDescription.StoppingTime = int.Parse(stValue); |
if (!string.IsNullOrEmpty(scrapedStoppingTime)) { |
||||||
if (stMinsec == "min") stopDescription.StoppingTime *= 60; |
var (stValue, (stMinsec, _)) = |
||||||
} |
(StoppingTimeRegex.Match(scrapedStoppingTime).Groups as IEnumerable<Group>) |
||||||
|
.Skip(1) |
||||||
var scrapedPlatform = stopDetails |
.Select(group => group.Value); |
||||||
.QuerySelectorAll(":scope > div")[3] |
stopDescription.StoppingTime = int.Parse(stValue); |
||||||
.Text() |
if (stMinsec == "min") stopDescription.StoppingTime *= 60; |
||||||
.WithCollapsedSpaces(); |
} |
||||||
if (!string.IsNullOrEmpty(scrapedPlatform)) |
|
||||||
stopDescription.Platform = PlatformRegex.Match(scrapedPlatform).Groups[1].Value; |
var scrapedPlatform = stopDetails |
||||||
|
.QuerySelectorAll(":scope > div")[3] |
||||||
void ScrapeTime(IElement element, ref TrainStopArrDep arrDep) { |
.Text() |
||||||
var parts = element.QuerySelectorAll(":scope > div > div > div"); |
.WithCollapsedSpaces(); |
||||||
if (parts.Length == 0) throw new OperationCanceledException(); |
if (!string.IsNullOrEmpty(scrapedPlatform)) |
||||||
var time = parts[0]; |
stopDescription.Platform = PlatformRegex.Match(scrapedPlatform).Groups[1].Value; |
||||||
var scrapedTime = time.Text().WithCollapsedSpaces(); |
|
||||||
var (stHour, (stMin, _)) = scrapedTime.Split(':').Select(int.Parse); |
void ScrapeTime(IElement element, ref TrainStopArrDep arrDep) { |
||||||
arrDep.ScheduleTime = BucharestTz.AtLeniently(dtSeq.Next(stHour, stMin).ToLocalDateTime()) |
var parts = element.QuerySelectorAll(":scope > div > div > div"); |
||||||
.ToDateTimeOffset(); |
if (parts.Length == 0) throw new OperationCanceledException(); |
||||||
|
var time = parts[0]; |
||||||
if (parts.Length < 2) return; |
var scrapedTime = time.Text().WithCollapsedSpaces(); |
||||||
|
var (stHour, (stMin, _)) = scrapedTime.Split(':').Select(int.Parse); |
||||||
var statusElement = parts[1]; |
arrDep.ScheduleTime = BucharestTz.AtLeniently(dtSeq.Next(stHour, stMin).ToLocalDateTime()) |
||||||
var (onTime, (delay, (approx, _))) = (StationArrdepStatusRegex.Match( |
.ToDateTimeOffset(); |
||||||
statusElement.Text().WithCollapsedSpaces(replaceWith: " ") |
|
||||||
).Groups as IEnumerable<Group>).Skip(1).Select(group => group.Value); |
if (parts.Length < 2) return; |
||||||
arrDep.MakeStatus(status => { |
|
||||||
status.Delay = string.IsNullOrEmpty(onTime) ? int.Parse(delay) : 0; |
var statusElement = parts[1]; |
||||||
status.Real = string.IsNullOrEmpty(approx); |
var (onTime, (delay, (approx, _))) = (StationArrdepStatusRegex.Match( |
||||||
}); |
statusElement.Text().WithCollapsedSpaces(replaceWith: " ") |
||||||
} |
).Groups as IEnumerable<Group>).Skip(1).Select(group => group.Value); |
||||||
|
arrDep.MakeStatus(status => { |
||||||
try { |
status.Delay = string.IsNullOrEmpty(onTime) ? int.Parse(delay) : 0; |
||||||
stopDescription.MakeArrival(arrival => { ScrapeTime(left, ref arrival); }); |
status.Real = string.IsNullOrEmpty(approx); |
||||||
} |
}); |
||||||
catch (OperationCanceledException) { } |
} |
||||||
|
|
||||||
try { |
try { |
||||||
stopDescription.MakeDeparture(departure => { ScrapeTime(right, ref departure); }); |
stopDescription.MakeArrival(arrival => { ScrapeTime(left, ref arrival); }); |
||||||
} |
} |
||||||
catch (OperationCanceledException) { } |
catch (OperationCanceledException) { } |
||||||
|
|
||||||
foreach (var noteDiv in stopNotes.QuerySelectorAll(":scope > div > div")) { |
try { |
||||||
var noteText = noteDiv.Text().WithCollapsedSpaces(); |
stopDescription.MakeDeparture(departure => { ScrapeTime(right, ref departure); }); |
||||||
Match trainNumberChangeMatch, departsAsMatch, detachingWagons, receivingWagons; |
} |
||||||
if ((trainNumberChangeMatch = TrainNumberChangeNoteRegex.Match(noteText)).Success) { |
catch (OperationCanceledException) { } |
||||||
stopDescription.AddTrainNumberChangeNote(trainNumberChangeMatch.Groups[1].Value, trainNumberChangeMatch.Groups[2].Value); |
|
||||||
} |
foreach (var noteDiv in stopNotes.QuerySelectorAll(":scope > div > div")) { |
||||||
else if ((departsAsMatch = DepartsAsNoteRegex.Match(noteText)).Success) { |
var noteText = noteDiv.Text().WithCollapsedSpaces(); |
||||||
var groups = departsAsMatch.Groups; |
Match trainNumberChangeMatch, departsAsMatch, detachingWagons, receivingWagons; |
||||||
var departureDate = BucharestTz.AtStrictly(new(int.Parse(groups[5].Value), int.Parse(groups[4].Value), int.Parse(groups[3].Value), 0, 0)); |
if ((trainNumberChangeMatch = TrainNumberChangeNoteRegex.Match(noteText)).Success) { |
||||||
stopDescription.AddDepartsAsNote(groups[1].Value, groups[2].Value, departureDate.ToDateTimeOffset()); |
stopDescription.AddTrainNumberChangeNote(trainNumberChangeMatch.Groups[1].Value, trainNumberChangeMatch.Groups[2].Value); |
||||||
} |
} |
||||||
else if ((detachingWagons = DetachingWagonsNoteRegex.Match(noteText)).Success) { |
else if ((departsAsMatch = DepartsAsNoteRegex.Match(noteText)).Success) { |
||||||
stopDescription.AddDetachingWagonsNote(detachingWagons.Groups[1].Value); |
var groups = departsAsMatch.Groups; |
||||||
} |
var departureDate = BucharestTz.AtStrictly(new(int.Parse(groups[5].Value), int.Parse(groups[4].Value), int.Parse(groups[3].Value), 0, 0)); |
||||||
else if ((receivingWagons = ReceivingWagonsNoteRegex.Match(noteText)).Success) { |
stopDescription.AddDepartsAsNote(groups[1].Value, groups[2].Value, departureDate.ToDateTimeOffset()); |
||||||
stopDescription.AddReceivingWagonsNote(receivingWagons.Groups[1].Value); |
} |
||||||
} |
else if ((detachingWagons = DetachingWagonsNoteRegex.Match(noteText)).Success) { |
||||||
} |
stopDescription.AddDetachingWagonsNote(detachingWagons.Groups[1].Value); |
||||||
}); |
} |
||||||
} |
else if ((receivingWagons = ReceivingWagonsNoteRegex.Match(noteText)).Success) { |
||||||
}); |
stopDescription.AddReceivingWagonsNote(receivingWagons.Groups[1].Value); |
||||||
} |
} |
||||||
return result; |
} |
||||||
} |
}); |
||||||
} |
} |
||||||
} // namespace |
}); |
||||||
|
} |
||||||
|
return result; |
||||||
|
} |
||||||
|
} |
||||||
|
} // namespace |
||||||
|
Loading…
Reference in new issue