Compare commits
No commits in common. '422b4727c0b6cd4e0d994db503dc74f0213b8b35' and '169e128ca1cb2a1d1cb126250a37b06fc2e89643' have entirely different histories.
422b4727c0
...
169e128ca1
34 changed files with 761 additions and 1486 deletions
@ -1,62 +0,0 @@ |
|||||||
using System; |
|
||||||
using System.Collections.Generic; |
|
||||||
|
|
||||||
namespace scraper.Models.Itinerary; |
|
||||||
|
|
||||||
#region Interfaces |
|
||||||
|
|
||||||
public interface IItinerary { |
|
||||||
public IReadOnlyList<IItineraryTrain> Trains { get; } |
|
||||||
} |
|
||||||
|
|
||||||
public interface IItineraryTrain { |
|
||||||
public string From { get; } |
|
||||||
public string To { get; } |
|
||||||
public IReadOnlyList<string> IntermediateStops { get; } |
|
||||||
public DateTimeOffset DepartureDate { get; } |
|
||||||
public DateTimeOffset ArrivalDate { get; } |
|
||||||
public int Km { get; } |
|
||||||
public string Operator { get; } |
|
||||||
public string TrainRank { get; } |
|
||||||
public string TrainNumber { get; } |
|
||||||
} |
|
||||||
|
|
||||||
#endregion |
|
||||||
|
|
||||||
#region Implementations |
|
||||||
|
|
||||||
internal record Itinerary : IItinerary { |
|
||||||
private List<IItineraryTrain> ModifyableTrains { get; set; } = new(); |
|
||||||
|
|
||||||
public IReadOnlyList<IItineraryTrain> Trains => ModifyableTrains; |
|
||||||
|
|
||||||
internal void AddTrain(IItineraryTrain train) { |
|
||||||
ModifyableTrains.Add(train); |
|
||||||
} |
|
||||||
|
|
||||||
internal void AddTrain(Action<ItineraryTrain> configurator) { |
|
||||||
ItineraryTrain newTrain = new(); |
|
||||||
configurator(newTrain); |
|
||||||
AddTrain(newTrain); |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
internal record ItineraryTrain : IItineraryTrain { |
|
||||||
private List<string> ModifyableIntermediateStops { get; set; } = new(); |
|
||||||
|
|
||||||
public string From { get; internal set; } = ""; |
|
||||||
public string To { get; internal set; } = ""; |
|
||||||
public IReadOnlyList<string> IntermediateStops => ModifyableIntermediateStops; |
|
||||||
public DateTimeOffset DepartureDate { get; internal set; } = new(); |
|
||||||
public DateTimeOffset ArrivalDate { get; internal set; } = new(); |
|
||||||
public int Km { get; internal set; } = 0; |
|
||||||
public string Operator { get; internal set; } = ""; |
|
||||||
public string TrainRank { get; internal set; } = ""; |
|
||||||
public string TrainNumber { get; internal set; } = ""; |
|
||||||
|
|
||||||
internal void AddIntermediateStop(string stop) { |
|
||||||
ModifyableIntermediateStops.Add(stop); |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
#endregion |
|
@ -1,246 +1,316 @@ |
|||||||
using System; |
using System; |
||||||
using System.Collections.Generic; |
using System.Collections.Generic; |
||||||
using InfoferScraper.Models.Status; |
using System.Text.Json; |
||||||
using Newtonsoft.Json; |
using System.Text.Json.Serialization; |
||||||
using Newtonsoft.Json.Converters; |
using InfoferScraper.Models.Status; |
||||||
using Newtonsoft.Json.Serialization; |
using InfoferScraper.Models.Train.JsonConverters; |
||||||
|
|
||||||
namespace InfoferScraper.Models.Train { |
namespace InfoferScraper.Models.Train { |
||||||
#region Interfaces |
#region Interfaces |
||||||
|
|
||||||
public interface ITrainScrapeResult { |
public interface ITrainScrapeResult { |
||||||
public string Rank { get; } |
public string Rank { get; } |
||||||
|
|
||||||
public string Number { get; } |
public string Number { get; } |
||||||
|
|
||||||
/// <summary> |
/// <summary> |
||||||
/// Date in the DD.MM.YYYY format |
/// Date in the DD.MM.YYYY format |
||||||
/// This date is taken as-is from the result. |
/// This date is taken as-is from the result. |
||||||
/// </summary> |
/// </summary> |
||||||
public string Date { get; } |
public string Date { get; } |
||||||
|
|
||||||
public string Operator { get; } |
public string Operator { get; } |
||||||
|
|
||||||
public IReadOnlyList<ITrainGroup> Groups { get; } |
public IReadOnlyList<ITrainGroup> Groups { get; } |
||||||
} |
} |
||||||
|
|
||||||
public interface ITrainGroup { |
public interface ITrainGroup { |
||||||
public ITrainRoute Route { get; } |
public ITrainRoute Route { get; } |
||||||
|
|
||||||
public ITrainStatus? Status { get; } |
public ITrainStatus? Status { get; } |
||||||
public IReadOnlyList<ITrainStopDescription> Stations { get; } |
public IReadOnlyList<ITrainStopDescription> Stations { get; } |
||||||
} |
} |
||||||
|
|
||||||
public interface ITrainRoute { |
public interface ITrainRoute { |
||||||
public string From { get; } |
public string From { get; } |
||||||
public string To { get; } |
public string To { get; } |
||||||
} |
} |
||||||
|
|
||||||
public interface ITrainStatus { |
public interface ITrainStatus { |
||||||
public int Delay { get; } |
public int Delay { get; } |
||||||
public string Station { get; } |
public string Station { get; } |
||||||
public StatusKind State { get; } |
public StatusKind State { get; } |
||||||
} |
} |
||||||
|
|
||||||
public interface ITrainStopDescription { |
public interface ITrainStopDescription { |
||||||
public string Name { get; } |
public string Name { get; } |
||||||
public string LinkName { get; } |
public int Km { get; } |
||||||
public int Km { get; } |
|
||||||
|
/// <summary> |
||||||
/// <summary> |
/// The time the train waits in the station in seconds |
||||||
/// The time the train waits in the station in seconds |
/// </summary> |
||||||
/// </summary> |
public int? StoppingTime { get; } |
||||||
public int? StoppingTime { get; } |
|
||||||
|
public string? Platform { get; } |
||||||
public string? Platform { get; } |
public ITrainStopArrDep? Arrival { get; } |
||||||
public ITrainStopArrDep? Arrival { get; } |
public ITrainStopArrDep? Departure { get; } |
||||||
public ITrainStopArrDep? Departure { get; } |
|
||||||
|
public IReadOnlyList<object> Notes { get; } |
||||||
public IReadOnlyList<object> Notes { get; } |
} |
||||||
} |
|
||||||
|
public interface ITrainStopNote { |
||||||
public interface ITrainStopNote { |
public NoteKind Kind { get; } |
||||||
public NoteKind Kind { get; } |
} |
||||||
} |
|
||||||
|
public interface ITrainStopTrainNumberChangeNote : ITrainStopNote { |
||||||
public interface ITrainStopTrainNumberChangeNote : ITrainStopNote { |
public string Rank { get; } |
||||||
public string Rank { get; } |
public string Number { get; } |
||||||
public string Number { get; } |
} |
||||||
} |
|
||||||
|
public interface ITrainStopDepartsAsNote : ITrainStopNote { |
||||||
public interface ITrainStopDepartsAsNote : ITrainStopNote { |
public string Rank { get; } |
||||||
public string Rank { get; } |
public string Number { get; } |
||||||
public string Number { get; } |
public DateTimeOffset DepartureDate { get; } |
||||||
public DateTimeOffset DepartureDate { get; } |
} |
||||||
} |
|
||||||
|
public interface ITrainStopDetachingWagonsNote : ITrainStopNote { |
||||||
public interface ITrainStopDetachingWagonsNote : ITrainStopNote { |
public string Station { get; } |
||||||
public string Station { get; } |
} |
||||||
} |
|
||||||
|
public interface ITrainStopReceivingWagonsNote : ITrainStopNote { |
||||||
public interface ITrainStopReceivingWagonsNote : ITrainStopNote { |
public string Station { get; } |
||||||
public string Station { get; } |
} |
||||||
} |
|
||||||
|
public interface ITrainStopArrDep { |
||||||
public interface ITrainStopArrDep { |
public DateTimeOffset ScheduleTime { get; } |
||||||
public DateTimeOffset ScheduleTime { get; } |
public IStatus? Status { get; } |
||||||
public IStatus? Status { get; } |
} |
||||||
} |
|
||||||
|
#endregion |
||||||
#endregion |
|
||||||
|
[JsonConverter(typeof(StatusKindConverter))] |
||||||
[JsonConverter(typeof(StringEnumConverter), typeof(CamelCaseNamingStrategy))] |
public enum StatusKind { |
||||||
public enum StatusKind { |
Passing, |
||||||
Passing, |
Arrival, |
||||||
Arrival, |
Departure, |
||||||
Departure, |
} |
||||||
} |
|
||||||
|
[JsonConverter(typeof(NoteKindConverter))] |
||||||
[JsonConverter(typeof(StringEnumConverter), typeof(CamelCaseNamingStrategy))] |
public enum NoteKind { |
||||||
public enum NoteKind { |
TrainNumberChange, |
||||||
TrainNumberChange, |
DetachingWagons, |
||||||
DetachingWagons, |
ReceivingWagons, |
||||||
ReceivingWagons, |
DepartsAs, |
||||||
DepartsAs, |
} |
||||||
} |
|
||||||
|
#region Implementations |
||||||
#region Implementations |
|
||||||
|
internal record TrainScrapeResult : ITrainScrapeResult { |
||||||
internal record TrainScrapeResult : ITrainScrapeResult { |
private List<ITrainGroup> ModifyableGroups { get; set; } = new(); |
||||||
private List<ITrainGroup> ModifyableGroups { get; set; } = new(); |
public string Rank { get; set; } = ""; |
||||||
public string Rank { get; set; } = ""; |
public string Number { get; set; } = ""; |
||||||
public string Number { get; set; } = ""; |
public string Date { get; set; } = ""; |
||||||
public string Date { get; set; } = ""; |
public string Operator { get; set; } = ""; |
||||||
public string Operator { get; set; } = ""; |
public IReadOnlyList<ITrainGroup> Groups => ModifyableGroups.AsReadOnly(); |
||||||
public IReadOnlyList<ITrainGroup> Groups => ModifyableGroups.AsReadOnly(); |
|
||||||
|
private void AddTrainGroup(ITrainGroup trainGroup) { |
||||||
private void AddTrainGroup(ITrainGroup trainGroup) { |
ModifyableGroups.Add(trainGroup); |
||||||
ModifyableGroups.Add(trainGroup); |
} |
||||||
} |
|
||||||
|
internal void AddTrainGroup(Action<TrainGroup> configurator) { |
||||||
internal void AddTrainGroup(Action<TrainGroup> configurator) { |
TrainGroup newTrainGroup = new(); |
||||||
TrainGroup newTrainGroup = new(); |
configurator(newTrainGroup); |
||||||
configurator(newTrainGroup); |
AddTrainGroup(newTrainGroup); |
||||||
AddTrainGroup(newTrainGroup); |
} |
||||||
} |
} |
||||||
} |
|
||||||
|
internal record TrainGroup : ITrainGroup { |
||||||
internal record TrainGroup : ITrainGroup { |
private List<ITrainStopDescription> ModifyableStations { get; set; } = new(); |
||||||
private List<ITrainStopDescription> ModifyableStations { get; set; } = new(); |
public ITrainRoute Route { get; init; } = new TrainRoute(); |
||||||
public ITrainRoute Route { get; init; } = new TrainRoute(); |
public ITrainStatus? Status { get; private set; } |
||||||
public ITrainStatus? Status { get; private set; } |
public IReadOnlyList<ITrainStopDescription> Stations => ModifyableStations.AsReadOnly(); |
||||||
public IReadOnlyList<ITrainStopDescription> Stations => ModifyableStations.AsReadOnly(); |
|
||||||
|
private void AddStopDescription(ITrainStopDescription stopDescription) { |
||||||
private void AddStopDescription(ITrainStopDescription stopDescription) { |
ModifyableStations.Add(stopDescription); |
||||||
ModifyableStations.Add(stopDescription); |
} |
||||||
} |
|
||||||
|
internal void AddStopDescription(Action<TrainStopDescription> configurator) { |
||||||
internal void AddStopDescription(Action<TrainStopDescription> configurator) { |
TrainStopDescription newStopDescription = new(); |
||||||
TrainStopDescription newStopDescription = new(); |
configurator(newStopDescription); |
||||||
configurator(newStopDescription); |
AddStopDescription(newStopDescription); |
||||||
AddStopDescription(newStopDescription); |
} |
||||||
} |
|
||||||
|
internal void ConfigureRoute(Action<TrainRoute> configurator) { |
||||||
internal void ConfigureRoute(Action<TrainRoute> configurator) { |
configurator((TrainRoute)Route); |
||||||
configurator((TrainRoute)Route); |
} |
||||||
} |
|
||||||
|
internal void MakeStatus(Action<TrainStatus> configurator) { |
||||||
internal void MakeStatus(Action<TrainStatus> configurator) { |
TrainStatus newStatus = new(); |
||||||
TrainStatus newStatus = new(); |
configurator(newStatus); |
||||||
configurator(newStatus); |
Status = newStatus; |
||||||
Status = newStatus; |
} |
||||||
} |
} |
||||||
} |
|
||||||
|
internal record TrainRoute : ITrainRoute { |
||||||
internal record TrainRoute : ITrainRoute { |
public TrainRoute() { |
||||||
public TrainRoute() { |
From = ""; |
||||||
From = ""; |
To = ""; |
||||||
To = ""; |
} |
||||||
} |
|
||||||
|
public string From { get; set; } |
||||||
public string From { get; set; } |
public string To { get; set; } |
||||||
public string To { get; set; } |
} |
||||||
} |
|
||||||
|
internal record TrainStatus : ITrainStatus { |
||||||
internal record TrainStatus : ITrainStatus { |
public int Delay { get; set; } |
||||||
public int Delay { get; set; } |
public string Station { get; set; } = ""; |
||||||
public string Station { get; set; } = ""; |
public StatusKind State { get; set; } |
||||||
public StatusKind State { get; set; } |
} |
||||||
} |
|
||||||
|
internal record TrainStopDescription : ITrainStopDescription { |
||||||
internal record TrainStopDescription : ITrainStopDescription { |
private List<ITrainStopNote> ModifyableNotes { get; } = new(); |
||||||
private List<ITrainStopNote> ModifyableNotes { get; } = new(); |
public string Name { get; set; } = ""; |
||||||
public string Name { get; set; } = ""; |
public int Km { get; set; } |
||||||
public string LinkName { get; set; } = ""; |
public int? StoppingTime { get; set; } |
||||||
public int Km { get; set; } |
public string? Platform { get; set; } |
||||||
public int? StoppingTime { get; set; } |
public ITrainStopArrDep? Arrival { get; private set; } |
||||||
public string? Platform { get; set; } |
public ITrainStopArrDep? Departure { get; private set; } |
||||||
public ITrainStopArrDep? Arrival { get; private set; } |
public IReadOnlyList<object> Notes => ModifyableNotes.AsReadOnly(); |
||||||
public ITrainStopArrDep? Departure { get; private set; } |
|
||||||
public IReadOnlyList<object> Notes => ModifyableNotes.AsReadOnly(); |
internal void MakeArrival(Action<TrainStopArrDep> configurator) { |
||||||
|
TrainStopArrDep newArrival = new(); |
||||||
internal void MakeArrival(Action<TrainStopArrDep> configurator) { |
configurator(newArrival); |
||||||
TrainStopArrDep newArrival = new(); |
Arrival = newArrival; |
||||||
configurator(newArrival); |
} |
||||||
Arrival = newArrival; |
|
||||||
} |
internal void MakeDeparture(Action<TrainStopArrDep> configurator) { |
||||||
|
TrainStopArrDep newDeparture = new(); |
||||||
internal void MakeDeparture(Action<TrainStopArrDep> configurator) { |
configurator(newDeparture); |
||||||
TrainStopArrDep newDeparture = new(); |
Departure = newDeparture; |
||||||
configurator(newDeparture); |
} |
||||||
Departure = newDeparture; |
|
||||||
} |
class DepartsAsNote : ITrainStopDepartsAsNote { |
||||||
|
public NoteKind Kind => NoteKind.DepartsAs; |
||||||
class DepartsAsNote : ITrainStopDepartsAsNote { |
public string Rank { get; set; } = ""; |
||||||
public NoteKind Kind => NoteKind.DepartsAs; |
public string Number { get; set; } = ""; |
||||||
public string Rank { get; set; } = ""; |
public DateTimeOffset DepartureDate { get; set; } |
||||||
public string Number { get; set; } = ""; |
} |
||||||
public DateTimeOffset DepartureDate { get; set; } |
|
||||||
} |
class TrainNumberChangeNote : ITrainStopTrainNumberChangeNote { |
||||||
|
public NoteKind Kind => NoteKind.TrainNumberChange; |
||||||
class TrainNumberChangeNote : ITrainStopTrainNumberChangeNote { |
public string Rank { get; set; } = ""; |
||||||
public NoteKind Kind => NoteKind.TrainNumberChange; |
public string Number { get; set; } = ""; |
||||||
public string Rank { get; set; } = ""; |
} |
||||||
public string Number { get; set; } = ""; |
|
||||||
} |
class ReceivingWagonsNote : ITrainStopReceivingWagonsNote { |
||||||
|
public NoteKind Kind => NoteKind.ReceivingWagons; |
||||||
class ReceivingWagonsNote : ITrainStopReceivingWagonsNote { |
public string Station { get; set; } = ""; |
||||||
public NoteKind Kind => NoteKind.ReceivingWagons; |
} |
||||||
public string Station { get; set; } = ""; |
|
||||||
} |
class DetachingWagonsNote : ITrainStopReceivingWagonsNote { |
||||||
|
public NoteKind Kind => NoteKind.DetachingWagons; |
||||||
class DetachingWagonsNote : ITrainStopReceivingWagonsNote { |
public string Station { get; set; } = ""; |
||||||
public NoteKind Kind => NoteKind.DetachingWagons; |
} |
||||||
public string Station { get; set; } = ""; |
|
||||||
} |
internal void AddDepartsAsNote(string rank, string number, DateTimeOffset departureDate) { |
||||||
|
ModifyableNotes.Add(new DepartsAsNote { Rank = rank, Number = number, DepartureDate = departureDate }); |
||||||
internal void AddDepartsAsNote(string rank, string number, DateTimeOffset departureDate) { |
} |
||||||
ModifyableNotes.Add(new DepartsAsNote { Rank = rank, Number = number, DepartureDate = departureDate }); |
|
||||||
} |
internal void AddTrainNumberChangeNote(string rank, string number) { |
||||||
|
ModifyableNotes.Add(new TrainNumberChangeNote { Rank = rank, Number = number }); |
||||||
internal void AddTrainNumberChangeNote(string rank, string number) { |
} |
||||||
ModifyableNotes.Add(new TrainNumberChangeNote { Rank = rank, Number = number }); |
|
||||||
} |
internal void AddReceivingWagonsNote(string station) { |
||||||
|
ModifyableNotes.Add(new ReceivingWagonsNote { Station = station }); |
||||||
internal void AddReceivingWagonsNote(string station) { |
} |
||||||
ModifyableNotes.Add(new ReceivingWagonsNote { Station = station }); |
|
||||||
} |
internal void AddDetachingWagonsNote(string station) { |
||||||
|
ModifyableNotes.Add(new DetachingWagonsNote { Station = station }); |
||||||
internal void AddDetachingWagonsNote(string station) { |
} |
||||||
ModifyableNotes.Add(new DetachingWagonsNote { Station = station }); |
} |
||||||
} |
|
||||||
} |
public record TrainStopArrDep : ITrainStopArrDep { |
||||||
|
public DateTimeOffset ScheduleTime { get; set; } |
||||||
public record TrainStopArrDep : ITrainStopArrDep { |
public IStatus? Status { get; private set; } |
||||||
public DateTimeOffset ScheduleTime { get; set; } |
|
||||||
public IStatus? Status { get; private set; } |
internal void MakeStatus(Action<Status.Status> configurator) { |
||||||
|
Status.Status newStatus = new(); |
||||||
internal void MakeStatus(Action<Status.Status> configurator) { |
configurator(newStatus); |
||||||
Status.Status newStatus = new(); |
Status = newStatus; |
||||||
configurator(newStatus); |
} |
||||||
Status = newStatus; |
} |
||||||
} |
|
||||||
} |
#endregion |
||||||
|
|
||||||
#endregion |
#region JSON Converters |
||||||
} |
|
||||||
|
namespace JsonConverters { |
||||||
|
internal class StatusKindConverter : JsonConverterFactory { |
||||||
|
public override bool CanConvert(Type typeToConvert) { |
||||||
|
return typeToConvert == typeof(StatusKind); |
||||||
|
} |
||||||
|
|
||||||
|
public override JsonConverter? CreateConverter(Type typeToConvert, JsonSerializerOptions options) { |
||||||
|
return new Converter(); |
||||||
|
} |
||||||
|
|
||||||
|
private class Converter : JsonConverter<StatusKind> { |
||||||
|
public override StatusKind Read( |
||||||
|
ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options |
||||||
|
) { |
||||||
|
return reader.GetString() switch { |
||||||
|
"arrival" => StatusKind.Arrival, |
||||||
|
"departure" => StatusKind.Departure, |
||||||
|
"passing" => StatusKind.Passing, |
||||||
|
_ => throw new NotImplementedException() |
||||||
|
}; |
||||||
|
} |
||||||
|
|
||||||
|
public override void Write(Utf8JsonWriter writer, StatusKind value, JsonSerializerOptions options) { |
||||||
|
writer.WriteStringValue(value switch { |
||||||
|
StatusKind.Passing => "passing", |
||||||
|
StatusKind.Arrival => "arrival", |
||||||
|
StatusKind.Departure => "departure", |
||||||
|
_ => throw new NotImplementedException() |
||||||
|
}); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
internal class NoteKindConverter : JsonConverterFactory { |
||||||
|
public override bool CanConvert(Type typeToConvert) { |
||||||
|
return typeToConvert == typeof(NoteKind); |
||||||
|
} |
||||||
|
|
||||||
|
public override JsonConverter? CreateConverter(Type typeToConvert, JsonSerializerOptions options) { |
||||||
|
return new Converter(); |
||||||
|
} |
||||||
|
|
||||||
|
private class Converter : JsonConverter<NoteKind> { |
||||||
|
public override NoteKind Read( |
||||||
|
ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options |
||||||
|
) { |
||||||
|
return reader.GetString() switch { |
||||||
|
"departsAs" => NoteKind.DepartsAs, |
||||||
|
"trainNumberChange" => NoteKind.TrainNumberChange, |
||||||
|
"receivingWagons" => NoteKind.ReceivingWagons, |
||||||
|
"detachingWagons" => NoteKind.DetachingWagons, |
||||||
|
_ => throw new NotImplementedException() |
||||||
|
}; |
||||||
|
} |
||||||
|
|
||||||
|
public override void Write(Utf8JsonWriter writer, NoteKind value, JsonSerializerOptions options) { |
||||||
|
writer.WriteStringValue(value switch { |
||||||
|
NoteKind.DepartsAs => "departsAs", |
||||||
|
NoteKind.TrainNumberChange => "trainNumberChange", |
||||||
|
NoteKind.DetachingWagons => "detachingWagons", |
||||||
|
NoteKind.ReceivingWagons => "receivingWagons", |
||||||
|
_ => throw new NotImplementedException() |
||||||
|
}); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
#endregion |
||||||
|
} |
||||||
|
@ -1,220 +0,0 @@ |
|||||||
using System; |
|
||||||
using System.Collections.Generic; |
|
||||||
using System.Linq; |
|
||||||
using System.Net; |
|
||||||
using System.Net.Http; |
|
||||||
using System.Text.RegularExpressions; |
|
||||||
using System.Threading.Tasks; |
|
||||||
using AngleSharp; |
|
||||||
using AngleSharp.Dom; |
|
||||||
using AngleSharp.Html.Dom; |
|
||||||
using Flurl; |
|
||||||
using InfoferScraper.Models.Train; |
|
||||||
using NodaTime; |
|
||||||
using NodaTime.Extensions; |
|
||||||
using scraper.Models.Itinerary; |
|
||||||
|
|
||||||
namespace InfoferScraper.Scrapers; |
|
||||||
|
|
||||||
public class RouteScraper { |
|
||||||
private const string BaseUrl = "https://mersultrenurilor.infofer.ro/ro-RO/"; |
|
||||||
private static readonly DateTimeZone BucharestTz = DateTimeZoneProviders.Tzdb["Europe/Bucharest"]; |
|
||||||
|
|
||||||
private static readonly Regex KmTrainRankNoRegex = new(@"^([0-9]+)\skm\scu\s([A-Z-]+)\s([0-9]+)$"); |
|
||||||
private static readonly Regex OperatorRegex = new(@$"^Operat\sde\s([{Utils.RoLetters}\s]+)$"); |
|
||||||
private static readonly Regex DepArrRegex = new(@"^(Ple|Sos)\s([0-9]+)\s([a-z]+)\.?\s([0-9]+):([0-9]+)$"); |
|
||||||
|
|
||||||
private static readonly Dictionary<string, int> Months = new Dictionary<string, int>() { |
|
||||||
["ian"] = 1, |
|
||||||
["feb"] = 2, |
|
||||||
["mar"] = 3, |
|
||||||
["apr"] = 4, |
|
||||||
["mai"] = 5, |
|
||||||
["iun"] = 6, |
|
||||||
["iul"] = 7, |
|
||||||
["aug"] = 8, |
|
||||||
["sep"] = 9, |
|
||||||
["oct"] = 10, |
|
||||||
["noi"] = 11, |
|
||||||
["dec"] = 12, |
|
||||||
}; |
|
||||||
|
|
||||||
private readonly CookieContainer cookieContainer = new(); |
|
||||||
|
|
||||||
private readonly HttpClient httpClient; |
|
||||||
|
|
||||||
public RouteScraper(HttpClientHandler? httpClientHandler = null) { |
|
||||||
if (httpClientHandler == null) { |
|
||||||
httpClientHandler = new HttpClientHandler { |
|
||||||
CookieContainer = cookieContainer, |
|
||||||
UseCookies = true, |
|
||||||
}; |
|
||||||
} |
|
||||||
else { |
|
||||||
httpClientHandler.CookieContainer = cookieContainer; |
|
||||||
httpClientHandler.UseCookies = true; |
|
||||||
} |
|
||||||
httpClient = new HttpClient(httpClientHandler) { |
|
||||||
BaseAddress = new Uri(BaseUrl), |
|
||||||
DefaultRequestVersion = new Version(2, 0), |
|
||||||
}; |
|
||||||
} |
|
||||||
|
|
||||||
public async Task<List<IItinerary>?> Scrape(string from, string to, DateTimeOffset? dateOverride = null) { |
|
||||||
var dateOverrideInstant = dateOverride?.ToInstant().InZone(BucharestTz); |
|
||||||
dateOverride = dateOverrideInstant?.ToDateTimeOffset(); |
|
||||||
TrainScrapeResult result = new(); |
|
||||||
|
|
||||||
var asConfig = Configuration.Default; |
|
||||||
var asContext = BrowsingContext.New(asConfig); |
|
||||||
|
|
||||||
var firstUrl = "Rute-trenuri" |
|
||||||
.AppendPathSegment(from) |
|
||||||
.AppendPathSegment(to); |
|
||||||
if (dateOverride != null) { |
|
||||||
firstUrl = firstUrl.SetQueryParam("DepartureDate", $"{dateOverride:d.MM.yyyy}"); |
|
||||||
} |
|
||||||
firstUrl = firstUrl.SetQueryParam("OrderingTypeId", "0"); |
|
||||||
firstUrl = firstUrl.SetQueryParam("TimeSelectionId", "0"); |
|
||||||
firstUrl = firstUrl.SetQueryParam("MinutesInDay", "0"); |
|
||||||
firstUrl = firstUrl.SetQueryParam("ConnectionsTypeId", "1"); |
|
||||||
firstUrl = firstUrl.SetQueryParam("BetweenTrainsMinimumMinutes", "5"); |
|
||||||
firstUrl = firstUrl.SetQueryParam("ChangeStationName", ""); |
|
||||||
|
|
||||||
var firstResponse = await httpClient.GetStringAsync(firstUrl); |
|
||||||
var firstDocument = await asContext.OpenAsync(req => req.Content(firstResponse)); |
|
||||||
var firstForm = firstDocument.GetElementById("form-search")!; |
|
||||||
|
|
||||||
var firstResult = firstForm |
|
||||||
.QuerySelectorAll<IHtmlInputElement>("input") |
|
||||||
.Where(elem => elem.Name != null) |
|
||||||
.ToDictionary(elem => elem.Name!, elem => elem.Value); |
|
||||||
|
|
||||||
var secondUrl = "".AppendPathSegments("Itineraries", "GetItineraries"); |
|
||||||
var secondResponse = await httpClient.PostAsync( |
|
||||||
secondUrl, |
|
||||||
#pragma warning disable CS8620 |
|
||||||
new FormUrlEncodedContent(firstResult) |
|
||||||
#pragma warning restore CS8620 |
|
||||||
); |
|
||||||
var secondResponseContent = await secondResponse.Content.ReadAsStringAsync(); |
|
||||||
var secondDocument = await asContext.OpenAsync( |
|
||||||
req => req.Content(secondResponseContent) |
|
||||||
); |
|
||||||
|
|
||||||
var (itineraryInfoDiv, _) = secondDocument |
|
||||||
.QuerySelectorAll("body > div"); |
|
||||||
|
|
||||||
if (itineraryInfoDiv == null) { |
|
||||||
return null; |
|
||||||
} |
|
||||||
|
|
||||||
var itinerariesLi = secondDocument |
|
||||||
.QuerySelectorAll("body > ul > li"); |
|
||||||
var itineraries = new List<IItinerary>(); |
|
||||||
foreach (var itineraryLi in itinerariesLi) { |
|
||||||
var itinerary = new Itinerary(); |
|
||||||
|
|
||||||
var cardDivs = itineraryLi.QuerySelectorAll(":scope > div > div > div > div"); |
|
||||||
var detailsDivs = cardDivs.Last() |
|
||||||
.QuerySelectorAll(":scope > div > div")[1] |
|
||||||
.QuerySelectorAll(":scope > div"); |
|
||||||
var trainItineraryAndDetailsLis = detailsDivs[0] |
|
||||||
.QuerySelectorAll(":scope > ul > li"); |
|
||||||
var stations = new List<string>(); |
|
||||||
var details = new List<ItineraryTrain>(); |
|
||||||
foreach (var (idx, li) in trainItineraryAndDetailsLis.Select((li, idx) => (idx, li))) { |
|
||||||
if (idx % 2 == 0) { |
|
||||||
// Station |
|
||||||
stations.Add( |
|
||||||
li |
|
||||||
.QuerySelectorAll(":scope > div > div > div > div")[1] |
|
||||||
.Text() |
|
||||||
.WithCollapsedSpaces() |
|
||||||
); |
|
||||||
} |
|
||||||
else { |
|
||||||
var now = LocalDateTime.FromDateTime(DateTime.Now); |
|
||||||
// Detail |
|
||||||
var detailColumns = li.QuerySelectorAll(":scope > div > div"); |
|
||||||
var leftSideDivs = detailColumns[0].QuerySelectorAll(":scope > div"); |
|
||||||
|
|
||||||
var departureDateText = leftSideDivs[0] |
|
||||||
.QuerySelectorAll(":scope > div")[1] |
|
||||||
.Text() |
|
||||||
.WithCollapsedSpaces(); |
|
||||||
var departureDateMatch = DepArrRegex.Match(departureDateText); |
|
||||||
var departureDate = new LocalDateTime( |
|
||||||
now.Year, |
|
||||||
Months[departureDateMatch.Groups[3].Value], |
|
||||||
int.Parse(departureDateMatch.Groups[2].Value), |
|
||||||
int.Parse(departureDateMatch.Groups[4].Value), |
|
||||||
int.Parse(departureDateMatch.Groups[5].Value), |
|
||||||
0 |
|
||||||
); |
|
||||||
if (departureDate < now.PlusDays(-1)) { |
|
||||||
departureDate = departureDate.PlusYears(1); |
|
||||||
} |
|
||||||
|
|
||||||
var arrivalDateText = leftSideDivs[3] |
|
||||||
.QuerySelectorAll(":scope > div")[1] |
|
||||||
.Text() |
|
||||||
.WithCollapsedSpaces(); |
|
||||||
var arrivalDateMatch = DepArrRegex.Match(arrivalDateText); |
|
||||||
var arrivalDate = new LocalDateTime( |
|
||||||
now.Year, |
|
||||||
Months[arrivalDateMatch.Groups[3].Value], |
|
||||||
int.Parse(arrivalDateMatch.Groups[2].Value), |
|
||||||
int.Parse(arrivalDateMatch.Groups[4].Value), |
|
||||||
int.Parse(arrivalDateMatch.Groups[5].Value), |
|
||||||
0 |
|
||||||
); |
|
||||||
if (arrivalDate < now.PlusDays(-1)) { |
|
||||||
arrivalDate = arrivalDate.PlusYears(1); |
|
||||||
} |
|
||||||
|
|
||||||
var rightSideDivs = detailColumns[1].QuerySelectorAll(":scope > div > div"); |
|
||||||
var kmRankNumberText = rightSideDivs[0] |
|
||||||
.QuerySelectorAll(":scope > div > div")[0] |
|
||||||
.Text() |
|
||||||
.WithCollapsedSpaces(); |
|
||||||
var kmRankNumberMatch = KmTrainRankNoRegex.Match(kmRankNumberText); |
|
||||||
|
|
||||||
var operatorText = rightSideDivs[0] |
|
||||||
.QuerySelectorAll(":scope > div > div")[1] |
|
||||||
.Text() |
|
||||||
.WithCollapsedSpaces(); |
|
||||||
var operatorMatch = OperatorRegex.Match(operatorText); |
|
||||||
|
|
||||||
var train = new ItineraryTrain { |
|
||||||
ArrivalDate = BucharestTz.AtLeniently(arrivalDate).ToDateTimeOffset(), |
|
||||||
DepartureDate = BucharestTz.AtLeniently(departureDate).ToDateTimeOffset(), |
|
||||||
Km = int.Parse(kmRankNumberMatch.Groups[1].Value), |
|
||||||
TrainRank = kmRankNumberMatch.Groups[2].Value, |
|
||||||
TrainNumber = kmRankNumberMatch.Groups[3].Value, |
|
||||||
Operator = operatorMatch.Groups[1].Value, |
|
||||||
}; |
|
||||||
|
|
||||||
foreach (var div in leftSideDivs[2] |
|
||||||
.QuerySelectorAll(":scope > div") |
|
||||||
.Where((_, i) => i % 2 != 0)) { |
|
||||||
var text = div.Text().WithCollapsedSpaces(); |
|
||||||
if (text == "Nu sunt stații intermediare.") continue; |
|
||||||
train.AddIntermediateStop(div.Text().WithCollapsedSpaces()); |
|
||||||
} |
|
||||||
|
|
||||||
details.Add(train); |
|
||||||
} |
|
||||||
} |
|
||||||
foreach (var ((iFrom, iTo), detail) in stations.Zip(stations.Skip(1)).Zip(details)) { |
|
||||||
detail.From = iFrom; |
|
||||||
detail.To = iTo; |
|
||||||
itinerary.AddTrain(detail); |
|
||||||
} |
|
||||||
|
|
||||||
itineraries.Add(itinerary); |
|
||||||
} |
|
||||||
|
|
||||||
return itineraries; |
|
||||||
} |
|
||||||
} |
|
@ -1,261 +1,239 @@ |
|||||||
using System; |
using System; |
||||||
using System.Collections.Generic; |
using System.Collections.Generic; |
||||||
using System.Linq; |
using System.Linq; |
||||||
using System.Net; |
using System.Net; |
||||||
using System.Net.Http; |
using System.Net.Http; |
||||||
using System.Text.RegularExpressions; |
using System.Text.RegularExpressions; |
||||||
using System.Threading.Tasks; |
using System.Threading.Tasks; |
||||||
using AngleSharp; |
using AngleSharp; |
||||||
using AngleSharp.Dom; |
using AngleSharp.Dom; |
||||||
using AngleSharp.Html.Dom; |
using AngleSharp.Html.Dom; |
||||||
using Flurl; |
using Flurl; |
||||||
using InfoferScraper.Models.Train; |
using InfoferScraper.Models.Train; |
||||||
using NodaTime; |
using NodaTime; |
||||||
using NodaTime.Extensions; |
using NodaTime.Extensions; |
||||||
using scraper.Exceptions; |
using scraper.Exceptions; |
||||||
|
|
||||||
namespace InfoferScraper.Scrapers { |
namespace InfoferScraper.Scrapers { |
||||||
public class TrainScraper { |
public static class TrainScraper { |
||||||
private const string BaseUrl = "https://mersultrenurilor.infofer.ro/ro-RO/"; |
private const string BaseUrl = "https://mersultrenurilor.infofer.ro/ro-RO/"; |
||||||
private static readonly Regex TrainInfoRegex = new(@"^([A-Z-]+)\s([0-9]+)\sîn\s([0-9.]+)$"); |
private static readonly Regex TrainInfoRegex = new(@"^([A-Z-]+)\s([0-9]+)\sîn\s([0-9.]+)$"); |
||||||
private static readonly Regex OperatorRegex = new(@"^Operat\sde\s(.+)$"); |
private static readonly Regex OperatorRegex = new(@"^Operat\sde\s(.+)$"); |
||||||
|
|
||||||
private static readonly Regex RouteRegex = |
private static readonly Regex RouteRegex = |
||||||
new(@$"^Parcurs\stren\s([{Utils.RoLetters} ]+)[-–]([{Utils.RoLetters}\s]+)$"); |
new(@$"^Parcurs\stren\s([{Utils.RoLetters} ]+)[-–]([{Utils.RoLetters}\s]+)$"); |
||||||
|
|
||||||
private static readonly Regex SlRegex = |
private static readonly Regex SlRegex = |
||||||
new( |
new( |
||||||
@"^(?:Fără|([0-9]+)\smin)\s(întârziere|mai\sdevreme)\sla\s(trecerea\sfără\soprire\sprin|sosirea\sîn|plecarea\sdin)\s(.+)\.$"); |
@"^(?:Fără|([0-9]+)\smin)\s(întârziere|mai\sdevreme)\sla\s(trecerea\sfără\soprire\sprin|sosirea\sîn|plecarea\sdin)\s(.+)\.$"); |
||||||
|
|
||||||
private static readonly Dictionary<char, StatusKind> SlStateMap = new() { |
private static readonly Dictionary<char, StatusKind> SlStateMap = new() { |
||||||
{ 't', StatusKind.Passing }, |
{ 't', StatusKind.Passing }, |
||||||
{ 's', StatusKind.Arrival }, |
{ 's', StatusKind.Arrival }, |
||||||
{ 'p', StatusKind.Departure }, |
{ 'p', StatusKind.Departure }, |
||||||
}; |
}; |
||||||
|
|
||||||
private static readonly Regex KmRegex = new(@"^km\s([0-9]+)$"); |
private static readonly Regex KmRegex = new(@"^km\s([0-9]+)$"); |
||||||
private static readonly Regex StoppingTimeRegex = new(@"^([0-9]+)\s(min|sec)\soprire$"); |
private static readonly Regex StoppingTimeRegex = new(@"^([0-9]+)\s(min|sec)\soprire$"); |
||||||
private static readonly Regex PlatformRegex = new(@"^linia\s(.+)$"); |
private static readonly Regex PlatformRegex = new(@"^linia\s(.+)$"); |
||||||
|
|
||||||
private static readonly Regex StationArrdepStatusRegex = |
private static readonly Regex StationArrdepStatusRegex = |
||||||
new(@"^(?:(la timp)|(?:((?:\+|-)[0-9]+) min \((?:(?:întârziere)|(?:mai devreme))\)))(\*?)$"); |
new(@"^(?:(la timp)|(?:((?:\+|-)[0-9]+) min \((?:(?:întârziere)|(?:mai devreme))\)))(\*?)$"); |
||||||
|
|
||||||
private static readonly Regex TrainNumberChangeNoteRegex = |
private static readonly Regex TrainNumberChangeNoteRegex = |
||||||
new(@"^Trenul își schimbă numărul în\s([A-Z-]+)\s([0-9]+)$"); |
new(@"^Trenul își schimbă numărul în\s([A-Z-]+)\s([0-9]+)$"); |
||||||
private static readonly Regex DepartsAsNoteRegex = |
private static readonly Regex DepartsAsNoteRegex = |
||||||
new(@"^Trenul pleacă cu numărul\s([A-Z-]+)\s([0-9]+)\sîn\s([0-9]{2}).([0-9]{2}).([0-9]{4})$"); |
new(@"^Trenul pleacă cu numărul\s([A-Z-]+)\s([0-9]+)\sîn\s([0-9]{2}).([0-9]{2}).([0-9]{4})$"); |
||||||
private static readonly Regex ReceivingWagonsNoteRegex = |
private static readonly Regex ReceivingWagonsNoteRegex = |
||||||
new(@"^Trenul primește vagoane de la\s(.+)\.$"); |
new(@"^Trenul primește vagoane de la\s(.+)\.$"); |
||||||
private static readonly Regex DetachingWagonsNoteRegex = |
private static readonly Regex DetachingWagonsNoteRegex = |
||||||
new(@"^Trenul detașează vagoane pentru stația\s(.+)\.$"); |
new(@"^Trenul detașează vagoane pentru stația\s(.+)\.$"); |
||||||
|
|
||||||
private static readonly DateTimeZone BucharestTz = DateTimeZoneProviders.Tzdb["Europe/Bucharest"]; |
private static readonly DateTimeZone BucharestTz = DateTimeZoneProviders.Tzdb["Europe/Bucharest"]; |
||||||
|
|
||||||
private readonly CookieContainer cookieContainer = new(); |
private static readonly CookieContainer CookieContainer = new(); |
||||||
private readonly HttpClient httpClient; |
private static readonly HttpClient HttpClient = new(new HttpClientHandler { |
||||||
|
CookieContainer = CookieContainer, |
||||||
public TrainScraper(HttpClientHandler? httpClientHandler = null) |
UseCookies = true, |
||||||
{ |
}) { |
||||||
if (httpClientHandler == null) { |
BaseAddress = new Uri(BaseUrl), |
||||||
httpClientHandler = new HttpClientHandler { |
DefaultRequestVersion = new Version(2, 0), |
||||||
CookieContainer = cookieContainer, |
}; |
||||||
UseCookies = true, |
|
||||||
}; |
public static async Task<ITrainScrapeResult?> Scrape(string trainNumber, DateTimeOffset? dateOverride = null) { |
||||||
} |
var dateOverrideInstant = dateOverride?.ToInstant().InZone(BucharestTz); |
||||||
else { |
dateOverride = dateOverrideInstant?.ToDateTimeOffset(); |
||||||
httpClientHandler.CookieContainer = cookieContainer; |
TrainScrapeResult result = new(); |
||||||
httpClientHandler.UseCookies = true; |
|
||||||
} |
var asConfig = Configuration.Default; |
||||||
httpClient = new HttpClient(httpClientHandler) { |
var asContext = BrowsingContext.New(asConfig); |
||||||
BaseAddress = new Uri(BaseUrl), |
|
||||||
DefaultRequestVersion = new Version(2, 0), |
var firstUrl = "Tren" |
||||||
}; |
.AppendPathSegment(trainNumber); |
||||||
} |
if (dateOverride != null) { |
||||||
|
firstUrl = firstUrl.SetQueryParam("Date", $"{dateOverride:d.MM.yyyy}"); |
||||||
public async Task<ITrainScrapeResult?> Scrape(string trainNumber, DateTimeOffset? dateOverride = null) { |
} |
||||||
var dateOverrideInstant = dateOverride?.ToInstant().InZone(BucharestTz); |
var firstResponse = await HttpClient.GetStringAsync(firstUrl); |
||||||
dateOverride = dateOverrideInstant?.ToDateTimeOffset(); |
var firstDocument = await asContext.OpenAsync(req => req.Content(firstResponse)); |
||||||
TrainScrapeResult result = new(); |
var firstForm = firstDocument.GetElementById("form-search")!; |
||||||
|
|
||||||
var asConfig = Configuration.Default; |
var firstResult = firstForm |
||||||
var asContext = BrowsingContext.New(asConfig); |
.QuerySelectorAll<IHtmlInputElement>("input") |
||||||
|
.Where(elem => elem.Name != null) |
||||||
var firstUrl = "Tren" |
.ToDictionary(elem => elem.Name!, elem => elem.Value); |
||||||
.AppendPathSegment(trainNumber); |
|
||||||
if (dateOverride != null) { |
var secondUrl = "".AppendPathSegments("Trains", "TrainsResult"); |
||||||
firstUrl = firstUrl.SetQueryParam("Date", $"{dateOverride:d.MM.yyyy}"); |
var secondResponse = await HttpClient.PostAsync( |
||||||
} |
secondUrl, |
||||||
var firstResponse = await httpClient.GetStringAsync(firstUrl); |
#pragma warning disable CS8620 |
||||||
var firstDocument = await asContext.OpenAsync(req => req.Content(firstResponse)); |
new FormUrlEncodedContent(firstResult) |
||||||
var firstForm = firstDocument.GetElementById("form-search")!; |
#pragma warning restore CS8620 |
||||||
|
); |
||||||
var firstResult = firstForm |
var secondResponseContent = await secondResponse.Content.ReadAsStringAsync(); |
||||||
.QuerySelectorAll<IHtmlInputElement>("input") |
var secondDocument = await asContext.OpenAsync( |
||||||
.Where(elem => elem.Name != null) |
req => req.Content(secondResponseContent) |
||||||
.ToDictionary(elem => elem.Name!, elem => elem.Value); |
); |
||||||
|
|
||||||
var secondUrl = "".AppendPathSegments("Trains", "TrainsResult"); |
var (trainInfoDiv, (_, (_, (resultsDiv, _)))) = secondDocument |
||||||
var secondResponse = await httpClient.PostAsync( |
.QuerySelectorAll("body > div"); |
||||||
secondUrl, |
if (trainInfoDiv == null) { |
||||||
#pragma warning disable CS8620 |
return null; |
||||||
new FormUrlEncodedContent(firstResult) |
} |
||||||
#pragma warning restore CS8620 |
if (resultsDiv == null) { |
||||||
); |
throw new TrainNotThisDayException(); |
||||||
var secondResponseContent = await secondResponse.Content.ReadAsStringAsync(); |
} |
||||||
var secondDocument = await asContext.OpenAsync( |
trainInfoDiv = trainInfoDiv.QuerySelectorAll(":scope > div > div").First(); |
||||||
req => req.Content(secondResponseContent) |
|
||||||
); |
(result.Rank, (result.Number, (result.Date, _))) = (TrainInfoRegex.Match( |
||||||
|
trainInfoDiv.QuerySelector(":scope > h2")!.Text().WithCollapsedSpaces() |
||||||
var (trainInfoDiv, (_, (_, (resultsDiv, _)))) = secondDocument |
).Groups as IEnumerable<Group>).Select(group => group.Value).Skip(1); |
||||||
.QuerySelectorAll("body > div"); |
var (scrapedDateD, (scrapedDateM, (scrapedDateY, _))) = result.Date |
||||||
if (trainInfoDiv == null) { |
.Split('.') |
||||||
return null; |
.Select(int.Parse); |
||||||
} |
var date = new DateTime(scrapedDateY, scrapedDateM, scrapedDateD); |
||||||
if (resultsDiv == null) { |
|
||||||
throw new TrainNotThisDayException(); |
result.Operator = (OperatorRegex.Match( |
||||||
} |
trainInfoDiv.QuerySelector(":scope > p")!.Text().WithCollapsedSpaces() |
||||||
trainInfoDiv = trainInfoDiv.QuerySelectorAll(":scope > div > div").First(); |
).Groups as IEnumerable<Group>).Skip(1).First().Value; |
||||||
|
|
||||||
(result.Rank, (result.Number, (result.Date, _))) = (TrainInfoRegex.Match( |
foreach (var groupDiv in resultsDiv.QuerySelectorAll(":scope > div")) { |
||||||
trainInfoDiv.QuerySelector(":scope > h2")!.Text().WithCollapsedSpaces() |
result.AddTrainGroup(group => { |
||||||
).Groups as IEnumerable<Group>).Select(group => group.Value).Skip(1); |
var statusDiv = groupDiv.QuerySelectorAll(":scope > div").First(); |
||||||
var (scrapedDateD, (scrapedDateM, (scrapedDateY, _))) = result.Date |
var routeText = statusDiv.QuerySelector(":scope > h4")!.Text().WithCollapsedSpaces(); |
||||||
.Split('.') |
group.ConfigureRoute(route => { |
||||||
.Select(int.Parse); |
(route.From, (route.To, _)) = (RouteRegex.Match(routeText).Groups as IEnumerable<Group>).Skip(1) |
||||||
var date = new DateTime(scrapedDateY, scrapedDateM, scrapedDateD); |
.Select(group => group.Value); |
||||||
|
}); |
||||||
result.Operator = (OperatorRegex.Match( |
|
||||||
trainInfoDiv.QuerySelector(":scope > p")!.Text().WithCollapsedSpaces() |
try { |
||||||
).Groups as IEnumerable<Group>).Skip(1).First().Value; |
var statusLineMatch = |
||||||
|
SlRegex.Match(statusDiv.QuerySelector(":scope > div")!.Text().WithCollapsedSpaces()); |
||||||
foreach (var groupDiv in resultsDiv.QuerySelectorAll(":scope > div")) { |
var (slmDelay, (slmLate, (slmArrival, (slmStation, _)))) = |
||||||
result.AddTrainGroup(group => { |
(statusLineMatch.Groups as IEnumerable<Group>).Skip(1).Select(group => group.Value); |
||||||
var statusDiv = groupDiv.QuerySelectorAll(":scope > div").First(); |
group.MakeStatus(status => { |
||||||
var routeText = statusDiv.QuerySelector(":scope > h4")!.Text().WithCollapsedSpaces(); |
status.Delay = string.IsNullOrEmpty(slmDelay) ? 0 : |
||||||
group.ConfigureRoute(route => { |
slmLate == "întârziere" ? int.Parse(slmDelay) : -int.Parse(slmDelay); |
||||||
(route.From, (route.To, _)) = (RouteRegex.Match(routeText).Groups as IEnumerable<Group>).Skip(1) |
status.Station = slmStation; |
||||||
.Select(group => group.Value); |
status.State = SlStateMap[slmArrival[0]]; |
||||||
}); |
}); |
||||||
|
} |
||||||
try { |
catch { |
||||||
var statusLineMatch = |
// ignored |
||||||
SlRegex.Match(statusDiv.QuerySelector(":scope > div")!.Text().WithCollapsedSpaces()); |
} |
||||||
var (slmDelay, (slmLate, (slmArrival, (slmStation, _)))) = |
|
||||||
(statusLineMatch.Groups as IEnumerable<Group>).Skip(1).Select(group => group.Value); |
Utils.DateTimeSequencer dtSeq = new(date.Year, date.Month, date.Day); |
||||||
group.MakeStatus(status => { |
var stations = statusDiv.QuerySelectorAll(":scope > ul > li"); |
||||||
status.Delay = string.IsNullOrEmpty(slmDelay) ? 0 : |
foreach (var station in stations) { |
||||||
slmLate == "întârziere" ? int.Parse(slmDelay) : -int.Parse(slmDelay); |
group.AddStopDescription(stopDescription => { |
||||||
status.Station = slmStation; |
var (left, (middle, (right, _))) = station |
||||||
status.State = SlStateMap[slmArrival[0]]; |
.QuerySelectorAll(":scope > div > div"); |
||||||
}); |
var (stopDetails, (stopNotes, _)) = middle |
||||||
} |
.QuerySelectorAll(":scope > div > div > div"); |
||||||
catch { |
stopDescription.Name = stopDetails |
||||||
// ignored |
.QuerySelectorAll(":scope > div")[0] |
||||||
} |
.Text() |
||||||
|
.WithCollapsedSpaces(); |
||||||
Utils.DateTimeSequencer dtSeq = new(date.Year, date.Month, date.Day); |
var scrapedKm = stopDetails |
||||||
var stations = statusDiv.QuerySelectorAll(":scope > ul > li"); |
.QuerySelectorAll(":scope > div")[1] |
||||||
foreach (var station in stations) { |
.Text() |
||||||
group.AddStopDescription(stopDescription => { |
.WithCollapsedSpaces(); |
||||||
var (left, (middle, (right, _))) = station |
stopDescription.Km = int.Parse( |
||||||
.QuerySelectorAll(":scope > div > div"); |
(KmRegex.Match(scrapedKm).Groups as IEnumerable<Group>).Skip(1).First().Value |
||||||
var (stopDetails, (stopNotes, _)) = middle |
); |
||||||
.QuerySelectorAll(":scope > div > div > div"); |
var scrapedStoppingTime = stopDetails |
||||||
stopDescription.Name = stopDetails |
.QuerySelectorAll(":scope > div")[2] |
||||||
.QuerySelectorAll(":scope > div")[0] |
.Text() |
||||||
.Text() |
.WithCollapsedSpaces(); |
||||||
.WithCollapsedSpaces(); |
if (!string.IsNullOrEmpty(scrapedStoppingTime)) { |
||||||
stopDescription.LinkName = new Flurl.Url(stopDetails |
var (stValue, (stMinsec, _)) = |
||||||
.QuerySelectorAll(":scope > div")[0] |
(StoppingTimeRegex.Match(scrapedStoppingTime).Groups as IEnumerable<Group>) |
||||||
.QuerySelector(":scope a") |
.Skip(1) |
||||||
.Attributes["href"] |
.Select(group => group.Value); |
||||||
.Value).PathSegments.Last(); |
stopDescription.StoppingTime = int.Parse(stValue); |
||||||
var scrapedKm = stopDetails |
if (stMinsec == "min") stopDescription.StoppingTime *= 60; |
||||||
.QuerySelectorAll(":scope > div")[1] |
} |
||||||
.Text() |
|
||||||
.WithCollapsedSpaces(); |
var scrapedPlatform = stopDetails |
||||||
stopDescription.Km = int.Parse( |
.QuerySelectorAll(":scope > div")[3] |
||||||
(KmRegex.Match(scrapedKm).Groups as IEnumerable<Group>).Skip(1).First().Value |
.Text() |
||||||
); |
.WithCollapsedSpaces(); |
||||||
var scrapedStoppingTime = stopDetails |
if (!string.IsNullOrEmpty(scrapedPlatform)) |
||||||
.QuerySelectorAll(":scope > div")[2] |
stopDescription.Platform = PlatformRegex.Match(scrapedPlatform).Groups[1].Value; |
||||||
.Text() |
|
||||||
.WithCollapsedSpaces(); |
void ScrapeTime(IElement element, ref TrainStopArrDep arrDep) { |
||||||
if (!string.IsNullOrEmpty(scrapedStoppingTime)) { |
var parts = element.QuerySelectorAll(":scope > div > div > div"); |
||||||
var (stValue, (stMinsec, _)) = |
if (parts.Length == 0) throw new OperationCanceledException(); |
||||||
(StoppingTimeRegex.Match(scrapedStoppingTime).Groups as IEnumerable<Group>) |
var time = parts[0]; |
||||||
.Skip(1) |
var scrapedTime = time.Text().WithCollapsedSpaces(); |
||||||
.Select(group => group.Value); |
var (stHour, (stMin, _)) = scrapedTime.Split(':').Select(int.Parse); |
||||||
stopDescription.StoppingTime = int.Parse(stValue); |
arrDep.ScheduleTime = BucharestTz.AtLeniently(dtSeq.Next(stHour, stMin).ToLocalDateTime()) |
||||||
if (stMinsec == "min") stopDescription.StoppingTime *= 60; |
.ToDateTimeOffset(); |
||||||
} |
|
||||||
|
if (parts.Length < 2) return; |
||||||
var scrapedPlatform = stopDetails |
|
||||||
.QuerySelectorAll(":scope > div")[3] |
var statusElement = parts[1]; |
||||||
.Text() |
var (onTime, (delay, (approx, _))) = (StationArrdepStatusRegex.Match( |
||||||
.WithCollapsedSpaces(); |
statusElement.Text().WithCollapsedSpaces(replaceWith: " ") |
||||||
if (!string.IsNullOrEmpty(scrapedPlatform)) |
).Groups as IEnumerable<Group>).Skip(1).Select(group => group.Value); |
||||||
stopDescription.Platform = PlatformRegex.Match(scrapedPlatform).Groups[1].Value; |
arrDep.MakeStatus(status => { |
||||||
|
status.Delay = string.IsNullOrEmpty(onTime) ? int.Parse(delay) : 0; |
||||||
void ScrapeTime(IElement element, ref TrainStopArrDep arrDep) { |
status.Real = string.IsNullOrEmpty(approx); |
||||||
var parts = element.QuerySelectorAll(":scope > div > div > div"); |
}); |
||||||
if (parts.Length == 0) throw new OperationCanceledException(); |
} |
||||||
var time = parts[0]; |
|
||||||
var scrapedTime = time.Text().WithCollapsedSpaces(); |
try { |
||||||
var (stHour, (stMin, _)) = scrapedTime.Split(':').Select(int.Parse); |
stopDescription.MakeArrival(arrival => { ScrapeTime(left, ref arrival); }); |
||||||
arrDep.ScheduleTime = BucharestTz.AtLeniently(dtSeq.Next(stHour, stMin).ToLocalDateTime()) |
} |
||||||
.ToDateTimeOffset(); |
catch (OperationCanceledException) { } |
||||||
|
|
||||||
if (parts.Length < 2) return; |
try { |
||||||
|
stopDescription.MakeDeparture(departure => { ScrapeTime(right, ref departure); }); |
||||||
var statusElement = parts[1]; |
} |
||||||
var (onTime, (delay, (approx, _))) = (StationArrdepStatusRegex.Match( |
catch (OperationCanceledException) { } |
||||||
statusElement.Text().WithCollapsedSpaces(replaceWith: " ") |
|
||||||
).Groups as IEnumerable<Group>).Skip(1).Select(group => group.Value); |
foreach (var noteDiv in stopNotes.QuerySelectorAll(":scope > div > div")) { |
||||||
arrDep.MakeStatus(status => { |
var noteText = noteDiv.Text().WithCollapsedSpaces(); |
||||||
if (string.IsNullOrEmpty(onTime) && delay == null) { |
Match trainNumberChangeMatch, departsAsMatch, detachingWagons, receivingWagons; |
||||||
status.Cancelled = true; |
if ((trainNumberChangeMatch = TrainNumberChangeNoteRegex.Match(noteText)).Success) { |
||||||
} |
stopDescription.AddTrainNumberChangeNote(trainNumberChangeMatch.Groups[1].Value, trainNumberChangeMatch.Groups[2].Value); |
||||||
else { |
} |
||||||
status.Delay = string.IsNullOrEmpty(onTime) ? int.Parse(delay) : 0; |
else if ((departsAsMatch = DepartsAsNoteRegex.Match(noteText)).Success) { |
||||||
} |
var groups = departsAsMatch.Groups; |
||||||
status.Real = string.IsNullOrEmpty(approx); |
var departureDate = BucharestTz.AtStrictly(new(int.Parse(groups[5].Value), int.Parse(groups[4].Value), int.Parse(groups[3].Value), 0, 0)); |
||||||
}); |
stopDescription.AddDepartsAsNote(groups[1].Value, groups[2].Value, departureDate.ToDateTimeOffset()); |
||||||
} |
} |
||||||
|
else if ((detachingWagons = DetachingWagonsNoteRegex.Match(noteText)).Success) { |
||||||
try { |
stopDescription.AddDetachingWagonsNote(detachingWagons.Groups[1].Value); |
||||||
stopDescription.MakeArrival(arrival => { ScrapeTime(left, ref arrival); }); |
} |
||||||
} |
else if ((receivingWagons = ReceivingWagonsNoteRegex.Match(noteText)).Success) { |
||||||
catch (OperationCanceledException) { } |
stopDescription.AddReceivingWagonsNote(receivingWagons.Groups[1].Value); |
||||||
|
} |
||||||
try { |
} |
||||||
stopDescription.MakeDeparture(departure => { ScrapeTime(right, ref departure); }); |
}); |
||||||
} |
} |
||||||
catch (OperationCanceledException) { } |
}); |
||||||
|
} |
||||||
foreach (var noteDiv in stopNotes.QuerySelectorAll(":scope > div > div")) { |
return result; |
||||||
var noteText = noteDiv.Text().WithCollapsedSpaces(); |
} |
||||||
Match trainNumberChangeMatch, departsAsMatch, detachingWagons, receivingWagons; |
} |
||||||
if ((trainNumberChangeMatch = TrainNumberChangeNoteRegex.Match(noteText)).Success) { |
} // namespace |
||||||
stopDescription.AddTrainNumberChangeNote(trainNumberChangeMatch.Groups[1].Value, trainNumberChangeMatch.Groups[2].Value); |
|
||||||
} |
|
||||||
else if ((departsAsMatch = DepartsAsNoteRegex.Match(noteText)).Success) { |
|
||||||
var groups = departsAsMatch.Groups; |
|
||||||
var departureDate = BucharestTz.AtStrictly(new(int.Parse(groups[5].Value), int.Parse(groups[4].Value), int.Parse(groups[3].Value), 0, 0)); |
|
||||||
stopDescription.AddDepartsAsNote(groups[1].Value, groups[2].Value, departureDate.ToDateTimeOffset()); |
|
||||||
} |
|
||||||
else if ((detachingWagons = DetachingWagonsNoteRegex.Match(noteText)).Success) { |
|
||||||
stopDescription.AddDetachingWagonsNote(detachingWagons.Groups[1].Value); |
|
||||||
} |
|
||||||
else if ((receivingWagons = ReceivingWagonsNoteRegex.Match(noteText)).Success) { |
|
||||||
stopDescription.AddReceivingWagonsNote(receivingWagons.Groups[1].Value); |
|
||||||
} |
|
||||||
} |
|
||||||
}); |
|
||||||
} |
|
||||||
}); |
|
||||||
} |
|
||||||
return result; |
|
||||||
} |
|
||||||
} |
|
||||||
} // namespace |
|
||||||
|
@ -1,40 +0,0 @@ |
|||||||
using System; |
|
||||||
using System.Collections.Generic; |
|
||||||
using System.Threading.Tasks; |
|
||||||
using Microsoft.AspNetCore.Http; |
|
||||||
using Microsoft.AspNetCore.Mvc; |
|
||||||
using scraper.Models.Itinerary; |
|
||||||
using Server.Services.Interfaces; |
|
||||||
|
|
||||||
namespace Server.Controllers.V3; |
|
||||||
|
|
||||||
[ApiController] |
|
||||||
[ApiExplorerSettings(GroupName = "v3")] |
|
||||||
[Route("/v3/[controller]")]
|
|
||||||
public class ItinerariesController : Controller { |
|
||||||
private IDataManager DataManager { get; } |
|
||||||
private IDatabase Database { get; } |
|
||||||
|
|
||||||
public ItinerariesController(IDataManager dataManager, IDatabase database) { |
|
||||||
this.DataManager = dataManager; |
|
||||||
this.Database = database; |
|
||||||
} |
|
||||||
|
|
||||||
|
|
||||||
[HttpGet("")] |
|
||||||
[ProducesResponseType(typeof(IEnumerable<IItinerary>), StatusCodes.Status200OK)] |
|
||||||
[ProducesResponseType(StatusCodes.Status404NotFound)] |
|
||||||
public async Task<ActionResult<IEnumerable<IItinerary>>> FindItineraries( |
|
||||||
[FromQuery] string from, |
|
||||||
[FromQuery] string to, |
|
||||||
[FromQuery] DateTimeOffset? date |
|
||||||
) { |
|
||||||
var itineraries = await DataManager.FetchItineraries(from, to, date); |
|
||||||
|
|
||||||
if (itineraries == null) { |
|
||||||
return NotFound(); |
|
||||||
} |
|
||||||
|
|
||||||
return Ok(itineraries); |
|
||||||
} |
|
||||||
} |
|
@ -1,5 +0,0 @@ |
|||||||
namespace Server.Models.Database; |
|
||||||
|
|
||||||
public record MongoSettings(string ConnectionString, string DatabaseName) { |
|
||||||
public MongoSettings() : this("", "") { } |
|
||||||
} |
|
@ -1,17 +0,0 @@ |
|||||||
using MongoDB.Bson; |
|
||||||
using MongoDB.Bson.Serialization.Attributes; |
|
||||||
using Newtonsoft.Json; |
|
||||||
|
|
||||||
namespace Server.Models.Database; |
|
||||||
|
|
||||||
public record StationAlias( |
|
||||||
[property: BsonId] |
|
||||||
[property: BsonRepresentation(BsonType.ObjectId)] |
|
||||||
[property: JsonProperty(NullValueHandling = NullValueHandling.Ignore)] |
|
||||||
string? Id, |
|
||||||
string Name, |
|
||||||
[property: BsonRepresentation(BsonType.ObjectId)] |
|
||||||
string? ListingId |
|
||||||
) { |
|
||||||
public StationAlias() : this(null, "", null) { } |
|
||||||
} |
|
@ -1,18 +0,0 @@ |
|||||||
using System.Collections.Generic; |
|
||||||
using MongoDB.Bson; |
|
||||||
using MongoDB.Bson.Serialization.Attributes; |
|
||||||
using Newtonsoft.Json; |
|
||||||
|
|
||||||
namespace Server.Models.Database; |
|
||||||
|
|
||||||
public record StationListing( |
|
||||||
[property: BsonId] |
|
||||||
[property: BsonRepresentation(BsonType.ObjectId)] |
|
||||||
[property: JsonProperty(NullValueHandling = NullValueHandling.Ignore)] |
|
||||||
string? Id, |
|
||||||
string Name, |
|
||||||
List<string> StoppedAtBy |
|
||||||
) { |
|
||||||
public StationListing() : this(null, "", new()) { } |
|
||||||
public StationListing(string name, List<string> stoppedAtBy) : this(null, name, stoppedAtBy) { } |
|
||||||
} |
|
@ -1,20 +0,0 @@ |
|||||||
using MongoDB.Bson; |
|
||||||
using MongoDB.Bson.Serialization.Attributes; |
|
||||||
using Newtonsoft.Json; |
|
||||||
|
|
||||||
namespace Server.Models.Database; |
|
||||||
|
|
||||||
public record TrainListing( |
|
||||||
[property: BsonId] |
|
||||||
[property: BsonRepresentation(BsonType.ObjectId)] |
|
||||||
[property: JsonProperty(NullValueHandling = NullValueHandling.Ignore)] |
|
||||||
string? Id, |
|
||||||
string Rank, |
|
||||||
string Number, |
|
||||||
string Company, |
|
||||||
[property: BsonRepresentation(BsonType.ObjectId)] |
|
||||||
string? LatestDescription |
|
||||||
) { |
|
||||||
public TrainListing() : this(null, "", "", "", null) { } |
|
||||||
public TrainListing(string rank, string number, string company) : this(null, rank, number, company, null) { } |
|
||||||
} |
|
@ -1,9 +0,0 @@ |
|||||||
namespace Server.Models; |
|
||||||
|
|
||||||
public record ProxySettings(string Url, ProxyCredentials? Credentials = null) { |
|
||||||
public ProxySettings() : this("") { } |
|
||||||
} |
|
||||||
|
|
||||||
public record ProxyCredentials(string Username, string Password) { |
|
||||||
public ProxyCredentials() : this("", "") { } |
|
||||||
} |
|
@ -1,14 +1,11 @@ |
|||||||
using System; |
using System; |
||||||
using System.Collections.Generic; |
|
||||||
using System.Threading.Tasks; |
using System.Threading.Tasks; |
||||||
using InfoferScraper.Models.Train; |
using InfoferScraper.Models.Train; |
||||||
using InfoferScraper.Models.Station; |
using InfoferScraper.Models.Station; |
||||||
using scraper.Models.Itinerary; |
|
||||||
|
|
||||||
namespace Server.Services.Interfaces; |
namespace Server.Services.Interfaces; |
||||||
|
|
||||||
public interface IDataManager { |
public interface IDataManager { |
||||||
public Task<IStationScrapeResult?> FetchStation(string stationName, DateTimeOffset date); |
public Task<IStationScrapeResult?> FetchStation(string stationName, DateTimeOffset date); |
||||||
public Task<ITrainScrapeResult?> FetchTrain(string trainNumber, DateTimeOffset date); |
public Task<ITrainScrapeResult?> FetchTrain(string trainNumber, DateTimeOffset date); |
||||||
public Task<IReadOnlyList<IItinerary>?> FetchItineraries(string from, string to, DateTimeOffset? date = null); |
|
||||||
} |
} |
||||||
|
@ -1,38 +0,0 @@ |
|||||||
using System; |
|
||||||
using System.Threading; |
|
||||||
using System.Threading.Tasks; |
|
||||||
|
|
||||||
namespace Server.Utils; |
|
||||||
|
|
||||||
// Inspired from: https://stackoverflow.com/a/57517920 |
|
||||||
public class AsyncThrottle { |
|
||||||
private readonly SemaphoreSlim openConnectionSemaphore; |
|
||||||
|
|
||||||
public AsyncThrottle(int limit) { |
|
||||||
openConnectionSemaphore = new(limit, limit); |
|
||||||
} |
|
||||||
|
|
||||||
public async Task<T> MakeRequest<T>(Task<T> task) => await MakeRequest(() => task); |
|
||||||
public async Task<T> MakeRequest<T>(Func<Task<T>> taskCreator) { |
|
||||||
await openConnectionSemaphore.WaitAsync(); |
|
||||||
try { |
|
||||||
var result = await taskCreator(); |
|
||||||
return result; |
|
||||||
} |
|
||||||
finally { |
|
||||||
openConnectionSemaphore.Release(); |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
|
|
||||||
public async Task MakeRequest(Task task) => await MakeRequest(() => task); |
|
||||||
public async Task MakeRequest(Func<Task> taskCreator) { |
|
||||||
await openConnectionSemaphore.WaitAsync(); |
|
||||||
try { |
|
||||||
await taskCreator(); |
|
||||||
} |
|
||||||
finally { |
|
||||||
openConnectionSemaphore.Release(); |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
@ -1,7 +0,0 @@ |
|||||||
using NodaTime; |
|
||||||
|
|
||||||
namespace Server.Utils; |
|
||||||
|
|
||||||
public static class Constants { |
|
||||||
public static readonly DateTimeZone BucharestTz = DateTimeZoneProviders.Tzdb["Europe/Bucharest"]; |
|
||||||
} |
|
@ -1,33 +0,0 @@ |
|||||||
using System.Collections.Generic; |
|
||||||
using System.Threading.Tasks; |
|
||||||
using MongoDB.Driver; |
|
||||||
|
|
||||||
namespace Server.Utils; |
|
||||||
|
|
||||||
public record IAsyncCusorAsyncEnumerator<T>(IAsyncCursor<T> Cursor) { |
|
||||||
private IEnumerator<T>? enumerator = null; |
|
||||||
|
|
||||||
public T Current => enumerator!.Current; |
|
||||||
|
|
||||||
public async Task<bool> MoveNextAsync() { |
|
||||||
bool result; |
|
||||||
if (enumerator != null) { |
|
||||||
result = enumerator.MoveNext(); |
|
||||||
if (result) return true; |
|
||||||
} |
|
||||||
|
|
||||||
result = await Cursor.MoveNextAsync(); |
|
||||||
if (result) { |
|
||||||
enumerator = Cursor.Current.GetEnumerator(); |
|
||||||
return true; |
|
||||||
} |
|
||||||
|
|
||||||
return false; |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
public static class IAsyncCursorExtensions { |
|
||||||
public static IAsyncCusorAsyncEnumerator<T> GetAsyncEnumerator<T>(this IAsyncCursor<T> cursor) { |
|
||||||
return new(cursor); |
|
||||||
} |
|
||||||
} |
|
Loading…
Reference in new issue