Browse Source

Add LinkName to stations when querying a train

master
Kenneth Bruen 2 years ago
parent
commit
5785e4a98b
Signed by: kbruen
GPG Key ID: C1980A470C3EE5B1
  1. 634
      scraper/src/Models/Train.cs
  2. 483
      scraper/src/Scrapers/Train.cs

634
scraper/src/Models/Train.cs

@ -1,316 +1,318 @@
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
using System.Text.Json; using System.Text.Json;
using System.Text.Json.Serialization; using System.Text.Json.Serialization;
using InfoferScraper.Models.Status; using InfoferScraper.Models.Status;
using InfoferScraper.Models.Train.JsonConverters; using InfoferScraper.Models.Train.JsonConverters;
namespace InfoferScraper.Models.Train { namespace InfoferScraper.Models.Train {
#region Interfaces #region Interfaces
public interface ITrainScrapeResult { public interface ITrainScrapeResult {
public string Rank { get; } public string Rank { get; }
public string Number { get; } public string Number { get; }
/// <summary> /// <summary>
/// Date in the DD.MM.YYYY format /// Date in the DD.MM.YYYY format
/// This date is taken as-is from the result. /// This date is taken as-is from the result.
/// </summary> /// </summary>
public string Date { get; } public string Date { get; }
public string Operator { get; } public string Operator { get; }
public IReadOnlyList<ITrainGroup> Groups { get; } public IReadOnlyList<ITrainGroup> Groups { get; }
} }
public interface ITrainGroup { public interface ITrainGroup {
public ITrainRoute Route { get; } public ITrainRoute Route { get; }
public ITrainStatus? Status { get; } public ITrainStatus? Status { get; }
public IReadOnlyList<ITrainStopDescription> Stations { get; } public IReadOnlyList<ITrainStopDescription> Stations { get; }
} }
public interface ITrainRoute { public interface ITrainRoute {
public string From { get; } public string From { get; }
public string To { get; } public string To { get; }
} }
public interface ITrainStatus { public interface ITrainStatus {
public int Delay { get; } public int Delay { get; }
public string Station { get; } public string Station { get; }
public StatusKind State { get; } public StatusKind State { get; }
} }
public interface ITrainStopDescription { public interface ITrainStopDescription {
public string Name { get; } public string Name { get; }
public int Km { get; } public string LinkName { get; }
public int Km { get; }
/// <summary>
/// The time the train waits in the station in seconds /// <summary>
/// </summary> /// The time the train waits in the station in seconds
public int? StoppingTime { get; } /// </summary>
public int? StoppingTime { get; }
public string? Platform { get; }
public ITrainStopArrDep? Arrival { get; } public string? Platform { get; }
public ITrainStopArrDep? Departure { get; } public ITrainStopArrDep? Arrival { get; }
public ITrainStopArrDep? Departure { get; }
public IReadOnlyList<object> Notes { get; }
} public IReadOnlyList<object> Notes { get; }
}
public interface ITrainStopNote {
public NoteKind Kind { get; } public interface ITrainStopNote {
} public NoteKind Kind { get; }
}
public interface ITrainStopTrainNumberChangeNote : ITrainStopNote {
public string Rank { get; } public interface ITrainStopTrainNumberChangeNote : ITrainStopNote {
public string Number { get; } public string Rank { get; }
} public string Number { get; }
}
public interface ITrainStopDepartsAsNote : ITrainStopNote {
public string Rank { get; } public interface ITrainStopDepartsAsNote : ITrainStopNote {
public string Number { get; } public string Rank { get; }
public DateTimeOffset DepartureDate { get; } public string Number { get; }
} public DateTimeOffset DepartureDate { get; }
}
public interface ITrainStopDetachingWagonsNote : ITrainStopNote {
public string Station { get; } public interface ITrainStopDetachingWagonsNote : ITrainStopNote {
} public string Station { get; }
}
public interface ITrainStopReceivingWagonsNote : ITrainStopNote {
public string Station { get; } public interface ITrainStopReceivingWagonsNote : ITrainStopNote {
} public string Station { get; }
}
public interface ITrainStopArrDep {
public DateTimeOffset ScheduleTime { get; } public interface ITrainStopArrDep {
public IStatus? Status { get; } public DateTimeOffset ScheduleTime { get; }
} public IStatus? Status { get; }
}
#endregion
#endregion
[JsonConverter(typeof(StatusKindConverter))]
public enum StatusKind { [JsonConverter(typeof(StatusKindConverter))]
Passing, public enum StatusKind {
Arrival, Passing,
Departure, Arrival,
} Departure,
}
[JsonConverter(typeof(NoteKindConverter))]
public enum NoteKind { [JsonConverter(typeof(NoteKindConverter))]
TrainNumberChange, public enum NoteKind {
DetachingWagons, TrainNumberChange,
ReceivingWagons, DetachingWagons,
DepartsAs, ReceivingWagons,
} DepartsAs,
}
#region Implementations
#region Implementations
internal record TrainScrapeResult : ITrainScrapeResult {
private List<ITrainGroup> ModifyableGroups { get; set; } = new(); internal record TrainScrapeResult : ITrainScrapeResult {
public string Rank { get; set; } = ""; private List<ITrainGroup> ModifyableGroups { get; set; } = new();
public string Number { get; set; } = ""; public string Rank { get; set; } = "";
public string Date { get; set; } = ""; public string Number { get; set; } = "";
public string Operator { get; set; } = ""; public string Date { get; set; } = "";
public IReadOnlyList<ITrainGroup> Groups => ModifyableGroups.AsReadOnly(); public string Operator { get; set; } = "";
public IReadOnlyList<ITrainGroup> Groups => ModifyableGroups.AsReadOnly();
private void AddTrainGroup(ITrainGroup trainGroup) {
ModifyableGroups.Add(trainGroup); private void AddTrainGroup(ITrainGroup trainGroup) {
} ModifyableGroups.Add(trainGroup);
}
internal void AddTrainGroup(Action<TrainGroup> configurator) {
TrainGroup newTrainGroup = new(); internal void AddTrainGroup(Action<TrainGroup> configurator) {
configurator(newTrainGroup); TrainGroup newTrainGroup = new();
AddTrainGroup(newTrainGroup); configurator(newTrainGroup);
} AddTrainGroup(newTrainGroup);
} }
}
internal record TrainGroup : ITrainGroup {
private List<ITrainStopDescription> ModifyableStations { get; set; } = new(); internal record TrainGroup : ITrainGroup {
public ITrainRoute Route { get; init; } = new TrainRoute(); private List<ITrainStopDescription> ModifyableStations { get; set; } = new();
public ITrainStatus? Status { get; private set; } public ITrainRoute Route { get; init; } = new TrainRoute();
public IReadOnlyList<ITrainStopDescription> Stations => ModifyableStations.AsReadOnly(); public ITrainStatus? Status { get; private set; }
public IReadOnlyList<ITrainStopDescription> Stations => ModifyableStations.AsReadOnly();
private void AddStopDescription(ITrainStopDescription stopDescription) {
ModifyableStations.Add(stopDescription); private void AddStopDescription(ITrainStopDescription stopDescription) {
} ModifyableStations.Add(stopDescription);
}
internal void AddStopDescription(Action<TrainStopDescription> configurator) {
TrainStopDescription newStopDescription = new(); internal void AddStopDescription(Action<TrainStopDescription> configurator) {
configurator(newStopDescription); TrainStopDescription newStopDescription = new();
AddStopDescription(newStopDescription); configurator(newStopDescription);
} AddStopDescription(newStopDescription);
}
internal void ConfigureRoute(Action<TrainRoute> configurator) {
configurator((TrainRoute)Route); internal void ConfigureRoute(Action<TrainRoute> configurator) {
} configurator((TrainRoute)Route);
}
internal void MakeStatus(Action<TrainStatus> configurator) {
TrainStatus newStatus = new(); internal void MakeStatus(Action<TrainStatus> configurator) {
configurator(newStatus); TrainStatus newStatus = new();
Status = newStatus; configurator(newStatus);
} Status = newStatus;
} }
}
internal record TrainRoute : ITrainRoute {
public TrainRoute() { internal record TrainRoute : ITrainRoute {
From = ""; public TrainRoute() {
To = ""; From = "";
} To = "";
}
public string From { get; set; }
public string To { get; set; } public string From { get; set; }
} public string To { get; set; }
}
internal record TrainStatus : ITrainStatus {
public int Delay { get; set; } internal record TrainStatus : ITrainStatus {
public string Station { get; set; } = ""; public int Delay { get; set; }
public StatusKind State { get; set; } public string Station { get; set; } = "";
} public StatusKind State { get; set; }
}
internal record TrainStopDescription : ITrainStopDescription {
private List<ITrainStopNote> ModifyableNotes { get; } = new(); internal record TrainStopDescription : ITrainStopDescription {
public string Name { get; set; } = ""; private List<ITrainStopNote> ModifyableNotes { get; } = new();
public int Km { get; set; } public string Name { get; set; } = "";
public int? StoppingTime { get; set; } public string LinkName { get; set; } = "";
public string? Platform { get; set; } public int Km { get; set; }
public ITrainStopArrDep? Arrival { get; private set; } public int? StoppingTime { get; set; }
public ITrainStopArrDep? Departure { get; private set; } public string? Platform { get; set; }
public IReadOnlyList<object> Notes => ModifyableNotes.AsReadOnly(); public ITrainStopArrDep? Arrival { get; private set; }
public ITrainStopArrDep? Departure { get; private set; }
internal void MakeArrival(Action<TrainStopArrDep> configurator) { public IReadOnlyList<object> Notes => ModifyableNotes.AsReadOnly();
TrainStopArrDep newArrival = new();
configurator(newArrival); internal void MakeArrival(Action<TrainStopArrDep> configurator) {
Arrival = newArrival; TrainStopArrDep newArrival = new();
} configurator(newArrival);
Arrival = newArrival;
internal void MakeDeparture(Action<TrainStopArrDep> configurator) { }
TrainStopArrDep newDeparture = new();
configurator(newDeparture); internal void MakeDeparture(Action<TrainStopArrDep> configurator) {
Departure = newDeparture; TrainStopArrDep newDeparture = new();
} configurator(newDeparture);
Departure = newDeparture;
class DepartsAsNote : ITrainStopDepartsAsNote { }
public NoteKind Kind => NoteKind.DepartsAs;
public string Rank { get; set; } = ""; class DepartsAsNote : ITrainStopDepartsAsNote {
public string Number { get; set; } = ""; public NoteKind Kind => NoteKind.DepartsAs;
public DateTimeOffset DepartureDate { get; set; } public string Rank { get; set; } = "";
} public string Number { get; set; } = "";
public DateTimeOffset DepartureDate { get; set; }
class TrainNumberChangeNote : ITrainStopTrainNumberChangeNote { }
public NoteKind Kind => NoteKind.TrainNumberChange;
public string Rank { get; set; } = ""; class TrainNumberChangeNote : ITrainStopTrainNumberChangeNote {
public string Number { get; set; } = ""; public NoteKind Kind => NoteKind.TrainNumberChange;
} public string Rank { get; set; } = "";
public string Number { get; set; } = "";
class ReceivingWagonsNote : ITrainStopReceivingWagonsNote { }
public NoteKind Kind => NoteKind.ReceivingWagons;
public string Station { get; set; } = ""; class ReceivingWagonsNote : ITrainStopReceivingWagonsNote {
} public NoteKind Kind => NoteKind.ReceivingWagons;
public string Station { get; set; } = "";
class DetachingWagonsNote : ITrainStopReceivingWagonsNote { }
public NoteKind Kind => NoteKind.DetachingWagons;
public string Station { get; set; } = ""; class DetachingWagonsNote : ITrainStopReceivingWagonsNote {
} public NoteKind Kind => NoteKind.DetachingWagons;
public string Station { get; set; } = "";
internal void AddDepartsAsNote(string rank, string number, DateTimeOffset departureDate) { }
ModifyableNotes.Add(new DepartsAsNote { Rank = rank, Number = number, DepartureDate = departureDate });
} internal void AddDepartsAsNote(string rank, string number, DateTimeOffset departureDate) {
ModifyableNotes.Add(new DepartsAsNote { Rank = rank, Number = number, DepartureDate = departureDate });
internal void AddTrainNumberChangeNote(string rank, string number) { }
ModifyableNotes.Add(new TrainNumberChangeNote { Rank = rank, Number = number });
} internal void AddTrainNumberChangeNote(string rank, string number) {
ModifyableNotes.Add(new TrainNumberChangeNote { Rank = rank, Number = number });
internal void AddReceivingWagonsNote(string station) { }
ModifyableNotes.Add(new ReceivingWagonsNote { Station = station });
} internal void AddReceivingWagonsNote(string station) {
ModifyableNotes.Add(new ReceivingWagonsNote { Station = station });
internal void AddDetachingWagonsNote(string station) { }
ModifyableNotes.Add(new DetachingWagonsNote { Station = station });
} internal void AddDetachingWagonsNote(string station) {
} ModifyableNotes.Add(new DetachingWagonsNote { Station = station });
}
public record TrainStopArrDep : ITrainStopArrDep { }
public DateTimeOffset ScheduleTime { get; set; }
public IStatus? Status { get; private set; } public record TrainStopArrDep : ITrainStopArrDep {
public DateTimeOffset ScheduleTime { get; set; }
internal void MakeStatus(Action<Status.Status> configurator) { public IStatus? Status { get; private set; }
Status.Status newStatus = new();
configurator(newStatus); internal void MakeStatus(Action<Status.Status> configurator) {
Status = newStatus; Status.Status newStatus = new();
} configurator(newStatus);
} Status = newStatus;
}
#endregion }
#region JSON Converters #endregion
namespace JsonConverters { #region JSON Converters
internal class StatusKindConverter : JsonConverterFactory {
public override bool CanConvert(Type typeToConvert) { namespace JsonConverters {
return typeToConvert == typeof(StatusKind); internal class StatusKindConverter : JsonConverterFactory {
} public override bool CanConvert(Type typeToConvert) {
return typeToConvert == typeof(StatusKind);
public override JsonConverter? CreateConverter(Type typeToConvert, JsonSerializerOptions options) { }
return new Converter();
} public override JsonConverter? CreateConverter(Type typeToConvert, JsonSerializerOptions options) {
return new Converter();
private class Converter : JsonConverter<StatusKind> { }
public override StatusKind Read(
ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options private class Converter : JsonConverter<StatusKind> {
) { public override StatusKind Read(
return reader.GetString() switch { ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options
"arrival" => StatusKind.Arrival, ) {
"departure" => StatusKind.Departure, return reader.GetString() switch {
"passing" => StatusKind.Passing, "arrival" => StatusKind.Arrival,
_ => throw new NotImplementedException() "departure" => StatusKind.Departure,
}; "passing" => StatusKind.Passing,
} _ => throw new NotImplementedException()
};
public override void Write(Utf8JsonWriter writer, StatusKind value, JsonSerializerOptions options) { }
writer.WriteStringValue(value switch {
StatusKind.Passing => "passing", public override void Write(Utf8JsonWriter writer, StatusKind value, JsonSerializerOptions options) {
StatusKind.Arrival => "arrival", writer.WriteStringValue(value switch {
StatusKind.Departure => "departure", StatusKind.Passing => "passing",
_ => throw new NotImplementedException() StatusKind.Arrival => "arrival",
}); StatusKind.Departure => "departure",
} _ => throw new NotImplementedException()
} });
} }
}
internal class NoteKindConverter : JsonConverterFactory { }
public override bool CanConvert(Type typeToConvert) {
return typeToConvert == typeof(NoteKind); internal class NoteKindConverter : JsonConverterFactory {
} public override bool CanConvert(Type typeToConvert) {
return typeToConvert == typeof(NoteKind);
public override JsonConverter? CreateConverter(Type typeToConvert, JsonSerializerOptions options) { }
return new Converter();
} public override JsonConverter? CreateConverter(Type typeToConvert, JsonSerializerOptions options) {
return new Converter();
private class Converter : JsonConverter<NoteKind> { }
public override NoteKind Read(
ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options private class Converter : JsonConverter<NoteKind> {
) { public override NoteKind Read(
return reader.GetString() switch { ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options
"departsAs" => NoteKind.DepartsAs, ) {
"trainNumberChange" => NoteKind.TrainNumberChange, return reader.GetString() switch {
"receivingWagons" => NoteKind.ReceivingWagons, "departsAs" => NoteKind.DepartsAs,
"detachingWagons" => NoteKind.DetachingWagons, "trainNumberChange" => NoteKind.TrainNumberChange,
_ => throw new NotImplementedException() "receivingWagons" => NoteKind.ReceivingWagons,
}; "detachingWagons" => NoteKind.DetachingWagons,
} _ => throw new NotImplementedException()
};
public override void Write(Utf8JsonWriter writer, NoteKind value, JsonSerializerOptions options) { }
writer.WriteStringValue(value switch {
NoteKind.DepartsAs => "departsAs", public override void Write(Utf8JsonWriter writer, NoteKind value, JsonSerializerOptions options) {
NoteKind.TrainNumberChange => "trainNumberChange", writer.WriteStringValue(value switch {
NoteKind.DetachingWagons => "detachingWagons", NoteKind.DepartsAs => "departsAs",
NoteKind.ReceivingWagons => "receivingWagons", NoteKind.TrainNumberChange => "trainNumberChange",
_ => throw new NotImplementedException() NoteKind.DetachingWagons => "detachingWagons",
}); NoteKind.ReceivingWagons => "receivingWagons",
} _ => throw new NotImplementedException()
} });
} }
} }
}
#endregion }
}
#endregion
}

483
scraper/src/Scrapers/Train.cs

@ -1,239 +1,244 @@
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
using System.Linq; using System.Linq;
using System.Net; using System.Net;
using System.Net.Http; using System.Net.Http;
using System.Text.RegularExpressions; using System.Text.RegularExpressions;
using System.Threading.Tasks; using System.Threading.Tasks;
using AngleSharp; using AngleSharp;
using AngleSharp.Dom; using AngleSharp.Dom;
using AngleSharp.Html.Dom; using AngleSharp.Html.Dom;
using Flurl; using Flurl;
using InfoferScraper.Models.Train; using InfoferScraper.Models.Train;
using NodaTime; using NodaTime;
using NodaTime.Extensions; using NodaTime.Extensions;
using scraper.Exceptions; using scraper.Exceptions;
namespace InfoferScraper.Scrapers { namespace InfoferScraper.Scrapers {
public static class TrainScraper { public static class TrainScraper {
private const string BaseUrl = "https://mersultrenurilor.infofer.ro/ro-RO/"; private const string BaseUrl = "https://mersultrenurilor.infofer.ro/ro-RO/";
private static readonly Regex TrainInfoRegex = new(@"^([A-Z-]+)\s([0-9]+)\sîn\s([0-9.]+)$"); private static readonly Regex TrainInfoRegex = new(@"^([A-Z-]+)\s([0-9]+)\sîn\s([0-9.]+)$");
private static readonly Regex OperatorRegex = new(@"^Operat\sde\s(.+)$"); private static readonly Regex OperatorRegex = new(@"^Operat\sde\s(.+)$");
private static readonly Regex RouteRegex = private static readonly Regex RouteRegex =
new(@$"^Parcurs\stren\s([{Utils.RoLetters} ]+)[-–]([{Utils.RoLetters}\s]+)$"); new(@$"^Parcurs\stren\s([{Utils.RoLetters} ]+)[-–]([{Utils.RoLetters}\s]+)$");
private static readonly Regex SlRegex = private static readonly Regex SlRegex =
new( new(
@"^(?:Fără|([0-9]+)\smin)\s(întârziere|mai\sdevreme)\sla\s(trecerea\sfără\soprire\sprin|sosirea\sîn|plecarea\sdin)\s(.+)\.$"); @"^(?:Fără|([0-9]+)\smin)\s(întârziere|mai\sdevreme)\sla\s(trecerea\sfără\soprire\sprin|sosirea\sîn|plecarea\sdin)\s(.+)\.$");
private static readonly Dictionary<char, StatusKind> SlStateMap = new() { private static readonly Dictionary<char, StatusKind> SlStateMap = new() {
{ 't', StatusKind.Passing }, { 't', StatusKind.Passing },
{ 's', StatusKind.Arrival }, { 's', StatusKind.Arrival },
{ 'p', StatusKind.Departure }, { 'p', StatusKind.Departure },
}; };
private static readonly Regex KmRegex = new(@"^km\s([0-9]+)$"); private static readonly Regex KmRegex = new(@"^km\s([0-9]+)$");
private static readonly Regex StoppingTimeRegex = new(@"^([0-9]+)\s(min|sec)\soprire$"); private static readonly Regex StoppingTimeRegex = new(@"^([0-9]+)\s(min|sec)\soprire$");
private static readonly Regex PlatformRegex = new(@"^linia\s(.+)$"); private static readonly Regex PlatformRegex = new(@"^linia\s(.+)$");
private static readonly Regex StationArrdepStatusRegex = private static readonly Regex StationArrdepStatusRegex =
new(@"^(?:(la timp)|(?:((?:\+|-)[0-9]+) min \((?:(?:întârziere)|(?:mai devreme))\)))(\*?)$"); new(@"^(?:(la timp)|(?:((?:\+|-)[0-9]+) min \((?:(?:întârziere)|(?:mai devreme))\)))(\*?)$");
private static readonly Regex TrainNumberChangeNoteRegex = private static readonly Regex TrainNumberChangeNoteRegex =
new(@"^Trenul își schimbă numărul în\s([A-Z-]+)\s([0-9]+)$"); new(@"^Trenul își schimbă numărul în\s([A-Z-]+)\s([0-9]+)$");
private static readonly Regex DepartsAsNoteRegex = private static readonly Regex DepartsAsNoteRegex =
new(@"^Trenul pleacă cu numărul\s([A-Z-]+)\s([0-9]+)\sîn\s([0-9]{2}).([0-9]{2}).([0-9]{4})$"); new(@"^Trenul pleacă cu numărul\s([A-Z-]+)\s([0-9]+)\sîn\s([0-9]{2}).([0-9]{2}).([0-9]{4})$");
private static readonly Regex ReceivingWagonsNoteRegex = private static readonly Regex ReceivingWagonsNoteRegex =
new(@"^Trenul primește vagoane de la\s(.+)\.$"); new(@"^Trenul primește vagoane de la\s(.+)\.$");
private static readonly Regex DetachingWagonsNoteRegex = private static readonly Regex DetachingWagonsNoteRegex =
new(@"^Trenul detașează vagoane pentru stația\s(.+)\.$"); new(@"^Trenul detașează vagoane pentru stația\s(.+)\.$");
private static readonly DateTimeZone BucharestTz = DateTimeZoneProviders.Tzdb["Europe/Bucharest"]; private static readonly DateTimeZone BucharestTz = DateTimeZoneProviders.Tzdb["Europe/Bucharest"];
private static readonly CookieContainer CookieContainer = new(); private static readonly CookieContainer CookieContainer = new();
private static readonly HttpClient HttpClient = new(new HttpClientHandler { private static readonly HttpClient HttpClient = new(new HttpClientHandler {
CookieContainer = CookieContainer, CookieContainer = CookieContainer,
UseCookies = true, UseCookies = true,
}) { }) {
BaseAddress = new Uri(BaseUrl), BaseAddress = new Uri(BaseUrl),
DefaultRequestVersion = new Version(2, 0), DefaultRequestVersion = new Version(2, 0),
}; };
public static async Task<ITrainScrapeResult?> Scrape(string trainNumber, DateTimeOffset? dateOverride = null) { public static async Task<ITrainScrapeResult?> Scrape(string trainNumber, DateTimeOffset? dateOverride = null) {
var dateOverrideInstant = dateOverride?.ToInstant().InZone(BucharestTz); var dateOverrideInstant = dateOverride?.ToInstant().InZone(BucharestTz);
dateOverride = dateOverrideInstant?.ToDateTimeOffset(); dateOverride = dateOverrideInstant?.ToDateTimeOffset();
TrainScrapeResult result = new(); TrainScrapeResult result = new();
var asConfig = Configuration.Default; var asConfig = Configuration.Default;
var asContext = BrowsingContext.New(asConfig); var asContext = BrowsingContext.New(asConfig);
var firstUrl = "Tren" var firstUrl = "Tren"
.AppendPathSegment(trainNumber); .AppendPathSegment(trainNumber);
if (dateOverride != null) { if (dateOverride != null) {
firstUrl = firstUrl.SetQueryParam("Date", $"{dateOverride:d.MM.yyyy}"); firstUrl = firstUrl.SetQueryParam("Date", $"{dateOverride:d.MM.yyyy}");
} }
var firstResponse = await HttpClient.GetStringAsync(firstUrl); var firstResponse = await HttpClient.GetStringAsync(firstUrl);
var firstDocument = await asContext.OpenAsync(req => req.Content(firstResponse)); var firstDocument = await asContext.OpenAsync(req => req.Content(firstResponse));
var firstForm = firstDocument.GetElementById("form-search")!; var firstForm = firstDocument.GetElementById("form-search")!;
var firstResult = firstForm var firstResult = firstForm
.QuerySelectorAll<IHtmlInputElement>("input") .QuerySelectorAll<IHtmlInputElement>("input")
.Where(elem => elem.Name != null) .Where(elem => elem.Name != null)
.ToDictionary(elem => elem.Name!, elem => elem.Value); .ToDictionary(elem => elem.Name!, elem => elem.Value);
var secondUrl = "".AppendPathSegments("Trains", "TrainsResult"); var secondUrl = "".AppendPathSegments("Trains", "TrainsResult");
var secondResponse = await HttpClient.PostAsync( var secondResponse = await HttpClient.PostAsync(
secondUrl, secondUrl,
#pragma warning disable CS8620 #pragma warning disable CS8620
new FormUrlEncodedContent(firstResult) new FormUrlEncodedContent(firstResult)
#pragma warning restore CS8620 #pragma warning restore CS8620
); );
var secondResponseContent = await secondResponse.Content.ReadAsStringAsync(); var secondResponseContent = await secondResponse.Content.ReadAsStringAsync();
var secondDocument = await asContext.OpenAsync( var secondDocument = await asContext.OpenAsync(
req => req.Content(secondResponseContent) req => req.Content(secondResponseContent)
); );
var (trainInfoDiv, (_, (_, (resultsDiv, _)))) = secondDocument var (trainInfoDiv, (_, (_, (resultsDiv, _)))) = secondDocument
.QuerySelectorAll("body > div"); .QuerySelectorAll("body > div");
if (trainInfoDiv == null) { if (trainInfoDiv == null) {
return null; return null;
} }
if (resultsDiv == null) { if (resultsDiv == null) {
throw new TrainNotThisDayException(); throw new TrainNotThisDayException();
} }
trainInfoDiv = trainInfoDiv.QuerySelectorAll(":scope > div > div").First(); trainInfoDiv = trainInfoDiv.QuerySelectorAll(":scope > div > div").First();
(result.Rank, (result.Number, (result.Date, _))) = (TrainInfoRegex.Match( (result.Rank, (result.Number, (result.Date, _))) = (TrainInfoRegex.Match(
trainInfoDiv.QuerySelector(":scope > h2")!.Text().WithCollapsedSpaces() trainInfoDiv.QuerySelector(":scope > h2")!.Text().WithCollapsedSpaces()
).Groups as IEnumerable<Group>).Select(group => group.Value).Skip(1); ).Groups as IEnumerable<Group>).Select(group => group.Value).Skip(1);
var (scrapedDateD, (scrapedDateM, (scrapedDateY, _))) = result.Date var (scrapedDateD, (scrapedDateM, (scrapedDateY, _))) = result.Date
.Split('.') .Split('.')
.Select(int.Parse); .Select(int.Parse);
var date = new DateTime(scrapedDateY, scrapedDateM, scrapedDateD); var date = new DateTime(scrapedDateY, scrapedDateM, scrapedDateD);
result.Operator = (OperatorRegex.Match( result.Operator = (OperatorRegex.Match(
trainInfoDiv.QuerySelector(":scope > p")!.Text().WithCollapsedSpaces() trainInfoDiv.QuerySelector(":scope > p")!.Text().WithCollapsedSpaces()
).Groups as IEnumerable<Group>).Skip(1).First().Value; ).Groups as IEnumerable<Group>).Skip(1).First().Value;
foreach (var groupDiv in resultsDiv.QuerySelectorAll(":scope > div")) { foreach (var groupDiv in resultsDiv.QuerySelectorAll(":scope > div")) {
result.AddTrainGroup(group => { result.AddTrainGroup(group => {
var statusDiv = groupDiv.QuerySelectorAll(":scope > div").First(); var statusDiv = groupDiv.QuerySelectorAll(":scope > div").First();
var routeText = statusDiv.QuerySelector(":scope > h4")!.Text().WithCollapsedSpaces(); var routeText = statusDiv.QuerySelector(":scope > h4")!.Text().WithCollapsedSpaces();
group.ConfigureRoute(route => { group.ConfigureRoute(route => {
(route.From, (route.To, _)) = (RouteRegex.Match(routeText).Groups as IEnumerable<Group>).Skip(1) (route.From, (route.To, _)) = (RouteRegex.Match(routeText).Groups as IEnumerable<Group>).Skip(1)
.Select(group => group.Value); .Select(group => group.Value);
}); });
try { try {
var statusLineMatch = var statusLineMatch =
SlRegex.Match(statusDiv.QuerySelector(":scope > div")!.Text().WithCollapsedSpaces()); SlRegex.Match(statusDiv.QuerySelector(":scope > div")!.Text().WithCollapsedSpaces());
var (slmDelay, (slmLate, (slmArrival, (slmStation, _)))) = var (slmDelay, (slmLate, (slmArrival, (slmStation, _)))) =
(statusLineMatch.Groups as IEnumerable<Group>).Skip(1).Select(group => group.Value); (statusLineMatch.Groups as IEnumerable<Group>).Skip(1).Select(group => group.Value);
group.MakeStatus(status => { group.MakeStatus(status => {
status.Delay = string.IsNullOrEmpty(slmDelay) ? 0 : status.Delay = string.IsNullOrEmpty(slmDelay) ? 0 :
slmLate == "întârziere" ? int.Parse(slmDelay) : -int.Parse(slmDelay); slmLate == "întârziere" ? int.Parse(slmDelay) : -int.Parse(slmDelay);
status.Station = slmStation; status.Station = slmStation;
status.State = SlStateMap[slmArrival[0]]; status.State = SlStateMap[slmArrival[0]];
}); });
} }
catch { catch {
// ignored // ignored
} }
Utils.DateTimeSequencer dtSeq = new(date.Year, date.Month, date.Day); Utils.DateTimeSequencer dtSeq = new(date.Year, date.Month, date.Day);
var stations = statusDiv.QuerySelectorAll(":scope > ul > li"); var stations = statusDiv.QuerySelectorAll(":scope > ul > li");
foreach (var station in stations) { foreach (var station in stations) {
group.AddStopDescription(stopDescription => { group.AddStopDescription(stopDescription => {
var (left, (middle, (right, _))) = station var (left, (middle, (right, _))) = station
.QuerySelectorAll(":scope > div > div"); .QuerySelectorAll(":scope > div > div");
var (stopDetails, (stopNotes, _)) = middle var (stopDetails, (stopNotes, _)) = middle
.QuerySelectorAll(":scope > div > div > div"); .QuerySelectorAll(":scope > div > div > div");
stopDescription.Name = stopDetails stopDescription.Name = stopDetails
.QuerySelectorAll(":scope > div")[0] .QuerySelectorAll(":scope > div")[0]
.Text() .Text()
.WithCollapsedSpaces(); .WithCollapsedSpaces();
var scrapedKm = stopDetails stopDescription.LinkName = new Flurl.Url(stopDetails
.QuerySelectorAll(":scope > div")[1] .QuerySelectorAll(":scope > div")[0]
.Text() .QuerySelector(":scope a")
.WithCollapsedSpaces(); .Attributes["href"]
stopDescription.Km = int.Parse( .Value).PathSegments.Last();
(KmRegex.Match(scrapedKm).Groups as IEnumerable<Group>).Skip(1).First().Value var scrapedKm = stopDetails
); .QuerySelectorAll(":scope > div")[1]
var scrapedStoppingTime = stopDetails .Text()
.QuerySelectorAll(":scope > div")[2] .WithCollapsedSpaces();
.Text() stopDescription.Km = int.Parse(
.WithCollapsedSpaces(); (KmRegex.Match(scrapedKm).Groups as IEnumerable<Group>).Skip(1).First().Value
if (!string.IsNullOrEmpty(scrapedStoppingTime)) { );
var (stValue, (stMinsec, _)) = var scrapedStoppingTime = stopDetails
(StoppingTimeRegex.Match(scrapedStoppingTime).Groups as IEnumerable<Group>) .QuerySelectorAll(":scope > div")[2]
.Skip(1) .Text()
.Select(group => group.Value); .WithCollapsedSpaces();
stopDescription.StoppingTime = int.Parse(stValue); if (!string.IsNullOrEmpty(scrapedStoppingTime)) {
if (stMinsec == "min") stopDescription.StoppingTime *= 60; var (stValue, (stMinsec, _)) =
} (StoppingTimeRegex.Match(scrapedStoppingTime).Groups as IEnumerable<Group>)
.Skip(1)
var scrapedPlatform = stopDetails .Select(group => group.Value);
.QuerySelectorAll(":scope > div")[3] stopDescription.StoppingTime = int.Parse(stValue);
.Text() if (stMinsec == "min") stopDescription.StoppingTime *= 60;
.WithCollapsedSpaces(); }
if (!string.IsNullOrEmpty(scrapedPlatform))
stopDescription.Platform = PlatformRegex.Match(scrapedPlatform).Groups[1].Value; var scrapedPlatform = stopDetails
.QuerySelectorAll(":scope > div")[3]
void ScrapeTime(IElement element, ref TrainStopArrDep arrDep) { .Text()
var parts = element.QuerySelectorAll(":scope > div > div > div"); .WithCollapsedSpaces();
if (parts.Length == 0) throw new OperationCanceledException(); if (!string.IsNullOrEmpty(scrapedPlatform))
var time = parts[0]; stopDescription.Platform = PlatformRegex.Match(scrapedPlatform).Groups[1].Value;
var scrapedTime = time.Text().WithCollapsedSpaces();
var (stHour, (stMin, _)) = scrapedTime.Split(':').Select(int.Parse); void ScrapeTime(IElement element, ref TrainStopArrDep arrDep) {
arrDep.ScheduleTime = BucharestTz.AtLeniently(dtSeq.Next(stHour, stMin).ToLocalDateTime()) var parts = element.QuerySelectorAll(":scope > div > div > div");
.ToDateTimeOffset(); if (parts.Length == 0) throw new OperationCanceledException();
var time = parts[0];
if (parts.Length < 2) return; var scrapedTime = time.Text().WithCollapsedSpaces();
var (stHour, (stMin, _)) = scrapedTime.Split(':').Select(int.Parse);
var statusElement = parts[1]; arrDep.ScheduleTime = BucharestTz.AtLeniently(dtSeq.Next(stHour, stMin).ToLocalDateTime())
var (onTime, (delay, (approx, _))) = (StationArrdepStatusRegex.Match( .ToDateTimeOffset();
statusElement.Text().WithCollapsedSpaces(replaceWith: " ")
).Groups as IEnumerable<Group>).Skip(1).Select(group => group.Value); if (parts.Length < 2) return;
arrDep.MakeStatus(status => {
status.Delay = string.IsNullOrEmpty(onTime) ? int.Parse(delay) : 0; var statusElement = parts[1];
status.Real = string.IsNullOrEmpty(approx); var (onTime, (delay, (approx, _))) = (StationArrdepStatusRegex.Match(
}); statusElement.Text().WithCollapsedSpaces(replaceWith: " ")
} ).Groups as IEnumerable<Group>).Skip(1).Select(group => group.Value);
arrDep.MakeStatus(status => {
try { status.Delay = string.IsNullOrEmpty(onTime) ? int.Parse(delay) : 0;
stopDescription.MakeArrival(arrival => { ScrapeTime(left, ref arrival); }); status.Real = string.IsNullOrEmpty(approx);
} });
catch (OperationCanceledException) { } }
try { try {
stopDescription.MakeDeparture(departure => { ScrapeTime(right, ref departure); }); stopDescription.MakeArrival(arrival => { ScrapeTime(left, ref arrival); });
} }
catch (OperationCanceledException) { } catch (OperationCanceledException) { }
foreach (var noteDiv in stopNotes.QuerySelectorAll(":scope > div > div")) { try {
var noteText = noteDiv.Text().WithCollapsedSpaces(); stopDescription.MakeDeparture(departure => { ScrapeTime(right, ref departure); });
Match trainNumberChangeMatch, departsAsMatch, detachingWagons, receivingWagons; }
if ((trainNumberChangeMatch = TrainNumberChangeNoteRegex.Match(noteText)).Success) { catch (OperationCanceledException) { }
stopDescription.AddTrainNumberChangeNote(trainNumberChangeMatch.Groups[1].Value, trainNumberChangeMatch.Groups[2].Value);
} foreach (var noteDiv in stopNotes.QuerySelectorAll(":scope > div > div")) {
else if ((departsAsMatch = DepartsAsNoteRegex.Match(noteText)).Success) { var noteText = noteDiv.Text().WithCollapsedSpaces();
var groups = departsAsMatch.Groups; Match trainNumberChangeMatch, departsAsMatch, detachingWagons, receivingWagons;
var departureDate = BucharestTz.AtStrictly(new(int.Parse(groups[5].Value), int.Parse(groups[4].Value), int.Parse(groups[3].Value), 0, 0)); if ((trainNumberChangeMatch = TrainNumberChangeNoteRegex.Match(noteText)).Success) {
stopDescription.AddDepartsAsNote(groups[1].Value, groups[2].Value, departureDate.ToDateTimeOffset()); stopDescription.AddTrainNumberChangeNote(trainNumberChangeMatch.Groups[1].Value, trainNumberChangeMatch.Groups[2].Value);
} }
else if ((detachingWagons = DetachingWagonsNoteRegex.Match(noteText)).Success) { else if ((departsAsMatch = DepartsAsNoteRegex.Match(noteText)).Success) {
stopDescription.AddDetachingWagonsNote(detachingWagons.Groups[1].Value); var groups = departsAsMatch.Groups;
} var departureDate = BucharestTz.AtStrictly(new(int.Parse(groups[5].Value), int.Parse(groups[4].Value), int.Parse(groups[3].Value), 0, 0));
else if ((receivingWagons = ReceivingWagonsNoteRegex.Match(noteText)).Success) { stopDescription.AddDepartsAsNote(groups[1].Value, groups[2].Value, departureDate.ToDateTimeOffset());
stopDescription.AddReceivingWagonsNote(receivingWagons.Groups[1].Value); }
} else if ((detachingWagons = DetachingWagonsNoteRegex.Match(noteText)).Success) {
} stopDescription.AddDetachingWagonsNote(detachingWagons.Groups[1].Value);
}); }
} else if ((receivingWagons = ReceivingWagonsNoteRegex.Match(noteText)).Success) {
}); stopDescription.AddReceivingWagonsNote(receivingWagons.Groups[1].Value);
} }
return result; }
} });
} }
} // namespace });
}
return result;
}
}
} // namespace

Loading…
Cancel
Save