From 4b4e04fb75d7baf2d5506b934c20640be5dbc2f7 Mon Sep 17 00:00:00 2001 From: Dan Cojocaru Date: Sat, 6 Aug 2022 00:05:10 +0300 Subject: [PATCH] Add departure date (midnight) for arr/dep trains --- scraper/src/Models/Station.cs | 2 ++ scraper/src/Scrapers/Station.cs | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/scraper/src/Models/Station.cs b/scraper/src/Models/Station.cs index 8682eef..400f598 100644 --- a/scraper/src/Models/Station.cs +++ b/scraper/src/Models/Station.cs @@ -33,6 +33,7 @@ namespace InfoferScraper.Models.Station { /// Arrivals -> Departure station; Departures -> Destination station /// public string Terminus { get; } + public DateTimeOffset DepartureDate { get; } } public interface IStationStatus : IStatus { @@ -96,6 +97,7 @@ namespace InfoferScraper.Models.Station { public string Rank { get; internal set; } = ""; public IReadOnlyList Route => _modifyableRoute.AsReadOnly(); public string Terminus { get; internal set; } = ""; + public DateTimeOffset DepartureDate { get; internal set; } internal void AddRouteStation(string station) => _modifyableRoute.Add(station); } diff --git a/scraper/src/Scrapers/Station.cs b/scraper/src/Scrapers/Station.cs index 4ebe5c6..488b8d7 100644 --- a/scraper/src/Scrapers/Station.cs +++ b/scraper/src/Scrapers/Station.cs @@ -26,6 +26,8 @@ namespace InfoferScraper.Scrapers { ); private static readonly Regex PlatformRegex = new(@"^linia\s([A-Za-z0-9]+)$"); + + private static readonly Regex TrainUrlDateRegex = new(@"Date=([0-9]{2}).([0-9]{2}).([0-9]{4})"); private static readonly DateTimeZone BucharestTz = DateTimeZoneProviders.Tzdb["Europe/Bucharest"]; @@ -140,6 +142,22 @@ namespace InfoferScraper.Scrapers { .QuerySelector(":scope > a")! .Text() .WithCollapsedSpaces(); + var trainUri = new Uri( + "http://localhost" + trainDiv + .QuerySelectorAll(":scope > div > div > div")[1] + .QuerySelector(":scope > a")! + .GetAttribute("href")! + ); + var (trainDepDay, (trainDepMonth, (trainDepYear, _))) = TrainUrlDateRegex + .Match(trainUri.Query) + .Groups + .Values + .Skip(1) + .Select(g => int.Parse(g.Value)); + arrDep.ModifyableTrain.DepartureDate = BucharestTz + .AtStartOfDay(new(trainDepYear, trainDepMonth, trainDepDay)) + .ToDateTimeOffset() + .ToUniversalTime(); arrDep.ModifyableTrain.Terminus = destDiv .QuerySelectorAll(":scope > div > div > div")[1] .Text()