Compare commits

..

No commits in common. '422b4727c0b6cd4e0d994db503dc74f0213b8b35' and '169e128ca1cb2a1d1cb126250a37b06fc2e89643' have entirely different histories.

  1. 2
      .vscode/launch.json
  2. 2
      ConsoleTest/ConsoleTest.csproj
  3. 36
      ConsoleTest/Program.cs
  4. 6
      Dockerfile
  5. 6
      docker-compose.yml
  6. 3
      scraper/scraper.csproj
  7. 62
      scraper/src/Models/Itinerary.cs
  8. 3
      scraper/src/Models/Station.cs
  9. 3
      scraper/src/Models/Status.cs
  10. 562
      scraper/src/Models/Train.cs
  11. 220
      scraper/src/Scrapers/Route.cs
  12. 47
      scraper/src/Scrapers/Station.cs
  13. 500
      scraper/src/Scrapers/Train.cs
  14. 3
      server/Controllers/V2/StationsController.cs
  15. 3
      server/Controllers/V2/TrainsController.cs
  16. 40
      server/Controllers/V3/ItinerariesController.cs
  17. 3
      server/Controllers/V3/StationsController.cs
  18. 3
      server/Controllers/V3/TrainsController.cs
  19. 5
      server/Models/Database/MongoSettings.cs
  20. 17
      server/Models/Database/StationAlias.cs
  21. 18
      server/Models/Database/StationListing.cs
  22. 20
      server/Models/Database/TrainListing.cs
  23. 9
      server/Models/ProxySettings.cs
  24. 68
      server/Services/Implementations/DataManager.cs
  25. 430
      server/Services/Implementations/Database.cs
  26. 3
      server/Services/Interfaces/IDataManager.cs
  27. 18
      server/Services/Interfaces/IDatabase.cs
  28. 57
      server/Startup.cs
  29. 38
      server/Utils/AsyncThrottle.cs
  30. 7
      server/Utils/Constants.cs
  31. 33
      server/Utils/IAsyncCusorAsyncAdapter.cs
  32. 6
      server/appsettings.Development.json
  33. 4
      server/appsettings.json
  34. 10
      server/server.csproj

2
.vscode/launch.json vendored

@ -6,7 +6,7 @@
"type": "coreclr", "type": "coreclr",
"request": "launch", "request": "launch",
"preLaunchTask": "buildConsoleTest", "preLaunchTask": "buildConsoleTest",
"program": "${workspaceFolder}/ConsoleTest/bin/Debug/net7.0/ConsoleTest.dll", "program": "${workspaceFolder}/ConsoleTest/bin/Debug/net6.0/ConsoleTest.dll",
"args": [], "args": [],
"cwd": "${workspaceFolder}", "cwd": "${workspaceFolder}",
"stopAtEntry": false, "stopAtEntry": false,

2
ConsoleTest/ConsoleTest.csproj

@ -6,7 +6,7 @@
<PropertyGroup> <PropertyGroup>
<OutputType>Exe</OutputType> <OutputType>Exe</OutputType>
<TargetFrameworks>net6.0;net7.0;net8.0</TargetFrameworks> <TargetFramework>net6.0</TargetFramework>
</PropertyGroup> </PropertyGroup>
</Project> </Project>

36
ConsoleTest/Program.cs

@ -1,5 +1,4 @@
using System; using System;
using System.Linq;
using System.Text.Json; using System.Text.Json;
using System.Threading.Tasks; using System.Threading.Tasks;
using InfoferScraper; using InfoferScraper;
@ -8,7 +7,6 @@ using InfoferScraper.Scrapers;
while (true) { while (true) {
Console.WriteLine("1. Scrape Train"); Console.WriteLine("1. Scrape Train");
Console.WriteLine("2. Scrape Station"); Console.WriteLine("2. Scrape Station");
Console.WriteLine("3. Scrape Itineraries");
Console.WriteLine("0. Exit"); Console.WriteLine("0. Exit");
var input = Console.ReadLine()?.Trim(); var input = Console.ReadLine()?.Trim();
@ -19,9 +17,6 @@ while (true) {
case "2": case "2":
await PrintStation(); await PrintStation();
break; break;
case "3":
await ScrapeItineraries();
break;
case null: case null:
case "0": case "0":
goto INPUT_LOOP_BREAK; goto INPUT_LOOP_BREAK;
@ -40,7 +35,7 @@ async Task PrintTrain() {
Console.WriteLine( Console.WriteLine(
JsonSerializer.Serialize( JsonSerializer.Serialize(
await new TrainScraper().Scrape(trainNumber), await TrainScraper.Scrape(trainNumber),
new JsonSerializerOptions { new JsonSerializerOptions {
PropertyNamingPolicy = JsonNamingPolicy.CamelCase, PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
WriteIndented = true, WriteIndented = true,
@ -58,7 +53,7 @@ async Task PrintStation() {
Console.WriteLine( Console.WriteLine(
JsonSerializer.Serialize( JsonSerializer.Serialize(
await new StationScraper().Scrape(stationName), await StationScraper.Scrape(stationName),
new JsonSerializerOptions { new JsonSerializerOptions {
PropertyNamingPolicy = JsonNamingPolicy.CamelCase, PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
WriteIndented = true, WriteIndented = true,
@ -66,30 +61,3 @@ async Task PrintStation() {
) )
); );
} }
async Task ScrapeItineraries() {
Console.Write("From station: ");
var from = Console.ReadLine();
Console.Write("To station: ");
var to = Console.ReadLine();
if (from == null || to == null) return;
var data = await new RouteScraper().Scrape(from, to);
Console.WriteLine($"{data.Count} itineraries:");
Console.WriteLine();
void PrintArrDepLine(DateTimeOffset date, string station) {
Console.WriteLine($"{date:HH:mm} {station}");
}
foreach (var itinerary in data) {
foreach (var train in itinerary.Trains) {
PrintArrDepLine(train.DepartureDate, train.From);
Console.WriteLine($" {train.TrainRank,-4} {train.TrainNumber,-5} ({train.Operator}), {train.Km,3} km via {string.Join(", ", train.IntermediateStops)}");
PrintArrDepLine(train.ArrivalDate, train.To);
}
Console.WriteLine();
}
}

6
Dockerfile

@ -1,5 +1,5 @@
# https://hub.docker.com/_/microsoft-dotnet # https://hub.docker.com/_/microsoft-dotnet
FROM mcr.microsoft.com/dotnet/sdk:7.0 AS build FROM mcr.microsoft.com/dotnet/sdk:6.0 AS build
WORKDIR /source WORKDIR /source
# copy csproj and restore as distinct layers # copy csproj and restore as distinct layers
@ -14,10 +14,10 @@ COPY server/. ./server/
COPY scraper/. ./scraper/ COPY scraper/. ./scraper/
COPY ConsoleTest/. ./ConsoleTest/ COPY ConsoleTest/. ./ConsoleTest/
WORKDIR /source/server WORKDIR /source/server
RUN dotnet publish -f net7.0 -c release -o /app --no-restore RUN dotnet publish -c release -o /app --no-restore
# final stage/image # final stage/image
FROM mcr.microsoft.com/dotnet/aspnet:7.0 FROM mcr.microsoft.com/dotnet/aspnet:6.0
WORKDIR /app WORKDIR /app
COPY --from=build /app ./ COPY --from=build /app ./
ENV INSIDE_DOCKER=true ENV INSIDE_DOCKER=true

6
docker-compose.yml

@ -5,6 +5,8 @@ services:
image: new_infofer_scraper image: new_infofer_scraper
build: . build: .
ports: ports:
- ${PORT:-5001}:80 - ${PORT:-5000}:80
environment: environment:
DB_DIR: /data - DB_DIR=/data
volumes:
- ./data:/data

3
scraper/scraper.csproj

@ -2,14 +2,13 @@
<PropertyGroup> <PropertyGroup>
<Nullable>enable</Nullable> <Nullable>enable</Nullable>
<TargetFrameworks>net6.0;net7.0;net8.0</TargetFrameworks> <TargetFramework>net6.0</TargetFramework>
</PropertyGroup> </PropertyGroup>
<ItemGroup> <ItemGroup>
<PackageReference Include="AngleSharp" Version="0.16.0" /> <PackageReference Include="AngleSharp" Version="0.16.0" />
<PackageReference Include="Flurl" Version="3.0.2" /> <PackageReference Include="Flurl" Version="3.0.2" />
<PackageReference Include="Jetbrains.Annotations" Version="2021.2.0" /> <PackageReference Include="Jetbrains.Annotations" Version="2021.2.0" />
<PackageReference Include="Newtonsoft.Json" Version="13.0.3" />
<PackageReference Include="NodaTime" Version="3.0.5" /> <PackageReference Include="NodaTime" Version="3.0.5" />
</ItemGroup> </ItemGroup>

62
scraper/src/Models/Itinerary.cs

@ -1,62 +0,0 @@
using System;
using System.Collections.Generic;
namespace scraper.Models.Itinerary;
#region Interfaces
public interface IItinerary {
public IReadOnlyList<IItineraryTrain> Trains { get; }
}
public interface IItineraryTrain {
public string From { get; }
public string To { get; }
public IReadOnlyList<string> IntermediateStops { get; }
public DateTimeOffset DepartureDate { get; }
public DateTimeOffset ArrivalDate { get; }
public int Km { get; }
public string Operator { get; }
public string TrainRank { get; }
public string TrainNumber { get; }
}
#endregion
#region Implementations
internal record Itinerary : IItinerary {
private List<IItineraryTrain> ModifyableTrains { get; set; } = new();
public IReadOnlyList<IItineraryTrain> Trains => ModifyableTrains;
internal void AddTrain(IItineraryTrain train) {
ModifyableTrains.Add(train);
}
internal void AddTrain(Action<ItineraryTrain> configurator) {
ItineraryTrain newTrain = new();
configurator(newTrain);
AddTrain(newTrain);
}
}
internal record ItineraryTrain : IItineraryTrain {
private List<string> ModifyableIntermediateStops { get; set; } = new();
public string From { get; internal set; } = "";
public string To { get; internal set; } = "";
public IReadOnlyList<string> IntermediateStops => ModifyableIntermediateStops;
public DateTimeOffset DepartureDate { get; internal set; } = new();
public DateTimeOffset ArrivalDate { get; internal set; } = new();
public int Km { get; internal set; } = 0;
public string Operator { get; internal set; } = "";
public string TrainRank { get; internal set; } = "";
public string TrainNumber { get; internal set; } = "";
internal void AddIntermediateStop(string stop) {
ModifyableIntermediateStops.Add(stop);
}
}
#endregion

3
scraper/src/Models/Station.cs

@ -37,6 +37,9 @@ namespace InfoferScraper.Models.Station {
} }
public interface IStationStatus : IStatus { public interface IStationStatus : IStatus {
new int Delay { get; }
new bool Real { get; }
public bool Cancelled { get; }
public string? Platform { get; } public string? Platform { get; }
} }

3
scraper/src/Models/Status.cs

@ -6,13 +6,10 @@ namespace InfoferScraper.Models.Status {
/// Determines whether delay was actually reported or is an approximation /// Determines whether delay was actually reported or is an approximation
/// </summary> /// </summary>
public bool Real { get; } public bool Real { get; }
public bool Cancelled { get; }
} }
internal record Status : IStatus { internal record Status : IStatus {
public int Delay { get; set; } public int Delay { get; set; }
public bool Real { get; set; } public bool Real { get; set; }
public bool Cancelled { get; set; }
} }
} }

562
scraper/src/Models/Train.cs

@ -1,246 +1,316 @@
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
using InfoferScraper.Models.Status; using System.Text.Json;
using Newtonsoft.Json; using System.Text.Json.Serialization;
using Newtonsoft.Json.Converters; using InfoferScraper.Models.Status;
using Newtonsoft.Json.Serialization; using InfoferScraper.Models.Train.JsonConverters;
namespace InfoferScraper.Models.Train { namespace InfoferScraper.Models.Train {
#region Interfaces #region Interfaces
public interface ITrainScrapeResult { public interface ITrainScrapeResult {
public string Rank { get; } public string Rank { get; }
public string Number { get; } public string Number { get; }
/// <summary> /// <summary>
/// Date in the DD.MM.YYYY format /// Date in the DD.MM.YYYY format
/// This date is taken as-is from the result. /// This date is taken as-is from the result.
/// </summary> /// </summary>
public string Date { get; } public string Date { get; }
public string Operator { get; } public string Operator { get; }
public IReadOnlyList<ITrainGroup> Groups { get; } public IReadOnlyList<ITrainGroup> Groups { get; }
} }
public interface ITrainGroup { public interface ITrainGroup {
public ITrainRoute Route { get; } public ITrainRoute Route { get; }
public ITrainStatus? Status { get; } public ITrainStatus? Status { get; }
public IReadOnlyList<ITrainStopDescription> Stations { get; } public IReadOnlyList<ITrainStopDescription> Stations { get; }
} }
public interface ITrainRoute { public interface ITrainRoute {
public string From { get; } public string From { get; }
public string To { get; } public string To { get; }
} }
public interface ITrainStatus { public interface ITrainStatus {
public int Delay { get; } public int Delay { get; }
public string Station { get; } public string Station { get; }
public StatusKind State { get; } public StatusKind State { get; }
} }
public interface ITrainStopDescription { public interface ITrainStopDescription {
public string Name { get; } public string Name { get; }
public string LinkName { get; } public int Km { get; }
public int Km { get; }
/// <summary>
/// <summary> /// The time the train waits in the station in seconds
/// The time the train waits in the station in seconds /// </summary>
/// </summary> public int? StoppingTime { get; }
public int? StoppingTime { get; }
public string? Platform { get; }
public string? Platform { get; } public ITrainStopArrDep? Arrival { get; }
public ITrainStopArrDep? Arrival { get; } public ITrainStopArrDep? Departure { get; }
public ITrainStopArrDep? Departure { get; }
public IReadOnlyList<object> Notes { get; }
public IReadOnlyList<object> Notes { get; } }
}
public interface ITrainStopNote {
public interface ITrainStopNote { public NoteKind Kind { get; }
public NoteKind Kind { get; } }
}
public interface ITrainStopTrainNumberChangeNote : ITrainStopNote {
public interface ITrainStopTrainNumberChangeNote : ITrainStopNote { public string Rank { get; }
public string Rank { get; } public string Number { get; }
public string Number { get; } }
}
public interface ITrainStopDepartsAsNote : ITrainStopNote {
public interface ITrainStopDepartsAsNote : ITrainStopNote { public string Rank { get; }
public string Rank { get; } public string Number { get; }
public string Number { get; } public DateTimeOffset DepartureDate { get; }
public DateTimeOffset DepartureDate { get; } }
}
public interface ITrainStopDetachingWagonsNote : ITrainStopNote {
public interface ITrainStopDetachingWagonsNote : ITrainStopNote { public string Station { get; }
public string Station { get; } }
}
public interface ITrainStopReceivingWagonsNote : ITrainStopNote {
public interface ITrainStopReceivingWagonsNote : ITrainStopNote { public string Station { get; }
public string Station { get; } }
}
public interface ITrainStopArrDep {
public interface ITrainStopArrDep { public DateTimeOffset ScheduleTime { get; }
public DateTimeOffset ScheduleTime { get; } public IStatus? Status { get; }
public IStatus? Status { get; } }
}
#endregion
#endregion
[JsonConverter(typeof(StatusKindConverter))]
[JsonConverter(typeof(StringEnumConverter), typeof(CamelCaseNamingStrategy))] public enum StatusKind {
public enum StatusKind { Passing,
Passing, Arrival,
Arrival, Departure,
Departure, }
}
[JsonConverter(typeof(NoteKindConverter))]
[JsonConverter(typeof(StringEnumConverter), typeof(CamelCaseNamingStrategy))] public enum NoteKind {
public enum NoteKind { TrainNumberChange,
TrainNumberChange, DetachingWagons,
DetachingWagons, ReceivingWagons,
ReceivingWagons, DepartsAs,
DepartsAs, }
}
#region Implementations
#region Implementations
internal record TrainScrapeResult : ITrainScrapeResult {
internal record TrainScrapeResult : ITrainScrapeResult { private List<ITrainGroup> ModifyableGroups { get; set; } = new();
private List<ITrainGroup> ModifyableGroups { get; set; } = new(); public string Rank { get; set; } = "";
public string Rank { get; set; } = ""; public string Number { get; set; } = "";
public string Number { get; set; } = ""; public string Date { get; set; } = "";
public string Date { get; set; } = ""; public string Operator { get; set; } = "";
public string Operator { get; set; } = ""; public IReadOnlyList<ITrainGroup> Groups => ModifyableGroups.AsReadOnly();
public IReadOnlyList<ITrainGroup> Groups => ModifyableGroups.AsReadOnly();
private void AddTrainGroup(ITrainGroup trainGroup) {
private void AddTrainGroup(ITrainGroup trainGroup) { ModifyableGroups.Add(trainGroup);
ModifyableGroups.Add(trainGroup); }
}
internal void AddTrainGroup(Action<TrainGroup> configurator) {
internal void AddTrainGroup(Action<TrainGroup> configurator) { TrainGroup newTrainGroup = new();
TrainGroup newTrainGroup = new(); configurator(newTrainGroup);
configurator(newTrainGroup); AddTrainGroup(newTrainGroup);
AddTrainGroup(newTrainGroup); }
} }
}
internal record TrainGroup : ITrainGroup {
internal record TrainGroup : ITrainGroup { private List<ITrainStopDescription> ModifyableStations { get; set; } = new();
private List<ITrainStopDescription> ModifyableStations { get; set; } = new(); public ITrainRoute Route { get; init; } = new TrainRoute();
public ITrainRoute Route { get; init; } = new TrainRoute(); public ITrainStatus? Status { get; private set; }
public ITrainStatus? Status { get; private set; } public IReadOnlyList<ITrainStopDescription> Stations => ModifyableStations.AsReadOnly();
public IReadOnlyList<ITrainStopDescription> Stations => ModifyableStations.AsReadOnly();
private void AddStopDescription(ITrainStopDescription stopDescription) {
private void AddStopDescription(ITrainStopDescription stopDescription) { ModifyableStations.Add(stopDescription);
ModifyableStations.Add(stopDescription); }
}
internal void AddStopDescription(Action<TrainStopDescription> configurator) {
internal void AddStopDescription(Action<TrainStopDescription> configurator) { TrainStopDescription newStopDescription = new();
TrainStopDescription newStopDescription = new(); configurator(newStopDescription);
configurator(newStopDescription); AddStopDescription(newStopDescription);
AddStopDescription(newStopDescription); }
}
internal void ConfigureRoute(Action<TrainRoute> configurator) {
internal void ConfigureRoute(Action<TrainRoute> configurator) { configurator((TrainRoute)Route);
configurator((TrainRoute)Route); }
}
internal void MakeStatus(Action<TrainStatus> configurator) {
internal void MakeStatus(Action<TrainStatus> configurator) { TrainStatus newStatus = new();
TrainStatus newStatus = new(); configurator(newStatus);
configurator(newStatus); Status = newStatus;
Status = newStatus; }
} }
}
internal record TrainRoute : ITrainRoute {
internal record TrainRoute : ITrainRoute { public TrainRoute() {
public TrainRoute() { From = "";
From = ""; To = "";
To = ""; }
}
public string From { get; set; }
public string From { get; set; } public string To { get; set; }
public string To { get; set; } }
}
internal record TrainStatus : ITrainStatus {
internal record TrainStatus : ITrainStatus { public int Delay { get; set; }
public int Delay { get; set; } public string Station { get; set; } = "";
public string Station { get; set; } = ""; public StatusKind State { get; set; }
public StatusKind State { get; set; } }
}
internal record TrainStopDescription : ITrainStopDescription {
internal record TrainStopDescription : ITrainStopDescription { private List<ITrainStopNote> ModifyableNotes { get; } = new();
private List<ITrainStopNote> ModifyableNotes { get; } = new(); public string Name { get; set; } = "";
public string Name { get; set; } = ""; public int Km { get; set; }
public string LinkName { get; set; } = ""; public int? StoppingTime { get; set; }
public int Km { get; set; } public string? Platform { get; set; }
public int? StoppingTime { get; set; } public ITrainStopArrDep? Arrival { get; private set; }
public string? Platform { get; set; } public ITrainStopArrDep? Departure { get; private set; }
public ITrainStopArrDep? Arrival { get; private set; } public IReadOnlyList<object> Notes => ModifyableNotes.AsReadOnly();
public ITrainStopArrDep? Departure { get; private set; }
public IReadOnlyList<object> Notes => ModifyableNotes.AsReadOnly(); internal void MakeArrival(Action<TrainStopArrDep> configurator) {
TrainStopArrDep newArrival = new();
internal void MakeArrival(Action<TrainStopArrDep> configurator) { configurator(newArrival);
TrainStopArrDep newArrival = new(); Arrival = newArrival;
configurator(newArrival); }
Arrival = newArrival;
} internal void MakeDeparture(Action<TrainStopArrDep> configurator) {
TrainStopArrDep newDeparture = new();
internal void MakeDeparture(Action<TrainStopArrDep> configurator) { configurator(newDeparture);
TrainStopArrDep newDeparture = new(); Departure = newDeparture;
configurator(newDeparture); }
Departure = newDeparture;
} class DepartsAsNote : ITrainStopDepartsAsNote {
public NoteKind Kind => NoteKind.DepartsAs;
class DepartsAsNote : ITrainStopDepartsAsNote { public string Rank { get; set; } = "";
public NoteKind Kind => NoteKind.DepartsAs; public string Number { get; set; } = "";
public string Rank { get; set; } = ""; public DateTimeOffset DepartureDate { get; set; }
public string Number { get; set; } = ""; }
public DateTimeOffset DepartureDate { get; set; }
} class TrainNumberChangeNote : ITrainStopTrainNumberChangeNote {
public NoteKind Kind => NoteKind.TrainNumberChange;
class TrainNumberChangeNote : ITrainStopTrainNumberChangeNote { public string Rank { get; set; } = "";
public NoteKind Kind => NoteKind.TrainNumberChange; public string Number { get; set; } = "";
public string Rank { get; set; } = ""; }
public string Number { get; set; } = "";
} class ReceivingWagonsNote : ITrainStopReceivingWagonsNote {
public NoteKind Kind => NoteKind.ReceivingWagons;
class ReceivingWagonsNote : ITrainStopReceivingWagonsNote { public string Station { get; set; } = "";
public NoteKind Kind => NoteKind.ReceivingWagons; }
public string Station { get; set; } = "";
} class DetachingWagonsNote : ITrainStopReceivingWagonsNote {
public NoteKind Kind => NoteKind.DetachingWagons;
class DetachingWagonsNote : ITrainStopReceivingWagonsNote { public string Station { get; set; } = "";
public NoteKind Kind => NoteKind.DetachingWagons; }
public string Station { get; set; } = "";
} internal void AddDepartsAsNote(string rank, string number, DateTimeOffset departureDate) {
ModifyableNotes.Add(new DepartsAsNote { Rank = rank, Number = number, DepartureDate = departureDate });
internal void AddDepartsAsNote(string rank, string number, DateTimeOffset departureDate) { }
ModifyableNotes.Add(new DepartsAsNote { Rank = rank, Number = number, DepartureDate = departureDate });
} internal void AddTrainNumberChangeNote(string rank, string number) {
ModifyableNotes.Add(new TrainNumberChangeNote { Rank = rank, Number = number });
internal void AddTrainNumberChangeNote(string rank, string number) { }
ModifyableNotes.Add(new TrainNumberChangeNote { Rank = rank, Number = number });
} internal void AddReceivingWagonsNote(string station) {
ModifyableNotes.Add(new ReceivingWagonsNote { Station = station });
internal void AddReceivingWagonsNote(string station) { }
ModifyableNotes.Add(new ReceivingWagonsNote { Station = station });
} internal void AddDetachingWagonsNote(string station) {
ModifyableNotes.Add(new DetachingWagonsNote { Station = station });
internal void AddDetachingWagonsNote(string station) { }
ModifyableNotes.Add(new DetachingWagonsNote { Station = station }); }
}
} public record TrainStopArrDep : ITrainStopArrDep {
public DateTimeOffset ScheduleTime { get; set; }
public record TrainStopArrDep : ITrainStopArrDep { public IStatus? Status { get; private set; }
public DateTimeOffset ScheduleTime { get; set; }
public IStatus? Status { get; private set; } internal void MakeStatus(Action<Status.Status> configurator) {
Status.Status newStatus = new();
internal void MakeStatus(Action<Status.Status> configurator) { configurator(newStatus);
Status.Status newStatus = new(); Status = newStatus;
configurator(newStatus); }
Status = newStatus; }
}
} #endregion
#endregion #region JSON Converters
}
namespace JsonConverters {
internal class StatusKindConverter : JsonConverterFactory {
public override bool CanConvert(Type typeToConvert) {
return typeToConvert == typeof(StatusKind);
}
public override JsonConverter? CreateConverter(Type typeToConvert, JsonSerializerOptions options) {
return new Converter();
}
private class Converter : JsonConverter<StatusKind> {
public override StatusKind Read(
ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options
) {
return reader.GetString() switch {
"arrival" => StatusKind.Arrival,
"departure" => StatusKind.Departure,
"passing" => StatusKind.Passing,
_ => throw new NotImplementedException()
};
}
public override void Write(Utf8JsonWriter writer, StatusKind value, JsonSerializerOptions options) {
writer.WriteStringValue(value switch {
StatusKind.Passing => "passing",
StatusKind.Arrival => "arrival",
StatusKind.Departure => "departure",
_ => throw new NotImplementedException()
});
}
}
}
internal class NoteKindConverter : JsonConverterFactory {
public override bool CanConvert(Type typeToConvert) {
return typeToConvert == typeof(NoteKind);
}
public override JsonConverter? CreateConverter(Type typeToConvert, JsonSerializerOptions options) {
return new Converter();
}
private class Converter : JsonConverter<NoteKind> {
public override NoteKind Read(
ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options
) {
return reader.GetString() switch {
"departsAs" => NoteKind.DepartsAs,
"trainNumberChange" => NoteKind.TrainNumberChange,
"receivingWagons" => NoteKind.ReceivingWagons,
"detachingWagons" => NoteKind.DetachingWagons,
_ => throw new NotImplementedException()
};
}
public override void Write(Utf8JsonWriter writer, NoteKind value, JsonSerializerOptions options) {
writer.WriteStringValue(value switch {
NoteKind.DepartsAs => "departsAs",
NoteKind.TrainNumberChange => "trainNumberChange",
NoteKind.DetachingWagons => "detachingWagons",
NoteKind.ReceivingWagons => "receivingWagons",
_ => throw new NotImplementedException()
});
}
}
}
}
#endregion
}

220
scraper/src/Scrapers/Route.cs

@ -1,220 +0,0 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Net.Http;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using AngleSharp;
using AngleSharp.Dom;
using AngleSharp.Html.Dom;
using Flurl;
using InfoferScraper.Models.Train;
using NodaTime;
using NodaTime.Extensions;
using scraper.Models.Itinerary;
namespace InfoferScraper.Scrapers;
public class RouteScraper {
private const string BaseUrl = "https://mersultrenurilor.infofer.ro/ro-RO/";
private static readonly DateTimeZone BucharestTz = DateTimeZoneProviders.Tzdb["Europe/Bucharest"];
private static readonly Regex KmTrainRankNoRegex = new(@"^([0-9]+)\skm\scu\s([A-Z-]+)\s([0-9]+)$");
private static readonly Regex OperatorRegex = new(@$"^Operat\sde\s([{Utils.RoLetters}\s]+)$");
private static readonly Regex DepArrRegex = new(@"^(Ple|Sos)\s([0-9]+)\s([a-z]+)\.?\s([0-9]+):([0-9]+)$");
private static readonly Dictionary<string, int> Months = new Dictionary<string, int>() {
["ian"] = 1,
["feb"] = 2,
["mar"] = 3,
["apr"] = 4,
["mai"] = 5,
["iun"] = 6,
["iul"] = 7,
["aug"] = 8,
["sep"] = 9,
["oct"] = 10,
["noi"] = 11,
["dec"] = 12,
};
private readonly CookieContainer cookieContainer = new();
private readonly HttpClient httpClient;
public RouteScraper(HttpClientHandler? httpClientHandler = null) {
if (httpClientHandler == null) {
httpClientHandler = new HttpClientHandler {
CookieContainer = cookieContainer,
UseCookies = true,
};
}
else {
httpClientHandler.CookieContainer = cookieContainer;
httpClientHandler.UseCookies = true;
}
httpClient = new HttpClient(httpClientHandler) {
BaseAddress = new Uri(BaseUrl),
DefaultRequestVersion = new Version(2, 0),
};
}
public async Task<List<IItinerary>?> Scrape(string from, string to, DateTimeOffset? dateOverride = null) {
var dateOverrideInstant = dateOverride?.ToInstant().InZone(BucharestTz);
dateOverride = dateOverrideInstant?.ToDateTimeOffset();
TrainScrapeResult result = new();
var asConfig = Configuration.Default;
var asContext = BrowsingContext.New(asConfig);
var firstUrl = "Rute-trenuri"
.AppendPathSegment(from)
.AppendPathSegment(to);
if (dateOverride != null) {
firstUrl = firstUrl.SetQueryParam("DepartureDate", $"{dateOverride:d.MM.yyyy}");
}
firstUrl = firstUrl.SetQueryParam("OrderingTypeId", "0");
firstUrl = firstUrl.SetQueryParam("TimeSelectionId", "0");
firstUrl = firstUrl.SetQueryParam("MinutesInDay", "0");
firstUrl = firstUrl.SetQueryParam("ConnectionsTypeId", "1");
firstUrl = firstUrl.SetQueryParam("BetweenTrainsMinimumMinutes", "5");
firstUrl = firstUrl.SetQueryParam("ChangeStationName", "");
var firstResponse = await httpClient.GetStringAsync(firstUrl);
var firstDocument = await asContext.OpenAsync(req => req.Content(firstResponse));
var firstForm = firstDocument.GetElementById("form-search")!;
var firstResult = firstForm
.QuerySelectorAll<IHtmlInputElement>("input")
.Where(elem => elem.Name != null)
.ToDictionary(elem => elem.Name!, elem => elem.Value);
var secondUrl = "".AppendPathSegments("Itineraries", "GetItineraries");
var secondResponse = await httpClient.PostAsync(
secondUrl,
#pragma warning disable CS8620
new FormUrlEncodedContent(firstResult)
#pragma warning restore CS8620
);
var secondResponseContent = await secondResponse.Content.ReadAsStringAsync();
var secondDocument = await asContext.OpenAsync(
req => req.Content(secondResponseContent)
);
var (itineraryInfoDiv, _) = secondDocument
.QuerySelectorAll("body > div");
if (itineraryInfoDiv == null) {
return null;
}
var itinerariesLi = secondDocument
.QuerySelectorAll("body > ul > li");
var itineraries = new List<IItinerary>();
foreach (var itineraryLi in itinerariesLi) {
var itinerary = new Itinerary();
var cardDivs = itineraryLi.QuerySelectorAll(":scope > div > div > div > div");
var detailsDivs = cardDivs.Last()
.QuerySelectorAll(":scope > div > div")[1]
.QuerySelectorAll(":scope > div");
var trainItineraryAndDetailsLis = detailsDivs[0]
.QuerySelectorAll(":scope > ul > li");
var stations = new List<string>();
var details = new List<ItineraryTrain>();
foreach (var (idx, li) in trainItineraryAndDetailsLis.Select((li, idx) => (idx, li))) {
if (idx % 2 == 0) {
// Station
stations.Add(
li
.QuerySelectorAll(":scope > div > div > div > div")[1]
.Text()
.WithCollapsedSpaces()
);
}
else {
var now = LocalDateTime.FromDateTime(DateTime.Now);
// Detail
var detailColumns = li.QuerySelectorAll(":scope > div > div");
var leftSideDivs = detailColumns[0].QuerySelectorAll(":scope > div");
var departureDateText = leftSideDivs[0]
.QuerySelectorAll(":scope > div")[1]
.Text()
.WithCollapsedSpaces();
var departureDateMatch = DepArrRegex.Match(departureDateText);
var departureDate = new LocalDateTime(
now.Year,
Months[departureDateMatch.Groups[3].Value],
int.Parse(departureDateMatch.Groups[2].Value),
int.Parse(departureDateMatch.Groups[4].Value),
int.Parse(departureDateMatch.Groups[5].Value),
0
);
if (departureDate < now.PlusDays(-1)) {
departureDate = departureDate.PlusYears(1);
}
var arrivalDateText = leftSideDivs[3]
.QuerySelectorAll(":scope > div")[1]
.Text()
.WithCollapsedSpaces();
var arrivalDateMatch = DepArrRegex.Match(arrivalDateText);
var arrivalDate = new LocalDateTime(
now.Year,
Months[arrivalDateMatch.Groups[3].Value],
int.Parse(arrivalDateMatch.Groups[2].Value),
int.Parse(arrivalDateMatch.Groups[4].Value),
int.Parse(arrivalDateMatch.Groups[5].Value),
0
);
if (arrivalDate < now.PlusDays(-1)) {
arrivalDate = arrivalDate.PlusYears(1);
}
var rightSideDivs = detailColumns[1].QuerySelectorAll(":scope > div > div");
var kmRankNumberText = rightSideDivs[0]
.QuerySelectorAll(":scope > div > div")[0]
.Text()
.WithCollapsedSpaces();
var kmRankNumberMatch = KmTrainRankNoRegex.Match(kmRankNumberText);
var operatorText = rightSideDivs[0]
.QuerySelectorAll(":scope > div > div")[1]
.Text()
.WithCollapsedSpaces();
var operatorMatch = OperatorRegex.Match(operatorText);
var train = new ItineraryTrain {
ArrivalDate = BucharestTz.AtLeniently(arrivalDate).ToDateTimeOffset(),
DepartureDate = BucharestTz.AtLeniently(departureDate).ToDateTimeOffset(),
Km = int.Parse(kmRankNumberMatch.Groups[1].Value),
TrainRank = kmRankNumberMatch.Groups[2].Value,
TrainNumber = kmRankNumberMatch.Groups[3].Value,
Operator = operatorMatch.Groups[1].Value,
};
foreach (var div in leftSideDivs[2]
.QuerySelectorAll(":scope > div")
.Where((_, i) => i % 2 != 0)) {
var text = div.Text().WithCollapsedSpaces();
if (text == "Nu sunt stații intermediare.") continue;
train.AddIntermediateStop(div.Text().WithCollapsedSpaces());
}
details.Add(train);
}
}
foreach (var ((iFrom, iTo), detail) in stations.Zip(stations.Skip(1)).Zip(details)) {
detail.From = iFrom;
detail.To = iTo;
itinerary.AddTrain(detail);
}
itineraries.Add(itinerary);
}
return itineraries;
}
}

47
scraper/src/Scrapers/Station.cs

@ -14,7 +14,7 @@ using NodaTime;
using NodaTime.Extensions; using NodaTime.Extensions;
namespace InfoferScraper.Scrapers { namespace InfoferScraper.Scrapers {
public class StationScraper { public static class StationScraper {
private static readonly Regex StationInfoRegex = new($@"^([{Utils.RoLetters}.0-9 ]+)\sîn\s([0-9.]+)$"); private static readonly Regex StationInfoRegex = new($@"^([{Utils.RoLetters}.0-9 ]+)\sîn\s([0-9.]+)$");
private static readonly Regex StoppingTimeRegex = new( private static readonly Regex StoppingTimeRegex = new(
@ -28,36 +28,25 @@ namespace InfoferScraper.Scrapers {
private static readonly Regex PlatformRegex = new(@"^linia\s([A-Za-z0-9]+)$"); private static readonly Regex PlatformRegex = new(@"^linia\s([A-Za-z0-9]+)$");
private static readonly Regex TrainUrlDateRegex = new(@"Date=([0-9]{2}).([0-9]{2}).([0-9]{4})"); private static readonly Regex TrainUrlDateRegex = new(@"Date=([0-9]{2}).([0-9]{2}).([0-9]{4})");
private static readonly DateTimeZone BucharestTz = DateTimeZoneProviders.Tzdb["Europe/Bucharest"]; private static readonly DateTimeZone BucharestTz = DateTimeZoneProviders.Tzdb["Europe/Bucharest"];
private const string BaseUrl = "https://mersultrenurilor.infofer.ro/ro-RO/"; private const string BaseUrl = "https://mersultrenurilor.infofer.ro/ro-RO/";
private readonly CookieContainer cookieContainer = new(); private static readonly CookieContainer CookieContainer = new();
private readonly HttpClient httpClient;
public StationScraper(HttpClientHandler? httpClientHandler = null) { private static readonly HttpClient HttpClient = new(new HttpClientHandler {
if (httpClientHandler == null) { CookieContainer = CookieContainer,
httpClientHandler = new HttpClientHandler { UseCookies = true,
CookieContainer = cookieContainer, }) {
UseCookies = true, BaseAddress = new Uri(BaseUrl),
}; DefaultRequestVersion = new Version(2, 0),
} };
else {
httpClientHandler.CookieContainer = cookieContainer;
httpClientHandler.UseCookies = true;
}
httpClient = new HttpClient(httpClientHandler) {
BaseAddress = new Uri(BaseUrl),
DefaultRequestVersion = new Version(2, 0),
};
}
public async Task<IStationScrapeResult> Scrape(string stationName, DateTimeOffset? date = null) { public static async Task<IStationScrapeResult> Scrape(string stationName, DateTimeOffset? date = null) {
var dateInstant = date?.ToInstant().InZone(BucharestTz); var dateInstant = date?.ToInstant().InZone(BucharestTz);
date = dateInstant?.ToDateTimeOffset(); date = dateInstant?.ToDateTimeOffset();
stationName = stationName.RoLettersToEn(); stationName = stationName.RoLettersToEn();
var result = new StationScrapeResult(); var result = new StationScrapeResult();
@ -70,7 +59,7 @@ namespace InfoferScraper.Scrapers {
if (date != null) { if (date != null) {
firstUrl = firstUrl.SetQueryParam("Date", $"{date:d.MM.yyyy}"); firstUrl = firstUrl.SetQueryParam("Date", $"{date:d.MM.yyyy}");
} }
var firstResponse = await httpClient.GetStringAsync(firstUrl); var firstResponse = await HttpClient.GetStringAsync(firstUrl);
var firstDocument = await asContext.OpenAsync(req => req.Content(firstResponse)); var firstDocument = await asContext.OpenAsync(req => req.Content(firstResponse));
var firstForm = firstDocument.GetElementById("form-search")!; var firstForm = firstDocument.GetElementById("form-search")!;
@ -80,7 +69,7 @@ namespace InfoferScraper.Scrapers {
.ToDictionary(elem => elem.Name!, elem => elem.Value); .ToDictionary(elem => elem.Name!, elem => elem.Value);
var secondUrl = "".AppendPathSegments("Stations", "StationsResult"); var secondUrl = "".AppendPathSegments("Stations", "StationsResult");
var secondResponse = await httpClient.PostAsync( var secondResponse = await HttpClient.PostAsync(
secondUrl, secondUrl,
#pragma warning disable CS8620 #pragma warning disable CS8620
new FormUrlEncodedContent(firstResult) new FormUrlEncodedContent(firstResult)
@ -178,9 +167,9 @@ namespace InfoferScraper.Scrapers {
.Text() .Text()
.WithCollapsedSpaces(); .WithCollapsedSpaces();
foreach (var station in routeDiv.QuerySelectorAll(":scope > div > div")[1] foreach (var station in routeDiv.QuerySelectorAll(":scope > div > div")[1]
.Text() .Text()
.WithCollapsedSpaces() .WithCollapsedSpaces()
.Split(" - ")) { .Split(" - ")) {
arrDep.ModifyableTrain.AddRouteStation(station); arrDep.ModifyableTrain.AddRouteStation(station);
} }
@ -193,7 +182,7 @@ namespace InfoferScraper.Scrapers {
.QuerySelectorAll(":scope > div"); .QuerySelectorAll(":scope > div");
var delayDiv = statusDivComponents[0]; var delayDiv = statusDivComponents[0];
var (delayMin, (approx, _)) = (StatusRegex.Match( var (delayMin, (approx, _)) = (StatusRegex.Match(
delayDiv delayDiv
.Text() .Text()

500
scraper/src/Scrapers/Train.cs

@ -1,261 +1,239 @@
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
using System.Linq; using System.Linq;
using System.Net; using System.Net;
using System.Net.Http; using System.Net.Http;
using System.Text.RegularExpressions; using System.Text.RegularExpressions;
using System.Threading.Tasks; using System.Threading.Tasks;
using AngleSharp; using AngleSharp;
using AngleSharp.Dom; using AngleSharp.Dom;
using AngleSharp.Html.Dom; using AngleSharp.Html.Dom;
using Flurl; using Flurl;
using InfoferScraper.Models.Train; using InfoferScraper.Models.Train;
using NodaTime; using NodaTime;
using NodaTime.Extensions; using NodaTime.Extensions;
using scraper.Exceptions; using scraper.Exceptions;
namespace InfoferScraper.Scrapers { namespace InfoferScraper.Scrapers {
public class TrainScraper { public static class TrainScraper {
private const string BaseUrl = "https://mersultrenurilor.infofer.ro/ro-RO/"; private const string BaseUrl = "https://mersultrenurilor.infofer.ro/ro-RO/";
private static readonly Regex TrainInfoRegex = new(@"^([A-Z-]+)\s([0-9]+)\sîn\s([0-9.]+)$"); private static readonly Regex TrainInfoRegex = new(@"^([A-Z-]+)\s([0-9]+)\sîn\s([0-9.]+)$");
private static readonly Regex OperatorRegex = new(@"^Operat\sde\s(.+)$"); private static readonly Regex OperatorRegex = new(@"^Operat\sde\s(.+)$");
private static readonly Regex RouteRegex = private static readonly Regex RouteRegex =
new(@$"^Parcurs\stren\s([{Utils.RoLetters} ]+)[-–]([{Utils.RoLetters}\s]+)$"); new(@$"^Parcurs\stren\s([{Utils.RoLetters} ]+)[-–]([{Utils.RoLetters}\s]+)$");
private static readonly Regex SlRegex = private static readonly Regex SlRegex =
new( new(
@"^(?:Fără|([0-9]+)\smin)\s(întârziere|mai\sdevreme)\sla\s(trecerea\sfără\soprire\sprin|sosirea\sîn|plecarea\sdin)\s(.+)\.$"); @"^(?:Fără|([0-9]+)\smin)\s(întârziere|mai\sdevreme)\sla\s(trecerea\sfără\soprire\sprin|sosirea\sîn|plecarea\sdin)\s(.+)\.$");
private static readonly Dictionary<char, StatusKind> SlStateMap = new() { private static readonly Dictionary<char, StatusKind> SlStateMap = new() {
{ 't', StatusKind.Passing }, { 't', StatusKind.Passing },
{ 's', StatusKind.Arrival }, { 's', StatusKind.Arrival },
{ 'p', StatusKind.Departure }, { 'p', StatusKind.Departure },
}; };
private static readonly Regex KmRegex = new(@"^km\s([0-9]+)$"); private static readonly Regex KmRegex = new(@"^km\s([0-9]+)$");
private static readonly Regex StoppingTimeRegex = new(@"^([0-9]+)\s(min|sec)\soprire$"); private static readonly Regex StoppingTimeRegex = new(@"^([0-9]+)\s(min|sec)\soprire$");
private static readonly Regex PlatformRegex = new(@"^linia\s(.+)$"); private static readonly Regex PlatformRegex = new(@"^linia\s(.+)$");
private static readonly Regex StationArrdepStatusRegex = private static readonly Regex StationArrdepStatusRegex =
new(@"^(?:(la timp)|(?:((?:\+|-)[0-9]+) min \((?:(?:întârziere)|(?:mai devreme))\)))(\*?)$"); new(@"^(?:(la timp)|(?:((?:\+|-)[0-9]+) min \((?:(?:întârziere)|(?:mai devreme))\)))(\*?)$");
private static readonly Regex TrainNumberChangeNoteRegex = private static readonly Regex TrainNumberChangeNoteRegex =
new(@"^Trenul își schimbă numărul în\s([A-Z-]+)\s([0-9]+)$"); new(@"^Trenul își schimbă numărul în\s([A-Z-]+)\s([0-9]+)$");
private static readonly Regex DepartsAsNoteRegex = private static readonly Regex DepartsAsNoteRegex =
new(@"^Trenul pleacă cu numărul\s([A-Z-]+)\s([0-9]+)\sîn\s([0-9]{2}).([0-9]{2}).([0-9]{4})$"); new(@"^Trenul pleacă cu numărul\s([A-Z-]+)\s([0-9]+)\sîn\s([0-9]{2}).([0-9]{2}).([0-9]{4})$");
private static readonly Regex ReceivingWagonsNoteRegex = private static readonly Regex ReceivingWagonsNoteRegex =
new(@"^Trenul primește vagoane de la\s(.+)\.$"); new(@"^Trenul primește vagoane de la\s(.+)\.$");
private static readonly Regex DetachingWagonsNoteRegex = private static readonly Regex DetachingWagonsNoteRegex =
new(@"^Trenul detașează vagoane pentru stația\s(.+)\.$"); new(@"^Trenul detașează vagoane pentru stația\s(.+)\.$");
private static readonly DateTimeZone BucharestTz = DateTimeZoneProviders.Tzdb["Europe/Bucharest"]; private static readonly DateTimeZone BucharestTz = DateTimeZoneProviders.Tzdb["Europe/Bucharest"];
private readonly CookieContainer cookieContainer = new(); private static readonly CookieContainer CookieContainer = new();
private readonly HttpClient httpClient; private static readonly HttpClient HttpClient = new(new HttpClientHandler {
CookieContainer = CookieContainer,
public TrainScraper(HttpClientHandler? httpClientHandler = null) UseCookies = true,
{ }) {
if (httpClientHandler == null) { BaseAddress = new Uri(BaseUrl),
httpClientHandler = new HttpClientHandler { DefaultRequestVersion = new Version(2, 0),
CookieContainer = cookieContainer, };
UseCookies = true,
}; public static async Task<ITrainScrapeResult?> Scrape(string trainNumber, DateTimeOffset? dateOverride = null) {
} var dateOverrideInstant = dateOverride?.ToInstant().InZone(BucharestTz);
else { dateOverride = dateOverrideInstant?.ToDateTimeOffset();
httpClientHandler.CookieContainer = cookieContainer; TrainScrapeResult result = new();
httpClientHandler.UseCookies = true;
} var asConfig = Configuration.Default;
httpClient = new HttpClient(httpClientHandler) { var asContext = BrowsingContext.New(asConfig);
BaseAddress = new Uri(BaseUrl),
DefaultRequestVersion = new Version(2, 0), var firstUrl = "Tren"
}; .AppendPathSegment(trainNumber);
} if (dateOverride != null) {
firstUrl = firstUrl.SetQueryParam("Date", $"{dateOverride:d.MM.yyyy}");
public async Task<ITrainScrapeResult?> Scrape(string trainNumber, DateTimeOffset? dateOverride = null) { }
var dateOverrideInstant = dateOverride?.ToInstant().InZone(BucharestTz); var firstResponse = await HttpClient.GetStringAsync(firstUrl);
dateOverride = dateOverrideInstant?.ToDateTimeOffset(); var firstDocument = await asContext.OpenAsync(req => req.Content(firstResponse));
TrainScrapeResult result = new(); var firstForm = firstDocument.GetElementById("form-search")!;
var asConfig = Configuration.Default; var firstResult = firstForm
var asContext = BrowsingContext.New(asConfig); .QuerySelectorAll<IHtmlInputElement>("input")
.Where(elem => elem.Name != null)
var firstUrl = "Tren" .ToDictionary(elem => elem.Name!, elem => elem.Value);
.AppendPathSegment(trainNumber);
if (dateOverride != null) { var secondUrl = "".AppendPathSegments("Trains", "TrainsResult");
firstUrl = firstUrl.SetQueryParam("Date", $"{dateOverride:d.MM.yyyy}"); var secondResponse = await HttpClient.PostAsync(
} secondUrl,
var firstResponse = await httpClient.GetStringAsync(firstUrl); #pragma warning disable CS8620
var firstDocument = await asContext.OpenAsync(req => req.Content(firstResponse)); new FormUrlEncodedContent(firstResult)
var firstForm = firstDocument.GetElementById("form-search")!; #pragma warning restore CS8620
);
var firstResult = firstForm var secondResponseContent = await secondResponse.Content.ReadAsStringAsync();
.QuerySelectorAll<IHtmlInputElement>("input") var secondDocument = await asContext.OpenAsync(
.Where(elem => elem.Name != null) req => req.Content(secondResponseContent)
.ToDictionary(elem => elem.Name!, elem => elem.Value); );
var secondUrl = "".AppendPathSegments("Trains", "TrainsResult"); var (trainInfoDiv, (_, (_, (resultsDiv, _)))) = secondDocument
var secondResponse = await httpClient.PostAsync( .QuerySelectorAll("body > div");
secondUrl, if (trainInfoDiv == null) {
#pragma warning disable CS8620 return null;
new FormUrlEncodedContent(firstResult) }
#pragma warning restore CS8620 if (resultsDiv == null) {
); throw new TrainNotThisDayException();
var secondResponseContent = await secondResponse.Content.ReadAsStringAsync(); }
var secondDocument = await asContext.OpenAsync( trainInfoDiv = trainInfoDiv.QuerySelectorAll(":scope > div > div").First();
req => req.Content(secondResponseContent)
); (result.Rank, (result.Number, (result.Date, _))) = (TrainInfoRegex.Match(
trainInfoDiv.QuerySelector(":scope > h2")!.Text().WithCollapsedSpaces()
var (trainInfoDiv, (_, (_, (resultsDiv, _)))) = secondDocument ).Groups as IEnumerable<Group>).Select(group => group.Value).Skip(1);
.QuerySelectorAll("body > div"); var (scrapedDateD, (scrapedDateM, (scrapedDateY, _))) = result.Date
if (trainInfoDiv == null) { .Split('.')
return null; .Select(int.Parse);
} var date = new DateTime(scrapedDateY, scrapedDateM, scrapedDateD);
if (resultsDiv == null) {
throw new TrainNotThisDayException(); result.Operator = (OperatorRegex.Match(
} trainInfoDiv.QuerySelector(":scope > p")!.Text().WithCollapsedSpaces()
trainInfoDiv = trainInfoDiv.QuerySelectorAll(":scope > div > div").First(); ).Groups as IEnumerable<Group>).Skip(1).First().Value;
(result.Rank, (result.Number, (result.Date, _))) = (TrainInfoRegex.Match( foreach (var groupDiv in resultsDiv.QuerySelectorAll(":scope > div")) {
trainInfoDiv.QuerySelector(":scope > h2")!.Text().WithCollapsedSpaces() result.AddTrainGroup(group => {
).Groups as IEnumerable<Group>).Select(group => group.Value).Skip(1); var statusDiv = groupDiv.QuerySelectorAll(":scope > div").First();
var (scrapedDateD, (scrapedDateM, (scrapedDateY, _))) = result.Date var routeText = statusDiv.QuerySelector(":scope > h4")!.Text().WithCollapsedSpaces();
.Split('.') group.ConfigureRoute(route => {
.Select(int.Parse); (route.From, (route.To, _)) = (RouteRegex.Match(routeText).Groups as IEnumerable<Group>).Skip(1)
var date = new DateTime(scrapedDateY, scrapedDateM, scrapedDateD); .Select(group => group.Value);
});
result.Operator = (OperatorRegex.Match(
trainInfoDiv.QuerySelector(":scope > p")!.Text().WithCollapsedSpaces() try {
).Groups as IEnumerable<Group>).Skip(1).First().Value; var statusLineMatch =
SlRegex.Match(statusDiv.QuerySelector(":scope > div")!.Text().WithCollapsedSpaces());
foreach (var groupDiv in resultsDiv.QuerySelectorAll(":scope > div")) { var (slmDelay, (slmLate, (slmArrival, (slmStation, _)))) =
result.AddTrainGroup(group => { (statusLineMatch.Groups as IEnumerable<Group>).Skip(1).Select(group => group.Value);
var statusDiv = groupDiv.QuerySelectorAll(":scope > div").First(); group.MakeStatus(status => {
var routeText = statusDiv.QuerySelector(":scope > h4")!.Text().WithCollapsedSpaces(); status.Delay = string.IsNullOrEmpty(slmDelay) ? 0 :
group.ConfigureRoute(route => { slmLate == "întârziere" ? int.Parse(slmDelay) : -int.Parse(slmDelay);
(route.From, (route.To, _)) = (RouteRegex.Match(routeText).Groups as IEnumerable<Group>).Skip(1) status.Station = slmStation;
.Select(group => group.Value); status.State = SlStateMap[slmArrival[0]];
}); });
}
try { catch {
var statusLineMatch = // ignored
SlRegex.Match(statusDiv.QuerySelector(":scope > div")!.Text().WithCollapsedSpaces()); }
var (slmDelay, (slmLate, (slmArrival, (slmStation, _)))) =
(statusLineMatch.Groups as IEnumerable<Group>).Skip(1).Select(group => group.Value); Utils.DateTimeSequencer dtSeq = new(date.Year, date.Month, date.Day);
group.MakeStatus(status => { var stations = statusDiv.QuerySelectorAll(":scope > ul > li");
status.Delay = string.IsNullOrEmpty(slmDelay) ? 0 : foreach (var station in stations) {
slmLate == "întârziere" ? int.Parse(slmDelay) : -int.Parse(slmDelay); group.AddStopDescription(stopDescription => {
status.Station = slmStation; var (left, (middle, (right, _))) = station
status.State = SlStateMap[slmArrival[0]]; .QuerySelectorAll(":scope > div > div");
}); var (stopDetails, (stopNotes, _)) = middle
} .QuerySelectorAll(":scope > div > div > div");
catch { stopDescription.Name = stopDetails
// ignored .QuerySelectorAll(":scope > div")[0]
} .Text()
.WithCollapsedSpaces();
Utils.DateTimeSequencer dtSeq = new(date.Year, date.Month, date.Day); var scrapedKm = stopDetails
var stations = statusDiv.QuerySelectorAll(":scope > ul > li"); .QuerySelectorAll(":scope > div")[1]
foreach (var station in stations) { .Text()
group.AddStopDescription(stopDescription => { .WithCollapsedSpaces();
var (left, (middle, (right, _))) = station stopDescription.Km = int.Parse(
.QuerySelectorAll(":scope > div > div"); (KmRegex.Match(scrapedKm).Groups as IEnumerable<Group>).Skip(1).First().Value
var (stopDetails, (stopNotes, _)) = middle );
.QuerySelectorAll(":scope > div > div > div"); var scrapedStoppingTime = stopDetails
stopDescription.Name = stopDetails .QuerySelectorAll(":scope > div")[2]
.QuerySelectorAll(":scope > div")[0] .Text()
.Text() .WithCollapsedSpaces();
.WithCollapsedSpaces(); if (!string.IsNullOrEmpty(scrapedStoppingTime)) {
stopDescription.LinkName = new Flurl.Url(stopDetails var (stValue, (stMinsec, _)) =
.QuerySelectorAll(":scope > div")[0] (StoppingTimeRegex.Match(scrapedStoppingTime).Groups as IEnumerable<Group>)
.QuerySelector(":scope a") .Skip(1)
.Attributes["href"] .Select(group => group.Value);
.Value).PathSegments.Last(); stopDescription.StoppingTime = int.Parse(stValue);
var scrapedKm = stopDetails if (stMinsec == "min") stopDescription.StoppingTime *= 60;
.QuerySelectorAll(":scope > div")[1] }
.Text()
.WithCollapsedSpaces(); var scrapedPlatform = stopDetails
stopDescription.Km = int.Parse( .QuerySelectorAll(":scope > div")[3]
(KmRegex.Match(scrapedKm).Groups as IEnumerable<Group>).Skip(1).First().Value .Text()
); .WithCollapsedSpaces();
var scrapedStoppingTime = stopDetails if (!string.IsNullOrEmpty(scrapedPlatform))
.QuerySelectorAll(":scope > div")[2] stopDescription.Platform = PlatformRegex.Match(scrapedPlatform).Groups[1].Value;
.Text()
.WithCollapsedSpaces(); void ScrapeTime(IElement element, ref TrainStopArrDep arrDep) {
if (!string.IsNullOrEmpty(scrapedStoppingTime)) { var parts = element.QuerySelectorAll(":scope > div > div > div");
var (stValue, (stMinsec, _)) = if (parts.Length == 0) throw new OperationCanceledException();
(StoppingTimeRegex.Match(scrapedStoppingTime).Groups as IEnumerable<Group>) var time = parts[0];
.Skip(1) var scrapedTime = time.Text().WithCollapsedSpaces();
.Select(group => group.Value); var (stHour, (stMin, _)) = scrapedTime.Split(':').Select(int.Parse);
stopDescription.StoppingTime = int.Parse(stValue); arrDep.ScheduleTime = BucharestTz.AtLeniently(dtSeq.Next(stHour, stMin).ToLocalDateTime())
if (stMinsec == "min") stopDescription.StoppingTime *= 60; .ToDateTimeOffset();
}
if (parts.Length < 2) return;
var scrapedPlatform = stopDetails
.QuerySelectorAll(":scope > div")[3] var statusElement = parts[1];
.Text() var (onTime, (delay, (approx, _))) = (StationArrdepStatusRegex.Match(
.WithCollapsedSpaces(); statusElement.Text().WithCollapsedSpaces(replaceWith: " ")
if (!string.IsNullOrEmpty(scrapedPlatform)) ).Groups as IEnumerable<Group>).Skip(1).Select(group => group.Value);
stopDescription.Platform = PlatformRegex.Match(scrapedPlatform).Groups[1].Value; arrDep.MakeStatus(status => {
status.Delay = string.IsNullOrEmpty(onTime) ? int.Parse(delay) : 0;
void ScrapeTime(IElement element, ref TrainStopArrDep arrDep) { status.Real = string.IsNullOrEmpty(approx);
var parts = element.QuerySelectorAll(":scope > div > div > div"); });
if (parts.Length == 0) throw new OperationCanceledException(); }
var time = parts[0];
var scrapedTime = time.Text().WithCollapsedSpaces(); try {
var (stHour, (stMin, _)) = scrapedTime.Split(':').Select(int.Parse); stopDescription.MakeArrival(arrival => { ScrapeTime(left, ref arrival); });
arrDep.ScheduleTime = BucharestTz.AtLeniently(dtSeq.Next(stHour, stMin).ToLocalDateTime()) }
.ToDateTimeOffset(); catch (OperationCanceledException) { }
if (parts.Length < 2) return; try {
stopDescription.MakeDeparture(departure => { ScrapeTime(right, ref departure); });
var statusElement = parts[1]; }
var (onTime, (delay, (approx, _))) = (StationArrdepStatusRegex.Match( catch (OperationCanceledException) { }
statusElement.Text().WithCollapsedSpaces(replaceWith: " ")
).Groups as IEnumerable<Group>).Skip(1).Select(group => group.Value); foreach (var noteDiv in stopNotes.QuerySelectorAll(":scope > div > div")) {
arrDep.MakeStatus(status => { var noteText = noteDiv.Text().WithCollapsedSpaces();
if (string.IsNullOrEmpty(onTime) && delay == null) { Match trainNumberChangeMatch, departsAsMatch, detachingWagons, receivingWagons;
status.Cancelled = true; if ((trainNumberChangeMatch = TrainNumberChangeNoteRegex.Match(noteText)).Success) {
} stopDescription.AddTrainNumberChangeNote(trainNumberChangeMatch.Groups[1].Value, trainNumberChangeMatch.Groups[2].Value);
else { }
status.Delay = string.IsNullOrEmpty(onTime) ? int.Parse(delay) : 0; else if ((departsAsMatch = DepartsAsNoteRegex.Match(noteText)).Success) {
} var groups = departsAsMatch.Groups;
status.Real = string.IsNullOrEmpty(approx); var departureDate = BucharestTz.AtStrictly(new(int.Parse(groups[5].Value), int.Parse(groups[4].Value), int.Parse(groups[3].Value), 0, 0));
}); stopDescription.AddDepartsAsNote(groups[1].Value, groups[2].Value, departureDate.ToDateTimeOffset());
} }
else if ((detachingWagons = DetachingWagonsNoteRegex.Match(noteText)).Success) {
try { stopDescription.AddDetachingWagonsNote(detachingWagons.Groups[1].Value);
stopDescription.MakeArrival(arrival => { ScrapeTime(left, ref arrival); }); }
} else if ((receivingWagons = ReceivingWagonsNoteRegex.Match(noteText)).Success) {
catch (OperationCanceledException) { } stopDescription.AddReceivingWagonsNote(receivingWagons.Groups[1].Value);
}
try { }
stopDescription.MakeDeparture(departure => { ScrapeTime(right, ref departure); }); });
} }
catch (OperationCanceledException) { } });
}
foreach (var noteDiv in stopNotes.QuerySelectorAll(":scope > div > div")) { return result;
var noteText = noteDiv.Text().WithCollapsedSpaces(); }
Match trainNumberChangeMatch, departsAsMatch, detachingWagons, receivingWagons; }
if ((trainNumberChangeMatch = TrainNumberChangeNoteRegex.Match(noteText)).Success) { } // namespace
stopDescription.AddTrainNumberChangeNote(trainNumberChangeMatch.Groups[1].Value, trainNumberChangeMatch.Groups[2].Value);
}
else if ((departsAsMatch = DepartsAsNoteRegex.Match(noteText)).Success) {
var groups = departsAsMatch.Groups;
var departureDate = BucharestTz.AtStrictly(new(int.Parse(groups[5].Value), int.Parse(groups[4].Value), int.Parse(groups[3].Value), 0, 0));
stopDescription.AddDepartsAsNote(groups[1].Value, groups[2].Value, departureDate.ToDateTimeOffset());
}
else if ((detachingWagons = DetachingWagonsNoteRegex.Match(noteText)).Success) {
stopDescription.AddDetachingWagonsNote(detachingWagons.Groups[1].Value);
}
else if ((receivingWagons = ReceivingWagonsNoteRegex.Match(noteText)).Success) {
stopDescription.AddReceivingWagonsNote(receivingWagons.Groups[1].Value);
}
}
});
}
});
}
return result;
}
}
} // namespace

3
server/Controllers/V2/StationsController.cs

@ -1,6 +1,5 @@
using System.Collections.Generic; using System.Collections.Generic;
using Microsoft.AspNetCore.Mvc; using Microsoft.AspNetCore.Mvc;
using Server.Models.Database;
using Server.Services.Interfaces; using Server.Services.Interfaces;
namespace Server.Controllers.V2; namespace Server.Controllers.V2;
@ -16,7 +15,7 @@ public class StationsController : Controller {
} }
[HttpGet("")] [HttpGet("")]
public ActionResult<IEnumerable<StationListing>> ListStations() { public ActionResult<IEnumerable<IStationRecord>> ListStations() {
return Ok(Database.Stations); return Ok(Database.Stations);
} }
} }

3
server/Controllers/V2/TrainsController.cs

@ -1,6 +1,5 @@
using System.Collections.Generic; using System.Collections.Generic;
using Microsoft.AspNetCore.Mvc; using Microsoft.AspNetCore.Mvc;
using Server.Models.Database;
using Server.Services.Interfaces; using Server.Services.Interfaces;
namespace Server.Controllers.V2; namespace Server.Controllers.V2;
@ -16,7 +15,7 @@ public class TrainsController : Controller {
} }
[HttpGet("")] [HttpGet("")]
public ActionResult<IEnumerable<TrainListing>> ListTrains() { public ActionResult<IEnumerable<ITrainRecord>> ListTrains() {
return Ok(Database.Trains); return Ok(Database.Trains);
} }
} }

40
server/Controllers/V3/ItinerariesController.cs

@ -1,40 +0,0 @@
using System;
using System.Collections.Generic;
using System.Threading.Tasks;
using Microsoft.AspNetCore.Http;
using Microsoft.AspNetCore.Mvc;
using scraper.Models.Itinerary;
using Server.Services.Interfaces;
namespace Server.Controllers.V3;
[ApiController]
[ApiExplorerSettings(GroupName = "v3")]
[Route("/v3/[controller]")]
public class ItinerariesController : Controller {
private IDataManager DataManager { get; }
private IDatabase Database { get; }
public ItinerariesController(IDataManager dataManager, IDatabase database) {
this.DataManager = dataManager;
this.Database = database;
}
[HttpGet("")]
[ProducesResponseType(typeof(IEnumerable<IItinerary>), StatusCodes.Status200OK)]
[ProducesResponseType(StatusCodes.Status404NotFound)]
public async Task<ActionResult<IEnumerable<IItinerary>>> FindItineraries(
[FromQuery] string from,
[FromQuery] string to,
[FromQuery] DateTimeOffset? date
) {
var itineraries = await DataManager.FetchItineraries(from, to, date);
if (itineraries == null) {
return NotFound();
}
return Ok(itineraries);
}
}

3
server/Controllers/V3/StationsController.cs

@ -4,7 +4,6 @@ using System.Threading.Tasks;
using InfoferScraper.Models.Station; using InfoferScraper.Models.Station;
using Microsoft.AspNetCore.Http; using Microsoft.AspNetCore.Http;
using Microsoft.AspNetCore.Mvc; using Microsoft.AspNetCore.Mvc;
using Server.Models.Database;
using Server.Services.Interfaces; using Server.Services.Interfaces;
namespace Server.Controllers.V3; namespace Server.Controllers.V3;
@ -22,7 +21,7 @@ public class StationsController : Controller {
} }
[HttpGet("")] [HttpGet("")]
public ActionResult<IEnumerable<StationListing>> ListStations() { public ActionResult<IEnumerable<IStationRecord>> ListStations() {
return Ok(Database.Stations); return Ok(Database.Stations);
} }

3
server/Controllers/V3/TrainsController.cs

@ -5,7 +5,6 @@ using InfoferScraper.Models.Train;
using Microsoft.AspNetCore.Http; using Microsoft.AspNetCore.Http;
using Microsoft.AspNetCore.Mvc; using Microsoft.AspNetCore.Mvc;
using scraper.Exceptions; using scraper.Exceptions;
using Server.Models.Database;
using Server.Services.Interfaces; using Server.Services.Interfaces;
namespace Server.Controllers.V3; namespace Server.Controllers.V3;
@ -23,7 +22,7 @@ public class TrainsController : Controller {
} }
[HttpGet("")] [HttpGet("")]
public ActionResult<IEnumerable<TrainListing>> ListTrains() { public ActionResult<IEnumerable<ITrainRecord>> ListTrains() {
return Ok(Database.Trains); return Ok(Database.Trains);
} }

5
server/Models/Database/MongoSettings.cs

@ -1,5 +0,0 @@
namespace Server.Models.Database;
public record MongoSettings(string ConnectionString, string DatabaseName) {
public MongoSettings() : this("", "") { }
}

17
server/Models/Database/StationAlias.cs

@ -1,17 +0,0 @@
using MongoDB.Bson;
using MongoDB.Bson.Serialization.Attributes;
using Newtonsoft.Json;
namespace Server.Models.Database;
public record StationAlias(
[property: BsonId]
[property: BsonRepresentation(BsonType.ObjectId)]
[property: JsonProperty(NullValueHandling = NullValueHandling.Ignore)]
string? Id,
string Name,
[property: BsonRepresentation(BsonType.ObjectId)]
string? ListingId
) {
public StationAlias() : this(null, "", null) { }
}

18
server/Models/Database/StationListing.cs

@ -1,18 +0,0 @@
using System.Collections.Generic;
using MongoDB.Bson;
using MongoDB.Bson.Serialization.Attributes;
using Newtonsoft.Json;
namespace Server.Models.Database;
public record StationListing(
[property: BsonId]
[property: BsonRepresentation(BsonType.ObjectId)]
[property: JsonProperty(NullValueHandling = NullValueHandling.Ignore)]
string? Id,
string Name,
List<string> StoppedAtBy
) {
public StationListing() : this(null, "", new()) { }
public StationListing(string name, List<string> stoppedAtBy) : this(null, name, stoppedAtBy) { }
}

20
server/Models/Database/TrainListing.cs

@ -1,20 +0,0 @@
using MongoDB.Bson;
using MongoDB.Bson.Serialization.Attributes;
using Newtonsoft.Json;
namespace Server.Models.Database;
public record TrainListing(
[property: BsonId]
[property: BsonRepresentation(BsonType.ObjectId)]
[property: JsonProperty(NullValueHandling = NullValueHandling.Ignore)]
string? Id,
string Rank,
string Number,
string Company,
[property: BsonRepresentation(BsonType.ObjectId)]
string? LatestDescription
) {
public TrainListing() : this(null, "", "", "", null) { }
public TrainListing(string rank, string number, string company) : this(null, rank, number, company, null) { }
}

9
server/Models/ProxySettings.cs

@ -1,9 +0,0 @@
namespace Server.Models;
public record ProxySettings(string Url, ProxyCredentials? Credentials = null) {
public ProxySettings() : this("") { }
}
public record ProxyCredentials(string Username, string Password) {
public ProxyCredentials() : this("", "") { }
}

68
server/Services/Implementations/DataManager.cs

@ -1,94 +1,47 @@
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
using System.Diagnostics;
using System.Net;
using System.Net.Http;
using System.Threading.Tasks; using System.Threading.Tasks;
using InfoferScraper;
using InfoferScraper.Models.Station;
using InfoferScraper.Models.Train; using InfoferScraper.Models.Train;
using Microsoft.Extensions.Logging; using InfoferScraper.Models.Station;
using scraper.Models.Itinerary;
using Server.Models;
using Server.Services.Interfaces; using Server.Services.Interfaces;
using Server.Utils; using Server.Utils;
using InfoferScraper;
namespace Server.Services.Implementations { namespace Server.Services.Implementations {
public class DataManager : IDataManager { public class DataManager : IDataManager {
private ILogger<DataManager> Logger { get; }
private IDatabase Database { get; } private IDatabase Database { get; }
private NodaTime.IDateTimeZoneProvider TzProvider { get; } private NodaTime.IDateTimeZoneProvider TzProvider { get; }
private NodaTime.DateTimeZone CfrTimeZone => TzProvider["Europe/Bucharest"]; private NodaTime.DateTimeZone CfrTimeZone => TzProvider["Europe/Bucharest"];
public DataManager(NodaTime.IDateTimeZoneProvider tzProvider, IDatabase database, ILogger<DataManager> logger, ProxySettings? proxySettings) { public DataManager(NodaTime.IDateTimeZoneProvider tzProvider, IDatabase database) {
this.TzProvider = tzProvider; this.TzProvider = tzProvider;
this.Database = database; this.Database = database;
this.Logger = logger;
HttpClientHandler httpClientHandler = new (){
UseProxy = proxySettings != null,
Proxy = proxySettings == null ? null : new WebProxy(proxySettings.Url),
DefaultProxyCredentials = proxySettings?.Credentials == null ? null : new NetworkCredential(proxySettings.Credentials.Username, proxySettings.Credentials.Password),
};
InfoferScraper.Scrapers.StationScraper stationScraper = new(httpClientHandler);
InfoferScraper.Scrapers.TrainScraper trainScraper = new(httpClientHandler);
InfoferScraper.Scrapers.RouteScraper routeScraper = new(httpClientHandler);
stationCache = new(async (t) => { stationCache = new(async (t) => {
var (stationName, date) = t; var (stationName, date) = t;
Logger.LogDebug("Fetching station {StationName} for date {Date}", stationName, date);
var zonedDate = new NodaTime.LocalDate(date.Year, date.Month, date.Day).AtStartOfDayInZone(CfrTimeZone); var zonedDate = new NodaTime.LocalDate(date.Year, date.Month, date.Day).AtStartOfDayInZone(CfrTimeZone);
var station = await stationScraper.Scrape(stationName, zonedDate.ToDateTimeOffset()); var station = await InfoferScraper.Scrapers.StationScraper.Scrape(stationName, zonedDate.ToDateTimeOffset());
if (station != null) { if (station != null) {
_ = Task.Run(async () => { await Database.OnStationData(station);
var watch = Stopwatch.StartNew();
await Database.OnStationData(station);
var ms = watch.ElapsedMilliseconds;
Logger.LogInformation("OnStationData timing: {StationDataMs} ms", ms);
});
} }
return station; return station;
}, TimeSpan.FromMinutes(1)); }, TimeSpan.FromMinutes(1));
trainCache = new(async (t) => { trainCache = new(async (t) => {
var (trainNumber, date) = t; var (trainNumber, date) = t;
Logger.LogDebug("Fetching train {TrainNumber} for date {Date}", trainNumber, date);
var zonedDate = new NodaTime.LocalDate(date.Year, date.Month, date.Day).AtStartOfDayInZone(CfrTimeZone); var zonedDate = new NodaTime.LocalDate(date.Year, date.Month, date.Day).AtStartOfDayInZone(CfrTimeZone);
var train = await trainScraper.Scrape(trainNumber, zonedDate.ToDateTimeOffset()); var train = await InfoferScraper.Scrapers.TrainScraper.Scrape(trainNumber, zonedDate.ToDateTimeOffset());
if (train != null) { if (train != null) {
_ = Task.Run(async () => { await Database.OnTrainData(train);
var watch = Stopwatch.StartNew();
await Database.OnTrainData(train);
var ms = watch.ElapsedMilliseconds;
Logger.LogInformation("OnTrainData timing: {StationDataMs} ms", ms);
});
} }
return train; return train;
}, TimeSpan.FromSeconds(30)); }, TimeSpan.FromSeconds(30));
itinerariesCache = new(async (t) => {
var (from, to, date) = t;
Logger.LogDebug("Fetching itinerary from {From} to {To} for date {Date}", from, to, date);
var zonedDate = new NodaTime.LocalDate(date.Year, date.Month, date.Day).AtStartOfDayInZone(CfrTimeZone);
var itineraries = await routeScraper.Scrape(from, to, zonedDate.ToDateTimeOffset());
if (itineraries != null) {
_ = Task.Run(async () => {
var watch = Stopwatch.StartNew();
await Database.OnItineraries(itineraries);
var ms = watch.ElapsedMilliseconds;
Logger.LogInformation("OnItineraries timing: {StationDataMs} ms", ms);
});
}
return itineraries;
}, TimeSpan.FromMinutes(1));
} }
private readonly AsyncCache<(string, DateOnly), IStationScrapeResult?> stationCache; private readonly AsyncCache<(string, DateOnly), IStationScrapeResult?> stationCache;
private readonly AsyncCache<(string, DateOnly), ITrainScrapeResult?> trainCache; private readonly AsyncCache<(string, DateOnly), ITrainScrapeResult?> trainCache;
private readonly AsyncCache<(string, string, DateOnly), IReadOnlyList<IItinerary>?> itinerariesCache;
public Task<IStationScrapeResult?> FetchStation(string stationName, DateTimeOffset date) { public Task<IStationScrapeResult?> FetchStation(string stationName, DateTimeOffset date) {
var cfrDateTime = new NodaTime.ZonedDateTime(NodaTime.Instant.FromDateTimeOffset(date), CfrTimeZone); var cfrDateTime = new NodaTime.ZonedDateTime(NodaTime.Instant.FromDateTimeOffset(date), CfrTimeZone);
@ -103,12 +56,5 @@ namespace Server.Services.Implementations {
return trainCache.GetItem((trainNumber, cfrDate)); return trainCache.GetItem((trainNumber, cfrDate));
} }
public async Task<IReadOnlyList<IItinerary>?> FetchItineraries(string from, string to, DateTimeOffset? date = null) {
var cfrDateTime = new NodaTime.ZonedDateTime(NodaTime.Instant.FromDateTimeOffset(date ?? DateTimeOffset.Now), CfrTimeZone);
var cfrDate = new DateOnly(cfrDateTime.Year, cfrDateTime.Month, cfrDateTime.Day);
return await itinerariesCache.GetItem((from, to, cfrDate));
}
} }
} }

430
server/Services/Implementations/Database.cs

@ -2,347 +2,187 @@ using System;
using System.Collections.Generic; using System.Collections.Generic;
using System.IO; using System.IO;
using System.Linq; using System.Linq;
using System.Threading; using System.Text.Json;
using System.Text.Json.Nodes;
using System.Text.Json.Serialization;
using System.Threading.Tasks; using System.Threading.Tasks;
using InfoferScraper.Models.Station;
using Microsoft.Extensions.Logging; using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using MongoDB.Bson;
using MongoDB.Bson.Serialization.Attributes;
using MongoDB.Driver;
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
using Newtonsoft.Json.Serialization;
using scraper.Models.Itinerary;
using Server.Models.Database;
using Server.Utils;
namespace Server.Services.Implementations; namespace Server.Services.Implementations;
public class Database : Server.Services.Interfaces.IDatabase { public class Database : Server.Services.Interfaces.IDatabase {
private static readonly JsonSerializerSettings jsonSerializerSettings = new() { private static readonly JsonSerializerOptions serializerOptions = new() {
ContractResolver = new DefaultContractResolver { PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
NamingStrategy = new CamelCaseNamingStrategy(),
},
}; };
private ILogger<Database> Logger { get; } private ILogger<Database> Logger { get; }
public DbRecord DbData { get; private set; } = new(3); private bool shouldCommitOnEveryChange = true;
private bool dbDataDirty = false;
private bool stationsDirty = false;
private bool trainsDirty = false;
public IReadOnlyList<StationListing> Stations => stationListingsCollection public DbRecord DbData { get; private set; } = new(2);
.Aggregate(PipelineDefinition<StationListing, StationListing>.Create( private List<StationRecord> stations = new();
"{ $addFields: { stoppedAtCount: { $size: \"$stoppedAtBy\" } } }", private List<TrainRecord> trains = new();
"{ $sort: { stoppedAtCount: -1 } }",
"{ $unset: \"stoppedAtCount\" }" public IReadOnlyList<Server.Services.Interfaces.IStationRecord> Stations => stations;
)) public IReadOnlyList<Server.Services.Interfaces.ITrainRecord> Trains => trains;
.ToList();
public IReadOnlyList<TrainListing> Trains => trainListingsCollection.FindSync(_ => true).ToList();
public IReadOnlyList<StationAlias> StationAliases => stationAliasCollection.FindSync(_ => true).ToList();
private static readonly string DbDir = Environment.GetEnvironmentVariable("DB_DIR") ?? Path.Join(Environment.CurrentDirectory, "db"); private static readonly string DbDir = Environment.GetEnvironmentVariable("DB_DIR") ?? Path.Join(Environment.CurrentDirectory, "db");
private static readonly string DbFile = Path.Join(DbDir, "db.json"); private static readonly string DbFile = Path.Join(DbDir, "db.json");
private static readonly string StationsFile = Path.Join(DbDir, "stations.json"); private static readonly string StationsFile = Path.Join(DbDir, "stations.json");
private static readonly string TrainsFile = Path.Join(DbDir, "trains.json"); private static readonly string TrainsFile = Path.Join(DbDir, "trains.json");
private readonly IMongoDatabase db; public IDisposable MakeDbTransaction() {
private readonly IMongoCollection<DbRecord> dbRecordCollection; shouldCommitOnEveryChange = false;
private readonly IMongoCollection<TrainListing> trainListingsCollection; return new Server.Utils.ActionDisposable(() => {
private readonly IMongoCollection<StationListing> stationListingsCollection; if (dbDataDirty) File.WriteAllText(DbFile, JsonSerializer.Serialize(DbData, serializerOptions));
private readonly IMongoCollection<StationAlias> stationAliasCollection; if (stationsDirty) {
private readonly AsyncThrottle throttle; stations.Sort((s1, s2) => s2.StoppedAtBy.Count.CompareTo(s1.StoppedAtBy.Count));
File.WriteAllText(StationsFile, JsonSerializer.Serialize(stations, serializerOptions));
private readonly Dictionary<string, string> trainObjectIds = new(); }
private readonly Dictionary<string, string> stationObjectIds = new(); if (trainsDirty) File.WriteAllText(TrainsFile, JsonSerializer.Serialize(trains, serializerOptions));
dbDataDirty = stationsDirty = trainsDirty = false;
shouldCommitOnEveryChange = true;
});
}
public Database(ILogger<Database> logger, IOptions<MongoSettings> mongoSettings) { public Database(ILogger<Database> logger) {
Logger = logger; Logger = logger;
var settings = MongoClientSettings.FromConnectionString(mongoSettings.Value.ConnectionString); if (!Directory.Exists(DbDir)) {
settings.ServerApi = new(ServerApiVersion.V1); Logger.LogDebug("Creating directory: {DbDir}", DbDir);
settings.MaxConnectionPoolSize = 10000; Directory.CreateDirectory(DbDir);
MongoClient mongoClient = new(settings); }
Logger.LogDebug("Created monogClient");
throttle = new(mongoClient.Settings.MaxConnectionPoolSize / 2);
db = mongoClient.GetDatabase(mongoSettings.Value.DatabaseName) ?? throw new NullReferenceException("Unable to get Mongo database");
Logger.LogDebug("Created db");
dbRecordCollection = db.GetCollection<DbRecord>("db");
trainListingsCollection = db.GetCollection<TrainListing>("trainListings");
stationListingsCollection = db.GetCollection<StationListing>("stationListings");
stationAliasCollection = db.GetCollection<StationAlias>("stationAliases");
Migration(); Migration();
Task.Run(async () => await Initialize()); if (File.Exists(DbFile)) {
DbData = JsonSerializer.Deserialize<DbRecord>(File.ReadAllText(DbFile), serializerOptions)!;
}
else {
File.WriteAllText(DbFile, JsonSerializer.Serialize(DbData, serializerOptions));
}
if (File.Exists(StationsFile)) {
stations = JsonSerializer.Deserialize<List<StationRecord>>(File.ReadAllText(StationsFile), serializerOptions)!;
}
if (File.Exists(TrainsFile)) {
trains = JsonSerializer.Deserialize<List<TrainRecord>>(File.ReadAllText(TrainsFile), serializerOptions)!;
}
} }
private void Migration() { private void Migration() {
if (!File.Exists(DbFile) && File.Exists(TrainsFile)) { if (!File.Exists(DbFile)) {
// using var _ = Logger.BeginScope("Migrating DB version 1 -> 2");
Logger.LogInformation("Migrating DB version 1 -> 2"); Logger.LogInformation("Migrating DB version 1 -> 2");
if (File.Exists(StationsFile)) { if (File.Exists(StationsFile)) {
Logger.LogDebug("Converting StationsFile"); Logger.LogDebug("Converting StationsFile");
var oldStations = JToken.Parse(File.ReadAllText(StationsFile)); var oldStations = JsonNode.Parse(File.ReadAllText(StationsFile));
List<StationListing> stations = new();
if (oldStations != null) { if (oldStations != null) {
Logger.LogDebug("Found {StationsCount} stations", oldStations.Children().Count()); Logger.LogDebug("Found {StationsCount} stations", oldStations.AsArray().Count);
foreach (var station in oldStations.Children()) { foreach (var station in oldStations.AsArray()) {
if (station == null) continue; if (station == null) continue;
station["stoppedAtBy"] = new JArray(station["stoppedAtBy"]!.Children().Select(num => (JToken)(num!).ToString()!).ToArray()); station["stoppedAtBy"] = new JsonArray(station["stoppedAtBy"]!.AsArray().Select(num => (JsonNode)(num!).ToString()!).ToArray());
} }
stations = oldStations.ToObject<List<StationListing>>(JsonSerializer.Create(jsonSerializerSettings))!; stations = JsonSerializer.Deserialize<List<StationRecord>>(oldStations, serializerOptions)!;
} }
Logger.LogDebug("Rewriting StationsFile"); Logger.LogDebug("Rewriting StationsFile");
File.WriteAllText(StationsFile, JsonConvert.SerializeObject(stations, jsonSerializerSettings)); File.WriteAllText(StationsFile, JsonSerializer.Serialize(stations, serializerOptions));
} }
if (File.Exists(TrainsFile)) { if (File.Exists(TrainsFile)) {
Logger.LogDebug("Converting TrainsFile"); Logger.LogDebug("Converting TrainsFile");
var oldTrains = JToken.Parse(File.ReadAllText(TrainsFile)); var oldTrains = JsonNode.Parse(File.ReadAllText(TrainsFile));
List<TrainListing> trains = new();
if (oldTrains != null) { if (oldTrains != null) {
Logger.LogDebug("Found {TrainsCount} trains", oldTrains.Children().Count()); Logger.LogDebug("Found {TrainsCount} trains", oldTrains.AsArray().Count);
foreach (var train in oldTrains.Children()) { foreach (var train in oldTrains.AsArray()) {
if (train == null) continue; if (train == null) continue;
train["number"] = train["numberString"]; train["number"] = train["numberString"];
train["numberString"]?.Remove(); train.AsObject().Remove("numberString");
} }
trains = oldTrains.ToObject<List<TrainListing>>(JsonSerializer.Create(jsonSerializerSettings))!; trains = JsonSerializer.Deserialize<List<TrainRecord>>(oldTrains, serializerOptions)!;
} }
Logger.LogDebug("Rewriting TrainsFile"); Logger.LogDebug("Rewriting TrainsFile");
File.WriteAllText(TrainsFile, JsonConvert.SerializeObject(trains, jsonSerializerSettings)); File.WriteAllText(TrainsFile, JsonSerializer.Serialize(trains, serializerOptions));
} }
DbData = new(2); DbData = new(2);
File.WriteAllText(DbFile, JsonConvert.SerializeObject(DbData, jsonSerializerSettings)); File.WriteAllText(DbFile, JsonSerializer.Serialize(DbData, serializerOptions));
Migration(); Migration();
} }
else if (File.Exists(DbFile)) {
var oldDbData = JToken.Parse(File.ReadAllText(DbFile));
if (((int?)oldDbData?["version"]) == 2) {
Logger.LogInformation("Migrating DB version 2 -> 3 (transition from fs+JSON to MongoDB)");
if (File.Exists(StationsFile)) {
Logger.LogDebug("Converting StationsFile");
var stations = JsonConvert.DeserializeObject<List<StationListing>>(File.ReadAllText(StationsFile));
stationListingsCollection.InsertMany(stations);
File.Delete(StationsFile);
}
if (File.Exists(TrainsFile)) {
Logger.LogDebug("Converting TrainsFile");
var trains = JsonConvert.DeserializeObject<List<TrainListing>>(File.ReadAllText(TrainsFile));
trainListingsCollection.InsertMany(trains);
File.Delete(TrainsFile);
}
File.Delete(DbFile);
try {
Directory.Delete(DbDir);
}
catch (Exception) {
// Deleting of the directory is optional; may not be allowed in Docker or similar
}
var x = dbRecordCollection.FindSync(_ => true).ToList()!;
if (x.Count != 0) {
Logger.LogWarning("db collection contained data when migrating to V3");
using (var _ = Logger.BeginScope("Already existing data:")) {
foreach (var dbRecord in x) {
Logger.LogInformation("Id: {Id}, Version: {Version}", dbRecord.Id, dbRecord.Version);
}
}
Logger.LogInformation("Backing up existing data");
var backupDbRecordCollection = db.GetCollection<DbRecord>("db-backup");
backupDbRecordCollection.InsertMany(x);
Logger.LogDebug("Removing existing data");
dbRecordCollection.DeleteMany(_ => true);
}
dbRecordCollection.InsertOne(new(3));
Migration();
}
else {
throw new("Unexpected Database version, only DB Version 2 uses DbFile");
}
}
else { else {
var datas = dbRecordCollection.FindSync(_ => true).ToList(); var oldDbData = JsonNode.Parse(File.ReadAllText(DbFile));
if (datas.Count == 0) { if (((int?)oldDbData?["version"]) == 2) {
Logger.LogInformation("No db record found, new database"); Logger.LogInformation("DB Version: 2; noop");
dbRecordCollection.InsertOne(DbData);
}
else {
DbData = datas[0];
}
if (DbData.Version == 3) {
Logger.LogInformation("Using MongoDB Database Version 3; noop");
} }
else { else {
throw new($"Unexpected Database version: {DbData.Version}"); throw new Exception("Unexpected Database version");
} }
} }
} }
private async Task Initialize() {
await foreach (var entry in await stationAliasCollection.FindAsync(_ => true)) {
if (entry?.ListingId is null) continue;
stationObjectIds.Add(entry.Name, entry.ListingId);
}
}
private readonly SemaphoreSlim insertTrainLock = new (1, 1);
public async Task<string> FoundTrain(string rank, string number, string company) { public async Task<string> FoundTrain(string rank, string number, string company) {
number = string.Join("", number.TakeWhile(c => c is >= '0' and <= '9')); number = string.Join("", number.TakeWhile(c => '0' <= c && c <= '9'));
// If there is a matching ObjectId, then it's already in the database if (!trains.Where(train => train.Number == number).Any()) {
if (trainObjectIds.ContainsKey(number)) return number; Logger.LogDebug("Found train {Rank} {Number} from {Company}", rank, number, company);
await insertTrainLock.WaitAsync(); trains.Add(new(number, rank, company));
try { if (shouldCommitOnEveryChange) {
var possibleTrains = await (await throttle.MakeRequest(() => trainListingsCollection.FindAsync( await File.WriteAllTextAsync(TrainsFile, JsonSerializer.Serialize(trains, serializerOptions));
Builders<TrainListing>.Filter.Eq("number", number)
))).ToListAsync();
if (possibleTrains.Count == 0) {
Logger.LogDebug("Found train {Rank} {Number} from {Company}", rank, number, company);
TrainListing listing = new(number: number, rank: rank, company: company);
await throttle.MakeRequest(() => trainListingsCollection.InsertOneAsync(listing));
if (listing.Id != null) {
trainObjectIds[number] = listing.Id;
}
} }
else { else {
foreach (var possibleTrain in possibleTrains) { trainsDirty = true;
trainObjectIds[possibleTrain.Number] = possibleTrain.Id!;
}
} }
} }
finally {
insertTrainLock.Release();
}
return number; return number;
} }
private readonly SemaphoreSlim insertStationLock = new (1, 1);
public async Task FoundStation(string name) { public async Task FoundStation(string name) {
// if (!await throttle.MakeRequest(() => stationListingsCollection.Find(Builders<StationListing>.Filter.Eq("name", name)).AnyAsync())) { if (!stations.Where(station => station.Name == name).Any()) {
// Logger.LogDebug("Found station {StationName}", name);
// await throttle.MakeRequest(() => stationListingsCollection.InsertOneAsync(new(name, new())));
// }
// If there is a matching ObjectId, then it's already in the database
if (stationObjectIds.ContainsKey(name)) return;
await insertStationLock.WaitAsync();
UpdateResult update;
try {
update = await stationListingsCollection.UpdateOneAsync(
Builders<StationListing>.Filter.Eq("name", name),
Builders<StationListing>.Update.Combine(
Builders<StationListing>.Update.SetOnInsert("name", name),
Builders<StationListing>.Update.SetOnInsert("stoppedAtBy", new List<string>())
),
new UpdateOptions {
IsUpsert = true,
}
);
if (update.IsAcknowledged && update.ModifiedCount > 0) {
var listingId = update.UpsertedId.AsObjectId.ToString();
stationObjectIds[name] = listingId;
await stationAliasCollection.UpdateOneAsync(
Builders<StationAlias>.Filter.Eq("name", name),
Builders<StationAlias>.Update.Combine(
Builders<StationAlias>.Update.SetOnInsert("name", name),
Builders<StationAlias>.Update.SetOnInsert("listingId", listingId)
),
new UpdateOptions { IsUpsert = true }
);
}
}
finally {
insertStationLock.Release();
}
if (update.IsAcknowledged && update.MatchedCount == 0) {
Logger.LogDebug("Found station {StationName}", name); Logger.LogDebug("Found station {StationName}", name);
stations.Add(new(name, new()));
if (shouldCommitOnEveryChange) {
await File.WriteAllTextAsync(StationsFile, JsonSerializer.Serialize(stations, serializerOptions));
}
else {
stationsDirty = true;
}
} }
} }
public async Task FoundStations(IEnumerable<string> names) {
var unknownStations = names.ToList();
if (unknownStations.All(s => stationObjectIds.ContainsKey(s))) {
return;
}
unknownStations.RemoveAll(s => stationObjectIds.ContainsKey(s));
var existingStations = await (await stationListingsCollection.FindAsync(
Builders<StationListing>.Filter.StringIn("name", unknownStations.Select((n) => new StringOrRegularExpression(n)))
)).ToListAsync();
foreach (var existingStation in existingStations) {
stationObjectIds[existingStation.Name] = existingStation.Id!;
}
unknownStations.RemoveAll(s => existingStations.Select(st => st.Name).Contains(s));
if (unknownStations.Count == 0) return;
var unknownStationListings = unknownStations.Select((s) => new StationListing(s, new())).ToList();
await stationListingsCollection.InsertManyAsync(unknownStationListings);
foreach (var listing in unknownStationListings) {
stationObjectIds[listing.Name] = listing.Id!;
}
Logger.LogDebug("Found stations {StationNames}", unknownStations);
}
public async Task FoundTrainAtStation(string stationName, string trainNumber) { public async Task FoundTrainAtStation(string stationName, string trainNumber) {
trainNumber = string.Join("", trainNumber.TakeWhile(c => c is >= '0' and <= '9')); trainNumber = string.Join("", trainNumber.TakeWhile(c => '0' <= c && c <= '9'));
await FoundStation(stationName); await FoundStation(stationName);
UpdateResult updateResult; var dirty = false;
if (stationObjectIds.ContainsKey(stationName)) { for (var i = 0; i < stations.Count; i++) {
updateResult = await throttle.MakeRequest(() => stationListingsCollection.UpdateOneAsync( if (stations[i].Name == stationName) {
Builders<StationListing>.Filter.Eq("_id", ObjectId.Parse(stationObjectIds[stationName])), if (!stations[i].StoppedAtBy.Contains(trainNumber)) {
Builders<StationListing>.Update.AddToSet("stoppedAtBy", trainNumber) Logger.LogDebug("Found train {TrainNumber} at station {StationName}", trainNumber, stationName);
)); stations[i].ActualStoppedAtBy.Add(trainNumber);
} dirty = true;
else { }
updateResult = await throttle.MakeRequest(() => stationListingsCollection.UpdateOneAsync( break;
Builders<StationListing>.Filter.Eq("name", stationName), }
Builders<StationListing>.Update.AddToSet("stoppedAtBy", trainNumber)
));
}
if (updateResult.IsAcknowledged && updateResult.ModifiedCount > 0) {
Logger.LogDebug("Found train {TrainNumber} at station {StationName}", trainNumber, stationName);
}
}
public async Task FoundTrainAtStations(IEnumerable<string> stationNames, string trainNumber) {
trainNumber = string.Join("", trainNumber.TakeWhile(c => c is >= '0' and <= '9'));
var enumerable = stationNames as string[] ?? stationNames.ToArray();
await FoundStations(enumerable);
var objectIds = enumerable
.Select<string, ObjectId?>((stationName) => stationObjectIds.ContainsKey(stationName) ? ObjectId.Parse(stationObjectIds[stationName]) : null)
.ToList();
UpdateResult updateResult;
if (!objectIds.Any((id) => id is null)) {
updateResult = await throttle.MakeRequest(() => stationListingsCollection.UpdateManyAsync(
Builders<StationListing>.Filter.In("_id", objectIds),
Builders<StationListing>.Update.AddToSet("stoppedAtBy", trainNumber)
));
}
else {
updateResult = await throttle.MakeRequest(() => stationListingsCollection.UpdateManyAsync(
Builders<StationListing>.Filter.StringIn("name", enumerable.Select(sn => new StringOrRegularExpression(sn))),
Builders<StationListing>.Update.AddToSet("stoppedAtBy", trainNumber)
));
} }
if (updateResult.IsAcknowledged && updateResult.ModifiedCount > 0) { if (dirty) {
Logger.LogDebug("Found train {TrainNumber} at stations {StationNames}", trainNumber, stationNames); if (shouldCommitOnEveryChange) {
stations.Sort((s1, s2) => s2.StoppedAtBy.Count.CompareTo(s1.StoppedAtBy.Count));
await File.WriteAllTextAsync(StationsFile, JsonSerializer.Serialize(stations, serializerOptions));
}
else {
stationsDirty = true;
}
} }
} }
public async Task OnTrainData(InfoferScraper.Models.Train.ITrainScrapeResult trainData) { public async Task OnTrainData(InfoferScraper.Models.Train.ITrainScrapeResult trainData) {
using var _ = MakeDbTransaction();
var trainNumber = await FoundTrain(trainData.Rank, trainData.Number, trainData.Operator); var trainNumber = await FoundTrain(trainData.Rank, trainData.Number, trainData.Operator);
await FoundTrainAtStations( foreach (var group in trainData.Groups) {
trainData.Groups foreach (var station in group.Stations) {
.SelectMany(g => g.Stations) await FoundTrainAtStation(station.Name, trainNumber);
.Select(trainStop => trainStop.Name) }
.Distinct(), }
trainNumber
);
} }
public async Task OnStationData(InfoferScraper.Models.Station.IStationScrapeResult stationData) { public async Task OnStationData(InfoferScraper.Models.Station.IStationScrapeResult stationData) {
@ -351,40 +191,48 @@ public class Database : Server.Services.Interfaces.IDatabase {
async Task ProcessTrain(InfoferScraper.Models.Station.IStationArrDep train) { async Task ProcessTrain(InfoferScraper.Models.Station.IStationArrDep train) {
var trainNumber = train.Train.Number; var trainNumber = train.Train.Number;
trainNumber = await FoundTrain(train.Train.Rank, trainNumber, train.Train.Operator); trainNumber = await FoundTrain(train.Train.Rank, trainNumber, train.Train.Operator);
await FoundTrainAtStations(Enumerable.Repeat(stationName, 1).Concat(train.Train.Route).Distinct(), trainNumber); await FoundTrainAtStation(stationName, trainNumber);
if (train.Train.Route.Count != 0) {
foreach (var station in train.Train.Route) {
await FoundTrainAtStation(station, trainNumber);
}
}
} }
List<IStationArrDep> arrdep = new(); using var _ = MakeDbTransaction();
if (stationData.Arrivals != null) { if (stationData.Arrivals != null) {
arrdep.AddRange(stationData.Arrivals); foreach (var train in stationData.Arrivals) {
await ProcessTrain(train);
}
} }
if (stationData.Departures != null) { if (stationData.Departures != null) {
arrdep.AddRange(stationData.Departures); foreach (var train in stationData.Departures) {
await ProcessTrain(train);
}
} }
}
}
foreach (var train in arrdep.DistinctBy((t) => t.Train.Number)) { public record DbRecord(int Version);
await ProcessTrain(train);
} public record StationRecord : Server.Services.Interfaces.IStationRecord {
[JsonPropertyName("stoppedAtBy")]
public List<string> ActualStoppedAtBy { get; init; }
public string Name { get; init; }
[JsonIgnore]
public IReadOnlyList<string> StoppedAtBy => ActualStoppedAtBy;
public StationRecord() {
Name = "";
ActualStoppedAtBy = new();
} }
public async Task OnItineraries(IReadOnlyList<IItinerary> itineraries) { public StationRecord(string name, List<string> stoppedAtBy) {
foreach (var itinerary in itineraries) { Name = name;
foreach (var train in itinerary.Trains) { ActualStoppedAtBy = stoppedAtBy;
await FoundTrainAtStations(
train.IntermediateStops.Concat(new[] { train.From, train.To }),
train.TrainNumber
);
}
}
} }
} }
public record DbRecord( public record TrainRecord(string Number, string Rank, string Company) : Server.Services.Interfaces.ITrainRecord;
[property: BsonId]
[property: BsonRepresentation(BsonType.ObjectId)]
[property: JsonProperty(NullValueHandling = NullValueHandling.Ignore)]
string? Id,
int Version
) {
public DbRecord(int version) : this(null, version) { }
}

3
server/Services/Interfaces/IDataManager.cs

@ -1,14 +1,11 @@
using System; using System;
using System.Collections.Generic;
using System.Threading.Tasks; using System.Threading.Tasks;
using InfoferScraper.Models.Train; using InfoferScraper.Models.Train;
using InfoferScraper.Models.Station; using InfoferScraper.Models.Station;
using scraper.Models.Itinerary;
namespace Server.Services.Interfaces; namespace Server.Services.Interfaces;
public interface IDataManager { public interface IDataManager {
public Task<IStationScrapeResult?> FetchStation(string stationName, DateTimeOffset date); public Task<IStationScrapeResult?> FetchStation(string stationName, DateTimeOffset date);
public Task<ITrainScrapeResult?> FetchTrain(string trainNumber, DateTimeOffset date); public Task<ITrainScrapeResult?> FetchTrain(string trainNumber, DateTimeOffset date);
public Task<IReadOnlyList<IItinerary>?> FetchItineraries(string from, string to, DateTimeOffset? date = null);
} }

18
server/Services/Interfaces/IDatabase.cs

@ -2,19 +2,27 @@ using System.Collections.Generic;
using System.Threading.Tasks; using System.Threading.Tasks;
using InfoferScraper.Models.Train; using InfoferScraper.Models.Train;
using InfoferScraper.Models.Station; using InfoferScraper.Models.Station;
using scraper.Models.Itinerary;
using Server.Models.Database;
namespace Server.Services.Interfaces; namespace Server.Services.Interfaces;
public interface IDatabase { public interface IDatabase {
public IReadOnlyList<StationListing> Stations { get; } public IReadOnlyList<IStationRecord> Stations { get; }
public IReadOnlyList<TrainListing> Trains { get; } public IReadOnlyList<ITrainRecord> Trains { get; }
public Task<string> FoundTrain(string rank, string number, string company); public Task<string> FoundTrain(string rank, string number, string company);
public Task FoundStation(string name); public Task FoundStation(string name);
public Task FoundTrainAtStation(string stationName, string trainName); public Task FoundTrainAtStation(string stationName, string trainName);
public Task OnTrainData(ITrainScrapeResult trainData); public Task OnTrainData(ITrainScrapeResult trainData);
public Task OnStationData(IStationScrapeResult stationData); public Task OnStationData(IStationScrapeResult stationData);
public Task OnItineraries(IReadOnlyList<IItinerary> itineraries); }
public interface IStationRecord {
public string Name { get; }
public IReadOnlyList<string> StoppedAtBy { get; }
}
public interface ITrainRecord {
public string Rank { get; }
public string Number { get; }
public string Company { get; }
} }

57
server/Startup.cs

@ -1,17 +1,13 @@
using System; using System;
using System.Net; using System.Net;
using System.Text.Json;
using Microsoft.AspNetCore.Builder; using Microsoft.AspNetCore.Builder;
using Microsoft.AspNetCore.Hosting; using Microsoft.AspNetCore.Hosting;
using Microsoft.AspNetCore.Http;
using Microsoft.AspNetCore.HttpOverrides; using Microsoft.AspNetCore.HttpOverrides;
using Microsoft.Extensions.Configuration; using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Hosting; using Microsoft.Extensions.Hosting;
using Microsoft.OpenApi.Models; using Microsoft.OpenApi.Models;
using MongoDB.Bson.Serialization.Conventions;
using Newtonsoft.Json.Serialization;
using Server.Models;
using Server.Models.Database;
using Server.Services.Implementations; using Server.Services.Implementations;
using Server.Services.Interfaces; using Server.Services.Interfaces;
@ -30,33 +26,12 @@ namespace Server {
options.KnownProxies.Add(Dns.GetHostAddresses("host.docker.internal")[0]); options.KnownProxies.Add(Dns.GetHostAddresses("host.docker.internal")[0]);
}); });
} }
services.Configure<ProxySettings>(Configuration.GetSection("Proxy"));
services.Configure<MongoSettings>(Configuration.GetSection("TrainDataMongo"));
var conventionPack = new ConventionPack { new CamelCaseElementNameConvention() };
ConventionRegistry.Register("camelCase", conventionPack, _ => true);
services.AddSingleton<IDataManager, DataManager>(); services.AddSingleton<IDataManager, DataManager>();
services.AddSingleton<IDatabase, Database>(); services.AddSingleton<IDatabase, Database>();
services.AddSingleton(NodaTime.DateTimeZoneProviders.Tzdb); services.AddSingleton<NodaTime.IDateTimeZoneProvider>(NodaTime.DateTimeZoneProviders.Tzdb);
services.AddSingleton<IFileStorage>((serviceProvider) => {
var conf = serviceProvider.GetRequiredService<IConfiguration>();
var section = conf.GetSection("FileStorage");
switch (section["Type"]) {
case "local": {
var dir = section["Directory"];
return new LocalFileStorage(dir!);
}
default:
throw new Exception("Unable to configure FileStorage");
}
});
services.AddControllers() services.AddControllers()
.AddNewtonsoftJson(options => { .AddJsonOptions(options => {
options.SerializerSettings.ContractResolver = new DefaultContractResolver { options.JsonSerializerOptions.PropertyNamingPolicy = JsonNamingPolicy.CamelCase;
NamingStrategy = new CamelCaseNamingStrategy(),
};
}); });
services.AddSwaggerGen(c => { services.AddSwaggerGen(c => {
c.SwaggerDoc("v1", new OpenApiInfo { Title = "InfoTren Scraper", Version = "v1" }); c.SwaggerDoc("v1", new OpenApiInfo { Title = "InfoTren Scraper", Version = "v1" });
@ -82,30 +57,6 @@ namespace Server {
c.SwaggerEndpoint("/swagger/v1/swagger.json", "InfoTren Scraper v1"); c.SwaggerEndpoint("/swagger/v1/swagger.json", "InfoTren Scraper v1");
}); });
app.MapWhen(x => x.Request.Path.StartsWithSegments("/rapidoc"), appBuilder => {
appBuilder.Run(async context => {
context.Response.ContentType = "text/html";
await context.Response.WriteAsync(
"""
<!doctype html> <!-- Important: must specify -->
<html>
<head>
<meta charset="utf-8"> <!-- Important: rapi-doc uses utf8 characters -->
<script type="module" src="https://unpkg.com/rapidoc/dist/rapidoc-min.js"></script>
</head>
<body>
<rapi-doc
spec-url="/swagger/v3/swagger.json"
theme = "dark"
> </rapi-doc>
</body>
</html>
"""
);
});
});
// app.UseHttpsRedirection(); // app.UseHttpsRedirection();
app.UseRouting(); app.UseRouting();

38
server/Utils/AsyncThrottle.cs

@ -1,38 +0,0 @@
using System;
using System.Threading;
using System.Threading.Tasks;
namespace Server.Utils;
// Inspired from: https://stackoverflow.com/a/57517920
public class AsyncThrottle {
private readonly SemaphoreSlim openConnectionSemaphore;
public AsyncThrottle(int limit) {
openConnectionSemaphore = new(limit, limit);
}
public async Task<T> MakeRequest<T>(Task<T> task) => await MakeRequest(() => task);
public async Task<T> MakeRequest<T>(Func<Task<T>> taskCreator) {
await openConnectionSemaphore.WaitAsync();
try {
var result = await taskCreator();
return result;
}
finally {
openConnectionSemaphore.Release();
}
}
public async Task MakeRequest(Task task) => await MakeRequest(() => task);
public async Task MakeRequest(Func<Task> taskCreator) {
await openConnectionSemaphore.WaitAsync();
try {
await taskCreator();
}
finally {
openConnectionSemaphore.Release();
}
}
}

7
server/Utils/Constants.cs

@ -1,7 +0,0 @@
using NodaTime;
namespace Server.Utils;
public static class Constants {
public static readonly DateTimeZone BucharestTz = DateTimeZoneProviders.Tzdb["Europe/Bucharest"];
}

33
server/Utils/IAsyncCusorAsyncAdapter.cs

@ -1,33 +0,0 @@
using System.Collections.Generic;
using System.Threading.Tasks;
using MongoDB.Driver;
namespace Server.Utils;
public record IAsyncCusorAsyncEnumerator<T>(IAsyncCursor<T> Cursor) {
private IEnumerator<T>? enumerator = null;
public T Current => enumerator!.Current;
public async Task<bool> MoveNextAsync() {
bool result;
if (enumerator != null) {
result = enumerator.MoveNext();
if (result) return true;
}
result = await Cursor.MoveNextAsync();
if (result) {
enumerator = Cursor.Current.GetEnumerator();
return true;
}
return false;
}
}
public static class IAsyncCursorExtensions {
public static IAsyncCusorAsyncEnumerator<T> GetAsyncEnumerator<T>(this IAsyncCursor<T> cursor) {
return new(cursor);
}
}

6
server/appsettings.Development.json

@ -5,9 +5,5 @@
"Microsoft": "Warning", "Microsoft": "Warning",
"Microsoft.Hosting.Lifetime": "Information" "Microsoft.Hosting.Lifetime": "Information"
} }
}, }
"TrainDataMongo": {
"ConnectionString": "mongodb://localhost:27017",
"DatabaseName": "NewInfoferScraper"
},
} }

4
server/appsettings.json

@ -9,9 +9,5 @@
"Microsoft.Hosting.Lifetime": "Information" "Microsoft.Hosting.Lifetime": "Information"
} }
}, },
"TrainDataMongo": {
"ConnectionString": "mongodb://mongo:27017",
"DatabaseName": "NewInfoferScraper"
},
"AllowedHosts": "*" "AllowedHosts": "*"
} }

10
server/server.csproj

@ -4,16 +4,12 @@
<Nullable>enable</Nullable> <Nullable>enable</Nullable>
<AssemblyName>Server</AssemblyName> <AssemblyName>Server</AssemblyName>
<RootNamespace>Server</RootNamespace> <RootNamespace>Server</RootNamespace>
<LangVersion>11</LangVersion> <TargetFramework>net6.0</TargetFramework>
<TargetFrameworks>net6.0;net7.0;net8.0</TargetFrameworks>
</PropertyGroup> </PropertyGroup>
<ItemGroup> <ItemGroup>
<PackageReference Include="Microsoft.AspNetCore.Mvc.NewtonsoftJson" Version="6.0.21" />
<PackageReference Include="Microsoft.Data.Sqlite" Version="6.0.1" /> <PackageReference Include="Microsoft.Data.Sqlite" Version="6.0.1" />
<PackageReference Include="Microsoft.EntityFrameworkCore.Sqlite" Version="5.0.13" /> <PackageReference Include="Microsoft.EntityFrameworkCore.Sqlite" Version="5.0.13" />
<PackageReference Include="MongoDB.Analyzer" Version="1.1.0" />
<PackageReference Include="MongoDB.Driver" Version="2.19.1" />
<PackageReference Include="Nanoid" Version="2.1.0" /> <PackageReference Include="Nanoid" Version="2.1.0" />
<PackageReference Include="Swashbuckle.AspNetCore" Version="5.6.3" /> <PackageReference Include="Swashbuckle.AspNetCore" Version="5.6.3" />
</ItemGroup> </ItemGroup>
@ -22,4 +18,8 @@
<ProjectReference Include="..\scraper\scraper.csproj" /> <ProjectReference Include="..\scraper\scraper.csproj" />
</ItemGroup> </ItemGroup>
<ItemGroup>
<Folder Include="Utils" />
</ItemGroup>
</Project> </Project>

Loading…
Cancel
Save