diff --git a/ConsoleTest/ConsoleTest.csproj b/ConsoleTest/ConsoleTest.csproj
index 7c6a3da..40e087c 100644
--- a/ConsoleTest/ConsoleTest.csproj
+++ b/ConsoleTest/ConsoleTest.csproj
@@ -6,7 +6,7 @@
Exe
- net6.0;net7.0
+ net6.0;net7.0;net8.0
diff --git a/ConsoleTest/Program.cs b/ConsoleTest/Program.cs
index c4d66da..0ce4b90 100644
--- a/ConsoleTest/Program.cs
+++ b/ConsoleTest/Program.cs
@@ -40,7 +40,7 @@ async Task PrintTrain() {
Console.WriteLine(
JsonSerializer.Serialize(
- await TrainScraper.Scrape(trainNumber),
+ await new TrainScraper().Scrape(trainNumber),
new JsonSerializerOptions {
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
WriteIndented = true,
@@ -58,7 +58,7 @@ async Task PrintStation() {
Console.WriteLine(
JsonSerializer.Serialize(
- await StationScraper.Scrape(stationName),
+ await new StationScraper().Scrape(stationName),
new JsonSerializerOptions {
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
WriteIndented = true,
@@ -74,7 +74,7 @@ async Task ScrapeItineraries() {
if (from == null || to == null) return;
- var data = await RouteScraper.Scrape(from, to);
+ var data = await new RouteScraper().Scrape(from, to);
Console.WriteLine($"{data.Count} itineraries:");
Console.WriteLine();
diff --git a/scraper/scraper.csproj b/scraper/scraper.csproj
index 4dfa9a6..25c1486 100644
--- a/scraper/scraper.csproj
+++ b/scraper/scraper.csproj
@@ -2,7 +2,7 @@
enable
- net6.0;net7.0
+ net6.0;net7.0;net8.0
diff --git a/scraper/src/Scrapers/Route.cs b/scraper/src/Scrapers/Route.cs
index 110bd5f..3a224c6 100644
--- a/scraper/src/Scrapers/Route.cs
+++ b/scraper/src/Scrapers/Route.cs
@@ -16,20 +16,10 @@ using scraper.Models.Itinerary;
namespace InfoferScraper.Scrapers;
-public static class RouteScraper {
+public class RouteScraper {
private const string BaseUrl = "https://mersultrenurilor.infofer.ro/ro-RO/";
private static readonly DateTimeZone BucharestTz = DateTimeZoneProviders.Tzdb["Europe/Bucharest"];
- private static readonly CookieContainer CookieContainer = new();
-
- private static readonly HttpClient HttpClient = new(new HttpClientHandler {
- CookieContainer = CookieContainer,
- UseCookies = true,
- }) {
- BaseAddress = new Uri(BaseUrl),
- DefaultRequestVersion = new Version(2, 0),
- };
-
private static readonly Regex KmTrainRankNoRegex = new(@"^([0-9]+)\skm\scu\s([A-Z-]+)\s([0-9]+)$");
private static readonly Regex OperatorRegex = new(@$"^Operat\sde\s([{Utils.RoLetters}\s]+)$");
private static readonly Regex DepArrRegex = new(@"^(Ple|Sos)\s([0-9]+)\s([a-z]+)\.?\s([0-9]+):([0-9]+)$");
@@ -49,7 +39,28 @@ public static class RouteScraper {
["dec"] = 12,
};
- public static async Task?> Scrape(string from, string to, DateTimeOffset? dateOverride = null) {
+ private readonly CookieContainer cookieContainer = new();
+
+ private readonly HttpClient httpClient;
+
+ public RouteScraper(HttpClientHandler? httpClientHandler = null) {
+ if (httpClientHandler == null) {
+ httpClientHandler = new HttpClientHandler {
+ CookieContainer = cookieContainer,
+ UseCookies = true,
+ };
+ }
+ else {
+ httpClientHandler.CookieContainer = cookieContainer;
+ httpClientHandler.UseCookies = true;
+ }
+ httpClient = new HttpClient(httpClientHandler) {
+ BaseAddress = new Uri(BaseUrl),
+ DefaultRequestVersion = new Version(2, 0),
+ };
+ }
+
+ public async Task?> Scrape(string from, string to, DateTimeOffset? dateOverride = null) {
var dateOverrideInstant = dateOverride?.ToInstant().InZone(BucharestTz);
dateOverride = dateOverrideInstant?.ToDateTimeOffset();
TrainScrapeResult result = new();
@@ -70,7 +81,7 @@ public static class RouteScraper {
firstUrl = firstUrl.SetQueryParam("BetweenTrainsMinimumMinutes", "5");
firstUrl = firstUrl.SetQueryParam("ChangeStationName", "");
- var firstResponse = await HttpClient.GetStringAsync(firstUrl);
+ var firstResponse = await httpClient.GetStringAsync(firstUrl);
var firstDocument = await asContext.OpenAsync(req => req.Content(firstResponse));
var firstForm = firstDocument.GetElementById("form-search")!;
@@ -80,7 +91,7 @@ public static class RouteScraper {
.ToDictionary(elem => elem.Name!, elem => elem.Value);
var secondUrl = "".AppendPathSegments("Itineraries", "GetItineraries");
- var secondResponse = await HttpClient.PostAsync(
+ var secondResponse = await httpClient.PostAsync(
secondUrl,
#pragma warning disable CS8620
new FormUrlEncodedContent(firstResult)
@@ -90,10 +101,10 @@ public static class RouteScraper {
var secondDocument = await asContext.OpenAsync(
req => req.Content(secondResponseContent)
);
-
+
var (itineraryInfoDiv, _) = secondDocument
.QuerySelectorAll("body > div");
-
+
if (itineraryInfoDiv == null) {
return null;
}
@@ -103,7 +114,7 @@ public static class RouteScraper {
var itineraries = new List();
foreach (var itineraryLi in itinerariesLi) {
var itinerary = new Itinerary();
-
+
var cardDivs = itineraryLi.QuerySelectorAll(":scope > div > div > div > div");
var detailsDivs = cardDivs.Last()
.QuerySelectorAll(":scope > div > div")[1]
@@ -127,7 +138,7 @@ public static class RouteScraper {
// Detail
var detailColumns = li.QuerySelectorAll(":scope > div > div");
var leftSideDivs = detailColumns[0].QuerySelectorAll(":scope > div");
-
+
var departureDateText = leftSideDivs[0]
.QuerySelectorAll(":scope > div")[1]
.Text()
@@ -144,7 +155,7 @@ public static class RouteScraper {
if (departureDate < now.PlusDays(-1)) {
departureDate = departureDate.PlusYears(1);
}
-
+
var arrivalDateText = leftSideDivs[3]
.QuerySelectorAll(":scope > div")[1]
.Text()
@@ -168,7 +179,7 @@ public static class RouteScraper {
.Text()
.WithCollapsedSpaces();
var kmRankNumberMatch = KmTrainRankNoRegex.Match(kmRankNumberText);
-
+
var operatorText = rightSideDivs[0]
.QuerySelectorAll(":scope > div > div")[1]
.Text()
@@ -191,7 +202,7 @@ public static class RouteScraper {
if (text == "Nu sunt stații intermediare.") continue;
train.AddIntermediateStop(div.Text().WithCollapsedSpaces());
}
-
+
details.Add(train);
}
}
@@ -200,10 +211,10 @@ public static class RouteScraper {
detail.To = iTo;
itinerary.AddTrain(detail);
}
-
+
itineraries.Add(itinerary);
}
return itineraries;
}
-}
\ No newline at end of file
+}
diff --git a/scraper/src/Scrapers/Station.cs b/scraper/src/Scrapers/Station.cs
index 488b8d7..ac1315d 100644
--- a/scraper/src/Scrapers/Station.cs
+++ b/scraper/src/Scrapers/Station.cs
@@ -14,7 +14,7 @@ using NodaTime;
using NodaTime.Extensions;
namespace InfoferScraper.Scrapers {
- public static class StationScraper {
+ public class StationScraper {
private static readonly Regex StationInfoRegex = new($@"^([{Utils.RoLetters}.0-9 ]+)\sîn\s([0-9.]+)$");
private static readonly Regex StoppingTimeRegex = new(
@@ -28,25 +28,36 @@ namespace InfoferScraper.Scrapers {
private static readonly Regex PlatformRegex = new(@"^linia\s([A-Za-z0-9]+)$");
private static readonly Regex TrainUrlDateRegex = new(@"Date=([0-9]{2}).([0-9]{2}).([0-9]{4})");
-
+
private static readonly DateTimeZone BucharestTz = DateTimeZoneProviders.Tzdb["Europe/Bucharest"];
private const string BaseUrl = "https://mersultrenurilor.infofer.ro/ro-RO/";
- private static readonly CookieContainer CookieContainer = new();
+ private readonly CookieContainer cookieContainer = new();
+
+ private readonly HttpClient httpClient;
- private static readonly HttpClient HttpClient = new(new HttpClientHandler {
- CookieContainer = CookieContainer,
- UseCookies = true,
- }) {
- BaseAddress = new Uri(BaseUrl),
- DefaultRequestVersion = new Version(2, 0),
- };
+ public StationScraper(HttpClientHandler? httpClientHandler = null) {
+ if (httpClientHandler == null) {
+ httpClientHandler = new HttpClientHandler {
+ CookieContainer = cookieContainer,
+ UseCookies = true,
+ };
+ }
+ else {
+ httpClientHandler.CookieContainer = cookieContainer;
+ httpClientHandler.UseCookies = true;
+ }
+ httpClient = new HttpClient(httpClientHandler) {
+ BaseAddress = new Uri(BaseUrl),
+ DefaultRequestVersion = new Version(2, 0),
+ };
+ }
- public static async Task Scrape(string stationName, DateTimeOffset? date = null) {
+ public async Task Scrape(string stationName, DateTimeOffset? date = null) {
var dateInstant = date?.ToInstant().InZone(BucharestTz);
date = dateInstant?.ToDateTimeOffset();
-
+
stationName = stationName.RoLettersToEn();
var result = new StationScrapeResult();
@@ -59,7 +70,7 @@ namespace InfoferScraper.Scrapers {
if (date != null) {
firstUrl = firstUrl.SetQueryParam("Date", $"{date:d.MM.yyyy}");
}
- var firstResponse = await HttpClient.GetStringAsync(firstUrl);
+ var firstResponse = await httpClient.GetStringAsync(firstUrl);
var firstDocument = await asContext.OpenAsync(req => req.Content(firstResponse));
var firstForm = firstDocument.GetElementById("form-search")!;
@@ -69,7 +80,7 @@ namespace InfoferScraper.Scrapers {
.ToDictionary(elem => elem.Name!, elem => elem.Value);
var secondUrl = "".AppendPathSegments("Stations", "StationsResult");
- var secondResponse = await HttpClient.PostAsync(
+ var secondResponse = await httpClient.PostAsync(
secondUrl,
#pragma warning disable CS8620
new FormUrlEncodedContent(firstResult)
@@ -167,9 +178,9 @@ namespace InfoferScraper.Scrapers {
.Text()
.WithCollapsedSpaces();
foreach (var station in routeDiv.QuerySelectorAll(":scope > div > div")[1]
- .Text()
- .WithCollapsedSpaces()
- .Split(" - ")) {
+ .Text()
+ .WithCollapsedSpaces()
+ .Split(" - ")) {
arrDep.ModifyableTrain.AddRouteStation(station);
}
@@ -182,7 +193,7 @@ namespace InfoferScraper.Scrapers {
.QuerySelectorAll(":scope > div");
var delayDiv = statusDivComponents[0];
-
+
var (delayMin, (approx, _)) = (StatusRegex.Match(
delayDiv
.Text()
diff --git a/scraper/src/Scrapers/Train.cs b/scraper/src/Scrapers/Train.cs
index abd2c40..e043851 100644
--- a/scraper/src/Scrapers/Train.cs
+++ b/scraper/src/Scrapers/Train.cs
@@ -15,7 +15,7 @@ using NodaTime.Extensions;
using scraper.Exceptions;
namespace InfoferScraper.Scrapers {
- public static class TrainScraper {
+ public class TrainScraper {
private const string BaseUrl = "https://mersultrenurilor.infofer.ro/ro-RO/";
private static readonly Regex TrainInfoRegex = new(@"^([A-Z-]+)\s([0-9]+)\sîn\s([0-9.]+)$");
private static readonly Regex OperatorRegex = new(@"^Operat\sde\s(.+)$");
@@ -51,16 +51,28 @@ namespace InfoferScraper.Scrapers {
private static readonly DateTimeZone BucharestTz = DateTimeZoneProviders.Tzdb["Europe/Bucharest"];
- private static readonly CookieContainer CookieContainer = new();
- private static readonly HttpClient HttpClient = new(new HttpClientHandler {
- CookieContainer = CookieContainer,
- UseCookies = true,
- }) {
- BaseAddress = new Uri(BaseUrl),
- DefaultRequestVersion = new Version(2, 0),
- };
+ private readonly CookieContainer cookieContainer = new();
+ private readonly HttpClient httpClient;
+
+ public TrainScraper(HttpClientHandler? httpClientHandler = null)
+ {
+ if (httpClientHandler == null) {
+ httpClientHandler = new HttpClientHandler {
+ CookieContainer = cookieContainer,
+ UseCookies = true,
+ };
+ }
+ else {
+ httpClientHandler.CookieContainer = cookieContainer;
+ httpClientHandler.UseCookies = true;
+ }
+ httpClient = new HttpClient(httpClientHandler) {
+ BaseAddress = new Uri(BaseUrl),
+ DefaultRequestVersion = new Version(2, 0),
+ };
+ }
- public static async Task Scrape(string trainNumber, DateTimeOffset? dateOverride = null) {
+ public async Task Scrape(string trainNumber, DateTimeOffset? dateOverride = null) {
var dateOverrideInstant = dateOverride?.ToInstant().InZone(BucharestTz);
dateOverride = dateOverrideInstant?.ToDateTimeOffset();
TrainScrapeResult result = new();
@@ -73,7 +85,7 @@ namespace InfoferScraper.Scrapers {
if (dateOverride != null) {
firstUrl = firstUrl.SetQueryParam("Date", $"{dateOverride:d.MM.yyyy}");
}
- var firstResponse = await HttpClient.GetStringAsync(firstUrl);
+ var firstResponse = await httpClient.GetStringAsync(firstUrl);
var firstDocument = await asContext.OpenAsync(req => req.Content(firstResponse));
var firstForm = firstDocument.GetElementById("form-search")!;
@@ -83,7 +95,7 @@ namespace InfoferScraper.Scrapers {
.ToDictionary(elem => elem.Name!, elem => elem.Value);
var secondUrl = "".AppendPathSegments("Trains", "TrainsResult");
- var secondResponse = await HttpClient.PostAsync(
+ var secondResponse = await httpClient.PostAsync(
secondUrl,
#pragma warning disable CS8620
new FormUrlEncodedContent(firstResult)
diff --git a/server/Models/ProxySettings.cs b/server/Models/ProxySettings.cs
new file mode 100644
index 0000000..389e9c3
--- /dev/null
+++ b/server/Models/ProxySettings.cs
@@ -0,0 +1,9 @@
+namespace Server.Models;
+
+public record ProxySettings(string Url, ProxyCredentials? Credentials = null) {
+ public ProxySettings() : this("") { }
+}
+
+public record ProxyCredentials(string Username, string Password) {
+ public ProxyCredentials() : this("", "") { }
+}
diff --git a/server/Services/Implementations/DataManager.cs b/server/Services/Implementations/DataManager.cs
index f5b99c8..a8e0e25 100644
--- a/server/Services/Implementations/DataManager.cs
+++ b/server/Services/Implementations/DataManager.cs
@@ -1,12 +1,15 @@
using System;
using System.Collections.Generic;
using System.Diagnostics;
+using System.Net;
+using System.Net.Http;
using System.Threading.Tasks;
using InfoferScraper;
using InfoferScraper.Models.Station;
using InfoferScraper.Models.Train;
using Microsoft.Extensions.Logging;
using scraper.Models.Itinerary;
+using Server.Models;
using Server.Services.Interfaces;
using Server.Utils;
@@ -18,17 +21,26 @@ namespace Server.Services.Implementations {
private NodaTime.IDateTimeZoneProvider TzProvider { get; }
private NodaTime.DateTimeZone CfrTimeZone => TzProvider["Europe/Bucharest"];
- public DataManager(NodaTime.IDateTimeZoneProvider tzProvider, IDatabase database, ILogger logger) {
+ public DataManager(NodaTime.IDateTimeZoneProvider tzProvider, IDatabase database, ILogger logger, ProxySettings? proxySettings) {
this.TzProvider = tzProvider;
this.Database = database;
this.Logger = logger;
+ HttpClientHandler httpClientHandler = new (){
+ UseProxy = proxySettings != null,
+ Proxy = proxySettings == null ? null : new WebProxy(proxySettings.Url),
+ DefaultProxyCredentials = proxySettings?.Credentials == null ? null : new NetworkCredential(proxySettings.Credentials.Username, proxySettings.Credentials.Password),
+ };
+ InfoferScraper.Scrapers.StationScraper stationScraper = new(httpClientHandler);
+ InfoferScraper.Scrapers.TrainScraper trainScraper = new(httpClientHandler);
+ InfoferScraper.Scrapers.RouteScraper routeScraper = new(httpClientHandler);
+
stationCache = new(async (t) => {
var (stationName, date) = t;
Logger.LogDebug("Fetching station {StationName} for date {Date}", stationName, date);
var zonedDate = new NodaTime.LocalDate(date.Year, date.Month, date.Day).AtStartOfDayInZone(CfrTimeZone);
- var station = await InfoferScraper.Scrapers.StationScraper.Scrape(stationName, zonedDate.ToDateTimeOffset());
+ var station = await stationScraper.Scrape(stationName, zonedDate.ToDateTimeOffset());
if (station != null) {
_ = Task.Run(async () => {
var watch = Stopwatch.StartNew();
@@ -44,7 +56,7 @@ namespace Server.Services.Implementations {
Logger.LogDebug("Fetching train {TrainNumber} for date {Date}", trainNumber, date);
var zonedDate = new NodaTime.LocalDate(date.Year, date.Month, date.Day).AtStartOfDayInZone(CfrTimeZone);
- var train = await InfoferScraper.Scrapers.TrainScraper.Scrape(trainNumber, zonedDate.ToDateTimeOffset());
+ var train = await trainScraper.Scrape(trainNumber, zonedDate.ToDateTimeOffset());
if (train != null) {
_ = Task.Run(async () => {
var watch = Stopwatch.StartNew();
@@ -60,7 +72,7 @@ namespace Server.Services.Implementations {
Logger.LogDebug("Fetching itinerary from {From} to {To} for date {Date}", from, to, date);
var zonedDate = new NodaTime.LocalDate(date.Year, date.Month, date.Day).AtStartOfDayInZone(CfrTimeZone);
- var itineraries = await InfoferScraper.Scrapers.RouteScraper.Scrape(from, to, zonedDate.ToDateTimeOffset());
+ var itineraries = await routeScraper.Scrape(from, to, zonedDate.ToDateTimeOffset());
if (itineraries != null) {
_ = Task.Run(async () => {
var watch = Stopwatch.StartNew();
@@ -99,4 +111,4 @@ namespace Server.Services.Implementations {
return await itinerariesCache.GetItem((from, to, cfrDate));
}
}
-}
\ No newline at end of file
+}
diff --git a/server/Startup.cs b/server/Startup.cs
index ffaea98..4c573df 100644
--- a/server/Startup.cs
+++ b/server/Startup.cs
@@ -10,6 +10,7 @@ using Microsoft.Extensions.Hosting;
using Microsoft.OpenApi.Models;
using MongoDB.Bson.Serialization.Conventions;
using Newtonsoft.Json.Serialization;
+using Server.Models;
using Server.Models.Database;
using Server.Services.Implementations;
using Server.Services.Interfaces;
@@ -30,12 +31,27 @@ namespace Server {
});
}
+ services.Configure(Configuration.GetSection("Proxy"));
services.Configure(Configuration.GetSection("TrainDataMongo"));
var conventionPack = new ConventionPack { new CamelCaseElementNameConvention() };
ConventionRegistry.Register("camelCase", conventionPack, _ => true);
services.AddSingleton();
services.AddSingleton();
- services.AddSingleton(NodaTime.DateTimeZoneProviders.Tzdb);
+ services.AddSingleton(NodaTime.DateTimeZoneProviders.Tzdb);
+
+ services.AddSingleton((serviceProvider) => {
+ var conf = serviceProvider.GetRequiredService();
+ var section = conf.GetSection("FileStorage");
+ switch (section["Type"]) {
+ case "local": {
+ var dir = section["Directory"];
+ return new LocalFileStorage(dir!);
+ }
+ default:
+ throw new Exception("Unable to configure FileStorage");
+ }
+ });
+
services.AddControllers()
.AddNewtonsoftJson(options => {
options.SerializerSettings.ContractResolver = new DefaultContractResolver {
diff --git a/server/Utils/Constants.cs b/server/Utils/Constants.cs
new file mode 100644
index 0000000..d59b5cf
--- /dev/null
+++ b/server/Utils/Constants.cs
@@ -0,0 +1,7 @@
+using NodaTime;
+
+namespace Server.Utils;
+
+public static class Constants {
+ public static readonly DateTimeZone BucharestTz = DateTimeZoneProviders.Tzdb["Europe/Bucharest"];
+}
\ No newline at end of file
diff --git a/server/server.csproj b/server/server.csproj
index c06495a..ec644e6 100644
--- a/server/server.csproj
+++ b/server/server.csproj
@@ -5,7 +5,7 @@
Server
Server
11
- net6.0;net7.0
+ net6.0;net7.0;net8.0