Browse Source

Add proxy and .NET 8 support

Kenneth Bruen 3 months ago
parent
commit
422b4727c0
Signed by: kbruen
GPG Key ID: C1980A470C3EE5B1
  1. 2
      ConsoleTest/ConsoleTest.csproj
  2. 6
      ConsoleTest/Program.cs
  3. 2
      scraper/scraper.csproj
  4. 39
      scraper/src/Scrapers/Route.cs
  5. 41
      scraper/src/Scrapers/Station.cs
  6. 36
      scraper/src/Scrapers/Train.cs
  7. 9
      server/Models/ProxySettings.cs
  8. 20
      server/Services/Implementations/DataManager.cs
  9. 18
      server/Startup.cs
  10. 7
      server/Utils/Constants.cs
  11. 2
      server/server.csproj

2
ConsoleTest/ConsoleTest.csproj

@ -6,7 +6,7 @@
<PropertyGroup> <PropertyGroup>
<OutputType>Exe</OutputType> <OutputType>Exe</OutputType>
<TargetFrameworks>net6.0;net7.0</TargetFrameworks> <TargetFrameworks>net6.0;net7.0;net8.0</TargetFrameworks>
</PropertyGroup> </PropertyGroup>
</Project> </Project>

6
ConsoleTest/Program.cs

@ -40,7 +40,7 @@ async Task PrintTrain() {
Console.WriteLine( Console.WriteLine(
JsonSerializer.Serialize( JsonSerializer.Serialize(
await TrainScraper.Scrape(trainNumber), await new TrainScraper().Scrape(trainNumber),
new JsonSerializerOptions { new JsonSerializerOptions {
PropertyNamingPolicy = JsonNamingPolicy.CamelCase, PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
WriteIndented = true, WriteIndented = true,
@ -58,7 +58,7 @@ async Task PrintStation() {
Console.WriteLine( Console.WriteLine(
JsonSerializer.Serialize( JsonSerializer.Serialize(
await StationScraper.Scrape(stationName), await new StationScraper().Scrape(stationName),
new JsonSerializerOptions { new JsonSerializerOptions {
PropertyNamingPolicy = JsonNamingPolicy.CamelCase, PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
WriteIndented = true, WriteIndented = true,
@ -74,7 +74,7 @@ async Task ScrapeItineraries() {
if (from == null || to == null) return; if (from == null || to == null) return;
var data = await RouteScraper.Scrape(from, to); var data = await new RouteScraper().Scrape(from, to);
Console.WriteLine($"{data.Count} itineraries:"); Console.WriteLine($"{data.Count} itineraries:");
Console.WriteLine(); Console.WriteLine();

2
scraper/scraper.csproj

@ -2,7 +2,7 @@
<PropertyGroup> <PropertyGroup>
<Nullable>enable</Nullable> <Nullable>enable</Nullable>
<TargetFrameworks>net6.0;net7.0</TargetFrameworks> <TargetFrameworks>net6.0;net7.0;net8.0</TargetFrameworks>
</PropertyGroup> </PropertyGroup>
<ItemGroup> <ItemGroup>

39
scraper/src/Scrapers/Route.cs

@ -16,20 +16,10 @@ using scraper.Models.Itinerary;
namespace InfoferScraper.Scrapers; namespace InfoferScraper.Scrapers;
public static class RouteScraper { public class RouteScraper {
private const string BaseUrl = "https://mersultrenurilor.infofer.ro/ro-RO/"; private const string BaseUrl = "https://mersultrenurilor.infofer.ro/ro-RO/";
private static readonly DateTimeZone BucharestTz = DateTimeZoneProviders.Tzdb["Europe/Bucharest"]; private static readonly DateTimeZone BucharestTz = DateTimeZoneProviders.Tzdb["Europe/Bucharest"];
private static readonly CookieContainer CookieContainer = new();
private static readonly HttpClient HttpClient = new(new HttpClientHandler {
CookieContainer = CookieContainer,
UseCookies = true,
}) {
BaseAddress = new Uri(BaseUrl),
DefaultRequestVersion = new Version(2, 0),
};
private static readonly Regex KmTrainRankNoRegex = new(@"^([0-9]+)\skm\scu\s([A-Z-]+)\s([0-9]+)$"); private static readonly Regex KmTrainRankNoRegex = new(@"^([0-9]+)\skm\scu\s([A-Z-]+)\s([0-9]+)$");
private static readonly Regex OperatorRegex = new(@$"^Operat\sde\s([{Utils.RoLetters}\s]+)$"); private static readonly Regex OperatorRegex = new(@$"^Operat\sde\s([{Utils.RoLetters}\s]+)$");
private static readonly Regex DepArrRegex = new(@"^(Ple|Sos)\s([0-9]+)\s([a-z]+)\.?\s([0-9]+):([0-9]+)$"); private static readonly Regex DepArrRegex = new(@"^(Ple|Sos)\s([0-9]+)\s([a-z]+)\.?\s([0-9]+):([0-9]+)$");
@ -49,7 +39,28 @@ public static class RouteScraper {
["dec"] = 12, ["dec"] = 12,
}; };
public static async Task<List<IItinerary>?> Scrape(string from, string to, DateTimeOffset? dateOverride = null) { private readonly CookieContainer cookieContainer = new();
private readonly HttpClient httpClient;
public RouteScraper(HttpClientHandler? httpClientHandler = null) {
if (httpClientHandler == null) {
httpClientHandler = new HttpClientHandler {
CookieContainer = cookieContainer,
UseCookies = true,
};
}
else {
httpClientHandler.CookieContainer = cookieContainer;
httpClientHandler.UseCookies = true;
}
httpClient = new HttpClient(httpClientHandler) {
BaseAddress = new Uri(BaseUrl),
DefaultRequestVersion = new Version(2, 0),
};
}
public async Task<List<IItinerary>?> Scrape(string from, string to, DateTimeOffset? dateOverride = null) {
var dateOverrideInstant = dateOverride?.ToInstant().InZone(BucharestTz); var dateOverrideInstant = dateOverride?.ToInstant().InZone(BucharestTz);
dateOverride = dateOverrideInstant?.ToDateTimeOffset(); dateOverride = dateOverrideInstant?.ToDateTimeOffset();
TrainScrapeResult result = new(); TrainScrapeResult result = new();
@ -70,7 +81,7 @@ public static class RouteScraper {
firstUrl = firstUrl.SetQueryParam("BetweenTrainsMinimumMinutes", "5"); firstUrl = firstUrl.SetQueryParam("BetweenTrainsMinimumMinutes", "5");
firstUrl = firstUrl.SetQueryParam("ChangeStationName", ""); firstUrl = firstUrl.SetQueryParam("ChangeStationName", "");
var firstResponse = await HttpClient.GetStringAsync(firstUrl); var firstResponse = await httpClient.GetStringAsync(firstUrl);
var firstDocument = await asContext.OpenAsync(req => req.Content(firstResponse)); var firstDocument = await asContext.OpenAsync(req => req.Content(firstResponse));
var firstForm = firstDocument.GetElementById("form-search")!; var firstForm = firstDocument.GetElementById("form-search")!;
@ -80,7 +91,7 @@ public static class RouteScraper {
.ToDictionary(elem => elem.Name!, elem => elem.Value); .ToDictionary(elem => elem.Name!, elem => elem.Value);
var secondUrl = "".AppendPathSegments("Itineraries", "GetItineraries"); var secondUrl = "".AppendPathSegments("Itineraries", "GetItineraries");
var secondResponse = await HttpClient.PostAsync( var secondResponse = await httpClient.PostAsync(
secondUrl, secondUrl,
#pragma warning disable CS8620 #pragma warning disable CS8620
new FormUrlEncodedContent(firstResult) new FormUrlEncodedContent(firstResult)

41
scraper/src/Scrapers/Station.cs

@ -14,7 +14,7 @@ using NodaTime;
using NodaTime.Extensions; using NodaTime.Extensions;
namespace InfoferScraper.Scrapers { namespace InfoferScraper.Scrapers {
public static class StationScraper { public class StationScraper {
private static readonly Regex StationInfoRegex = new($@"^([{Utils.RoLetters}.0-9 ]+)\sîn\s([0-9.]+)$"); private static readonly Regex StationInfoRegex = new($@"^([{Utils.RoLetters}.0-9 ]+)\sîn\s([0-9.]+)$");
private static readonly Regex StoppingTimeRegex = new( private static readonly Regex StoppingTimeRegex = new(
@ -33,17 +33,28 @@ namespace InfoferScraper.Scrapers {
private const string BaseUrl = "https://mersultrenurilor.infofer.ro/ro-RO/"; private const string BaseUrl = "https://mersultrenurilor.infofer.ro/ro-RO/";
private static readonly CookieContainer CookieContainer = new(); private readonly CookieContainer cookieContainer = new();
private static readonly HttpClient HttpClient = new(new HttpClientHandler { private readonly HttpClient httpClient;
CookieContainer = CookieContainer,
UseCookies = true,
}) {
BaseAddress = new Uri(BaseUrl),
DefaultRequestVersion = new Version(2, 0),
};
public static async Task<IStationScrapeResult> Scrape(string stationName, DateTimeOffset? date = null) { public StationScraper(HttpClientHandler? httpClientHandler = null) {
if (httpClientHandler == null) {
httpClientHandler = new HttpClientHandler {
CookieContainer = cookieContainer,
UseCookies = true,
};
}
else {
httpClientHandler.CookieContainer = cookieContainer;
httpClientHandler.UseCookies = true;
}
httpClient = new HttpClient(httpClientHandler) {
BaseAddress = new Uri(BaseUrl),
DefaultRequestVersion = new Version(2, 0),
};
}
public async Task<IStationScrapeResult> Scrape(string stationName, DateTimeOffset? date = null) {
var dateInstant = date?.ToInstant().InZone(BucharestTz); var dateInstant = date?.ToInstant().InZone(BucharestTz);
date = dateInstant?.ToDateTimeOffset(); date = dateInstant?.ToDateTimeOffset();
@ -59,7 +70,7 @@ namespace InfoferScraper.Scrapers {
if (date != null) { if (date != null) {
firstUrl = firstUrl.SetQueryParam("Date", $"{date:d.MM.yyyy}"); firstUrl = firstUrl.SetQueryParam("Date", $"{date:d.MM.yyyy}");
} }
var firstResponse = await HttpClient.GetStringAsync(firstUrl); var firstResponse = await httpClient.GetStringAsync(firstUrl);
var firstDocument = await asContext.OpenAsync(req => req.Content(firstResponse)); var firstDocument = await asContext.OpenAsync(req => req.Content(firstResponse));
var firstForm = firstDocument.GetElementById("form-search")!; var firstForm = firstDocument.GetElementById("form-search")!;
@ -69,7 +80,7 @@ namespace InfoferScraper.Scrapers {
.ToDictionary(elem => elem.Name!, elem => elem.Value); .ToDictionary(elem => elem.Name!, elem => elem.Value);
var secondUrl = "".AppendPathSegments("Stations", "StationsResult"); var secondUrl = "".AppendPathSegments("Stations", "StationsResult");
var secondResponse = await HttpClient.PostAsync( var secondResponse = await httpClient.PostAsync(
secondUrl, secondUrl,
#pragma warning disable CS8620 #pragma warning disable CS8620
new FormUrlEncodedContent(firstResult) new FormUrlEncodedContent(firstResult)
@ -167,9 +178,9 @@ namespace InfoferScraper.Scrapers {
.Text() .Text()
.WithCollapsedSpaces(); .WithCollapsedSpaces();
foreach (var station in routeDiv.QuerySelectorAll(":scope > div > div")[1] foreach (var station in routeDiv.QuerySelectorAll(":scope > div > div")[1]
.Text() .Text()
.WithCollapsedSpaces() .WithCollapsedSpaces()
.Split(" - ")) { .Split(" - ")) {
arrDep.ModifyableTrain.AddRouteStation(station); arrDep.ModifyableTrain.AddRouteStation(station);
} }

36
scraper/src/Scrapers/Train.cs

@ -15,7 +15,7 @@ using NodaTime.Extensions;
using scraper.Exceptions; using scraper.Exceptions;
namespace InfoferScraper.Scrapers { namespace InfoferScraper.Scrapers {
public static class TrainScraper { public class TrainScraper {
private const string BaseUrl = "https://mersultrenurilor.infofer.ro/ro-RO/"; private const string BaseUrl = "https://mersultrenurilor.infofer.ro/ro-RO/";
private static readonly Regex TrainInfoRegex = new(@"^([A-Z-]+)\s([0-9]+)\sîn\s([0-9.]+)$"); private static readonly Regex TrainInfoRegex = new(@"^([A-Z-]+)\s([0-9]+)\sîn\s([0-9.]+)$");
private static readonly Regex OperatorRegex = new(@"^Operat\sde\s(.+)$"); private static readonly Regex OperatorRegex = new(@"^Operat\sde\s(.+)$");
@ -51,16 +51,28 @@ namespace InfoferScraper.Scrapers {
private static readonly DateTimeZone BucharestTz = DateTimeZoneProviders.Tzdb["Europe/Bucharest"]; private static readonly DateTimeZone BucharestTz = DateTimeZoneProviders.Tzdb["Europe/Bucharest"];
private static readonly CookieContainer CookieContainer = new(); private readonly CookieContainer cookieContainer = new();
private static readonly HttpClient HttpClient = new(new HttpClientHandler { private readonly HttpClient httpClient;
CookieContainer = CookieContainer,
UseCookies = true, public TrainScraper(HttpClientHandler? httpClientHandler = null)
}) { {
BaseAddress = new Uri(BaseUrl), if (httpClientHandler == null) {
DefaultRequestVersion = new Version(2, 0), httpClientHandler = new HttpClientHandler {
}; CookieContainer = cookieContainer,
UseCookies = true,
};
}
else {
httpClientHandler.CookieContainer = cookieContainer;
httpClientHandler.UseCookies = true;
}
httpClient = new HttpClient(httpClientHandler) {
BaseAddress = new Uri(BaseUrl),
DefaultRequestVersion = new Version(2, 0),
};
}
public static async Task<ITrainScrapeResult?> Scrape(string trainNumber, DateTimeOffset? dateOverride = null) { public async Task<ITrainScrapeResult?> Scrape(string trainNumber, DateTimeOffset? dateOverride = null) {
var dateOverrideInstant = dateOverride?.ToInstant().InZone(BucharestTz); var dateOverrideInstant = dateOverride?.ToInstant().InZone(BucharestTz);
dateOverride = dateOverrideInstant?.ToDateTimeOffset(); dateOverride = dateOverrideInstant?.ToDateTimeOffset();
TrainScrapeResult result = new(); TrainScrapeResult result = new();
@ -73,7 +85,7 @@ namespace InfoferScraper.Scrapers {
if (dateOverride != null) { if (dateOverride != null) {
firstUrl = firstUrl.SetQueryParam("Date", $"{dateOverride:d.MM.yyyy}"); firstUrl = firstUrl.SetQueryParam("Date", $"{dateOverride:d.MM.yyyy}");
} }
var firstResponse = await HttpClient.GetStringAsync(firstUrl); var firstResponse = await httpClient.GetStringAsync(firstUrl);
var firstDocument = await asContext.OpenAsync(req => req.Content(firstResponse)); var firstDocument = await asContext.OpenAsync(req => req.Content(firstResponse));
var firstForm = firstDocument.GetElementById("form-search")!; var firstForm = firstDocument.GetElementById("form-search")!;
@ -83,7 +95,7 @@ namespace InfoferScraper.Scrapers {
.ToDictionary(elem => elem.Name!, elem => elem.Value); .ToDictionary(elem => elem.Name!, elem => elem.Value);
var secondUrl = "".AppendPathSegments("Trains", "TrainsResult"); var secondUrl = "".AppendPathSegments("Trains", "TrainsResult");
var secondResponse = await HttpClient.PostAsync( var secondResponse = await httpClient.PostAsync(
secondUrl, secondUrl,
#pragma warning disable CS8620 #pragma warning disable CS8620
new FormUrlEncodedContent(firstResult) new FormUrlEncodedContent(firstResult)

9
server/Models/ProxySettings.cs

@ -0,0 +1,9 @@
namespace Server.Models;
public record ProxySettings(string Url, ProxyCredentials? Credentials = null) {
public ProxySettings() : this("") { }
}
public record ProxyCredentials(string Username, string Password) {
public ProxyCredentials() : this("", "") { }
}

20
server/Services/Implementations/DataManager.cs

@ -1,12 +1,15 @@
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
using System.Diagnostics; using System.Diagnostics;
using System.Net;
using System.Net.Http;
using System.Threading.Tasks; using System.Threading.Tasks;
using InfoferScraper; using InfoferScraper;
using InfoferScraper.Models.Station; using InfoferScraper.Models.Station;
using InfoferScraper.Models.Train; using InfoferScraper.Models.Train;
using Microsoft.Extensions.Logging; using Microsoft.Extensions.Logging;
using scraper.Models.Itinerary; using scraper.Models.Itinerary;
using Server.Models;
using Server.Services.Interfaces; using Server.Services.Interfaces;
using Server.Utils; using Server.Utils;
@ -18,17 +21,26 @@ namespace Server.Services.Implementations {
private NodaTime.IDateTimeZoneProvider TzProvider { get; } private NodaTime.IDateTimeZoneProvider TzProvider { get; }
private NodaTime.DateTimeZone CfrTimeZone => TzProvider["Europe/Bucharest"]; private NodaTime.DateTimeZone CfrTimeZone => TzProvider["Europe/Bucharest"];
public DataManager(NodaTime.IDateTimeZoneProvider tzProvider, IDatabase database, ILogger<DataManager> logger) { public DataManager(NodaTime.IDateTimeZoneProvider tzProvider, IDatabase database, ILogger<DataManager> logger, ProxySettings? proxySettings) {
this.TzProvider = tzProvider; this.TzProvider = tzProvider;
this.Database = database; this.Database = database;
this.Logger = logger; this.Logger = logger;
HttpClientHandler httpClientHandler = new (){
UseProxy = proxySettings != null,
Proxy = proxySettings == null ? null : new WebProxy(proxySettings.Url),
DefaultProxyCredentials = proxySettings?.Credentials == null ? null : new NetworkCredential(proxySettings.Credentials.Username, proxySettings.Credentials.Password),
};
InfoferScraper.Scrapers.StationScraper stationScraper = new(httpClientHandler);
InfoferScraper.Scrapers.TrainScraper trainScraper = new(httpClientHandler);
InfoferScraper.Scrapers.RouteScraper routeScraper = new(httpClientHandler);
stationCache = new(async (t) => { stationCache = new(async (t) => {
var (stationName, date) = t; var (stationName, date) = t;
Logger.LogDebug("Fetching station {StationName} for date {Date}", stationName, date); Logger.LogDebug("Fetching station {StationName} for date {Date}", stationName, date);
var zonedDate = new NodaTime.LocalDate(date.Year, date.Month, date.Day).AtStartOfDayInZone(CfrTimeZone); var zonedDate = new NodaTime.LocalDate(date.Year, date.Month, date.Day).AtStartOfDayInZone(CfrTimeZone);
var station = await InfoferScraper.Scrapers.StationScraper.Scrape(stationName, zonedDate.ToDateTimeOffset()); var station = await stationScraper.Scrape(stationName, zonedDate.ToDateTimeOffset());
if (station != null) { if (station != null) {
_ = Task.Run(async () => { _ = Task.Run(async () => {
var watch = Stopwatch.StartNew(); var watch = Stopwatch.StartNew();
@ -44,7 +56,7 @@ namespace Server.Services.Implementations {
Logger.LogDebug("Fetching train {TrainNumber} for date {Date}", trainNumber, date); Logger.LogDebug("Fetching train {TrainNumber} for date {Date}", trainNumber, date);
var zonedDate = new NodaTime.LocalDate(date.Year, date.Month, date.Day).AtStartOfDayInZone(CfrTimeZone); var zonedDate = new NodaTime.LocalDate(date.Year, date.Month, date.Day).AtStartOfDayInZone(CfrTimeZone);
var train = await InfoferScraper.Scrapers.TrainScraper.Scrape(trainNumber, zonedDate.ToDateTimeOffset()); var train = await trainScraper.Scrape(trainNumber, zonedDate.ToDateTimeOffset());
if (train != null) { if (train != null) {
_ = Task.Run(async () => { _ = Task.Run(async () => {
var watch = Stopwatch.StartNew(); var watch = Stopwatch.StartNew();
@ -60,7 +72,7 @@ namespace Server.Services.Implementations {
Logger.LogDebug("Fetching itinerary from {From} to {To} for date {Date}", from, to, date); Logger.LogDebug("Fetching itinerary from {From} to {To} for date {Date}", from, to, date);
var zonedDate = new NodaTime.LocalDate(date.Year, date.Month, date.Day).AtStartOfDayInZone(CfrTimeZone); var zonedDate = new NodaTime.LocalDate(date.Year, date.Month, date.Day).AtStartOfDayInZone(CfrTimeZone);
var itineraries = await InfoferScraper.Scrapers.RouteScraper.Scrape(from, to, zonedDate.ToDateTimeOffset()); var itineraries = await routeScraper.Scrape(from, to, zonedDate.ToDateTimeOffset());
if (itineraries != null) { if (itineraries != null) {
_ = Task.Run(async () => { _ = Task.Run(async () => {
var watch = Stopwatch.StartNew(); var watch = Stopwatch.StartNew();

18
server/Startup.cs

@ -10,6 +10,7 @@ using Microsoft.Extensions.Hosting;
using Microsoft.OpenApi.Models; using Microsoft.OpenApi.Models;
using MongoDB.Bson.Serialization.Conventions; using MongoDB.Bson.Serialization.Conventions;
using Newtonsoft.Json.Serialization; using Newtonsoft.Json.Serialization;
using Server.Models;
using Server.Models.Database; using Server.Models.Database;
using Server.Services.Implementations; using Server.Services.Implementations;
using Server.Services.Interfaces; using Server.Services.Interfaces;
@ -30,12 +31,27 @@ namespace Server {
}); });
} }
services.Configure<ProxySettings>(Configuration.GetSection("Proxy"));
services.Configure<MongoSettings>(Configuration.GetSection("TrainDataMongo")); services.Configure<MongoSettings>(Configuration.GetSection("TrainDataMongo"));
var conventionPack = new ConventionPack { new CamelCaseElementNameConvention() }; var conventionPack = new ConventionPack { new CamelCaseElementNameConvention() };
ConventionRegistry.Register("camelCase", conventionPack, _ => true); ConventionRegistry.Register("camelCase", conventionPack, _ => true);
services.AddSingleton<IDataManager, DataManager>(); services.AddSingleton<IDataManager, DataManager>();
services.AddSingleton<IDatabase, Database>(); services.AddSingleton<IDatabase, Database>();
services.AddSingleton<NodaTime.IDateTimeZoneProvider>(NodaTime.DateTimeZoneProviders.Tzdb); services.AddSingleton(NodaTime.DateTimeZoneProviders.Tzdb);
services.AddSingleton<IFileStorage>((serviceProvider) => {
var conf = serviceProvider.GetRequiredService<IConfiguration>();
var section = conf.GetSection("FileStorage");
switch (section["Type"]) {
case "local": {
var dir = section["Directory"];
return new LocalFileStorage(dir!);
}
default:
throw new Exception("Unable to configure FileStorage");
}
});
services.AddControllers() services.AddControllers()
.AddNewtonsoftJson(options => { .AddNewtonsoftJson(options => {
options.SerializerSettings.ContractResolver = new DefaultContractResolver { options.SerializerSettings.ContractResolver = new DefaultContractResolver {

7
server/Utils/Constants.cs

@ -0,0 +1,7 @@
using NodaTime;
namespace Server.Utils;
public static class Constants {
public static readonly DateTimeZone BucharestTz = DateTimeZoneProviders.Tzdb["Europe/Bucharest"];
}

2
server/server.csproj

@ -5,7 +5,7 @@
<AssemblyName>Server</AssemblyName> <AssemblyName>Server</AssemblyName>
<RootNamespace>Server</RootNamespace> <RootNamespace>Server</RootNamespace>
<LangVersion>11</LangVersion> <LangVersion>11</LangVersion>
<TargetFrameworks>net6.0;net7.0</TargetFrameworks> <TargetFrameworks>net6.0;net7.0;net8.0</TargetFrameworks>
</PropertyGroup> </PropertyGroup>
<ItemGroup> <ItemGroup>

Loading…
Cancel
Save