Browse Source

Add proxy and .NET 8 support

Kenneth Bruen 3 months ago
parent
commit
422b4727c0
Signed by: kbruen
GPG Key ID: C1980A470C3EE5B1
  1. 2
      ConsoleTest/ConsoleTest.csproj
  2. 6
      ConsoleTest/Program.cs
  3. 2
      scraper/scraper.csproj
  4. 39
      scraper/src/Scrapers/Route.cs
  5. 27
      scraper/src/Scrapers/Station.cs
  6. 28
      scraper/src/Scrapers/Train.cs
  7. 9
      server/Models/ProxySettings.cs
  8. 20
      server/Services/Implementations/DataManager.cs
  9. 18
      server/Startup.cs
  10. 7
      server/Utils/Constants.cs
  11. 2
      server/server.csproj

2
ConsoleTest/ConsoleTest.csproj

@ -6,7 +6,7 @@
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFrameworks>net6.0;net7.0</TargetFrameworks>
<TargetFrameworks>net6.0;net7.0;net8.0</TargetFrameworks>
</PropertyGroup>
</Project>

6
ConsoleTest/Program.cs

@ -40,7 +40,7 @@ async Task PrintTrain() {
Console.WriteLine(
JsonSerializer.Serialize(
await TrainScraper.Scrape(trainNumber),
await new TrainScraper().Scrape(trainNumber),
new JsonSerializerOptions {
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
WriteIndented = true,
@ -58,7 +58,7 @@ async Task PrintStation() {
Console.WriteLine(
JsonSerializer.Serialize(
await StationScraper.Scrape(stationName),
await new StationScraper().Scrape(stationName),
new JsonSerializerOptions {
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
WriteIndented = true,
@ -74,7 +74,7 @@ async Task ScrapeItineraries() {
if (from == null || to == null) return;
var data = await RouteScraper.Scrape(from, to);
var data = await new RouteScraper().Scrape(from, to);
Console.WriteLine($"{data.Count} itineraries:");
Console.WriteLine();

2
scraper/scraper.csproj

@ -2,7 +2,7 @@
<PropertyGroup>
<Nullable>enable</Nullable>
<TargetFrameworks>net6.0;net7.0</TargetFrameworks>
<TargetFrameworks>net6.0;net7.0;net8.0</TargetFrameworks>
</PropertyGroup>
<ItemGroup>

39
scraper/src/Scrapers/Route.cs

@ -16,20 +16,10 @@ using scraper.Models.Itinerary;
namespace InfoferScraper.Scrapers;
public static class RouteScraper {
public class RouteScraper {
private const string BaseUrl = "https://mersultrenurilor.infofer.ro/ro-RO/";
private static readonly DateTimeZone BucharestTz = DateTimeZoneProviders.Tzdb["Europe/Bucharest"];
private static readonly CookieContainer CookieContainer = new();
private static readonly HttpClient HttpClient = new(new HttpClientHandler {
CookieContainer = CookieContainer,
UseCookies = true,
}) {
BaseAddress = new Uri(BaseUrl),
DefaultRequestVersion = new Version(2, 0),
};
private static readonly Regex KmTrainRankNoRegex = new(@"^([0-9]+)\skm\scu\s([A-Z-]+)\s([0-9]+)$");
private static readonly Regex OperatorRegex = new(@$"^Operat\sde\s([{Utils.RoLetters}\s]+)$");
private static readonly Regex DepArrRegex = new(@"^(Ple|Sos)\s([0-9]+)\s([a-z]+)\.?\s([0-9]+):([0-9]+)$");
@ -49,7 +39,28 @@ public static class RouteScraper {
["dec"] = 12,
};
public static async Task<List<IItinerary>?> Scrape(string from, string to, DateTimeOffset? dateOverride = null) {
private readonly CookieContainer cookieContainer = new();
private readonly HttpClient httpClient;
public RouteScraper(HttpClientHandler? httpClientHandler = null) {
if (httpClientHandler == null) {
httpClientHandler = new HttpClientHandler {
CookieContainer = cookieContainer,
UseCookies = true,
};
}
else {
httpClientHandler.CookieContainer = cookieContainer;
httpClientHandler.UseCookies = true;
}
httpClient = new HttpClient(httpClientHandler) {
BaseAddress = new Uri(BaseUrl),
DefaultRequestVersion = new Version(2, 0),
};
}
public async Task<List<IItinerary>?> Scrape(string from, string to, DateTimeOffset? dateOverride = null) {
var dateOverrideInstant = dateOverride?.ToInstant().InZone(BucharestTz);
dateOverride = dateOverrideInstant?.ToDateTimeOffset();
TrainScrapeResult result = new();
@ -70,7 +81,7 @@ public static class RouteScraper {
firstUrl = firstUrl.SetQueryParam("BetweenTrainsMinimumMinutes", "5");
firstUrl = firstUrl.SetQueryParam("ChangeStationName", "");
var firstResponse = await HttpClient.GetStringAsync(firstUrl);
var firstResponse = await httpClient.GetStringAsync(firstUrl);
var firstDocument = await asContext.OpenAsync(req => req.Content(firstResponse));
var firstForm = firstDocument.GetElementById("form-search")!;
@ -80,7 +91,7 @@ public static class RouteScraper {
.ToDictionary(elem => elem.Name!, elem => elem.Value);
var secondUrl = "".AppendPathSegments("Itineraries", "GetItineraries");
var secondResponse = await HttpClient.PostAsync(
var secondResponse = await httpClient.PostAsync(
secondUrl,
#pragma warning disable CS8620
new FormUrlEncodedContent(firstResult)

27
scraper/src/Scrapers/Station.cs

@ -14,7 +14,7 @@ using NodaTime;
using NodaTime.Extensions;
namespace InfoferScraper.Scrapers {
public static class StationScraper {
public class StationScraper {
private static readonly Regex StationInfoRegex = new($@"^([{Utils.RoLetters}.0-9 ]+)\sîn\s([0-9.]+)$");
private static readonly Regex StoppingTimeRegex = new(
@ -33,17 +33,28 @@ namespace InfoferScraper.Scrapers {
private const string BaseUrl = "https://mersultrenurilor.infofer.ro/ro-RO/";
private static readonly CookieContainer CookieContainer = new();
private readonly CookieContainer cookieContainer = new();
private static readonly HttpClient HttpClient = new(new HttpClientHandler {
CookieContainer = CookieContainer,
private readonly HttpClient httpClient;
public StationScraper(HttpClientHandler? httpClientHandler = null) {
if (httpClientHandler == null) {
httpClientHandler = new HttpClientHandler {
CookieContainer = cookieContainer,
UseCookies = true,
}) {
};
}
else {
httpClientHandler.CookieContainer = cookieContainer;
httpClientHandler.UseCookies = true;
}
httpClient = new HttpClient(httpClientHandler) {
BaseAddress = new Uri(BaseUrl),
DefaultRequestVersion = new Version(2, 0),
};
}
public static async Task<IStationScrapeResult> Scrape(string stationName, DateTimeOffset? date = null) {
public async Task<IStationScrapeResult> Scrape(string stationName, DateTimeOffset? date = null) {
var dateInstant = date?.ToInstant().InZone(BucharestTz);
date = dateInstant?.ToDateTimeOffset();
@ -59,7 +70,7 @@ namespace InfoferScraper.Scrapers {
if (date != null) {
firstUrl = firstUrl.SetQueryParam("Date", $"{date:d.MM.yyyy}");
}
var firstResponse = await HttpClient.GetStringAsync(firstUrl);
var firstResponse = await httpClient.GetStringAsync(firstUrl);
var firstDocument = await asContext.OpenAsync(req => req.Content(firstResponse));
var firstForm = firstDocument.GetElementById("form-search")!;
@ -69,7 +80,7 @@ namespace InfoferScraper.Scrapers {
.ToDictionary(elem => elem.Name!, elem => elem.Value);
var secondUrl = "".AppendPathSegments("Stations", "StationsResult");
var secondResponse = await HttpClient.PostAsync(
var secondResponse = await httpClient.PostAsync(
secondUrl,
#pragma warning disable CS8620
new FormUrlEncodedContent(firstResult)

28
scraper/src/Scrapers/Train.cs

@ -15,7 +15,7 @@ using NodaTime.Extensions;
using scraper.Exceptions;
namespace InfoferScraper.Scrapers {
public static class TrainScraper {
public class TrainScraper {
private const string BaseUrl = "https://mersultrenurilor.infofer.ro/ro-RO/";
private static readonly Regex TrainInfoRegex = new(@"^([A-Z-]+)\s([0-9]+)\sîn\s([0-9.]+)$");
private static readonly Regex OperatorRegex = new(@"^Operat\sde\s(.+)$");
@ -51,16 +51,28 @@ namespace InfoferScraper.Scrapers {
private static readonly DateTimeZone BucharestTz = DateTimeZoneProviders.Tzdb["Europe/Bucharest"];
private static readonly CookieContainer CookieContainer = new();
private static readonly HttpClient HttpClient = new(new HttpClientHandler {
CookieContainer = CookieContainer,
private readonly CookieContainer cookieContainer = new();
private readonly HttpClient httpClient;
public TrainScraper(HttpClientHandler? httpClientHandler = null)
{
if (httpClientHandler == null) {
httpClientHandler = new HttpClientHandler {
CookieContainer = cookieContainer,
UseCookies = true,
}) {
};
}
else {
httpClientHandler.CookieContainer = cookieContainer;
httpClientHandler.UseCookies = true;
}
httpClient = new HttpClient(httpClientHandler) {
BaseAddress = new Uri(BaseUrl),
DefaultRequestVersion = new Version(2, 0),
};
}
public static async Task<ITrainScrapeResult?> Scrape(string trainNumber, DateTimeOffset? dateOverride = null) {
public async Task<ITrainScrapeResult?> Scrape(string trainNumber, DateTimeOffset? dateOverride = null) {
var dateOverrideInstant = dateOverride?.ToInstant().InZone(BucharestTz);
dateOverride = dateOverrideInstant?.ToDateTimeOffset();
TrainScrapeResult result = new();
@ -73,7 +85,7 @@ namespace InfoferScraper.Scrapers {
if (dateOverride != null) {
firstUrl = firstUrl.SetQueryParam("Date", $"{dateOverride:d.MM.yyyy}");
}
var firstResponse = await HttpClient.GetStringAsync(firstUrl);
var firstResponse = await httpClient.GetStringAsync(firstUrl);
var firstDocument = await asContext.OpenAsync(req => req.Content(firstResponse));
var firstForm = firstDocument.GetElementById("form-search")!;
@ -83,7 +95,7 @@ namespace InfoferScraper.Scrapers {
.ToDictionary(elem => elem.Name!, elem => elem.Value);
var secondUrl = "".AppendPathSegments("Trains", "TrainsResult");
var secondResponse = await HttpClient.PostAsync(
var secondResponse = await httpClient.PostAsync(
secondUrl,
#pragma warning disable CS8620
new FormUrlEncodedContent(firstResult)

9
server/Models/ProxySettings.cs

@ -0,0 +1,9 @@
namespace Server.Models;
public record ProxySettings(string Url, ProxyCredentials? Credentials = null) {
public ProxySettings() : this("") { }
}
public record ProxyCredentials(string Username, string Password) {
public ProxyCredentials() : this("", "") { }
}

20
server/Services/Implementations/DataManager.cs

@ -1,12 +1,15 @@
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Net;
using System.Net.Http;
using System.Threading.Tasks;
using InfoferScraper;
using InfoferScraper.Models.Station;
using InfoferScraper.Models.Train;
using Microsoft.Extensions.Logging;
using scraper.Models.Itinerary;
using Server.Models;
using Server.Services.Interfaces;
using Server.Utils;
@ -18,17 +21,26 @@ namespace Server.Services.Implementations {
private NodaTime.IDateTimeZoneProvider TzProvider { get; }
private NodaTime.DateTimeZone CfrTimeZone => TzProvider["Europe/Bucharest"];
public DataManager(NodaTime.IDateTimeZoneProvider tzProvider, IDatabase database, ILogger<DataManager> logger) {
public DataManager(NodaTime.IDateTimeZoneProvider tzProvider, IDatabase database, ILogger<DataManager> logger, ProxySettings? proxySettings) {
this.TzProvider = tzProvider;
this.Database = database;
this.Logger = logger;
HttpClientHandler httpClientHandler = new (){
UseProxy = proxySettings != null,
Proxy = proxySettings == null ? null : new WebProxy(proxySettings.Url),
DefaultProxyCredentials = proxySettings?.Credentials == null ? null : new NetworkCredential(proxySettings.Credentials.Username, proxySettings.Credentials.Password),
};
InfoferScraper.Scrapers.StationScraper stationScraper = new(httpClientHandler);
InfoferScraper.Scrapers.TrainScraper trainScraper = new(httpClientHandler);
InfoferScraper.Scrapers.RouteScraper routeScraper = new(httpClientHandler);
stationCache = new(async (t) => {
var (stationName, date) = t;
Logger.LogDebug("Fetching station {StationName} for date {Date}", stationName, date);
var zonedDate = new NodaTime.LocalDate(date.Year, date.Month, date.Day).AtStartOfDayInZone(CfrTimeZone);
var station = await InfoferScraper.Scrapers.StationScraper.Scrape(stationName, zonedDate.ToDateTimeOffset());
var station = await stationScraper.Scrape(stationName, zonedDate.ToDateTimeOffset());
if (station != null) {
_ = Task.Run(async () => {
var watch = Stopwatch.StartNew();
@ -44,7 +56,7 @@ namespace Server.Services.Implementations {
Logger.LogDebug("Fetching train {TrainNumber} for date {Date}", trainNumber, date);
var zonedDate = new NodaTime.LocalDate(date.Year, date.Month, date.Day).AtStartOfDayInZone(CfrTimeZone);
var train = await InfoferScraper.Scrapers.TrainScraper.Scrape(trainNumber, zonedDate.ToDateTimeOffset());
var train = await trainScraper.Scrape(trainNumber, zonedDate.ToDateTimeOffset());
if (train != null) {
_ = Task.Run(async () => {
var watch = Stopwatch.StartNew();
@ -60,7 +72,7 @@ namespace Server.Services.Implementations {
Logger.LogDebug("Fetching itinerary from {From} to {To} for date {Date}", from, to, date);
var zonedDate = new NodaTime.LocalDate(date.Year, date.Month, date.Day).AtStartOfDayInZone(CfrTimeZone);
var itineraries = await InfoferScraper.Scrapers.RouteScraper.Scrape(from, to, zonedDate.ToDateTimeOffset());
var itineraries = await routeScraper.Scrape(from, to, zonedDate.ToDateTimeOffset());
if (itineraries != null) {
_ = Task.Run(async () => {
var watch = Stopwatch.StartNew();

18
server/Startup.cs

@ -10,6 +10,7 @@ using Microsoft.Extensions.Hosting;
using Microsoft.OpenApi.Models;
using MongoDB.Bson.Serialization.Conventions;
using Newtonsoft.Json.Serialization;
using Server.Models;
using Server.Models.Database;
using Server.Services.Implementations;
using Server.Services.Interfaces;
@ -30,12 +31,27 @@ namespace Server {
});
}
services.Configure<ProxySettings>(Configuration.GetSection("Proxy"));
services.Configure<MongoSettings>(Configuration.GetSection("TrainDataMongo"));
var conventionPack = new ConventionPack { new CamelCaseElementNameConvention() };
ConventionRegistry.Register("camelCase", conventionPack, _ => true);
services.AddSingleton<IDataManager, DataManager>();
services.AddSingleton<IDatabase, Database>();
services.AddSingleton<NodaTime.IDateTimeZoneProvider>(NodaTime.DateTimeZoneProviders.Tzdb);
services.AddSingleton(NodaTime.DateTimeZoneProviders.Tzdb);
services.AddSingleton<IFileStorage>((serviceProvider) => {
var conf = serviceProvider.GetRequiredService<IConfiguration>();
var section = conf.GetSection("FileStorage");
switch (section["Type"]) {
case "local": {
var dir = section["Directory"];
return new LocalFileStorage(dir!);
}
default:
throw new Exception("Unable to configure FileStorage");
}
});
services.AddControllers()
.AddNewtonsoftJson(options => {
options.SerializerSettings.ContractResolver = new DefaultContractResolver {

7
server/Utils/Constants.cs

@ -0,0 +1,7 @@
using NodaTime;
namespace Server.Utils;
public static class Constants {
public static readonly DateTimeZone BucharestTz = DateTimeZoneProviders.Tzdb["Europe/Bucharest"];
}

2
server/server.csproj

@ -5,7 +5,7 @@
<AssemblyName>Server</AssemblyName>
<RootNamespace>Server</RootNamespace>
<LangVersion>11</LangVersion>
<TargetFrameworks>net6.0;net7.0</TargetFrameworks>
<TargetFrameworks>net6.0;net7.0;net8.0</TargetFrameworks>
</PropertyGroup>
<ItemGroup>

Loading…
Cancel
Save