Browse Source

Add itinerary scraping

master
Kenneth Bruen 2 years ago
parent
commit
b66b8f79b1
Signed by: kbruen
GPG Key ID: C1980A470C3EE5B1
  1. 2
      scraper/src/Scrapers/Route.cs
  2. 40
      server/Controllers/V3/ItinerariesController.cs
  3. 25
      server/Services/Implementations/DataManager.cs
  4. 12
      server/Services/Implementations/Database.cs
  5. 3
      server/Services/Interfaces/IDataManager.cs
  6. 2
      server/Services/Interfaces/IDatabase.cs
  7. 33
      server/Utils/IAsyncCusorAsyncAdapter.cs

2
scraper/src/Scrapers/Route.cs

@ -187,6 +187,8 @@ public static class RouteScraper {
foreach (var div in leftSideDivs[2] foreach (var div in leftSideDivs[2]
.QuerySelectorAll(":scope > div") .QuerySelectorAll(":scope > div")
.Where((_, i) => i % 2 != 0)) { .Where((_, i) => i % 2 != 0)) {
var text = div.Text().WithCollapsedSpaces();
if (text == "Nu sunt stații intermediare.") continue;
train.AddIntermediateStop(div.Text().WithCollapsedSpaces()); train.AddIntermediateStop(div.Text().WithCollapsedSpaces());
} }

40
server/Controllers/V3/ItinerariesController.cs

@ -0,0 +1,40 @@
using System;
using System.Collections.Generic;
using System.Threading.Tasks;
using Microsoft.AspNetCore.Http;
using Microsoft.AspNetCore.Mvc;
using scraper.Models.Itinerary;
using Server.Services.Interfaces;
namespace Server.Controllers.V3;
[ApiController]
[ApiExplorerSettings(GroupName = "v3")]
[Route("/v3/[controller]")]
public class ItinerariesController : Controller {
private IDataManager DataManager { get; }
private IDatabase Database { get; }
public ItinerariesController(IDataManager dataManager, IDatabase database) {
this.DataManager = dataManager;
this.Database = database;
}
[HttpGet("")]
[ProducesResponseType(typeof(IEnumerable<IItinerary>), StatusCodes.Status200OK)]
[ProducesResponseType(StatusCodes.Status404NotFound)]
public async Task<ActionResult<IEnumerable<IItinerary>>> FindItineraries(
[FromQuery] string from,
[FromQuery] string to,
[FromQuery] DateTimeOffset? date
) {
var itineraries = await DataManager.FetchItineraries(from, to, date);
if (itineraries == null) {
return NotFound();
}
return Ok(itineraries);
}
}

25
server/Services/Implementations/DataManager.cs

@ -8,6 +8,7 @@ using Server.Services.Interfaces;
using Server.Utils; using Server.Utils;
using InfoferScraper; using InfoferScraper;
using Microsoft.Extensions.Logging; using Microsoft.Extensions.Logging;
using scraper.Models.Itinerary;
namespace Server.Services.Implementations { namespace Server.Services.Implementations {
public class DataManager : IDataManager { public class DataManager : IDataManager {
@ -52,10 +53,27 @@ namespace Server.Services.Implementations {
} }
return train; return train;
}, TimeSpan.FromSeconds(30)); }, TimeSpan.FromSeconds(30));
itinerariesCache = new(async (t) => {
var (from, to, date) = t;
var zonedDate = new NodaTime.LocalDate(date.Year, date.Month, date.Day).AtStartOfDayInZone(CfrTimeZone);
var itineraries = await InfoferScraper.Scrapers.RouteScraper.Scrape(from, to, zonedDate.ToDateTimeOffset());
if (itineraries != null) {
_ = Task.Run(async () => {
var watch = Stopwatch.StartNew();
await Database.OnItineraries(itineraries);
var ms = watch.ElapsedMilliseconds;
Logger.LogInformation("OnItineraries timing: {StationDataMs} ms", ms);
});
}
return itineraries;
}, TimeSpan.FromMinutes(1));
} }
private readonly AsyncCache<(string, DateOnly), IStationScrapeResult?> stationCache; private readonly AsyncCache<(string, DateOnly), IStationScrapeResult?> stationCache;
private readonly AsyncCache<(string, DateOnly), ITrainScrapeResult?> trainCache; private readonly AsyncCache<(string, DateOnly), ITrainScrapeResult?> trainCache;
private readonly AsyncCache<(string, string, DateOnly), IReadOnlyList<IItinerary>?> itinerariesCache;
public Task<IStationScrapeResult?> FetchStation(string stationName, DateTimeOffset date) { public Task<IStationScrapeResult?> FetchStation(string stationName, DateTimeOffset date) {
var cfrDateTime = new NodaTime.ZonedDateTime(NodaTime.Instant.FromDateTimeOffset(date), CfrTimeZone); var cfrDateTime = new NodaTime.ZonedDateTime(NodaTime.Instant.FromDateTimeOffset(date), CfrTimeZone);
@ -70,5 +88,12 @@ namespace Server.Services.Implementations {
return trainCache.GetItem((trainNumber, cfrDate)); return trainCache.GetItem((trainNumber, cfrDate));
} }
public async Task<IReadOnlyList<IItinerary>?> FetchItineraries(string from, string to, DateTimeOffset? date = null) {
var cfrDateTime = new NodaTime.ZonedDateTime(NodaTime.Instant.FromDateTimeOffset(date ?? DateTimeOffset.Now), CfrTimeZone);
var cfrDate = new DateOnly(cfrDateTime.Year, cfrDateTime.Month, cfrDateTime.Day);
return await itinerariesCache.GetItem((from, to, cfrDate));
}
} }
} }

12
server/Services/Implementations/Database.cs

@ -13,6 +13,7 @@ using Microsoft.Extensions.Options;
using MongoDB.Bson; using MongoDB.Bson;
using MongoDB.Bson.Serialization.Attributes; using MongoDB.Bson.Serialization.Attributes;
using MongoDB.Driver; using MongoDB.Driver;
using scraper.Models.Itinerary;
using Server.Models.Database; using Server.Models.Database;
using Server.Utils; using Server.Utils;
@ -339,6 +340,17 @@ public class Database : Server.Services.Interfaces.IDatabase {
await ProcessTrain(train); await ProcessTrain(train);
} }
} }
public async Task OnItineraries(IReadOnlyList<IItinerary> itineraries) {
foreach (var itinerary in itineraries) {
foreach (var train in itinerary.Trains) {
await FoundTrainAtStations(
train.IntermediateStops.Concat(new[] { train.From, train.To }),
train.TrainNumber
);
}
}
}
} }
public record DbRecord( public record DbRecord(

3
server/Services/Interfaces/IDataManager.cs

@ -1,11 +1,14 @@
using System; using System;
using System.Collections.Generic;
using System.Threading.Tasks; using System.Threading.Tasks;
using InfoferScraper.Models.Train; using InfoferScraper.Models.Train;
using InfoferScraper.Models.Station; using InfoferScraper.Models.Station;
using scraper.Models.Itinerary;
namespace Server.Services.Interfaces; namespace Server.Services.Interfaces;
public interface IDataManager { public interface IDataManager {
public Task<IStationScrapeResult?> FetchStation(string stationName, DateTimeOffset date); public Task<IStationScrapeResult?> FetchStation(string stationName, DateTimeOffset date);
public Task<ITrainScrapeResult?> FetchTrain(string trainNumber, DateTimeOffset date); public Task<ITrainScrapeResult?> FetchTrain(string trainNumber, DateTimeOffset date);
public Task<IReadOnlyList<IItinerary>?> FetchItineraries(string from, string to, DateTimeOffset? date = null);
} }

2
server/Services/Interfaces/IDatabase.cs

@ -2,6 +2,7 @@ using System.Collections.Generic;
using System.Threading.Tasks; using System.Threading.Tasks;
using InfoferScraper.Models.Train; using InfoferScraper.Models.Train;
using InfoferScraper.Models.Station; using InfoferScraper.Models.Station;
using scraper.Models.Itinerary;
using Server.Models.Database; using Server.Models.Database;
namespace Server.Services.Interfaces; namespace Server.Services.Interfaces;
@ -15,4 +16,5 @@ public interface IDatabase {
public Task FoundTrainAtStation(string stationName, string trainName); public Task FoundTrainAtStation(string stationName, string trainName);
public Task OnTrainData(ITrainScrapeResult trainData); public Task OnTrainData(ITrainScrapeResult trainData);
public Task OnStationData(IStationScrapeResult stationData); public Task OnStationData(IStationScrapeResult stationData);
public Task OnItineraries(IReadOnlyList<IItinerary> itineraries);
} }

33
server/Utils/IAsyncCusorAsyncAdapter.cs

@ -0,0 +1,33 @@
using System.Collections.Generic;
using System.Threading.Tasks;
using MongoDB.Driver;
namespace Server.Utils;
public record IAsyncCusorAsyncEnumerator<T>(IAsyncCursor<T> Cursor) {
private IEnumerator<T>? enumerator = null;
public T Current => enumerator!.Current;
public async Task<bool> MoveNextAsync() {
bool result;
if (enumerator != null) {
result = enumerator.MoveNext();
if (result) return true;
}
result = await Cursor.MoveNextAsync();
if (result) {
enumerator = Cursor.Current.GetEnumerator();
return true;
}
return false;
}
}
public static class IAsyncCursorExtensions {
public static IAsyncCusorAsyncEnumerator<T> GetAsyncEnumerator<T>(this IAsyncCursor<T> cursor) {
return new(cursor);
}
}
Loading…
Cancel
Save