Browse Source

Improve speed by caching and using ObjectIDs

Also: Switch to C# 11, .NET 7
Also: Added /rapidoc
master
Kenneth Bruen 2 years ago
parent
commit
de5d85cff4
Signed by: kbruen
GPG Key ID: C1980A470C3EE5B1
  1. 6
      Dockerfile
  2. 2
      scraper/scraper.csproj
  3. 9
      server/Models/Database/TrainListing.cs
  4. 20
      server/Services/Implementations/DataManager.cs
  5. 85
      server/Services/Implementations/Database.cs
  6. 25
      server/Startup.cs
  7. 3
      server/server.csproj

6
Dockerfile

@ -1,5 +1,5 @@
# https://hub.docker.com/_/microsoft-dotnet # https://hub.docker.com/_/microsoft-dotnet
FROM mcr.microsoft.com/dotnet/sdk:6.0 AS build FROM mcr.microsoft.com/dotnet/sdk:7.0 AS build
WORKDIR /source WORKDIR /source
# copy csproj and restore as distinct layers # copy csproj and restore as distinct layers
@ -14,10 +14,10 @@ COPY server/. ./server/
COPY scraper/. ./scraper/ COPY scraper/. ./scraper/
COPY ConsoleTest/. ./ConsoleTest/ COPY ConsoleTest/. ./ConsoleTest/
WORKDIR /source/server WORKDIR /source/server
RUN dotnet publish -c release -o /app --no-restore RUN dotnet publish -f net7.0 -c release -o /app --no-restore
# final stage/image # final stage/image
FROM mcr.microsoft.com/dotnet/aspnet:6.0 FROM mcr.microsoft.com/dotnet/aspnet:7.0
WORKDIR /app WORKDIR /app
COPY --from=build /app ./ COPY --from=build /app ./
ENV INSIDE_DOCKER=true ENV INSIDE_DOCKER=true

2
scraper/scraper.csproj

@ -2,7 +2,7 @@
<PropertyGroup> <PropertyGroup>
<Nullable>enable</Nullable> <Nullable>enable</Nullable>
<TargetFramework>net6.0</TargetFramework> <TargetFrameworks>net6.0;net7.0</TargetFrameworks>
</PropertyGroup> </PropertyGroup>
<ItemGroup> <ItemGroup>

9
server/Models/Database/TrainListing.cs

@ -1,3 +1,4 @@
using System.Collections.Generic;
using System.Text.Json.Serialization; using System.Text.Json.Serialization;
using MongoDB.Bson; using MongoDB.Bson;
using MongoDB.Bson.Serialization.Attributes; using MongoDB.Bson.Serialization.Attributes;
@ -11,8 +12,10 @@ public record TrainListing(
string? Id, string? Id,
string Rank, string Rank,
string Number, string Number,
string Company string Company,
[property: BsonRepresentation(BsonType.ObjectId)]
string? LatestDescription
) { ) {
public TrainListing() : this(null, "", "", "") { } public TrainListing() : this(null, "", "", "", null) { }
public TrainListing(string rank, string number, string company) : this(null, rank, number, company) { } public TrainListing(string rank, string number, string company) : this(null, rank, number, company, null) { }
} }

20
server/Services/Implementations/DataManager.cs

@ -1,22 +1,26 @@
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
using System.Diagnostics;
using System.Threading.Tasks; using System.Threading.Tasks;
using InfoferScraper.Models.Train; using InfoferScraper.Models.Train;
using InfoferScraper.Models.Station; using InfoferScraper.Models.Station;
using Server.Services.Interfaces; using Server.Services.Interfaces;
using Server.Utils; using Server.Utils;
using InfoferScraper; using InfoferScraper;
using Microsoft.Extensions.Logging;
namespace Server.Services.Implementations { namespace Server.Services.Implementations {
public class DataManager : IDataManager { public class DataManager : IDataManager {
private ILogger<DataManager> Logger { get; }
private IDatabase Database { get; } private IDatabase Database { get; }
private NodaTime.IDateTimeZoneProvider TzProvider { get; } private NodaTime.IDateTimeZoneProvider TzProvider { get; }
private NodaTime.DateTimeZone CfrTimeZone => TzProvider["Europe/Bucharest"]; private NodaTime.DateTimeZone CfrTimeZone => TzProvider["Europe/Bucharest"];
public DataManager(NodaTime.IDateTimeZoneProvider tzProvider, IDatabase database) { public DataManager(NodaTime.IDateTimeZoneProvider tzProvider, IDatabase database, ILogger<DataManager> logger) {
this.TzProvider = tzProvider; this.TzProvider = tzProvider;
this.Database = database; this.Database = database;
this.Logger = logger;
stationCache = new(async (t) => { stationCache = new(async (t) => {
var (stationName, date) = t; var (stationName, date) = t;
@ -24,7 +28,12 @@ namespace Server.Services.Implementations {
var station = await InfoferScraper.Scrapers.StationScraper.Scrape(stationName, zonedDate.ToDateTimeOffset()); var station = await InfoferScraper.Scrapers.StationScraper.Scrape(stationName, zonedDate.ToDateTimeOffset());
if (station != null) { if (station != null) {
_ = Task.Run(async () => await Database.OnStationData(station)); _ = Task.Run(async () => {
var watch = Stopwatch.StartNew();
await Database.OnStationData(station);
var ms = watch.ElapsedMilliseconds;
Logger.LogInformation("OnStationData timing: {StationDataMs} ms", ms);
});
} }
return station; return station;
}, TimeSpan.FromMinutes(1)); }, TimeSpan.FromMinutes(1));
@ -34,7 +43,12 @@ namespace Server.Services.Implementations {
var train = await InfoferScraper.Scrapers.TrainScraper.Scrape(trainNumber, zonedDate.ToDateTimeOffset()); var train = await InfoferScraper.Scrapers.TrainScraper.Scrape(trainNumber, zonedDate.ToDateTimeOffset());
if (train != null) { if (train != null) {
_ = Task.Run(async () => await Database.OnTrainData(train)); _ = Task.Run(async () => {
var watch = Stopwatch.StartNew();
await Database.OnTrainData(train);
var ms = watch.ElapsedMilliseconds;
Logger.LogInformation("OnTrainData timing: {StationDataMs} ms", ms);
});
} }
return train; return train;
}, TimeSpan.FromSeconds(30)); }, TimeSpan.FromSeconds(30));

85
server/Services/Implementations/Database.cs

@ -47,6 +47,9 @@ public class Database : Server.Services.Interfaces.IDatabase {
private readonly IMongoCollection<StationListing> stationListingsCollection; private readonly IMongoCollection<StationListing> stationListingsCollection;
private readonly AsyncThrottle throttle; private readonly AsyncThrottle throttle;
private readonly Dictionary<string, string> trainObjectIds = new();
private readonly Dictionary<string, string> stationObjectIds = new();
public Database(ILogger<Database> logger, IOptions<MongoSettings> mongoSettings) { public Database(ILogger<Database> logger, IOptions<MongoSettings> mongoSettings) {
Logger = logger; Logger = logger;
@ -169,14 +172,25 @@ public class Database : Server.Services.Interfaces.IDatabase {
private readonly SemaphoreSlim insertTrainLock = new (1, 1); private readonly SemaphoreSlim insertTrainLock = new (1, 1);
public async Task<string> FoundTrain(string rank, string number, string company) { public async Task<string> FoundTrain(string rank, string number, string company) {
number = string.Join("", number.TakeWhile(c => c is >= '0' and <= '9')); number = string.Join("", number.TakeWhile(c => c is >= '0' and <= '9'));
// If there is a matching ObjectId, then it's already in the database
if (trainObjectIds.ContainsKey(number)) return number;
await insertTrainLock.WaitAsync(); await insertTrainLock.WaitAsync();
try { try {
if (!await (await throttle.MakeRequest(() => var possibleTrains = await (await throttle.MakeRequest(() => trainListingsCollection.FindAsync(
trainListingsCollection.FindAsync(Builders<TrainListing>.Filter.Eq("number", number)))) Builders<TrainListing>.Filter.Eq("number", number)
.AnyAsync()) { ))).ToListAsync();
if (possibleTrains.Count == 0) {
Logger.LogDebug("Found train {Rank} {Number} from {Company}", rank, number, company); Logger.LogDebug("Found train {Rank} {Number} from {Company}", rank, number, company);
await throttle.MakeRequest(() => TrainListing listing = new(number: number, rank: rank, company: company);
trainListingsCollection.InsertOneAsync(new(number: number, rank: rank, company: company))); await throttle.MakeRequest(() => trainListingsCollection.InsertOneAsync(listing));
if (listing.Id != null) {
trainObjectIds[number] = listing.Id;
}
}
else {
foreach (var possibleTrain in possibleTrains) {
trainObjectIds[possibleTrain.Number] = possibleTrain.Id!;
}
} }
} }
finally { finally {
@ -192,7 +206,11 @@ public class Database : Server.Services.Interfaces.IDatabase {
// if (!await throttle.MakeRequest(() => stationListingsCollection.Find(Builders<StationListing>.Filter.Eq("name", name)).AnyAsync())) { // if (!await throttle.MakeRequest(() => stationListingsCollection.Find(Builders<StationListing>.Filter.Eq("name", name)).AnyAsync())) {
// Logger.LogDebug("Found station {StationName}", name); // Logger.LogDebug("Found station {StationName}", name);
// await throttle.MakeRequest(() => stationListingsCollection.InsertOneAsync(new(name, new()))); // await throttle.MakeRequest(() => stationListingsCollection.InsertOneAsync(new(name, new())));
// } // }
// If there is a matching ObjectId, then it's already in the database
if (stationObjectIds.ContainsKey(name)) return;
await insertStationLock.WaitAsync(); await insertStationLock.WaitAsync();
UpdateResult update; UpdateResult update;
try { try {
@ -206,6 +224,9 @@ public class Database : Server.Services.Interfaces.IDatabase {
IsUpsert = true, IsUpsert = true,
} }
); );
if (update.IsAcknowledged && update.ModifiedCount > 0) {
stationObjectIds[name] = update.UpsertedId.AsObjectId.ToString();
}
} }
finally { finally {
insertStationLock.Release(); insertStationLock.Release();
@ -217,27 +238,45 @@ public class Database : Server.Services.Interfaces.IDatabase {
} }
public async Task FoundStations(IEnumerable<string> names) { public async Task FoundStations(IEnumerable<string> names) {
var enumerable = names as string[] ?? names.ToArray(); var unknownStations = names.ToList();
if (unknownStations.All(s => stationObjectIds.ContainsKey(s))) {
return;
}
unknownStations.RemoveAll(s => stationObjectIds.ContainsKey(s));
var existingStations = await (await stationListingsCollection.FindAsync( var existingStations = await (await stationListingsCollection.FindAsync(
Builders<StationListing>.Filter.StringIn("name", enumerable.Select((n) => new StringOrRegularExpression(n))) Builders<StationListing>.Filter.StringIn("name", unknownStations.Select((n) => new StringOrRegularExpression(n)))
)).ToListAsync(); )).ToListAsync();
var notExistingStations = enumerable.Where((n) => !existingStations.Select((s) => s.Name).Contains(n)).ToList(); foreach (var existingStation in existingStations) {
if (notExistingStations.Count == 0) return; stationObjectIds[existingStation.Name] = existingStation.Id!;
await stationListingsCollection.InsertManyAsync( }
notExistingStations.Select(
(s) => new StationListing(s, new()) unknownStations.RemoveAll(s => existingStations.Select(st => st.Name).Contains(s));
) if (unknownStations.Count == 0) return;
); var unknownStationListings = unknownStations.Select((s) => new StationListing(s, new())).ToList();
Logger.LogDebug("Found stations {StationNames}", notExistingStations); await stationListingsCollection.InsertManyAsync(unknownStationListings);
foreach (var listing in unknownStationListings) {
stationObjectIds[listing.Name] = listing.Id!;
}
Logger.LogDebug("Found stations {StationNames}", unknownStations);
} }
public async Task FoundTrainAtStation(string stationName, string trainNumber) { public async Task FoundTrainAtStation(string stationName, string trainNumber) {
trainNumber = string.Join("", trainNumber.TakeWhile(c => c is >= '0' and <= '9')); trainNumber = string.Join("", trainNumber.TakeWhile(c => c is >= '0' and <= '9'));
await FoundStation(stationName); await FoundStation(stationName);
var updateResult = await throttle.MakeRequest(() => stationListingsCollection.UpdateOneAsync( UpdateResult updateResult;
if (stationObjectIds.ContainsKey(stationName)) {
updateResult = await throttle.MakeRequest(() => stationListingsCollection.UpdateOneAsync(
Builders<StationListing>.Filter.Eq("_id", ObjectId.Parse(stationObjectIds[stationName])),
Builders<StationListing>.Update.AddToSet("stoppedAtBy", trainNumber)
));
}
else {
updateResult = await throttle.MakeRequest(() => stationListingsCollection.UpdateOneAsync(
Builders<StationListing>.Filter.Eq("name", stationName), Builders<StationListing>.Filter.Eq("name", stationName),
Builders<StationListing>.Update.AddToSet("stoppedAtBy", trainNumber) Builders<StationListing>.Update.AddToSet("stoppedAtBy", trainNumber)
)); ));
}
if (updateResult.IsAcknowledged && updateResult.ModifiedCount > 0) { if (updateResult.IsAcknowledged && updateResult.ModifiedCount > 0) {
Logger.LogDebug("Found train {TrainNumber} at station {StationName}", trainNumber, stationName); Logger.LogDebug("Found train {TrainNumber} at station {StationName}", trainNumber, stationName);
} }
@ -247,10 +286,22 @@ public class Database : Server.Services.Interfaces.IDatabase {
trainNumber = string.Join("", trainNumber.TakeWhile(c => c is >= '0' and <= '9')); trainNumber = string.Join("", trainNumber.TakeWhile(c => c is >= '0' and <= '9'));
var enumerable = stationNames as string[] ?? stationNames.ToArray(); var enumerable = stationNames as string[] ?? stationNames.ToArray();
await FoundStations(enumerable); await FoundStations(enumerable);
var updateResult = await throttle.MakeRequest(() => stationListingsCollection.UpdateManyAsync( var objectIds = enumerable
.Select<string, ObjectId?>((stationName) => stationObjectIds.ContainsKey(stationName) ? ObjectId.Parse(stationObjectIds[stationName]) : null)
.ToList();
UpdateResult updateResult;
if (!objectIds.Any((id) => id is null)) {
updateResult = await throttle.MakeRequest(() => stationListingsCollection.UpdateManyAsync(
Builders<StationListing>.Filter.In("_id", objectIds),
Builders<StationListing>.Update.AddToSet("stoppedAtBy", trainNumber)
));
}
else {
updateResult = await throttle.MakeRequest(() => stationListingsCollection.UpdateManyAsync(
Builders<StationListing>.Filter.StringIn("name", enumerable.Select(sn => new StringOrRegularExpression(sn))), Builders<StationListing>.Filter.StringIn("name", enumerable.Select(sn => new StringOrRegularExpression(sn))),
Builders<StationListing>.Update.AddToSet("stoppedAtBy", trainNumber) Builders<StationListing>.Update.AddToSet("stoppedAtBy", trainNumber)
)); ));
}
if (updateResult.IsAcknowledged && updateResult.ModifiedCount > 0) { if (updateResult.IsAcknowledged && updateResult.ModifiedCount > 0) {
Logger.LogDebug("Found train {TrainNumber} at stations {StationNames}", trainNumber, stationNames); Logger.LogDebug("Found train {TrainNumber} at stations {StationNames}", trainNumber, stationNames);
} }

25
server/Startup.cs

@ -3,6 +3,7 @@ using System.Net;
using System.Text.Json; using System.Text.Json;
using Microsoft.AspNetCore.Builder; using Microsoft.AspNetCore.Builder;
using Microsoft.AspNetCore.Hosting; using Microsoft.AspNetCore.Hosting;
using Microsoft.AspNetCore.Http;
using Microsoft.AspNetCore.HttpOverrides; using Microsoft.AspNetCore.HttpOverrides;
using Microsoft.Extensions.Configuration; using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.DependencyInjection;
@ -63,6 +64,30 @@ namespace Server {
c.SwaggerEndpoint("/swagger/v1/swagger.json", "InfoTren Scraper v1"); c.SwaggerEndpoint("/swagger/v1/swagger.json", "InfoTren Scraper v1");
}); });
app.MapWhen(x => x.Request.Path.StartsWithSegments("/rapidoc"), appBuilder => {
appBuilder.Run(async context => {
context.Response.ContentType = "text/html";
await context.Response.WriteAsync(
"""
<!doctype html> <!-- Important: must specify -->
<html>
<head>
<meta charset="utf-8"> <!-- Important: rapi-doc uses utf8 characters -->
<script type="module" src="https://unpkg.com/rapidoc/dist/rapidoc-min.js"></script>
</head>
<body>
<rapi-doc
spec-url="/swagger/v3/swagger.json"
theme = "dark"
> </rapi-doc>
</body>
</html>
"""
);
});
});
// app.UseHttpsRedirection(); // app.UseHttpsRedirection();
app.UseRouting(); app.UseRouting();

3
server/server.csproj

@ -4,7 +4,8 @@
<Nullable>enable</Nullable> <Nullable>enable</Nullable>
<AssemblyName>Server</AssemblyName> <AssemblyName>Server</AssemblyName>
<RootNamespace>Server</RootNamespace> <RootNamespace>Server</RootNamespace>
<TargetFramework>net6.0</TargetFramework> <LangVersion>11</LangVersion>
<TargetFrameworks>net6.0;net7.0</TargetFrameworks>
</PropertyGroup> </PropertyGroup>
<ItemGroup> <ItemGroup>

Loading…
Cancel
Save