Browse Source

Improve speed by caching and using ObjectIDs

Also: Switch to C# 11, .NET 7
Also: Added /rapidoc
master
Kenneth Bruen 2 years ago
parent
commit
de5d85cff4
Signed by: kbruen
GPG Key ID: C1980A470C3EE5B1
  1. 6
      Dockerfile
  2. 2
      scraper/scraper.csproj
  3. 9
      server/Models/Database/TrainListing.cs
  4. 20
      server/Services/Implementations/DataManager.cs
  5. 97
      server/Services/Implementations/Database.cs
  6. 25
      server/Startup.cs
  7. 3
      server/server.csproj

6
Dockerfile

@ -1,5 +1,5 @@
# https://hub.docker.com/_/microsoft-dotnet
FROM mcr.microsoft.com/dotnet/sdk:6.0 AS build
FROM mcr.microsoft.com/dotnet/sdk:7.0 AS build
WORKDIR /source
# copy csproj and restore as distinct layers
@ -14,10 +14,10 @@ COPY server/. ./server/
COPY scraper/. ./scraper/
COPY ConsoleTest/. ./ConsoleTest/
WORKDIR /source/server
RUN dotnet publish -c release -o /app --no-restore
RUN dotnet publish -f net7.0 -c release -o /app --no-restore
# final stage/image
FROM mcr.microsoft.com/dotnet/aspnet:6.0
FROM mcr.microsoft.com/dotnet/aspnet:7.0
WORKDIR /app
COPY --from=build /app ./
ENV INSIDE_DOCKER=true

2
scraper/scraper.csproj

@ -2,7 +2,7 @@
<PropertyGroup>
<Nullable>enable</Nullable>
<TargetFramework>net6.0</TargetFramework>
<TargetFrameworks>net6.0;net7.0</TargetFrameworks>
</PropertyGroup>
<ItemGroup>

9
server/Models/Database/TrainListing.cs

@ -1,3 +1,4 @@
using System.Collections.Generic;
using System.Text.Json.Serialization;
using MongoDB.Bson;
using MongoDB.Bson.Serialization.Attributes;
@ -11,8 +12,10 @@ public record TrainListing(
string? Id,
string Rank,
string Number,
string Company
string Company,
[property: BsonRepresentation(BsonType.ObjectId)]
string? LatestDescription
) {
public TrainListing() : this(null, "", "", "") { }
public TrainListing(string rank, string number, string company) : this(null, rank, number, company) { }
public TrainListing() : this(null, "", "", "", null) { }
public TrainListing(string rank, string number, string company) : this(null, rank, number, company, null) { }
}

20
server/Services/Implementations/DataManager.cs

@ -1,22 +1,26 @@
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Threading.Tasks;
using InfoferScraper.Models.Train;
using InfoferScraper.Models.Station;
using Server.Services.Interfaces;
using Server.Utils;
using InfoferScraper;
using Microsoft.Extensions.Logging;
namespace Server.Services.Implementations {
public class DataManager : IDataManager {
private ILogger<DataManager> Logger { get; }
private IDatabase Database { get; }
private NodaTime.IDateTimeZoneProvider TzProvider { get; }
private NodaTime.DateTimeZone CfrTimeZone => TzProvider["Europe/Bucharest"];
public DataManager(NodaTime.IDateTimeZoneProvider tzProvider, IDatabase database) {
public DataManager(NodaTime.IDateTimeZoneProvider tzProvider, IDatabase database, ILogger<DataManager> logger) {
this.TzProvider = tzProvider;
this.Database = database;
this.Logger = logger;
stationCache = new(async (t) => {
var (stationName, date) = t;
@ -24,7 +28,12 @@ namespace Server.Services.Implementations {
var station = await InfoferScraper.Scrapers.StationScraper.Scrape(stationName, zonedDate.ToDateTimeOffset());
if (station != null) {
_ = Task.Run(async () => await Database.OnStationData(station));
_ = Task.Run(async () => {
var watch = Stopwatch.StartNew();
await Database.OnStationData(station);
var ms = watch.ElapsedMilliseconds;
Logger.LogInformation("OnStationData timing: {StationDataMs} ms", ms);
});
}
return station;
}, TimeSpan.FromMinutes(1));
@ -34,7 +43,12 @@ namespace Server.Services.Implementations {
var train = await InfoferScraper.Scrapers.TrainScraper.Scrape(trainNumber, zonedDate.ToDateTimeOffset());
if (train != null) {
_ = Task.Run(async () => await Database.OnTrainData(train));
_ = Task.Run(async () => {
var watch = Stopwatch.StartNew();
await Database.OnTrainData(train);
var ms = watch.ElapsedMilliseconds;
Logger.LogInformation("OnTrainData timing: {StationDataMs} ms", ms);
});
}
return train;
}, TimeSpan.FromSeconds(30));

97
server/Services/Implementations/Database.cs

@ -47,6 +47,9 @@ public class Database : Server.Services.Interfaces.IDatabase {
private readonly IMongoCollection<StationListing> stationListingsCollection;
private readonly AsyncThrottle throttle;
private readonly Dictionary<string, string> trainObjectIds = new();
private readonly Dictionary<string, string> stationObjectIds = new();
public Database(ILogger<Database> logger, IOptions<MongoSettings> mongoSettings) {
Logger = logger;
@ -169,14 +172,25 @@ public class Database : Server.Services.Interfaces.IDatabase {
private readonly SemaphoreSlim insertTrainLock = new (1, 1);
public async Task<string> FoundTrain(string rank, string number, string company) {
number = string.Join("", number.TakeWhile(c => c is >= '0' and <= '9'));
// If there is a matching ObjectId, then it's already in the database
if (trainObjectIds.ContainsKey(number)) return number;
await insertTrainLock.WaitAsync();
try {
if (!await (await throttle.MakeRequest(() =>
trainListingsCollection.FindAsync(Builders<TrainListing>.Filter.Eq("number", number))))
.AnyAsync()) {
var possibleTrains = await (await throttle.MakeRequest(() => trainListingsCollection.FindAsync(
Builders<TrainListing>.Filter.Eq("number", number)
))).ToListAsync();
if (possibleTrains.Count == 0) {
Logger.LogDebug("Found train {Rank} {Number} from {Company}", rank, number, company);
await throttle.MakeRequest(() =>
trainListingsCollection.InsertOneAsync(new(number: number, rank: rank, company: company)));
TrainListing listing = new(number: number, rank: rank, company: company);
await throttle.MakeRequest(() => trainListingsCollection.InsertOneAsync(listing));
if (listing.Id != null) {
trainObjectIds[number] = listing.Id;
}
}
else {
foreach (var possibleTrain in possibleTrains) {
trainObjectIds[possibleTrain.Number] = possibleTrain.Id!;
}
}
}
finally {
@ -192,7 +206,11 @@ public class Database : Server.Services.Interfaces.IDatabase {
// if (!await throttle.MakeRequest(() => stationListingsCollection.Find(Builders<StationListing>.Filter.Eq("name", name)).AnyAsync())) {
// Logger.LogDebug("Found station {StationName}", name);
// await throttle.MakeRequest(() => stationListingsCollection.InsertOneAsync(new(name, new())));
// }
// If there is a matching ObjectId, then it's already in the database
if (stationObjectIds.ContainsKey(name)) return;
await insertStationLock.WaitAsync();
UpdateResult update;
try {
@ -206,6 +224,9 @@ public class Database : Server.Services.Interfaces.IDatabase {
IsUpsert = true,
}
);
if (update.IsAcknowledged && update.ModifiedCount > 0) {
stationObjectIds[name] = update.UpsertedId.AsObjectId.ToString();
}
}
finally {
insertStationLock.Release();
@ -217,27 +238,45 @@ public class Database : Server.Services.Interfaces.IDatabase {
}
public async Task FoundStations(IEnumerable<string> names) {
var enumerable = names as string[] ?? names.ToArray();
var unknownStations = names.ToList();
if (unknownStations.All(s => stationObjectIds.ContainsKey(s))) {
return;
}
unknownStations.RemoveAll(s => stationObjectIds.ContainsKey(s));
var existingStations = await (await stationListingsCollection.FindAsync(
Builders<StationListing>.Filter.StringIn("name", enumerable.Select((n) => new StringOrRegularExpression(n)))
Builders<StationListing>.Filter.StringIn("name", unknownStations.Select((n) => new StringOrRegularExpression(n)))
)).ToListAsync();
var notExistingStations = enumerable.Where((n) => !existingStations.Select((s) => s.Name).Contains(n)).ToList();
if (notExistingStations.Count == 0) return;
await stationListingsCollection.InsertManyAsync(
notExistingStations.Select(
(s) => new StationListing(s, new())
)
);
Logger.LogDebug("Found stations {StationNames}", notExistingStations);
foreach (var existingStation in existingStations) {
stationObjectIds[existingStation.Name] = existingStation.Id!;
}
unknownStations.RemoveAll(s => existingStations.Select(st => st.Name).Contains(s));
if (unknownStations.Count == 0) return;
var unknownStationListings = unknownStations.Select((s) => new StationListing(s, new())).ToList();
await stationListingsCollection.InsertManyAsync(unknownStationListings);
foreach (var listing in unknownStationListings) {
stationObjectIds[listing.Name] = listing.Id!;
}
Logger.LogDebug("Found stations {StationNames}", unknownStations);
}
public async Task FoundTrainAtStation(string stationName, string trainNumber) {
trainNumber = string.Join("", trainNumber.TakeWhile(c => c is >= '0' and <= '9'));
await FoundStation(stationName);
var updateResult = await throttle.MakeRequest(() => stationListingsCollection.UpdateOneAsync(
Builders<StationListing>.Filter.Eq("name", stationName),
Builders<StationListing>.Update.AddToSet("stoppedAtBy", trainNumber)
));
UpdateResult updateResult;
if (stationObjectIds.ContainsKey(stationName)) {
updateResult = await throttle.MakeRequest(() => stationListingsCollection.UpdateOneAsync(
Builders<StationListing>.Filter.Eq("_id", ObjectId.Parse(stationObjectIds[stationName])),
Builders<StationListing>.Update.AddToSet("stoppedAtBy", trainNumber)
));
}
else {
updateResult = await throttle.MakeRequest(() => stationListingsCollection.UpdateOneAsync(
Builders<StationListing>.Filter.Eq("name", stationName),
Builders<StationListing>.Update.AddToSet("stoppedAtBy", trainNumber)
));
}
if (updateResult.IsAcknowledged && updateResult.ModifiedCount > 0) {
Logger.LogDebug("Found train {TrainNumber} at station {StationName}", trainNumber, stationName);
}
@ -247,10 +286,22 @@ public class Database : Server.Services.Interfaces.IDatabase {
trainNumber = string.Join("", trainNumber.TakeWhile(c => c is >= '0' and <= '9'));
var enumerable = stationNames as string[] ?? stationNames.ToArray();
await FoundStations(enumerable);
var updateResult = await throttle.MakeRequest(() => stationListingsCollection.UpdateManyAsync(
Builders<StationListing>.Filter.StringIn("name", enumerable.Select(sn => new StringOrRegularExpression(sn))),
Builders<StationListing>.Update.AddToSet("stoppedAtBy", trainNumber)
));
var objectIds = enumerable
.Select<string, ObjectId?>((stationName) => stationObjectIds.ContainsKey(stationName) ? ObjectId.Parse(stationObjectIds[stationName]) : null)
.ToList();
UpdateResult updateResult;
if (!objectIds.Any((id) => id is null)) {
updateResult = await throttle.MakeRequest(() => stationListingsCollection.UpdateManyAsync(
Builders<StationListing>.Filter.In("_id", objectIds),
Builders<StationListing>.Update.AddToSet("stoppedAtBy", trainNumber)
));
}
else {
updateResult = await throttle.MakeRequest(() => stationListingsCollection.UpdateManyAsync(
Builders<StationListing>.Filter.StringIn("name", enumerable.Select(sn => new StringOrRegularExpression(sn))),
Builders<StationListing>.Update.AddToSet("stoppedAtBy", trainNumber)
));
}
if (updateResult.IsAcknowledged && updateResult.ModifiedCount > 0) {
Logger.LogDebug("Found train {TrainNumber} at stations {StationNames}", trainNumber, stationNames);
}

25
server/Startup.cs

@ -3,6 +3,7 @@ using System.Net;
using System.Text.Json;
using Microsoft.AspNetCore.Builder;
using Microsoft.AspNetCore.Hosting;
using Microsoft.AspNetCore.Http;
using Microsoft.AspNetCore.HttpOverrides;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
@ -63,6 +64,30 @@ namespace Server {
c.SwaggerEndpoint("/swagger/v1/swagger.json", "InfoTren Scraper v1");
});
app.MapWhen(x => x.Request.Path.StartsWithSegments("/rapidoc"), appBuilder => {
appBuilder.Run(async context => {
context.Response.ContentType = "text/html";
await context.Response.WriteAsync(
"""
<!doctype html> <!-- Important: must specify -->
<html>
<head>
<meta charset="utf-8"> <!-- Important: rapi-doc uses utf8 characters -->
<script type="module" src="https://unpkg.com/rapidoc/dist/rapidoc-min.js"></script>
</head>
<body>
<rapi-doc
spec-url="/swagger/v3/swagger.json"
theme = "dark"
> </rapi-doc>
</body>
</html>
"""
);
});
});
// app.UseHttpsRedirection();
app.UseRouting();

3
server/server.csproj

@ -4,7 +4,8 @@
<Nullable>enable</Nullable>
<AssemblyName>Server</AssemblyName>
<RootNamespace>Server</RootNamespace>
<TargetFramework>net6.0</TargetFramework>
<LangVersion>11</LangVersion>
<TargetFrameworks>net6.0;net7.0</TargetFrameworks>
</PropertyGroup>
<ItemGroup>

Loading…
Cancel
Save