diff --git a/Dockerfile b/Dockerfile index d38e4ae..32c7e4b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # https://hub.docker.com/_/microsoft-dotnet -FROM mcr.microsoft.com/dotnet/sdk:6.0 AS build +FROM mcr.microsoft.com/dotnet/sdk:7.0 AS build WORKDIR /source # copy csproj and restore as distinct layers @@ -14,10 +14,10 @@ COPY server/. ./server/ COPY scraper/. ./scraper/ COPY ConsoleTest/. ./ConsoleTest/ WORKDIR /source/server -RUN dotnet publish -c release -o /app --no-restore +RUN dotnet publish -f net7.0 -c release -o /app --no-restore # final stage/image -FROM mcr.microsoft.com/dotnet/aspnet:6.0 +FROM mcr.microsoft.com/dotnet/aspnet:7.0 WORKDIR /app COPY --from=build /app ./ ENV INSIDE_DOCKER=true diff --git a/scraper/scraper.csproj b/scraper/scraper.csproj index 2a103a0..391f257 100644 --- a/scraper/scraper.csproj +++ b/scraper/scraper.csproj @@ -2,7 +2,7 @@ enable - net6.0 + net6.0;net7.0 diff --git a/server/Models/Database/TrainListing.cs b/server/Models/Database/TrainListing.cs index bebe0f4..0277b53 100644 --- a/server/Models/Database/TrainListing.cs +++ b/server/Models/Database/TrainListing.cs @@ -1,3 +1,4 @@ +using System.Collections.Generic; using System.Text.Json.Serialization; using MongoDB.Bson; using MongoDB.Bson.Serialization.Attributes; @@ -11,8 +12,10 @@ public record TrainListing( string? Id, string Rank, string Number, - string Company + string Company, + [property: BsonRepresentation(BsonType.ObjectId)] + string? LatestDescription ) { - public TrainListing() : this(null, "", "", "") { } - public TrainListing(string rank, string number, string company) : this(null, rank, number, company) { } + public TrainListing() : this(null, "", "", "", null) { } + public TrainListing(string rank, string number, string company) : this(null, rank, number, company, null) { } } \ No newline at end of file diff --git a/server/Services/Implementations/DataManager.cs b/server/Services/Implementations/DataManager.cs index 2ede489..ac0d4e7 100644 --- a/server/Services/Implementations/DataManager.cs +++ b/server/Services/Implementations/DataManager.cs @@ -1,22 +1,26 @@ using System; using System.Collections.Generic; +using System.Diagnostics; using System.Threading.Tasks; using InfoferScraper.Models.Train; using InfoferScraper.Models.Station; using Server.Services.Interfaces; using Server.Utils; using InfoferScraper; +using Microsoft.Extensions.Logging; namespace Server.Services.Implementations { public class DataManager : IDataManager { + private ILogger Logger { get; } private IDatabase Database { get; } private NodaTime.IDateTimeZoneProvider TzProvider { get; } private NodaTime.DateTimeZone CfrTimeZone => TzProvider["Europe/Bucharest"]; - public DataManager(NodaTime.IDateTimeZoneProvider tzProvider, IDatabase database) { + public DataManager(NodaTime.IDateTimeZoneProvider tzProvider, IDatabase database, ILogger logger) { this.TzProvider = tzProvider; this.Database = database; + this.Logger = logger; stationCache = new(async (t) => { var (stationName, date) = t; @@ -24,7 +28,12 @@ namespace Server.Services.Implementations { var station = await InfoferScraper.Scrapers.StationScraper.Scrape(stationName, zonedDate.ToDateTimeOffset()); if (station != null) { - _ = Task.Run(async () => await Database.OnStationData(station)); + _ = Task.Run(async () => { + var watch = Stopwatch.StartNew(); + await Database.OnStationData(station); + var ms = watch.ElapsedMilliseconds; + Logger.LogInformation("OnStationData timing: {StationDataMs} ms", ms); + }); } return station; }, TimeSpan.FromMinutes(1)); @@ -34,7 +43,12 @@ namespace Server.Services.Implementations { var train = await InfoferScraper.Scrapers.TrainScraper.Scrape(trainNumber, zonedDate.ToDateTimeOffset()); if (train != null) { - _ = Task.Run(async () => await Database.OnTrainData(train)); + _ = Task.Run(async () => { + var watch = Stopwatch.StartNew(); + await Database.OnTrainData(train); + var ms = watch.ElapsedMilliseconds; + Logger.LogInformation("OnTrainData timing: {StationDataMs} ms", ms); + }); } return train; }, TimeSpan.FromSeconds(30)); diff --git a/server/Services/Implementations/Database.cs b/server/Services/Implementations/Database.cs index 5c1368a..ce30355 100644 --- a/server/Services/Implementations/Database.cs +++ b/server/Services/Implementations/Database.cs @@ -47,6 +47,9 @@ public class Database : Server.Services.Interfaces.IDatabase { private readonly IMongoCollection stationListingsCollection; private readonly AsyncThrottle throttle; + private readonly Dictionary trainObjectIds = new(); + private readonly Dictionary stationObjectIds = new(); + public Database(ILogger logger, IOptions mongoSettings) { Logger = logger; @@ -169,14 +172,25 @@ public class Database : Server.Services.Interfaces.IDatabase { private readonly SemaphoreSlim insertTrainLock = new (1, 1); public async Task FoundTrain(string rank, string number, string company) { number = string.Join("", number.TakeWhile(c => c is >= '0' and <= '9')); + // If there is a matching ObjectId, then it's already in the database + if (trainObjectIds.ContainsKey(number)) return number; await insertTrainLock.WaitAsync(); try { - if (!await (await throttle.MakeRequest(() => - trainListingsCollection.FindAsync(Builders.Filter.Eq("number", number)))) - .AnyAsync()) { + var possibleTrains = await (await throttle.MakeRequest(() => trainListingsCollection.FindAsync( + Builders.Filter.Eq("number", number) + ))).ToListAsync(); + if (possibleTrains.Count == 0) { Logger.LogDebug("Found train {Rank} {Number} from {Company}", rank, number, company); - await throttle.MakeRequest(() => - trainListingsCollection.InsertOneAsync(new(number: number, rank: rank, company: company))); + TrainListing listing = new(number: number, rank: rank, company: company); + await throttle.MakeRequest(() => trainListingsCollection.InsertOneAsync(listing)); + if (listing.Id != null) { + trainObjectIds[number] = listing.Id; + } + } + else { + foreach (var possibleTrain in possibleTrains) { + trainObjectIds[possibleTrain.Number] = possibleTrain.Id!; + } } } finally { @@ -192,7 +206,11 @@ public class Database : Server.Services.Interfaces.IDatabase { // if (!await throttle.MakeRequest(() => stationListingsCollection.Find(Builders.Filter.Eq("name", name)).AnyAsync())) { // Logger.LogDebug("Found station {StationName}", name); // await throttle.MakeRequest(() => stationListingsCollection.InsertOneAsync(new(name, new()))); + // } + // If there is a matching ObjectId, then it's already in the database + if (stationObjectIds.ContainsKey(name)) return; + await insertStationLock.WaitAsync(); UpdateResult update; try { @@ -206,6 +224,9 @@ public class Database : Server.Services.Interfaces.IDatabase { IsUpsert = true, } ); + if (update.IsAcknowledged && update.ModifiedCount > 0) { + stationObjectIds[name] = update.UpsertedId.AsObjectId.ToString(); + } } finally { insertStationLock.Release(); @@ -217,27 +238,45 @@ public class Database : Server.Services.Interfaces.IDatabase { } public async Task FoundStations(IEnumerable names) { - var enumerable = names as string[] ?? names.ToArray(); + var unknownStations = names.ToList(); + if (unknownStations.All(s => stationObjectIds.ContainsKey(s))) { + return; + } + + unknownStations.RemoveAll(s => stationObjectIds.ContainsKey(s)); var existingStations = await (await stationListingsCollection.FindAsync( - Builders.Filter.StringIn("name", enumerable.Select((n) => new StringOrRegularExpression(n))) + Builders.Filter.StringIn("name", unknownStations.Select((n) => new StringOrRegularExpression(n))) )).ToListAsync(); - var notExistingStations = enumerable.Where((n) => !existingStations.Select((s) => s.Name).Contains(n)).ToList(); - if (notExistingStations.Count == 0) return; - await stationListingsCollection.InsertManyAsync( - notExistingStations.Select( - (s) => new StationListing(s, new()) - ) - ); - Logger.LogDebug("Found stations {StationNames}", notExistingStations); + foreach (var existingStation in existingStations) { + stationObjectIds[existingStation.Name] = existingStation.Id!; + } + + unknownStations.RemoveAll(s => existingStations.Select(st => st.Name).Contains(s)); + if (unknownStations.Count == 0) return; + var unknownStationListings = unknownStations.Select((s) => new StationListing(s, new())).ToList(); + await stationListingsCollection.InsertManyAsync(unknownStationListings); + foreach (var listing in unknownStationListings) { + stationObjectIds[listing.Name] = listing.Id!; + } + Logger.LogDebug("Found stations {StationNames}", unknownStations); } public async Task FoundTrainAtStation(string stationName, string trainNumber) { trainNumber = string.Join("", trainNumber.TakeWhile(c => c is >= '0' and <= '9')); await FoundStation(stationName); - var updateResult = await throttle.MakeRequest(() => stationListingsCollection.UpdateOneAsync( - Builders.Filter.Eq("name", stationName), - Builders.Update.AddToSet("stoppedAtBy", trainNumber) - )); + UpdateResult updateResult; + if (stationObjectIds.ContainsKey(stationName)) { + updateResult = await throttle.MakeRequest(() => stationListingsCollection.UpdateOneAsync( + Builders.Filter.Eq("_id", ObjectId.Parse(stationObjectIds[stationName])), + Builders.Update.AddToSet("stoppedAtBy", trainNumber) + )); + } + else { + updateResult = await throttle.MakeRequest(() => stationListingsCollection.UpdateOneAsync( + Builders.Filter.Eq("name", stationName), + Builders.Update.AddToSet("stoppedAtBy", trainNumber) + )); + } if (updateResult.IsAcknowledged && updateResult.ModifiedCount > 0) { Logger.LogDebug("Found train {TrainNumber} at station {StationName}", trainNumber, stationName); } @@ -247,10 +286,22 @@ public class Database : Server.Services.Interfaces.IDatabase { trainNumber = string.Join("", trainNumber.TakeWhile(c => c is >= '0' and <= '9')); var enumerable = stationNames as string[] ?? stationNames.ToArray(); await FoundStations(enumerable); - var updateResult = await throttle.MakeRequest(() => stationListingsCollection.UpdateManyAsync( - Builders.Filter.StringIn("name", enumerable.Select(sn => new StringOrRegularExpression(sn))), - Builders.Update.AddToSet("stoppedAtBy", trainNumber) - )); + var objectIds = enumerable + .Select((stationName) => stationObjectIds.ContainsKey(stationName) ? ObjectId.Parse(stationObjectIds[stationName]) : null) + .ToList(); + UpdateResult updateResult; + if (!objectIds.Any((id) => id is null)) { + updateResult = await throttle.MakeRequest(() => stationListingsCollection.UpdateManyAsync( + Builders.Filter.In("_id", objectIds), + Builders.Update.AddToSet("stoppedAtBy", trainNumber) + )); + } + else { + updateResult = await throttle.MakeRequest(() => stationListingsCollection.UpdateManyAsync( + Builders.Filter.StringIn("name", enumerable.Select(sn => new StringOrRegularExpression(sn))), + Builders.Update.AddToSet("stoppedAtBy", trainNumber) + )); + } if (updateResult.IsAcknowledged && updateResult.ModifiedCount > 0) { Logger.LogDebug("Found train {TrainNumber} at stations {StationNames}", trainNumber, stationNames); } diff --git a/server/Startup.cs b/server/Startup.cs index fa2c664..08ce2c3 100644 --- a/server/Startup.cs +++ b/server/Startup.cs @@ -3,6 +3,7 @@ using System.Net; using System.Text.Json; using Microsoft.AspNetCore.Builder; using Microsoft.AspNetCore.Hosting; +using Microsoft.AspNetCore.Http; using Microsoft.AspNetCore.HttpOverrides; using Microsoft.Extensions.Configuration; using Microsoft.Extensions.DependencyInjection; @@ -63,6 +64,30 @@ namespace Server { c.SwaggerEndpoint("/swagger/v1/swagger.json", "InfoTren Scraper v1"); }); + app.MapWhen(x => x.Request.Path.StartsWithSegments("/rapidoc"), appBuilder => { + appBuilder.Run(async context => { + context.Response.ContentType = "text/html"; + + await context.Response.WriteAsync( + """ + + + + + + + + + + + """ + ); + }); + }); + // app.UseHttpsRedirection(); app.UseRouting(); diff --git a/server/server.csproj b/server/server.csproj index 550d7a0..ba6e02d 100644 --- a/server/server.csproj +++ b/server/server.csproj @@ -4,7 +4,8 @@ enable Server Server - net6.0 + 11 + net6.0;net7.0