diff --git a/.dockerignore b/.dockerignore index 38f9a76..b233715 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,16 +1,8 @@ -# Docker -.dockerignore -Dockerfile - -# CPython compiler output -__pycache__ -*.pyc - -# Python package stuff -reqlib-metadata -pyproject.toml -*.egg-info - -# VS Code -.vscode - +**/bin +**/obj +**/out +**/.vscode +**/.vs +**/omnisharp.json +.dotnet +.Microsoft.DotNet.ImageBuilder \ No newline at end of file diff --git a/.github/workflows/build-image.yml b/.github/workflows/build-image.yml index 227e702..ec7f2c2 100644 --- a/.github/workflows/build-image.yml +++ b/.github/workflows/build-image.yml @@ -19,5 +19,6 @@ jobs: - name: Publish uses: docker/build-push-action@v2 with: + context: . tags: ${{ format('ghcr.io/{0}/new_infofer_scraper:latest', github.actor) }} push: true diff --git a/.gitignore b/.gitignore index 19fe82c..8bf1b90 100644 --- a/.gitignore +++ b/.gitignore @@ -1,11 +1,448 @@ -# CPython compiler output -__pycache__ -*.pyc +## Ignore Visual Studio temporary files, build results, and +## files generated by popular Visual Studio add-ons. +## +## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore + +# User-specific files +*.rsuser +*.suo +*.user +*.userosscache +*.sln.docstates + +# User-specific files (MonoDevelop/Xamarin Studio) +*.userprefs + +# Mono auto generated files +mono_crash.* + +# Build results +[Dd]ebug/ +[Dd]ebugPublic/ +[Rr]elease/ +[Rr]eleases/ +x64/ +x86/ +[Aa][Rr][Mm]/ +[Aa][Rr][Mm]64/ +bld/ +[Bb]in/ +[Oo]bj/ +[Ll]og/ +[Ll]ogs/ + +# Visual Studio 2015/2017 cache/options directory +.vs/ +# Uncomment if you have tasks that create the project's static files in wwwroot +#wwwroot/ + +# Visual Studio 2017 auto generated files +Generated\ Files/ + +# MSTest test Results +[Tt]est[Rr]esult*/ +[Bb]uild[Ll]og.* + +# NUnit +*.VisualState.xml +TestResult.xml +nunit-*.xml + +# Build Results of an ATL Project +[Dd]ebugPS/ +[Rr]eleasePS/ +dlldata.c + +# Benchmark Results +BenchmarkDotNet.Artifacts/ + +# .NET Core +project.lock.json +project.fragment.lock.json +artifacts/ -# Python package stuff -reqlib-metadata -pyproject.toml -*.egg-info +# Tye +.tye/ + +# StyleCop +StyleCopReport.xml + +# Files built by Visual Studio +*_i.c +*_p.c +*_h.h +*.ilk +*.meta +*.obj +*.iobj +*.pch +*.pdb +*.ipdb +*.pgc +*.pgd +*.rsp +*.sbr +*.tlb +*.tli +*.tlh +*.tmp +*.tmp_proj +*_wpftmp.csproj +*.log +*.vspscc +*.vssscc +.builds +*.pidb +*.svclog +*.scc + +# Chutzpah Test files +_Chutzpah* + +# Visual C++ cache files +ipch/ +*.aps +*.ncb +*.opendb +*.opensdf +*.sdf +*.cachefile +*.VC.db +*.VC.VC.opendb + +# Visual Studio profiler +*.psess +*.vsp +*.vspx +*.sap + +# Visual Studio Trace Files +*.e2e + +# TFS 2012 Local Workspace +$tf/ + +# Guidance Automation Toolkit +*.gpState + +# ReSharper is a .NET coding add-in +_ReSharper*/ +*.[Rr]e[Ss]harper +*.DotSettings.user + +# TeamCity is a build add-in +_TeamCity* + +# DotCover is a Code Coverage Tool +*.dotCover + +# AxoCover is a Code Coverage Tool +.axoCover/* +!.axoCover/settings.json + +# Coverlet is a free, cross platform Code Coverage Tool +coverage*[.json, .xml, .info] + +# Visual Studio code coverage results +*.coverage +*.coveragexml + +# NCrunch +_NCrunch_* +.*crunch*.local.xml +nCrunchTemp_* + +# MightyMoose +*.mm.* +AutoTest.Net/ + +# Web workbench (sass) +.sass-cache/ + +# Installshield output folder +[Ee]xpress/ + +# DocProject is a documentation generator add-in +DocProject/buildhelp/ +DocProject/Help/*.HxT +DocProject/Help/*.HxC +DocProject/Help/*.hhc +DocProject/Help/*.hhk +DocProject/Help/*.hhp +DocProject/Help/Html2 +DocProject/Help/html + +# Click-Once directory +publish/ + +# Publish Web Output +*.[Pp]ublish.xml +*.azurePubxml +# Note: Comment the next line if you want to checkin your web deploy settings, +# but database connection strings (with potential passwords) will be unencrypted +*.pubxml +*.publishproj + +# Microsoft Azure Web App publish settings. Comment the next line if you want to +# checkin your Azure Web App publish settings, but sensitive information contained +# in these scripts will be unencrypted +PublishScripts/ + +# NuGet Packages +*.nupkg +# NuGet Symbol Packages +*.snupkg +# The packages folder can be ignored because of Package Restore +**/[Pp]ackages/* +# except build/, which is used as an MSBuild target. +!**/[Pp]ackages/build/ +# Uncomment if necessary however generally it will be regenerated when needed +#!**/[Pp]ackages/repositories.config +# NuGet v3's project.json files produces more ignorable files +*.nuget.props +*.nuget.targets + +# Microsoft Azure Build Output +csx/ +*.build.csdef + +# Microsoft Azure Emulator +ecf/ +rcf/ + +# Windows Store app package directories and files +AppPackages/ +BundleArtifacts/ +Package.StoreAssociation.xml +_pkginfo.txt +*.appx +*.appxbundle +*.appxupload + +# Visual Studio cache files +# files ending in .cache can be ignored +*.[Cc]ache +# but keep track of directories ending in .cache +!?*.[Cc]ache/ + +# Others +ClientBin/ +~$* +*~ +*.dbmdl +*.dbproj.schemaview +*.jfm +*.pfx +*.publishsettings +orleans.codegen.cs + +# Including strong name files can present a security risk +# (https://github.com/github/gitignore/pull/2483#issue-259490424) +#*.snk + +# Since there are multiple workflows, uncomment next line to ignore bower_components +# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) +#bower_components/ + +# RIA/Silverlight projects +Generated_Code/ + +# Backup & report files from converting an old project file +# to a newer Visual Studio version. Backup files are not needed, +# because we have git ;-) +_UpgradeReport_Files/ +Backup*/ +UpgradeLog*.XML +UpgradeLog*.htm +ServiceFabricBackup/ +*.rptproj.bak + +# SQL Server files +*.mdf +*.ldf +*.ndf + +# Business Intelligence projects +*.rdl.data +*.bim.layout +*.bim_*.settings +*.rptproj.rsuser +*- [Bb]ackup.rdl +*- [Bb]ackup ([0-9]).rdl +*- [Bb]ackup ([0-9][0-9]).rdl + +# Microsoft Fakes +FakesAssemblies/ + +# GhostDoc plugin setting file +*.GhostDoc.xml + +# Node.js Tools for Visual Studio +.ntvs_analysis.dat +node_modules/ + +# Visual Studio 6 build log +*.plg + +# Visual Studio 6 workspace options file +*.opt + +# Visual Studio 6 auto-generated workspace file (contains which files were open etc.) +*.vbw + +# Visual Studio LightSwitch build output +**/*.HTMLClient/GeneratedArtifacts +**/*.DesktopClient/GeneratedArtifacts +**/*.DesktopClient/ModelManifest.xml +**/*.Server/GeneratedArtifacts +**/*.Server/ModelManifest.xml +_Pvt_Extensions + +# Paket dependency manager +.paket/paket.exe +paket-files/ + +# FAKE - F# Make +.fake/ -# VS Code -.vscode +# Ionide - VsCode extension for F# Support +.ionide/ + +# CodeRush personal settings +.cr/personal + +# Python Tools for Visual Studio (PTVS) +__pycache__/ +*.pyc + +# Cake - Uncomment if you are using it +# tools/** +# !tools/packages.config + +# Tabs Studio +*.tss + +# Telerik's JustMock configuration file +*.jmconfig + +# BizTalk build output +*.btp.cs +*.btm.cs +*.odx.cs +*.xsd.cs + +# OpenCover UI analysis results +OpenCover/ + +# Azure Stream Analytics local run output +ASALocalRun/ + +# MSBuild Binary and Structured Log +*.binlog + +# NVidia Nsight GPU debugger configuration file +*.nvuser + +# MFractors (Xamarin productivity tool) working folder +.mfractor/ + +# Local History for Visual Studio +.localhistory/ + +# BeatPulse healthcheck temp database +healthchecksdb + +# Backup folder for Package Reference Convert tool in Visual Studio 2017 +MigrationBackup/ + +# Ionide (cross platform F# VS Code tools) working folder +.ionide/ + +## +## Visual studio for Mac +## + + +# globs +Makefile.in +*.userprefs +*.usertasks +config.make +config.status +aclocal.m4 +install-sh +autom4te.cache/ +*.tar.gz +tarballs/ +test-results/ + +# Mac bundle stuff +*.dmg +*.app + +# content below from: https://github.com/github/gitignore/blob/master/Global/macOS.gitignore +# General +.DS_Store +.AppleDouble +.LSOverride + +# Icon must end with two \r +Icon + + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + +# content below from: https://github.com/github/gitignore/blob/master/Global/Windows.gitignore +# Windows thumbnail cache files +Thumbs.db +ehthumbs.db +ehthumbs_vista.db + +# Dump file +*.stackdump + +# Folder config file +[Dd]esktop.ini + +# Recycle Bin used on file shares +$RECYCLE.BIN/ + +# Windows Installer files +*.cab +*.msi +*.msix +*.msm +*.msp + +# Windows shortcuts +*.lnk + +# JetBrains Rider +.idea/ +*.sln.iml + +## +## Visual Studio Code +## +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..aeb8ac6 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,46 @@ +{ + "version": "0.2.0", + "configurations": [ + { + "name": "ConsoleTest", + "type": "coreclr", + "request": "launch", + "preLaunchTask": "buildConsoleTest", + "program": "${workspaceFolder}/ConsoleTest/bin/Debug/net6.0/ConsoleTest.dll", + "args": [], + "cwd": "${workspaceFolder}", + "stopAtEntry": false, + "console": "integratedTerminal" + }, + { + // Use IntelliSense to find out which attributes exist for C# debugging + // Use hover for the description of the existing attributes + // For further information visit https://github.com/OmniSharp/omnisharp-vscode/blob/master/debugger-launchjson.md + "name": ".NET Core Launch (web)", + "type": "coreclr", + "request": "launch", + "preLaunchTask": "build", + // If you have changed target frameworks, make sure to update the program path. + "program": "${workspaceFolder}/server/bin/Debug/net6.0/Server.dll", + "args": [], + "cwd": "${workspaceFolder}/server", + "stopAtEntry": false, + // Enable launching a web browser when ASP.NET Core starts. For more information: https://aka.ms/VSCode-CS-LaunchJson-WebBrowser + // "serverReadyAction": { + // "action": "openExternally", + // "pattern": "\\bNow listening on:\\s+(https?://\\S+)" + // }, + "env": { + "ASPNETCORE_ENVIRONMENT": "Development" + }, + "sourceFileMap": { + "/Views": "${workspaceFolder}/Views" + } + }, + { + "name": ".NET Core Attach", + "type": "coreclr", + "request": "attach" + } + ] +} \ No newline at end of file diff --git a/.vscode/tasks.json b/.vscode/tasks.json new file mode 100644 index 0000000..1cb8e13 --- /dev/null +++ b/.vscode/tasks.json @@ -0,0 +1,53 @@ +{ + "version": "2.0.0", + "tasks": [ + { + "label": "build", + "command": "dotnet", + "type": "process", + "args": [ + "build", + "${workspaceFolder}/server/server.csproj", + "/property:GenerateFullPaths=true", + "/consoleloggerparameters:NoSummary" + ], + "problemMatcher": "$msCompile" + }, + { + "label": "buildConsoleTest", + "command": "dotnet", + "type": "process", + "args": [ + "build", + "${workspaceFolder}/ConsoleTest/ConsoleTest.csproj", + "/property:GenerateFullPaths=true", + "/consoleloggerparameters:NoSummary" + ], + "problemMatcher": "$msCompile" + }, + { + "label": "publish", + "command": "dotnet", + "type": "process", + "args": [ + "publish", + "${workspaceFolder}/server/server.csproj", + "/property:GenerateFullPaths=true", + "/consoleloggerparameters:NoSummary" + ], + "problemMatcher": "$msCompile" + }, + { + "label": "watch", + "command": "dotnet", + "type": "process", + "args": [ + "watch", + "run", + "--project", + "${workspaceFolder}/server/server.csproj" + ], + "problemMatcher": "$msCompile" + } + ] +} \ No newline at end of file diff --git a/ConsoleTest/.vscode/launch.json b/ConsoleTest/.vscode/launch.json new file mode 100644 index 0000000..cd971a6 --- /dev/null +++ b/ConsoleTest/.vscode/launch.json @@ -0,0 +1,26 @@ +{ + "version": "0.2.0", + "configurations": [ + { + // Use IntelliSense to find out which attributes exist for C# debugging + // Use hover for the description of the existing attributes + // For further information visit https://github.com/OmniSharp/omnisharp-vscode/blob/master/debugger-launchjson.md + "name": ".NET Core Launch (console)", + "type": "coreclr", + "request": "launch", + "preLaunchTask": "build", + // If you have changed target frameworks, make sure to update the program path. + "program": "${workspaceFolder}/bin/Debug/net5.0/ConsoleTest.dll", + "args": [], + "cwd": "${workspaceFolder}", + // For more information about the 'console' field, see https://aka.ms/VSCode-CS-LaunchJson-Console + "console": "internalConsole", + "stopAtEntry": false + }, + { + "name": ".NET Core Attach", + "type": "coreclr", + "request": "attach" + } + ] +} \ No newline at end of file diff --git a/ConsoleTest/.vscode/tasks.json b/ConsoleTest/.vscode/tasks.json new file mode 100644 index 0000000..ac5a31f --- /dev/null +++ b/ConsoleTest/.vscode/tasks.json @@ -0,0 +1,42 @@ +{ + "version": "2.0.0", + "tasks": [ + { + "label": "build", + "command": "dotnet", + "type": "process", + "args": [ + "build", + "${workspaceFolder}/ConsoleTest.csproj", + "/property:GenerateFullPaths=true", + "/consoleloggerparameters:NoSummary" + ], + "problemMatcher": "$msCompile" + }, + { + "label": "publish", + "command": "dotnet", + "type": "process", + "args": [ + "publish", + "${workspaceFolder}/ConsoleTest.csproj", + "/property:GenerateFullPaths=true", + "/consoleloggerparameters:NoSummary" + ], + "problemMatcher": "$msCompile" + }, + { + "label": "watch", + "command": "dotnet", + "type": "process", + "args": [ + "watch", + "run", + "${workspaceFolder}/ConsoleTest.csproj", + "/property:GenerateFullPaths=true", + "/consoleloggerparameters:NoSummary" + ], + "problemMatcher": "$msCompile" + } + ] +} \ No newline at end of file diff --git a/ConsoleTest/ConsoleTest.csproj b/ConsoleTest/ConsoleTest.csproj new file mode 100644 index 0000000..4df0a6c --- /dev/null +++ b/ConsoleTest/ConsoleTest.csproj @@ -0,0 +1,12 @@ + + + + + + + + Exe + net6.0 + + + diff --git a/ConsoleTest/Program.cs b/ConsoleTest/Program.cs new file mode 100644 index 0000000..470e4f0 --- /dev/null +++ b/ConsoleTest/Program.cs @@ -0,0 +1,63 @@ +using System; +using System.Text.Json; +using System.Threading.Tasks; +using InfoferScraper; +using InfoferScraper.Scrapers; + +while (true) { + Console.WriteLine("1. Scrape Train"); + Console.WriteLine("2. Scrape Station"); + Console.WriteLine("0. Exit"); + + var input = Console.ReadLine()?.Trim(); + switch (input) { + case "1": + await PrintTrain(); + break; + case "2": + await PrintStation(); + break; + case null: + case "0": + goto INPUT_LOOP_BREAK; + } + Console.WriteLine(); +} +INPUT_LOOP_BREAK:; + +async Task PrintTrain() { + Console.Write("Train number: "); + var trainNumber = Console.ReadLine()?.Trim(); + + if (trainNumber == null) { + return; + } + + Console.WriteLine( + JsonSerializer.Serialize( + await TrainScraper.Scrape(trainNumber), + new JsonSerializerOptions { + PropertyNamingPolicy = JsonNamingPolicy.CamelCase, + WriteIndented = true, + } + ) + ); +} +async Task PrintStation() { + Console.Write("Station name: "); + var stationName = Console.ReadLine()?.Trim(); + + if (stationName == null) { + return; + } + + Console.WriteLine( + JsonSerializer.Serialize( + await StationScraper.Scrape(stationName), + new JsonSerializerOptions { + PropertyNamingPolicy = JsonNamingPolicy.CamelCase, + WriteIndented = true, + } + ) + ); +} diff --git a/Dockerfile b/Dockerfile index 364d64a..38bcd9d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,23 +1,23 @@ -FROM python:slim - -RUN pip install pipenv - -WORKDIR /var/app/scraper -COPY scraper/Pipfil* ./ -COPY scraper/setup.py ./ -WORKDIR /var/app/server -COPY server/Pipfil* ./ -RUN pipenv install -RUN pipenv graph - -WORKDIR /var/app/scraper -COPY scraper . -WORKDIR /var/app/server -COPY server . -RUN rm server/scraper -RUN ln -s /var/app/scraper ./server/scraper - -ENV PORT 5000 -EXPOSE ${PORT} - -CMD ["pipenv", "run", "python3", "-m", "main"] +# https://hub.docker.com/_/microsoft-dotnet +FROM mcr.microsoft.com/dotnet/sdk:6.0 AS build +WORKDIR /source + +# copy csproj and restore as distinct layers +COPY *.sln . +COPY server/*.csproj ./server/ +COPY scraper/*.csproj ./scraper/ +COPY ConsoleTest/*.csproj ./ConsoleTest/ +RUN dotnet restore + +# copy everything else and build app +COPY server/. ./server/ +COPY scraper/. ./scraper/ +COPY ConsoleTest/. ./ConsoleTest/ +WORKDIR /source/server +RUN dotnet publish -c release -o /app --no-restore + +# final stage/image +FROM mcr.microsoft.com/dotnet/aspnet:6.0 +WORKDIR /app +COPY --from=build /app ./ +ENTRYPOINT ["dotnet", "Server.dll"] diff --git a/Pipfile b/Pipfile deleted file mode 100644 index 71e4f7c..0000000 --- a/Pipfile +++ /dev/null @@ -1,11 +0,0 @@ -[[source]] -url = "https://pypi.org/simple" -verify_ssl = true -name = "pypi" - -[packages] - -[dev-packages] - -[requires] -python_version = "3.9" diff --git a/Pipfile.lock b/Pipfile.lock deleted file mode 100644 index 8aee7e8..0000000 --- a/Pipfile.lock +++ /dev/null @@ -1,20 +0,0 @@ -{ - "_meta": { - "hash": { - "sha256": "a36a5392bb1e8bbc06bfaa0761e52593cf2d83b486696bf54667ba8da616c839" - }, - "pipfile-spec": 6, - "requires": { - "python_version": "3.9" - }, - "sources": [ - { - "name": "pypi", - "url": "https://pypi.org/simple", - "verify_ssl": true - } - ] - }, - "default": {}, - "develop": {} -} diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..d7519b7 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,12 @@ +version: '3' + +services: + infofer_scraper: + image: new_infofer_scraper + build: . + ports: + - ${PORT:-5000}:80 + environment: + - DB_DIR=/data + volumes: + - ./data:/data diff --git a/new-infofer-scraper.sln b/new-infofer-scraper.sln new file mode 100644 index 0000000..bbdb29a --- /dev/null +++ b/new-infofer-scraper.sln @@ -0,0 +1,62 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 16 +VisualStudioVersion = 16.6.30114.105 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "scraper", "scraper\scraper.csproj", "{E08BC25C-B39B-40F9-8114-A8D6545EE1C1}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "server", "server\server.csproj", "{C2D22A33-5317-47A3-B28A-E151224D3E46}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ConsoleTest", "ConsoleTest\ConsoleTest.csproj", "{0D8E3B5F-2511-4174-8129-275500753585}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Debug|x64 = Debug|x64 + Debug|x86 = Debug|x86 + Release|Any CPU = Release|Any CPU + Release|x64 = Release|x64 + Release|x86 = Release|x86 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {E08BC25C-B39B-40F9-8114-A8D6545EE1C1}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {E08BC25C-B39B-40F9-8114-A8D6545EE1C1}.Debug|Any CPU.Build.0 = Debug|Any CPU + {E08BC25C-B39B-40F9-8114-A8D6545EE1C1}.Debug|x64.ActiveCfg = Debug|Any CPU + {E08BC25C-B39B-40F9-8114-A8D6545EE1C1}.Debug|x64.Build.0 = Debug|Any CPU + {E08BC25C-B39B-40F9-8114-A8D6545EE1C1}.Debug|x86.ActiveCfg = Debug|Any CPU + {E08BC25C-B39B-40F9-8114-A8D6545EE1C1}.Debug|x86.Build.0 = Debug|Any CPU + {E08BC25C-B39B-40F9-8114-A8D6545EE1C1}.Release|Any CPU.ActiveCfg = Release|Any CPU + {E08BC25C-B39B-40F9-8114-A8D6545EE1C1}.Release|Any CPU.Build.0 = Release|Any CPU + {E08BC25C-B39B-40F9-8114-A8D6545EE1C1}.Release|x64.ActiveCfg = Release|Any CPU + {E08BC25C-B39B-40F9-8114-A8D6545EE1C1}.Release|x64.Build.0 = Release|Any CPU + {E08BC25C-B39B-40F9-8114-A8D6545EE1C1}.Release|x86.ActiveCfg = Release|Any CPU + {E08BC25C-B39B-40F9-8114-A8D6545EE1C1}.Release|x86.Build.0 = Release|Any CPU + {C2D22A33-5317-47A3-B28A-E151224D3E46}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {C2D22A33-5317-47A3-B28A-E151224D3E46}.Debug|Any CPU.Build.0 = Debug|Any CPU + {C2D22A33-5317-47A3-B28A-E151224D3E46}.Debug|x64.ActiveCfg = Debug|Any CPU + {C2D22A33-5317-47A3-B28A-E151224D3E46}.Debug|x64.Build.0 = Debug|Any CPU + {C2D22A33-5317-47A3-B28A-E151224D3E46}.Debug|x86.ActiveCfg = Debug|Any CPU + {C2D22A33-5317-47A3-B28A-E151224D3E46}.Debug|x86.Build.0 = Debug|Any CPU + {C2D22A33-5317-47A3-B28A-E151224D3E46}.Release|Any CPU.ActiveCfg = Release|Any CPU + {C2D22A33-5317-47A3-B28A-E151224D3E46}.Release|Any CPU.Build.0 = Release|Any CPU + {C2D22A33-5317-47A3-B28A-E151224D3E46}.Release|x64.ActiveCfg = Release|Any CPU + {C2D22A33-5317-47A3-B28A-E151224D3E46}.Release|x64.Build.0 = Release|Any CPU + {C2D22A33-5317-47A3-B28A-E151224D3E46}.Release|x86.ActiveCfg = Release|Any CPU + {C2D22A33-5317-47A3-B28A-E151224D3E46}.Release|x86.Build.0 = Release|Any CPU + {0D8E3B5F-2511-4174-8129-275500753585}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {0D8E3B5F-2511-4174-8129-275500753585}.Debug|Any CPU.Build.0 = Debug|Any CPU + {0D8E3B5F-2511-4174-8129-275500753585}.Debug|x64.ActiveCfg = Debug|Any CPU + {0D8E3B5F-2511-4174-8129-275500753585}.Debug|x64.Build.0 = Debug|Any CPU + {0D8E3B5F-2511-4174-8129-275500753585}.Debug|x86.ActiveCfg = Debug|Any CPU + {0D8E3B5F-2511-4174-8129-275500753585}.Debug|x86.Build.0 = Debug|Any CPU + {0D8E3B5F-2511-4174-8129-275500753585}.Release|Any CPU.ActiveCfg = Release|Any CPU + {0D8E3B5F-2511-4174-8129-275500753585}.Release|Any CPU.Build.0 = Release|Any CPU + {0D8E3B5F-2511-4174-8129-275500753585}.Release|x64.ActiveCfg = Release|Any CPU + {0D8E3B5F-2511-4174-8129-275500753585}.Release|x64.Build.0 = Release|Any CPU + {0D8E3B5F-2511-4174-8129-275500753585}.Release|x86.ActiveCfg = Release|Any CPU + {0D8E3B5F-2511-4174-8129-275500753585}.Release|x86.Build.0 = Release|Any CPU + EndGlobalSection +EndGlobal diff --git a/omnisharp.json b/omnisharp.json new file mode 100644 index 0000000..4220f19 --- /dev/null +++ b/omnisharp.json @@ -0,0 +1,24 @@ +{ + "$schema": "https://json.schemastore.org/omnisharp", + "FormattingOptions": { + "OrganizeImports": true, + "UseTabs": true, + "TabSize": 4, + "IndentationSize": 4, + "NewLinesForBracesInTypes": false, + "NewLinesForBracesInMethods": false, + "NewLinesForBracesInProperties": false, + "NewLinesForBracesInAccessors": false, + "NewLinesForBracesInAnonymousMethods": false, + "NewLinesForBracesInControlBlocks": false, + "NewLinesForBracesInAnonymousTypes": false, + "NewLinesForBracesInObjectCollectionArrayInitializers": false, + "NewLinesForBracesInLambdaExpressionBody": false, + "NewLineForElse": true, + "NewLineForCatch": true, + "NewLineForFinally": true, + "NewLineForMembersInObjectInit": false, + "NewLineForMembersInAnonymousTypes": false, + "NewLineForClausesInQuery": false + } +} \ No newline at end of file diff --git a/scraper/Pipfile b/scraper/Pipfile deleted file mode 100644 index 864234d..0000000 --- a/scraper/Pipfile +++ /dev/null @@ -1,14 +0,0 @@ -[[source]] -url = "https://pypi.org/simple" -verify_ssl = true -name = "pypi" - -[packages] -beautifulsoup4 = "*" -requests = "*" -pytz = "*" - -[dev-packages] - -[requires] -python_version = "3.9" diff --git a/scraper/Pipfile.lock b/scraper/Pipfile.lock deleted file mode 100644 index 1bb4905..0000000 --- a/scraper/Pipfile.lock +++ /dev/null @@ -1,85 +0,0 @@ -{ - "_meta": { - "hash": { - "sha256": "d7e3ebca9807b4f0c9dcac014554e9d1c9cb3a0c30b5c71b0b7cd4ccdc4934e1" - }, - "pipfile-spec": 6, - "requires": { - "python_version": "3.9" - }, - "sources": [ - { - "name": "pypi", - "url": "https://pypi.org/simple", - "verify_ssl": true - } - ] - }, - "default": { - "beautifulsoup4": { - "hashes": [ - "sha256:4c98143716ef1cb40bf7f39a8e3eec8f8b009509e74904ba3a7b315431577e35", - "sha256:84729e322ad1d5b4d25f805bfa05b902dd96450f43842c4e99067d5e1369eb25", - "sha256:fff47e031e34ec82bf17e00da8f592fe7de69aeea38be00523c04623c04fb666" - ], - "index": "pypi", - "version": "==4.9.3" - }, - "certifi": { - "hashes": [ - "sha256:2bbf76fd432960138b3ef6dda3dde0544f27cbf8546c458e60baf371917ba9ee", - "sha256:50b1e4f8446b06f41be7dd6338db18e0990601dce795c2b1686458aa7e8fa7d8" - ], - "version": "==2021.5.30" - }, - "charset-normalizer": { - "hashes": [ - "sha256:0c8911edd15d19223366a194a513099a302055a962bca2cec0f54b8b63175d8b", - "sha256:f23667ebe1084be45f6ae0538e4a5a865206544097e4e8bbcacf42cd02a348f3" - ], - "markers": "python_version >= '3'", - "version": "==2.0.4" - }, - "idna": { - "hashes": [ - "sha256:14475042e284991034cb48e06f6851428fb14c4dc953acd9be9a5e95c7b6dd7a", - "sha256:467fbad99067910785144ce333826c71fb0e63a425657295239737f7ecd125f3" - ], - "markers": "python_version >= '3'", - "version": "==3.2" - }, - "pytz": { - "hashes": [ - "sha256:83a4a90894bf38e243cf052c8b58f381bfe9a7a483f6a9cab140bc7f702ac4da", - "sha256:eb10ce3e7736052ed3623d49975ce333bcd712c7bb19a58b9e2089d4057d0798" - ], - "index": "pypi", - "version": "==2021.1" - }, - "requests": { - "hashes": [ - "sha256:6c1246513ecd5ecd4528a0906f910e8f0f9c6b8ec72030dc9fd154dc1a6efd24", - "sha256:b8aa58f8cf793ffd8782d3d8cb19e66ef36f7aba4353eec859e74678b01b07a7" - ], - "index": "pypi", - "version": "==2.26.0" - }, - "soupsieve": { - "hashes": [ - "sha256:052774848f448cf19c7e959adf5566904d525f33a3f8b6ba6f6f8f26ec7de0cc", - "sha256:c2c1c2d44f158cdbddab7824a9af8c4f83c76b1e23e049479aa432feb6c4c23b" - ], - "markers": "python_version >= '3'", - "version": "==2.2.1" - }, - "urllib3": { - "hashes": [ - "sha256:39fb8672126159acb139a7718dd10806104dec1e2f0f6c88aab05d17df10c8d4", - "sha256:f57b4c16c62fa2760b7e3d97c35b255512fb6b59a259730f36ba32ce9f8e342f" - ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'", - "version": "==1.26.6" - } - }, - "develop": {} -} diff --git a/scraper/__init__.py b/scraper/__init__.py deleted file mode 100644 index 426b2a3..0000000 --- a/scraper/__init__.py +++ /dev/null @@ -1 +0,0 @@ -__all__ = ['scraper'] diff --git a/scraper/main.py b/scraper/main.py deleted file mode 100644 index 7de5a40..0000000 --- a/scraper/main.py +++ /dev/null @@ -1,44 +0,0 @@ -from scraper import scrape - -_NO_DEFAULT = object() - -def check_yes_no(input: str, default=_NO_DEFAULT, considered_yes=None) -> bool: - input = str(input).strip().lower() - if not input: - if default == _NO_DEFAULT: - raise Exception('Empty input with no default') - return default - if not considered_yes: - considered_yes = ['y', 'yes', 't', 'true', '1'] - return input in considered_yes - -def main(): - train_no = int(input('Train number: ')) - use_yesterday = input('Train departed yesterday? [y/N] ') - data = scrape(train_no, use_yesterday=check_yes_no(use_yesterday, default=False)) - print(f'Train {train_no}\t{data["route"]["from"]}\t{data["route"]["to"]}') - print() - if 'status' in data and data['status']: - delay = data['status']['delay'] - if delay == 0: - delay = 'on time' - else: - delay = f'{delay} min' - state = data['status']['state'] - station = data['status']['station'] - print(f'Status: {delay}\t{state}\t{station}') - print() - for station in data['stations']: - if 'arrival' in station and station['arrival']: - print(station['arrival']['scheduleTime'], end='\t') - else: - print(end='\t') - print(station['name'], end='\t') - if 'departure' in station and station['departure']: - print(station['departure']['scheduleTime'], end='\t') - else: - print(end='\t') - print() - -if __name__ == '__main__': - main() \ No newline at end of file diff --git a/scraper/omnisharp.json b/scraper/omnisharp.json new file mode 120000 index 0000000..4ed1f68 --- /dev/null +++ b/scraper/omnisharp.json @@ -0,0 +1 @@ +../omnisharp.json \ No newline at end of file diff --git a/scraper/schemas.py b/scraper/schemas.py deleted file mode 100644 index 6509204..0000000 --- a/scraper/schemas.py +++ /dev/null @@ -1,20 +0,0 @@ -from contextlib import ExitStack as _ExitStack - -_es = _ExitStack() - -def _load_file(name: str): - import json - from os.path import join, dirname - dir = dirname(__file__) - - return json.load(_es.enter_context(open(join(dir, name)))) - -TRAIN_INFO_SCHEMA = { - 'v1': _load_file('scrape_train_schema.json'), - 'v2': _load_file('scrape_train_schema_v2.json'), -} -STATION_SCHEMA = { - 'v2': _load_file('scrape_station_schema_v2.json'), -} - -_es.close() diff --git a/scraper/scrape_station.py b/scraper/scrape_station.py deleted file mode 100644 index 335644f..0000000 --- a/scraper/scrape_station.py +++ /dev/null @@ -1,91 +0,0 @@ -import re - -from datetime import datetime, timedelta - -import pytz -import requests -from bs4 import BeautifulSoup - -from .utils import * - -# region regex definitions - -RO_LETTERS = r'A-Za-zăâîșțĂÂÎȚȘ' - -STATION_INFO_REGEX = re.compile(rf'^([{RO_LETTERS}.0-9 ]+) în ([0-9.]+)$') - -STOPPING_TIME_REGEX = re.compile(r'^(necunoscută \(stație terminus\))|(?:([0-9]+) (min|sec) \((?:începând cu|până la) ([0-9]{1,2}:[0-9]{2})\))$') - -# endregion - -def scrape(station_name: str): - station_name = ro_letters_to_en(station_name) - # Start scrapping session - s = requests.Session() - - r = s.get(build_url( - 'https://mersultrenurilor.infofer.ro/ro-RO/Statie/{station}', - station=station_name.replace(' ', '-'), - )) - - soup = BeautifulSoup(r.text, features='html.parser') - sform = soup.find(id='form-search') - result_data = { elem['name']: elem['value'] for elem in sform('input') } - - r = s.post('https://mersultrenurilor.infofer.ro/ro-RO/Stations/StationsResult', data=result_data) - soup = BeautifulSoup(r.text, features='html.parser') - - scraped = {} - - station_info_div, _, departures_div, arrivals_div, *_ = soup('div', recursive=False) - - scraped['stationName'], scraped['date'] = STATION_INFO_REGEX.match(collapse_space(station_info_div.h2.text)).groups() - date_d, date_m, date_y = (int(comp) for comp in scraped['date'].split('.')) - date = datetime(date_y, date_m, date_d) - dt_seq = DateTimeSequencer(date.year, date.month, date.day) - tz = pytz.timezone('Europe/Bucharest') - - def parse_arrdep_list(elem, end_station_field_name): - if elem.div.ul is None: - return None - - def parse_item(elem): - result = {} - - try: - data_div, status_div = elem('div', recursive=False) - except ValueError: - data_div, *_ = elem('div', recursive=False) - status_div = None - data_main_div, data_details_div = data_div('div', recursive=False) - time_div, dest_div, train_div, *_ = data_main_div('div', recursive=False) - operator_div, route_div, stopping_time_div = data_details_div.div('div', recursive=False) - - result['time'] = collapse_space(time_div.div.div('div', recursive=False)[1].text) - st_hr, st_min = (int(comp) for comp in result['time'].split(':')) - result['time'] = tz.localize(dt_seq(st_hr, st_min)).isoformat() - - unknown_st, st, minsec, st_opposite_time = STOPPING_TIME_REGEX.match( - collapse_space(stopping_time_div.div('div', recursive=False)[1].text) - ).groups() - if unknown_st: - result['stoppingTime'] = None - elif st: - minutes = minsec == 'min' - result['stoppingTime'] = int(st) * 60 if minutes else int(st) - - result['train'] = {} - result['train']['rank'] = collapse_space(train_div.div.div('div', recursive=False)[1].span.text) - result['train']['number'] = collapse_space(train_div.div.div('div', recursive=False)[1].a.text) - result['train'][end_station_field_name] = collapse_space(dest_div.div.div('div', recursive=False)[1].text) - result['train']['operator'] = collapse_space(operator_div.div('div', recursive=False)[1].text) - result['train']['route'] = collapse_space(route_div.div('div', recursive=False)[1].text).split(' - ') - - return result - - return [parse_item(elem) for elem in elem.div.ul('li', recursive=False)] - - scraped['departures'] = parse_arrdep_list(departures_div, 'destination') - scraped['arrivals'] = parse_arrdep_list(arrivals_div, 'origin') - - return scraped diff --git a/scraper/scrape_station_schema_v2.json b/scraper/scrape_station_schema_v2.json deleted file mode 100644 index a6b18f5..0000000 --- a/scraper/scrape_station_schema_v2.json +++ /dev/null @@ -1,138 +0,0 @@ -{ - "$schema": "http://json-schema.org/schema", - "title": "Train Info InfoFer Scrap Station Schema", - "description": "Results of scrapping InfoFer website for station arrival/departure info", - "definitions": { - "arrDepItem": { - "type": "object", - "properties": { - "time": { - "description": "Time of arrival/departure", - "type": "string", - "format": "date-time" - }, - "train": { - "type": "object", - "properties": { - "rank": { - "type": "string", - "examples": [ - "R", - "R-E", - "IR", - "IRN" - ] - }, - "number": { - "type": "string", - "examples": [ - "74", - "15934" - ] - }, - "operator": { - "type": "string", - "examples": [ - "CFR Călători", - "Softrans", - "Regio Călători" - ] - }, - "route": { - "description": "All the stations the train stops at", - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": [ - "rank", - "number", - "operator" - ] - }, - "stoppingTime": { - "description": "The number of seconds the train stops in the station", - "type": [ - "integer", - "null" - ], - "minimum": 0 - } - }, - "required": [ - "time", - "train", - "stoppingTime" - ] - } - }, - "type": "object", - "properties": { - "arrivals": { - "type": ["array", "null"], - "items": { - "allOf": [ - { - "$ref": "#/definitions/arrDepItem" - }, - { - "type": "object", - "properties": { - "train": { - "type": "object", - "properties": { - "origin": { - "type": "string" - } - }, - "required": ["origin"] - } - }, - "required": ["train"] - } - ] - } - }, - "departures": { - "type": ["array", "null"], - "items": { - "allOf": [ - { - "$ref": "#/definitions/arrDepItem" - }, - { - "type": "object", - "properties": { - "train": { - "type": "object", - "properties": { - "destination": { - "type": "string" - } - }, - "required": ["destination"] - } - }, - "required": ["train"] - } - ] - } - }, - "stationName": { - "type": "string" - }, - "date": { - "description": "Date for which the data is provided (likely today)", - "type": "string", - "pattern": "^[0-9]{1,2}\\.[0-9]{2}\\.[0-9]{4}$" - } - }, - "required": [ - "arrivals", - "departures", - "stationName", - "date" - ] -} \ No newline at end of file diff --git a/scraper/scrape_train.py b/scraper/scrape_train.py deleted file mode 100644 index 02c9a5a..0000000 --- a/scraper/scrape_train.py +++ /dev/null @@ -1,146 +0,0 @@ -import re - -from datetime import datetime, timedelta - -import pytz -import requests -from bs4 import BeautifulSoup - -from .utils import * - -# region regex definitions - -TRAIN_INFO_REGEX = re.compile(r'^([A-Z-]+) ([0-9]+) în ([0-9.]+)$') - -OPERATOR_REGEX = re.compile(r'^Operat de (.+)$') - -SL_REGEX = re.compile(r'^(?:Fără|([0-9]+) min) (întârziere|mai devreme) la (trecerea fără oprire prin|sosirea în|plecarea din) (.+)\.$') -SL_STATE_MAP = { - 't': 'passing', - 's': 'arrival', - 'p': 'departure', -} - -RO_LETTERS = r'A-Za-zăâîșțĂÂÎȚȘ' - -ROUTE_REGEX = re.compile(rf'^Parcurs tren ([{RO_LETTERS} ]+)[-–]([{RO_LETTERS} ]+)$') - -KM_REGEX = re.compile(r'^km ([0-9]+)$') - -PLATFORM_REGEX = re.compile(r'^linia (.+)$') - -STOPPING_TIME_REGEX = re.compile(r'^([0-9]+) (min|sec) oprire$') - -STATION_DEPARR_STATUS_REGEX = re.compile(r'^(?:(la timp)|(?:((?:\+|-)[0-9]+) min \((?:(?:întârziere)|(?:mai devreme))\)))(\*?)$') - -# endregion - -def scrape(train_no: str, use_yesterday=False, date_override=None): - # Start scrapping session - s = requests.Session() - - date = datetime.today() - if use_yesterday: - date -= timedelta(days=1) - if date_override: - date = date_override - - r = s.get(build_url( - 'https://mersultrenurilor.infofer.ro/ro-RO/Tren/{train_no}', - train_no=train_no, - query=[ - ('Date', date.strftime('%d.%m.%Y')), - ], - )) - - soup = BeautifulSoup(r.text, features='html.parser') - sform = soup.find(id='form-search') - result_data = { elem['name']: elem['value'] for elem in sform('input') } - - r = s.post('https://mersultrenurilor.infofer.ro/ro-RO/Trains/TrainsResult', data=result_data) - soup = BeautifulSoup(r.text, features='html.parser') - - scraped = {} - - train_info_div, _, _, results_div, *_ = soup('div', recursive=False) - - train_info_div = train_info_div.div('div', recursive=False)[0] - - scraped['rank'], scraped['number'], scraped['date'] = TRAIN_INFO_REGEX.match(collapse_space(train_info_div.h2.text)).groups() - date_d, date_m, date_y = (int(comp) for comp in scraped['date'].split('.')) - date = datetime(date_y, date_m, date_d) - - scraped['operator'] = OPERATOR_REGEX.match(collapse_space(train_info_div.p.text)).groups()[0] - - results_div = results_div.div - status_div = results_div('div', recursive=False)[0] - route_text = collapse_space(status_div.h4.text) - route_from, route_to = ROUTE_REGEX.match(route_text).groups() - scraped['route'] = { - 'from': route_from, - 'to': route_to, - } - try: - status_line_match = SL_REGEX.match(collapse_space(status_div.div.text)) - slm_delay, slm_late, slm_arrival, slm_station = status_line_match.groups() - scraped['status'] = { - 'delay': (int(slm_delay) if slm_late == 'întârziere' else -int(slm_delay)) if slm_delay else 0, - 'station': slm_station, - 'state': SL_STATE_MAP[slm_arrival[0]], - } - except Exception: - scraped['status'] = None - - stations = status_div.ul('li', recursive=False) - scraped['stations'] = [] - dt_seq = DateTimeSequencer(date.year, date.month, date.day) - tz = pytz.timezone('Europe/Bucharest') - for station in stations: - station_scraped = {} - - left, middle, right = station.div('div', recursive=False) - station_scraped['name'] = collapse_space(middle.div.div('div', recursive=False)[0]('div', recursive=False)[0].text) - station_scraped['km'] = collapse_space(middle.div.div('div', recursive=False)[0]('div', recursive=False)[1].text) - station_scraped['km'] = int(KM_REGEX.match(station_scraped['km']).groups()[0]) - station_scraped['stoppingTime'] = collapse_space(middle.div.div('div', recursive=False)[0]('div', recursive=False)[2].text) - if not station_scraped['stoppingTime']: - station_scraped['stoppingTime'] = None - else: - st_value, st_minsec = STOPPING_TIME_REGEX.match(station_scraped['stoppingTime']).groups() - station_scraped['stoppingTime'] = int(st_value) - if st_minsec == 'min': - station_scraped['stoppingTime'] *= 60 - station_scraped['platform'] = collapse_space(middle.div.div('div', recursive=False)[0]('div', recursive=False)[3].text) - if not station_scraped['platform']: - station_scraped['platform'] = None - else: - station_scraped['platform'] = PLATFORM_REGEX.match(station_scraped['platform']).groups()[0] - - def scrape_time(elem, setter): - parts = elem.div.div('div', recursive=False) - if parts: - result = {} - - time, *_ = parts - result['scheduleTime'] = collapse_space(time.text) - st_hr, st_min = (int(comp) for comp in result['scheduleTime'].split(':')) - result['scheduleTime'] = tz.localize(dt_seq(st_hr, st_min)).isoformat() - if len(parts) >= 2: - _, status, *_ = parts - result['status'] = {} - on_time, delay, approx = STATION_DEPARR_STATUS_REGEX.match(collapse_space(status.text)).groups() - result['status']['delay'] = 0 if on_time else int(delay) - result['status']['real'] = not approx - else: - result['status'] = None - - setter(result) - else: - setter(None) - - scrape_time(left, lambda value: station_scraped.update(arrival=value)) - scrape_time(right, lambda value: station_scraped.update(departure=value)) - - scraped['stations'].append(station_scraped) - - return scraped diff --git a/scraper/scrape_train_schema.json b/scraper/scrape_train_schema.json deleted file mode 100644 index 2f588b6..0000000 --- a/scraper/scrape_train_schema.json +++ /dev/null @@ -1,134 +0,0 @@ -{ - "$schema": "http://json-schema.org/schema", - "title": "Train Info InfoFer Scrap Train Schema", - "description": "Results of scrapping InfoFer website for train info", - "definitions": { - "delayType": { - "description": "Delay of the train (negative for being early)", - "type": "integer" - }, - "stationArrDepTime": { - "description": "Time of arrival at/departure from station", - "type": ["object", "null"], - "properties": { - "scheduleTime": { - "description": "The time the train is scheduled to arrive/depart", - "type": "string", - "pattern": "^[0-9]{1,2}:[0-9]{2}$" - }, - "status": { - "type": ["object", "null"], - "properties": { - "delay": { - "$ref": "#/definitions/delayType" - }, - "real": { - "description": "Determines whether delay was actually reported or is an approximation", - "type": "boolean" - } - }, - "required": ["delay", "real"] - } - }, - "required": ["scheduleTime"] - } - }, - "type": "object", - "properties": { - "rank": { - "description": "The rank of the train", - "type": "string", - "examples": [ - "R", - "R-E", - "IR", - "IRN" - ] - }, - "number": { - "description": "The number of the train", - "type": "string", - "examples": [ - "74", - "15934" - ] - }, - "date": { - "description": "Date of departure from the first station (dd.mm.yyyy)", - "type": "string", - "pattern": "^[0-9]{1,2}\\.[0-9]{2}\\.[0-9]{4}$" - }, - "operator": { - "description": "Operator of the train", - "type": "string", - "examples": [ - "CFR Călători", - "Softrans", - "Regio Călători" - ] - }, - "route": { - "description": "Route of the train", - "type": "object", - "properties": { - "from": { - "type": "string" - }, - "to": { - "type": "string" - } - }, - "required": ["from", "to"] - }, - "status": { - "description": "Current status of the train", - "type": ["object", "null"], - "properties": { - "delay": { - "$ref": "#/definitions/delayType" - }, - "station": { - "type": "string" - }, - "state": { - "type": "string", - "enum": ["passing", "arrival", "departure"] - } - }, - "required": ["delay", "station", "state"] - }, - "stations": { - "description": "List of stations the train stops at", - "type": "array", - "items": { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "km": { - "description": "The distance the train travelled until reaching this station", - "type": "integer" - }, - "stoppingTime": { - "description": "The number of minutes the train is scheduled to stop in this station", - "type": ["integer", "null"], - "minimum": 0 - }, - "platform": { - "description": "The platform the train stopped at", - "type": ["string", "null"] - }, - "arrival": { - "$ref": "#/definitions/stationArrDepTime" - }, - "departure": { - "$ref": "#/definitions/stationArrDepTime" - } - }, - "required": ["name", "km"] - } - } - }, - "required": ["route", "stations", "rank", "number", "date", "operator"] -} \ No newline at end of file diff --git a/scraper/scrape_train_schema_v2.json b/scraper/scrape_train_schema_v2.json deleted file mode 100644 index 4a1433a..0000000 --- a/scraper/scrape_train_schema_v2.json +++ /dev/null @@ -1,134 +0,0 @@ -{ - "$schema": "http://json-schema.org/schema", - "title": "Train Info InfoFer Scrap Train Schema", - "description": "Results of scrapping InfoFer website for train info", - "definitions": { - "delayType": { - "description": "Delay of the train (negative for being early)", - "type": "integer" - }, - "stationArrDepTime": { - "description": "Time of arrival at/departure from station", - "type": ["object", "null"], - "properties": { - "scheduleTime": { - "description": "The time the train is scheduled to arrive/depart", - "type": "string", - "format": "date-time" - }, - "status": { - "type": ["object", "null"], - "properties": { - "delay": { - "$ref": "#/definitions/delayType" - }, - "real": { - "description": "Determines whether delay was actually reported or is an approximation", - "type": "boolean" - } - }, - "required": ["delay", "real"] - } - }, - "required": ["scheduleTime"] - } - }, - "type": "object", - "properties": { - "rank": { - "description": "The rank of the train", - "type": "string", - "examples": [ - "R", - "R-E", - "IR", - "IRN" - ] - }, - "number": { - "description": "The number of the train", - "type": "string", - "examples": [ - "74", - "15934" - ] - }, - "date": { - "description": "Date of departure from the first station (dd.mm.yyyy)", - "type": "string", - "pattern": "^[0-9]{1,2}\\.[0-9]{2}\\.[0-9]{4}$" - }, - "operator": { - "description": "Operator of the train", - "type": "string", - "examples": [ - "CFR Călători", - "Softrans", - "Regio Călători" - ] - }, - "route": { - "description": "Route of the train", - "type": "object", - "properties": { - "from": { - "type": "string" - }, - "to": { - "type": "string" - } - }, - "required": ["from", "to"] - }, - "status": { - "description": "Current status of the train", - "type": ["object", "null"], - "properties": { - "delay": { - "$ref": "#/definitions/delayType" - }, - "station": { - "type": "string" - }, - "state": { - "type": "string", - "enum": ["passing", "arrival", "departure"] - } - }, - "required": ["delay", "station", "state"] - }, - "stations": { - "description": "List of stations the train stops at", - "type": "array", - "items": { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "km": { - "description": "The distance the train travelled until reaching this station", - "type": "integer" - }, - "stoppingTime": { - "description": "The number of seconds the train is scheduled to stop in this station", - "type": ["integer", "null"], - "minimum": 1 - }, - "platform": { - "description": "The platform the train stopped at", - "type": ["string", "null"] - }, - "arrival": { - "$ref": "#/definitions/stationArrDepTime" - }, - "departure": { - "$ref": "#/definitions/stationArrDepTime" - } - }, - "required": ["name", "km"] - } - } - }, - "required": ["route", "stations", "rank", "number", "date", "operator"] -} \ No newline at end of file diff --git a/scraper/scraper.csproj b/scraper/scraper.csproj new file mode 100644 index 0000000..2a103a0 --- /dev/null +++ b/scraper/scraper.csproj @@ -0,0 +1,15 @@ + + + + enable + net6.0 + + + + + + + + + + diff --git a/scraper/scraper.py b/scraper/scraper.py deleted file mode 100644 index 8a594d9..0000000 --- a/scraper/scraper.py +++ /dev/null @@ -1,12 +0,0 @@ -#! /usr/bin/env python3 -from .scrape_train import scrape as scrape_train -from .scrape_station import scrape as scrape_station - -def main(): - train_no = 1538 - print(f'Testing package with train number {train_no}') - from pprint import pprint - pprint(scrape_train(train_no)) - -if __name__ == '__main__': - main() diff --git a/scraper/setup.py b/scraper/setup.py deleted file mode 100644 index ee96682..0000000 --- a/scraper/setup.py +++ /dev/null @@ -1,8 +0,0 @@ -from distutils.core import setup - -setup( - name='InfoFer_Scraper', - version='0.1', - author='Dan Cojocaru', - install_requires=['beautifulsoup4', 'requests', 'pytz'] -) \ No newline at end of file diff --git a/scraper/src/Exceptions/TrainNotThisDayException.cs b/scraper/src/Exceptions/TrainNotThisDayException.cs new file mode 100644 index 0000000..5fe1e06 --- /dev/null +++ b/scraper/src/Exceptions/TrainNotThisDayException.cs @@ -0,0 +1,16 @@ +using System; +using System.Runtime.Serialization; +using JetBrains.Annotations; + +namespace scraper.Exceptions { + /// + /// The train that the information was requested for might be running, + /// but it is not running on the requested day. + /// + public class TrainNotThisDayException : Exception { + public TrainNotThisDayException() : base() { } + protected TrainNotThisDayException([NotNull] SerializationInfo info, StreamingContext context) : base(info, context) { } + public TrainNotThisDayException([CanBeNull] string? message) : base(message) { } + public TrainNotThisDayException([CanBeNull] string? message, [CanBeNull] Exception? innerException) : base(message, innerException) { } + } +} \ No newline at end of file diff --git a/scraper/src/Models/Station.cs b/scraper/src/Models/Station.cs new file mode 100644 index 0000000..7752742 --- /dev/null +++ b/scraper/src/Models/Station.cs @@ -0,0 +1,109 @@ +using System; +using System.Collections.Generic; +using InfoferScraper.Models.Status; + +namespace InfoferScraper.Models.Station { + #region Interfaces + + public interface IStationScrapeResult { + public string StationName { get; } + /// + /// Date in the DD.MM.YYYY format + /// This date is taken as-is from the result. + /// + public string Date { get; } + + public IReadOnlyList? Arrivals { get; } + public IReadOnlyList? Departures { get; } + } + + public interface IStationArrDep { + public int? StoppingTime { get; } + public DateTimeOffset Time { get; } + public IStationTrain Train { get; } + public IStationStatus Status { get; } + } + + public interface IStationTrain { + public string Number { get; } + public string Operator { get; } + public string Rank { get; } + public IReadOnlyList Route { get; } + /// + /// Arrivals -> Departure station; Departures -> Destination station + /// + public string Terminus { get; } + } + + public interface IStationStatus : IStatus { + new int Delay { get; } + new bool Real { get; } + public string? Platform { get; } + } + + #endregion + + #region Implementations + + internal record StationScrapeResult : IStationScrapeResult { + private List? _modifyableArrivals = new(); + private List? _modifyableDepartures = new(); + + public string StationName { get; internal set; } = ""; + public string Date { get; internal set; } = ""; + public IReadOnlyList? Arrivals => _modifyableArrivals?.AsReadOnly(); + public IReadOnlyList? Departures => _modifyableDepartures?.AsReadOnly(); + + private void AddStationArrival(StationArrDep arrival) { + _modifyableArrivals ??= new List(); + _modifyableArrivals.Add(arrival); + } + + private void AddStationDeparture(StationArrDep departure) { + _modifyableDepartures ??= new List(); + _modifyableDepartures.Add(departure); + } + + internal void AddNewStationArrival(Action configurator) { + StationArrDep newStationArrDep = new(); + configurator(newStationArrDep); + AddStationArrival(newStationArrDep); + } + + internal void AddNewStationDeparture(Action configurator) { + StationArrDep newStationArrDep = new(); + configurator(newStationArrDep); + AddStationDeparture(newStationArrDep); + } + } + + internal record StationArrDep : IStationArrDep { + public int? StoppingTime { get; internal set; } + public DateTimeOffset Time { get; internal set; } + public IStationTrain Train => ModifyableTrain; + public IStationStatus Status => ModifyableStatus; + + internal readonly StationTrain ModifyableTrain = new(); + internal readonly StationStatus ModifyableStatus = new(); + } + + internal record StationTrain : IStationTrain { + private readonly List _modifyableRoute = new(); + + public string Number { get; internal set; } = ""; + public string Operator { get; internal set; } = ""; + public string Rank { get; internal set; } = ""; + public IReadOnlyList Route => _modifyableRoute.AsReadOnly(); + public string Terminus { get; internal set; } = ""; + + internal void AddRouteStation(string station) => _modifyableRoute.Add(station); + } + + internal record StationStatus : IStationStatus { + public int Delay { get; internal set; } + public bool Real { get; internal set; } + public string? Platform { get; internal set; } + } + + #endregion +} diff --git a/scraper/src/Models/Status.cs b/scraper/src/Models/Status.cs new file mode 100644 index 0000000..0b91f64 --- /dev/null +++ b/scraper/src/Models/Status.cs @@ -0,0 +1,15 @@ +namespace InfoferScraper.Models.Status { + public interface IStatus { + public int Delay { get; } + + /// + /// Determines whether delay was actually reported or is an approximation + /// + public bool Real { get; } + } + + internal record Status : IStatus { + public int Delay { get; set; } + public bool Real { get; set; } + } +} diff --git a/scraper/src/Models/Train.cs b/scraper/src/Models/Train.cs new file mode 100644 index 0000000..9ebdeb7 --- /dev/null +++ b/scraper/src/Models/Train.cs @@ -0,0 +1,316 @@ +using System; +using System.Collections.Generic; +using System.Text.Json; +using System.Text.Json.Serialization; +using InfoferScraper.Models.Status; +using InfoferScraper.Models.Train.JsonConverters; + +namespace InfoferScraper.Models.Train { + #region Interfaces + + public interface ITrainScrapeResult { + public string Rank { get; } + + public string Number { get; } + + /// + /// Date in the DD.MM.YYYY format + /// This date is taken as-is from the result. + /// + public string Date { get; } + + public string Operator { get; } + + public IReadOnlyList Groups { get; } + } + + public interface ITrainGroup { + public ITrainRoute Route { get; } + + public ITrainStatus? Status { get; } + public IReadOnlyList Stations { get; } + } + + public interface ITrainRoute { + public string From { get; } + public string To { get; } + } + + public interface ITrainStatus { + public int Delay { get; } + public string Station { get; } + public StatusKind State { get; } + } + + public interface ITrainStopDescription { + public string Name { get; } + public int Km { get; } + + /// + /// The time the train waits in the station in seconds + /// + public int? StoppingTime { get; } + + public string? Platform { get; } + public ITrainStopArrDep? Arrival { get; } + public ITrainStopArrDep? Departure { get; } + + public IReadOnlyList Notes { get; } + } + + public interface ITrainStopNote { + public NoteKind Kind { get; } + } + + public interface ITrainStopTrainNumberChangeNote : ITrainStopNote { + public string Rank { get; } + public string Number { get; } + } + + public interface ITrainStopDepartsAsNote : ITrainStopNote { + public string Rank { get; } + public string Number { get; } + public DateTimeOffset DepartureDate { get; } + } + + public interface ITrainStopDetachingWagonsNote : ITrainStopNote { + public string Station { get; } + } + + public interface ITrainStopReceivingWagonsNote : ITrainStopNote { + public string Station { get; } + } + + public interface ITrainStopArrDep { + public DateTimeOffset ScheduleTime { get; } + public IStatus? Status { get; } + } + + #endregion + + [JsonConverter(typeof(StatusKindConverter))] + public enum StatusKind { + Passing, + Arrival, + Departure, + } + + [JsonConverter(typeof(NoteKindConverter))] + public enum NoteKind { + TrainNumberChange, + DetachingWagons, + ReceivingWagons, + DepartsAs, + } + + #region Implementations + + internal record TrainScrapeResult : ITrainScrapeResult { + private List ModifyableGroups { get; set; } = new(); + public string Rank { get; set; } = ""; + public string Number { get; set; } = ""; + public string Date { get; set; } = ""; + public string Operator { get; set; } = ""; + public IReadOnlyList Groups => ModifyableGroups.AsReadOnly(); + + private void AddTrainGroup(ITrainGroup trainGroup) { + ModifyableGroups.Add(trainGroup); + } + + internal void AddTrainGroup(Action configurator) { + TrainGroup newTrainGroup = new(); + configurator(newTrainGroup); + AddTrainGroup(newTrainGroup); + } + } + + internal record TrainGroup : ITrainGroup { + private List ModifyableStations { get; set; } = new(); + public ITrainRoute Route { get; init; } = new TrainRoute(); + public ITrainStatus? Status { get; private set; } + public IReadOnlyList Stations => ModifyableStations.AsReadOnly(); + + private void AddStopDescription(ITrainStopDescription stopDescription) { + ModifyableStations.Add(stopDescription); + } + + internal void AddStopDescription(Action configurator) { + TrainStopDescription newStopDescription = new(); + configurator(newStopDescription); + AddStopDescription(newStopDescription); + } + + internal void ConfigureRoute(Action configurator) { + configurator((TrainRoute)Route); + } + + internal void MakeStatus(Action configurator) { + TrainStatus newStatus = new(); + configurator(newStatus); + Status = newStatus; + } + } + + internal record TrainRoute : ITrainRoute { + public TrainRoute() { + From = ""; + To = ""; + } + + public string From { get; set; } + public string To { get; set; } + } + + internal record TrainStatus : ITrainStatus { + public int Delay { get; set; } + public string Station { get; set; } = ""; + public StatusKind State { get; set; } + } + + internal record TrainStopDescription : ITrainStopDescription { + private List ModifyableNotes { get; } = new(); + public string Name { get; set; } = ""; + public int Km { get; set; } + public int? StoppingTime { get; set; } + public string? Platform { get; set; } + public ITrainStopArrDep? Arrival { get; private set; } + public ITrainStopArrDep? Departure { get; private set; } + public IReadOnlyList Notes => ModifyableNotes.AsReadOnly(); + + internal void MakeArrival(Action configurator) { + TrainStopArrDep newArrival = new(); + configurator(newArrival); + Arrival = newArrival; + } + + internal void MakeDeparture(Action configurator) { + TrainStopArrDep newDeparture = new(); + configurator(newDeparture); + Departure = newDeparture; + } + + class DepartsAsNote : ITrainStopDepartsAsNote { + public NoteKind Kind => NoteKind.DepartsAs; + public string Rank { get; set; } = ""; + public string Number { get; set; } = ""; + public DateTimeOffset DepartureDate { get; set; } + } + + class TrainNumberChangeNote : ITrainStopTrainNumberChangeNote { + public NoteKind Kind => NoteKind.TrainNumberChange; + public string Rank { get; set; } = ""; + public string Number { get; set; } = ""; + } + + class ReceivingWagonsNote : ITrainStopReceivingWagonsNote { + public NoteKind Kind => NoteKind.ReceivingWagons; + public string Station { get; set; } = ""; + } + + class DetachingWagonsNote : ITrainStopReceivingWagonsNote { + public NoteKind Kind => NoteKind.DetachingWagons; + public string Station { get; set; } = ""; + } + + internal void AddDepartsAsNote(string rank, string number, DateTimeOffset departureDate) { + ModifyableNotes.Add(new DepartsAsNote { Rank = rank, Number = number, DepartureDate = departureDate }); + } + + internal void AddTrainNumberChangeNote(string rank, string number) { + ModifyableNotes.Add(new TrainNumberChangeNote { Rank = rank, Number = number }); + } + + internal void AddReceivingWagonsNote(string station) { + ModifyableNotes.Add(new ReceivingWagonsNote { Station = station }); + } + + internal void AddDetachingWagonsNote(string station) { + ModifyableNotes.Add(new DetachingWagonsNote { Station = station }); + } + } + + public record TrainStopArrDep : ITrainStopArrDep { + public DateTimeOffset ScheduleTime { get; set; } + public IStatus? Status { get; private set; } + + internal void MakeStatus(Action configurator) { + Status.Status newStatus = new(); + configurator(newStatus); + Status = newStatus; + } + } + + #endregion + + #region JSON Converters + + namespace JsonConverters { + internal class StatusKindConverter : JsonConverterFactory { + public override bool CanConvert(Type typeToConvert) { + return typeToConvert == typeof(StatusKind); + } + + public override JsonConverter? CreateConverter(Type typeToConvert, JsonSerializerOptions options) { + return new Converter(); + } + + private class Converter : JsonConverter { + public override StatusKind Read( + ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options + ) { + return reader.GetString() switch { + "arrival" => StatusKind.Arrival, + "departure" => StatusKind.Departure, + "passing" => StatusKind.Passing, + _ => throw new NotImplementedException() + }; + } + + public override void Write(Utf8JsonWriter writer, StatusKind value, JsonSerializerOptions options) { + writer.WriteStringValue(value switch { + StatusKind.Passing => "passing", + StatusKind.Arrival => "arrival", + StatusKind.Departure => "departure", + _ => throw new NotImplementedException() + }); + } + } + } + + internal class NoteKindConverter : JsonConverterFactory { + public override bool CanConvert(Type typeToConvert) { + return typeToConvert == typeof(NoteKind); + } + + public override JsonConverter? CreateConverter(Type typeToConvert, JsonSerializerOptions options) { + return new Converter(); + } + + private class Converter : JsonConverter { + public override NoteKind Read( + ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options + ) { + return reader.GetString() switch { + "departsAs" => NoteKind.DepartsAs, + "trainNumberChange" => NoteKind.TrainNumberChange, + "receivingWagons" => NoteKind.ReceivingWagons, + "detachingWagons" => NoteKind.DetachingWagons, + _ => throw new NotImplementedException() + }; + } + + public override void Write(Utf8JsonWriter writer, NoteKind value, JsonSerializerOptions options) { + writer.WriteStringValue(value switch { + NoteKind.DepartsAs => "departsAs", + NoteKind.TrainNumberChange => "trainNumberChange", + NoteKind.DetachingWagons => "detachingWagons", + NoteKind.ReceivingWagons => "receivingWagons", + _ => throw new NotImplementedException() + }); + } + } + } + } + + #endregion +} diff --git a/scraper/src/Scrapers/Station.cs b/scraper/src/Scrapers/Station.cs new file mode 100644 index 0000000..920f281 --- /dev/null +++ b/scraper/src/Scrapers/Station.cs @@ -0,0 +1,190 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Net; +using System.Net.Http; +using System.Text.RegularExpressions; +using System.Threading.Tasks; +using AngleSharp; +using AngleSharp.Dom; +using AngleSharp.Html.Dom; +using Flurl; +using InfoferScraper.Models.Station; +using NodaTime; +using NodaTime.Extensions; + +namespace InfoferScraper.Scrapers { + public static class StationScraper { + private static readonly Regex StationInfoRegex = new($@"^([{Utils.RoLetters}.0-9 ]+)\sîn\s([0-9.]+)$"); + + private static readonly Regex StoppingTimeRegex = new( + @"^(necunoscută \(stație terminus\))|(?:([0-9]+) (min|sec) \((?:începând cu|până la) ([0-9]{1,2}:[0-9]{2})\))$" + ); + + private static readonly Regex StatusRegex = new( + @"^(?:la timp|([+-]?[0-9]+) min \((?:întârziere|mai devreme)\))(\*?)$" + ); + + private static readonly Regex PlatformRegex = new(@"^linia\s([A-Za-z0-9]+)$"); + + private static readonly DateTimeZone BucharestTz = DateTimeZoneProviders.Tzdb["Europe/Bucharest"]; + + private const string BaseUrl = "https://mersultrenurilor.infofer.ro/ro-RO/"; + + private static readonly CookieContainer CookieContainer = new(); + + private static readonly HttpClient HttpClient = new(new HttpClientHandler { + CookieContainer = CookieContainer, + UseCookies = true, + }) { + BaseAddress = new Uri(BaseUrl), + DefaultRequestVersion = new Version(2, 0), + }; + + public static async Task Scrape(string stationName, DateTimeOffset? date = null) { + var dateInstant = date?.ToInstant().InZone(BucharestTz); + date = dateInstant?.ToDateTimeOffset(); + + stationName = stationName.RoLettersToEn(); + + var result = new StationScrapeResult(); + + var asConfig = Configuration.Default; + var asContext = BrowsingContext.New(asConfig); + + var firstUrl = "Statie" + .AppendPathSegment(Regex.Replace(stationName, @"\s", "-")); + if (date != null) { + firstUrl = firstUrl.SetQueryParam("Date", $"{date:d.MM.yyyy}"); + } + var firstResponse = await HttpClient.GetStringAsync(firstUrl); + var firstDocument = await asContext.OpenAsync(req => req.Content(firstResponse)); + var firstForm = firstDocument.GetElementById("form-search")!; + + var firstResult = firstForm + .QuerySelectorAll("input") + .Where(elem => elem.Name != null) + .ToDictionary(elem => elem.Name!, elem => elem.Value); + + var secondUrl = "".AppendPathSegments("Stations", "StationsResult"); + var secondResponse = await HttpClient.PostAsync( + secondUrl, +#pragma warning disable CS8620 + new FormUrlEncodedContent(firstResult) +#pragma warning restore CS8620 + ); + var secondResponseContent = await secondResponse.Content.ReadAsStringAsync(); + var secondDocument = await asContext.OpenAsync( + req => req.Content(secondResponseContent) + ); + + var (stationInfoDiv, (_, (departuresDiv, (arrivalsDiv, _)))) = secondDocument + .QuerySelectorAll("body > div"); + + (result.StationName, (result.Date, _)) = (StationInfoRegex.Match( + stationInfoDiv + .QuerySelector(":scope > h2")! + .Text() + .WithCollapsedSpaces() + ).Groups as IEnumerable).Skip(1).Select(group => group.Value); + + var (dateDay, (dateMonth, (dateYear, _))) = result.Date.Split('.').Select(int.Parse); + Utils.DateTimeSequencer dtSeq = new(dateYear, dateMonth, dateDay); + + void ParseArrDepList(IElement element, Action> adder) { + if (element.QuerySelector(":scope > div > ul") == null) return; + + foreach (var trainElement in element.QuerySelectorAll(":scope > div > ul > li")) { + adder(arrDep => { + var divs = trainElement.QuerySelectorAll(":scope > div"); + var dataDiv = divs[0]; + var statusDiv = divs.Length >= 2 ? divs[1] : null; + + var (dataMainDiv, (dataDetailsDiv, _)) = dataDiv + .QuerySelectorAll(":scope > div"); + var (timeDiv, (destDiv, (trainDiv, _))) = dataMainDiv + .QuerySelectorAll(":scope > div"); + var (operatorDiv, (routeDiv, (stoppingTimeDiv, _))) = dataDetailsDiv + .QuerySelectorAll(":scope > div > div"); + + var timeResult = timeDiv + .QuerySelectorAll(":scope > div > div > div")[1] + .Text() + .WithCollapsedSpaces(); + var (stHr, (stMin, _)) = timeResult.Split(':').Select(int.Parse); + arrDep.Time = BucharestTz.AtLeniently( + dtSeq.Next(stHr, stMin).ToLocalDateTime() + ).ToDateTimeOffset(); + + // ReSharper disable once UnusedVariable // stOppositeTime: might be useful in the future + var (unknownSt, (st, (minsec, (stOppositeTime, _)))) = (StoppingTimeRegex.Match( + stoppingTimeDiv.QuerySelectorAll(":scope > div > div")[1] + .Text() + .WithCollapsedSpaces() + ).Groups as IEnumerable).Skip(1).Select(group => group.Value); + if (unknownSt.Length == 0 && st.Length > 0) { + arrDep.StoppingTime = int.Parse(st); + if (minsec == "min") { + arrDep.StoppingTime *= 60; + } + } + + arrDep.ModifyableTrain.Rank = trainDiv + .QuerySelectorAll(":scope > div > div > div")[1] + .QuerySelector(":scope > span")! + .Text() + .WithCollapsedSpaces(); + arrDep.ModifyableTrain.Number = trainDiv + .QuerySelectorAll(":scope > div > div > div")[1] + .QuerySelector(":scope > a")! + .Text() + .WithCollapsedSpaces(); + arrDep.ModifyableTrain.Terminus = destDiv + .QuerySelectorAll(":scope > div > div > div")[1] + .Text() + .WithCollapsedSpaces(); + arrDep.ModifyableTrain.Operator = operatorDiv + .QuerySelectorAll(":scope > div > div")[1] + .Text() + .WithCollapsedSpaces(); + foreach (var station in routeDiv.QuerySelectorAll(":scope > div > div")[1] + .Text() + .WithCollapsedSpaces() + .Split(" - ")) { + arrDep.ModifyableTrain.AddRouteStation(station); + } + + if (statusDiv == null) { + return; + } + + var statusDivComponents = statusDiv + .QuerySelectorAll(":scope > div")[0] + .QuerySelectorAll(":scope > div"); + + var delayDiv = statusDivComponents[0]; + + var (delayMin, (approx, _)) = (StatusRegex.Match( + delayDiv + .Text() + .WithCollapsedSpaces() + ).Groups as IEnumerable).Skip(1).Select(group => group.Value); + arrDep.ModifyableStatus.Real = string.IsNullOrEmpty(approx); + arrDep.ModifyableStatus.Delay = delayMin.Length == 0 ? 0 : int.Parse(delayMin); + + if (statusDivComponents.Length < 2) return; + + var platformDiv = statusDivComponents[1]; + arrDep.ModifyableStatus.Platform = PlatformRegex.Match(platformDiv.Text().WithCollapsedSpaces()) + .Groups[1].Value; + }); + } + } + + ParseArrDepList(departuresDiv, result.AddNewStationDeparture); + ParseArrDepList(arrivalsDiv, result.AddNewStationArrival); + + return result; + } + } +} diff --git a/scraper/src/Scrapers/Train.cs b/scraper/src/Scrapers/Train.cs new file mode 100644 index 0000000..ba40cff --- /dev/null +++ b/scraper/src/Scrapers/Train.cs @@ -0,0 +1,239 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Net; +using System.Net.Http; +using System.Text.RegularExpressions; +using System.Threading.Tasks; +using AngleSharp; +using AngleSharp.Dom; +using AngleSharp.Html.Dom; +using Flurl; +using InfoferScraper.Models.Train; +using NodaTime; +using NodaTime.Extensions; +using scraper.Exceptions; + +namespace InfoferScraper.Scrapers { + public static class TrainScraper { + private const string BaseUrl = "https://mersultrenurilor.infofer.ro/ro-RO/"; + private static readonly Regex TrainInfoRegex = new(@"^([A-Z-]+)\s([0-9]+)\sîn\s([0-9.]+)$"); + private static readonly Regex OperatorRegex = new(@"^Operat\sde\s(.+)$"); + + private static readonly Regex RouteRegex = + new(@$"^Parcurs\stren\s([{Utils.RoLetters} ]+)[-–]([{Utils.RoLetters}\s]+)$"); + + private static readonly Regex SlRegex = + new( + @"^(?:Fără|([0-9]+)\smin)\s(întârziere|mai\sdevreme)\sla\s(trecerea\sfără\soprire\sprin|sosirea\sîn|plecarea\sdin)\s(.+)\.$"); + + private static readonly Dictionary SlStateMap = new() { + { 't', StatusKind.Passing }, + { 's', StatusKind.Arrival }, + { 'p', StatusKind.Departure }, + }; + + private static readonly Regex KmRegex = new(@"^km\s([0-9]+)$"); + private static readonly Regex StoppingTimeRegex = new(@"^([0-9]+)\s(min|sec)\soprire$"); + private static readonly Regex PlatformRegex = new(@"^linia\s(.+)$"); + + private static readonly Regex StationArrdepStatusRegex = + new(@"^(?:(la timp)|(?:((?:\+|-)[0-9]+) min \((?:(?:întârziere)|(?:mai devreme))\)))(\*?)$"); + + private static readonly Regex TrainNumberChangeNoteRegex = + new(@"^Trenul își schimbă numărul în\s([A-Z]+)\s([0-9]+)$"); + private static readonly Regex DepartsAsNoteRegex = + new(@"^Trenul pleacă cu numărul\s([A-Z]+)\s([0-9]+)\sîn\s([0-9]{2}).([0-9]{2}).([0-9]{4})$"); + private static readonly Regex ReceivingWagonsNoteRegex = + new(@"^Trenul primește vagoane de la\s(.+)\.$"); + private static readonly Regex DetachingWagonsNoteRegex = + new(@"^Trenul detașează vagoane pentru stația\s(.+)\.$"); + + private static readonly DateTimeZone BucharestTz = DateTimeZoneProviders.Tzdb["Europe/Bucharest"]; + + private static readonly CookieContainer CookieContainer = new(); + private static readonly HttpClient HttpClient = new(new HttpClientHandler { + CookieContainer = CookieContainer, + UseCookies = true, + }) { + BaseAddress = new Uri(BaseUrl), + DefaultRequestVersion = new Version(2, 0), + }; + + public static async Task Scrape(string trainNumber, DateTimeOffset? dateOverride = null) { + var dateOverrideInstant = dateOverride?.ToInstant().InZone(BucharestTz); + dateOverride = dateOverrideInstant?.ToDateTimeOffset(); + TrainScrapeResult result = new(); + + var asConfig = Configuration.Default; + var asContext = BrowsingContext.New(asConfig); + + var firstUrl = "Tren" + .AppendPathSegment(trainNumber); + if (dateOverride != null) { + firstUrl = firstUrl.SetQueryParam("Date", $"{dateOverride:d.MM.yyyy}"); + } + var firstResponse = await HttpClient.GetStringAsync(firstUrl); + var firstDocument = await asContext.OpenAsync(req => req.Content(firstResponse)); + var firstForm = firstDocument.GetElementById("form-search")!; + + var firstResult = firstForm + .QuerySelectorAll("input") + .Where(elem => elem.Name != null) + .ToDictionary(elem => elem.Name!, elem => elem.Value); + + var secondUrl = "".AppendPathSegments("Trains", "TrainsResult"); + var secondResponse = await HttpClient.PostAsync( + secondUrl, +#pragma warning disable CS8620 + new FormUrlEncodedContent(firstResult) +#pragma warning restore CS8620 + ); + var secondResponseContent = await secondResponse.Content.ReadAsStringAsync(); + var secondDocument = await asContext.OpenAsync( + req => req.Content(secondResponseContent) + ); + + var (trainInfoDiv, (_, (_, (resultsDiv, _)))) = secondDocument + .QuerySelectorAll("body > div"); + if (trainInfoDiv == null) { + return null; + } + if (resultsDiv == null) { + throw new TrainNotThisDayException(); + } + trainInfoDiv = trainInfoDiv.QuerySelectorAll(":scope > div > div").First(); + + (result.Rank, (result.Number, (result.Date, _))) = (TrainInfoRegex.Match( + trainInfoDiv.QuerySelector(":scope > h2")!.Text().WithCollapsedSpaces() + ).Groups as IEnumerable).Select(group => group.Value).Skip(1); + var (scrapedDateD, (scrapedDateM, (scrapedDateY, _))) = result.Date + .Split('.') + .Select(int.Parse); + var date = new DateTime(scrapedDateY, scrapedDateM, scrapedDateD); + + result.Operator = (OperatorRegex.Match( + trainInfoDiv.QuerySelector(":scope > p")!.Text().WithCollapsedSpaces() + ).Groups as IEnumerable).Skip(1).First().Value; + + foreach (var groupDiv in resultsDiv.QuerySelectorAll(":scope > div")) { + result.AddTrainGroup(group => { + var statusDiv = groupDiv.QuerySelectorAll(":scope > div").First(); + var routeText = statusDiv.QuerySelector(":scope > h4")!.Text().WithCollapsedSpaces(); + group.ConfigureRoute(route => { + (route.From, (route.To, _)) = (RouteRegex.Match(routeText).Groups as IEnumerable).Skip(1) + .Select(group => group.Value); + }); + + try { + var statusLineMatch = + SlRegex.Match(statusDiv.QuerySelector(":scope > div")!.Text().WithCollapsedSpaces()); + var (slmDelay, (slmLate, (slmArrival, (slmStation, _)))) = + (statusLineMatch.Groups as IEnumerable).Skip(1).Select(group => group.Value); + group.MakeStatus(status => { + status.Delay = string.IsNullOrEmpty(slmDelay) ? 0 : + slmLate == "întârziere" ? int.Parse(slmDelay) : -int.Parse(slmDelay); + status.Station = slmStation; + status.State = SlStateMap[slmArrival[0]]; + }); + } + catch { + // ignored + } + + Utils.DateTimeSequencer dtSeq = new(date.Year, date.Month, date.Day); + var stations = statusDiv.QuerySelectorAll(":scope > ul > li"); + foreach (var station in stations) { + group.AddStopDescription(stopDescription => { + var (left, (middle, (right, _))) = station + .QuerySelectorAll(":scope > div > div"); + var (stopDetails, (stopNotes, _)) = middle + .QuerySelectorAll(":scope > div > div > div"); + stopDescription.Name = stopDetails + .QuerySelectorAll(":scope > div")[0] + .Text() + .WithCollapsedSpaces(); + var scrapedKm = stopDetails + .QuerySelectorAll(":scope > div")[1] + .Text() + .WithCollapsedSpaces(); + stopDescription.Km = int.Parse( + (KmRegex.Match(scrapedKm).Groups as IEnumerable).Skip(1).First().Value + ); + var scrapedStoppingTime = stopDetails + .QuerySelectorAll(":scope > div")[2] + .Text() + .WithCollapsedSpaces(); + if (!string.IsNullOrEmpty(scrapedStoppingTime)) { + var (stValue, (stMinsec, _)) = + (StoppingTimeRegex.Match(scrapedStoppingTime).Groups as IEnumerable) + .Skip(1) + .Select(group => group.Value); + stopDescription.StoppingTime = int.Parse(stValue); + if (stMinsec == "min") stopDescription.StoppingTime *= 60; + } + + var scrapedPlatform = stopDetails + .QuerySelectorAll(":scope > div")[3] + .Text() + .WithCollapsedSpaces(); + if (!string.IsNullOrEmpty(scrapedPlatform)) + stopDescription.Platform = PlatformRegex.Match(scrapedPlatform).Groups[1].Value; + + void ScrapeTime(IElement element, ref TrainStopArrDep arrDep) { + var parts = element.QuerySelectorAll(":scope > div > div > div"); + if (parts.Length == 0) throw new OperationCanceledException(); + var time = parts[0]; + var scrapedTime = time.Text().WithCollapsedSpaces(); + var (stHour, (stMin, _)) = scrapedTime.Split(':').Select(int.Parse); + arrDep.ScheduleTime = BucharestTz.AtLeniently(dtSeq.Next(stHour, stMin).ToLocalDateTime()) + .ToDateTimeOffset(); + + if (parts.Length < 2) return; + + var statusElement = parts[1]; + var (onTime, (delay, (approx, _))) = (StationArrdepStatusRegex.Match( + statusElement.Text().WithCollapsedSpaces(replaceWith: " ") + ).Groups as IEnumerable).Skip(1).Select(group => group.Value); + arrDep.MakeStatus(status => { + status.Delay = string.IsNullOrEmpty(onTime) ? int.Parse(delay) : 0; + status.Real = string.IsNullOrEmpty(approx); + }); + } + + try { + stopDescription.MakeArrival(arrival => { ScrapeTime(left, ref arrival); }); + } + catch (OperationCanceledException) { } + + try { + stopDescription.MakeDeparture(departure => { ScrapeTime(right, ref departure); }); + } + catch (OperationCanceledException) { } + + foreach (var noteDiv in stopNotes.QuerySelectorAll(":scope > div > div")) { + var noteText = noteDiv.Text().WithCollapsedSpaces(); + Match trainNumberChangeMatch, departsAsMatch, detachingWagons, receivingWagons; + if ((trainNumberChangeMatch = TrainNumberChangeNoteRegex.Match(noteText)).Success) { + stopDescription.AddTrainNumberChangeNote(trainNumberChangeMatch.Groups[1].Value, trainNumberChangeMatch.Groups[2].Value); + } + else if ((departsAsMatch = DepartsAsNoteRegex.Match(noteText)).Success) { + var groups = departsAsMatch.Groups; + var departureDate = BucharestTz.AtStrictly(new(int.Parse(groups[5].Value), int.Parse(groups[4].Value), int.Parse(groups[3].Value), 0, 0)); + stopDescription.AddDepartsAsNote(groups[1].Value, groups[2].Value, departureDate.ToDateTimeOffset()); + } + else if ((detachingWagons = DetachingWagonsNoteRegex.Match(noteText)).Success) { + stopDescription.AddDetachingWagonsNote(detachingWagons.Groups[1].Value); + } + else if ((receivingWagons = ReceivingWagonsNoteRegex.Match(noteText)).Success) { + stopDescription.AddReceivingWagonsNote(receivingWagons.Groups[1].Value); + } + } + }); + } + }); + } + return result; + } + } +} // namespace diff --git a/scraper/src/Utils/DateTimeSequencer.cs b/scraper/src/Utils/DateTimeSequencer.cs new file mode 100644 index 0000000..fd55eb7 --- /dev/null +++ b/scraper/src/Utils/DateTimeSequencer.cs @@ -0,0 +1,25 @@ +using System; + +namespace InfoferScraper { + public static partial class Utils { + public class DateTimeSequencer { + private DateTime _current; + + public DateTimeSequencer(int year, int month, int day) { + _current = new DateTime(year, month, day); + _current = _current.AddSeconds(-1); + } + + public DateTimeSequencer(DateTime startingDateTime) { + _current = startingDateTime.AddSeconds(-1); + } + + public DateTime Next(int hour, int minute = 0, int second = 0) { + DateTime potentialNewDate = new(_current.Year, _current.Month, _current.Day, hour, minute, second); + if (_current > potentialNewDate) potentialNewDate = potentialNewDate.AddDays(1); + _current = potentialNewDate; + return _current; + } + } + } +} diff --git a/scraper/src/Utils/DeconstructIEnumerable.cs b/scraper/src/Utils/DeconstructIEnumerable.cs new file mode 100644 index 0000000..d022d20 --- /dev/null +++ b/scraper/src/Utils/DeconstructIEnumerable.cs @@ -0,0 +1,18 @@ +using System.Collections.Generic; +using System.Diagnostics; + +namespace InfoferScraper { + public static partial class Utils { + [DebuggerStepThrough] + public static void Deconstruct(this IEnumerable enumerable, out T? first, out IEnumerable rest) { + var enumerator = enumerable.GetEnumerator(); + first = enumerator.MoveNext() ? enumerator.Current : default; + rest = enumerator.AsEnumerable(); + } + + [DebuggerStepThrough] + private static IEnumerable AsEnumerable(this IEnumerator enumerator) { + while (enumerator.MoveNext()) yield return enumerator.Current; + } + } +} diff --git a/scraper/src/Utils/RoLetters.cs b/scraper/src/Utils/RoLetters.cs new file mode 100644 index 0000000..c1beba7 --- /dev/null +++ b/scraper/src/Utils/RoLetters.cs @@ -0,0 +1,5 @@ +namespace InfoferScraper { + public static partial class Utils { + public const string RoLetters = @"A-Za-zăâîșțĂÂÎȚȘ"; + } +} diff --git a/scraper/src/Utils/RoLettersToEn.cs b/scraper/src/Utils/RoLettersToEn.cs new file mode 100644 index 0000000..a3c75d4 --- /dev/null +++ b/scraper/src/Utils/RoLettersToEn.cs @@ -0,0 +1,24 @@ +using System.Collections.Generic; +using System.Linq; +using System.Runtime.InteropServices.ComTypes; + +namespace InfoferScraper { + public static partial class Utils { + private static readonly Dictionary RoToEn = new() { + { 'ă', 'a' }, + { 'Ă', 'A' }, + { 'â', 'a' }, + { 'Â', 'A' }, + { 'î', 'i' }, + { 'Î', 'I' }, + { 'ș', 's' }, + { 'Ș', 'S' }, + { 'ț', 't' }, + { 'Ț', 'T' }, + }; + + public static string RoLettersToEn(this string str) { + return string.Concat(str.Select(letter => RoToEn.GetValueOrDefault(letter, letter))); + } + } +} \ No newline at end of file diff --git a/scraper/src/Utils/WithCollapsedSpaces.cs b/scraper/src/Utils/WithCollapsedSpaces.cs new file mode 100644 index 0000000..c6363e3 --- /dev/null +++ b/scraper/src/Utils/WithCollapsedSpaces.cs @@ -0,0 +1,12 @@ +using System.Text.RegularExpressions; + +namespace InfoferScraper { + public static partial class Utils { + private static readonly Regex WhitespaceRegex = new(@"(\s)\s*"); + + public static string WithCollapsedSpaces(this string str, bool trim = true, string replaceWith = "$1") { + var collapsed = WhitespaceRegex.Replace(str, replaceWith); + return trim ? collapsed.Trim() : collapsed; + } + } +} diff --git a/scraper/utils.py b/scraper/utils.py deleted file mode 100644 index ad314be..0000000 --- a/scraper/utils.py +++ /dev/null @@ -1,79 +0,0 @@ -import re - -from datetime import datetime, timedelta -from urllib.parse import urlencode, quote - -# From: https://en.wikipedia.org/wiki/Whitespace_character#Unicode -ASCII_WHITESPACE = [ - '\u0009', # HT; Character Tabulation - '\u000a', # LF - '\u000b', # VT; Line Tabulation - '\u000c', # FF; Form Feed - '\u000d', # CR - '\u0020', # Space -] - -WHITESPACE = ASCII_WHITESPACE + [ - '\u0085', # NEL; Next Line - '\u00a0', # No-break Space;   - '\u1680', # Ogham Space Mark - '\u2000', # En Quad - '\u2001', # Em Quad - '\u2002', # En Space - '\u2003', # Em Space - '\u2004', # Three-per-em Space - '\u2005', # Four-per-em Space - '\u2006', # Six-per-em Space - '\u2007', # Figure Space - '\u2008', # Punctuation Space - '\u2009', # Thin Space - '\u200A', # Hair Space - '\u2028', # Line Separator - '\u2029', # Paragraph Separator - '\u202f', # Narrow No-break Space - '\u205d', # Meduam Mathematical Space - '\u3000', # Ideographic Space -] - -WHITESPACE_REGEX = re.compile(rf'[{"".join(WHITESPACE)}]+', flags=re.MULTILINE) - -class DateTimeSequencer: - def __init__(self, year: int, month: int, day: int) -> None: - self.current = datetime(year, month, day, 0, 0, 0) - self.current -= timedelta(seconds=1) - - def __call__(self, hour: int, minute: int = 0, second: int = 0) -> datetime: - potential_new_date = datetime(self.current.year, self.current.month, self.current.day, hour, minute, second) - if (self.current > potential_new_date): - potential_new_date += timedelta(days=1) - self.current = potential_new_date - return self.current - -def collapse_space(string: str) -> str: - return WHITESPACE_REGEX.sub( - ' ', - string, - ).strip() - -def build_url(base: str, /, query: dict = {}, **kwargs): - result = base.format(**{ k: quote(str(v)) for k, v in kwargs.items() }) - if query: - result += '?' - result += urlencode(query) - return result - -RO_TO_EN = { - 'ă': 'a', - 'Ă': 'A', - 'â': 'a', - 'Â': 'A', - 'î': 'i', - 'Î': 'I', - 'ș': 's', - 'Ș': 'S', - 'ț': 't', - 'Ț': 'T', -} - -def ro_letters_to_en(string: str) -> str: - return ''.join((RO_TO_EN.get(letter, letter) for letter in string)) diff --git a/server/.vscode/launch.json b/server/.vscode/launch.json new file mode 100644 index 0000000..cb7858d --- /dev/null +++ b/server/.vscode/launch.json @@ -0,0 +1,35 @@ +{ + "version": "0.2.0", + "configurations": [ + { + // Use IntelliSense to find out which attributes exist for C# debugging + // Use hover for the description of the existing attributes + // For further information visit https://github.com/OmniSharp/omnisharp-vscode/blob/master/debugger-launchjson.md + "name": ".NET Core Launch (web)", + "type": "coreclr", + "request": "launch", + "preLaunchTask": "build", + // If you have changed target frameworks, make sure to update the program path. + "program": "${workspaceFolder}/bin/Debug/net5.0/server.dll", + "args": [], + "cwd": "${workspaceFolder}", + "stopAtEntry": false, + // Enable launching a web browser when ASP.NET Core starts. For more information: https://aka.ms/VSCode-CS-LaunchJson-WebBrowser + "serverReadyAction": { + "action": "openExternally", + "pattern": "\\bNow listening on:\\s+(https?://\\S+)" + }, + "env": { + "ASPNETCORE_ENVIRONMENT": "Development" + }, + "sourceFileMap": { + "/Views": "${workspaceFolder}/Views" + } + }, + { + "name": ".NET Core Attach", + "type": "coreclr", + "request": "attach" + } + ] +} \ No newline at end of file diff --git a/server/.vscode/tasks.json b/server/.vscode/tasks.json new file mode 100644 index 0000000..ca03107 --- /dev/null +++ b/server/.vscode/tasks.json @@ -0,0 +1,42 @@ +{ + "version": "2.0.0", + "tasks": [ + { + "label": "build", + "command": "dotnet", + "type": "process", + "args": [ + "build", + "${workspaceFolder}/server.csproj", + "/property:GenerateFullPaths=true", + "/consoleloggerparameters:NoSummary" + ], + "problemMatcher": "$msCompile" + }, + { + "label": "publish", + "command": "dotnet", + "type": "process", + "args": [ + "publish", + "${workspaceFolder}/server.csproj", + "/property:GenerateFullPaths=true", + "/consoleloggerparameters:NoSummary" + ], + "problemMatcher": "$msCompile" + }, + { + "label": "watch", + "command": "dotnet", + "type": "process", + "args": [ + "watch", + "run", + "${workspaceFolder}/server.csproj", + "/property:GenerateFullPaths=true", + "/consoleloggerparameters:NoSummary" + ], + "problemMatcher": "$msCompile" + } + ] +} \ No newline at end of file diff --git a/server/Controllers/V1/TrainController.cs b/server/Controllers/V1/TrainController.cs new file mode 100644 index 0000000..a49b919 --- /dev/null +++ b/server/Controllers/V1/TrainController.cs @@ -0,0 +1,73 @@ +using System; +using System.Linq; +using System.Threading.Tasks; +using Microsoft.AspNetCore.Mvc; +using Server.Services.Interfaces; + +namespace Server.Controllers.V1; + +[ApiController] +[ApiExplorerSettings(GroupName = "v1")] +[Route("/[controller]")] +public class TrainController : Controller { + private IDataManager DataManager { get; } + + public TrainController(IDataManager dataManager) { + this.DataManager = dataManager; + } + + [HttpGet("{trainNumber:int}")] + public async Task TrainInfo( + [FromRoute] int trainNumber + ) { + var result = (await DataManager.FetchTrain( + trainNumber.ToString(), + DateTimeOffset.Now + ))!; + return new Models.V1.TrainScrapeResult { + Date = result.Date, + Number = result.Number, + Operator = result.Operator, + Rank = result.Rank, + Route = { + From = result.Groups[0].Route.From, + To = result.Groups[0].Route.To, + }, + Stations = result.Groups[0].Stations.Select(station => new Models.V1.TrainStopDescription { + Arrival = station.Arrival == null + ? null + : new Models.V1.TrainStopArrDep { + ScheduleTime = station.Arrival.ScheduleTime.ToString("HH:mm"), + Status = station.Arrival.Status == null + ? null + : new Models.V1.Status { + Delay = station.Arrival.Status.Delay, + Real = station.Arrival.Status.Real, + }, + }, + Departure = station.Departure == null + ? null + : new Models.V1.TrainStopArrDep { + ScheduleTime = station.Departure.ScheduleTime.ToString("HH:mm"), + Status = station.Departure.Status == null + ? null + : new Models.V1.Status { + Delay = station.Departure.Status.Delay, + Real = station.Departure.Status.Real, + }, + }, + Km = station.Km, + Name = station.Name, + Platform = station.Platform, + StoppingTime = station.StoppingTime, + }).ToList(), + Status = result.Groups[0].Status == null + ? null + : new Models.V1.TrainStatus { + Delay = result.Groups[0].Status!.Delay, + State = result.Groups[0].Status!.State, + Station = result.Groups[0].Status!.Station, + }, + }; + } +} diff --git a/server/Controllers/V1/TrainsController.cs b/server/Controllers/V1/TrainsController.cs new file mode 100644 index 0000000..791d802 --- /dev/null +++ b/server/Controllers/V1/TrainsController.cs @@ -0,0 +1,22 @@ +using System.Collections.Generic; +using System.Linq; +using Microsoft.AspNetCore.Mvc; +using Server.Services.Interfaces; + +namespace Server.Controllers.V1; + +[ApiController] +[ApiExplorerSettings(GroupName = "v1")] +[Route("/[controller]")] +public class TrainsController : Controller { + private IDatabase Database { get; } + + public TrainsController(IDatabase database) { + this.Database = database; + } + + [HttpGet("")] + public ActionResult> ListTrains() { + return Ok(Database.Trains.Select(train => train.Number)); + } +} diff --git a/server/Controllers/V2/StationController.cs b/server/Controllers/V2/StationController.cs new file mode 100644 index 0000000..1bc9a75 --- /dev/null +++ b/server/Controllers/V2/StationController.cs @@ -0,0 +1,50 @@ +using System; +using System.Linq; +using System.Threading.Tasks; +using Microsoft.AspNetCore.Mvc; +using Server.Services.Interfaces; +using Server.Models.V2; + +namespace Server.Controllers.V2; + +[ApiController] +[ApiExplorerSettings(GroupName = "v2")] +[Route("/v2/[controller]")] +public class StationController : Controller { + private IDataManager DataManager { get; } + + public StationController(IDataManager dataManager) { + this.DataManager = dataManager; + } + + [HttpGet("{stationName}")] + public async Task StationInfo([FromRoute] string stationName) { + var result = (await DataManager.FetchStation(stationName, DateTimeOffset.Now))!; + return new StationScrapeResult { + Date = result.Date, + StationName = result.StationName, + Arrivals = result.Arrivals?.Select(arrival => new StationArrival { + Time = arrival.Time, + StoppingTime = arrival.StoppingTime, + Train = new StationArrivalTrain { + Number = arrival.Train.Number, + Operator = arrival.Train.Operator, + Origin = arrival.Train.Terminus, + Rank = arrival.Train.Rank, + Route = arrival.Train.Route.ToList(), + }, + })?.ToList(), + Departures = result.Departures?.Select(departure => new StationDeparture { + Time = departure.Time, + StoppingTime = departure.StoppingTime, + Train = new StationDepartureTrain { + Number = departure.Train.Number, + Operator = departure.Train.Operator, + Destination = departure.Train.Terminus, + Rank = departure.Train.Rank, + Route = departure.Train.Route.ToList(), + }, + })?.ToList(), + }; + } +} diff --git a/server/Controllers/V2/StationsController.cs b/server/Controllers/V2/StationsController.cs new file mode 100644 index 0000000..24a7750 --- /dev/null +++ b/server/Controllers/V2/StationsController.cs @@ -0,0 +1,21 @@ +using System.Collections.Generic; +using Microsoft.AspNetCore.Mvc; +using Server.Services.Interfaces; + +namespace Server.Controllers.V2; + +[ApiController] +[ApiExplorerSettings(GroupName = "v2")] +[Route("/v2/[controller]")] +public class StationsController : Controller { + private IDatabase Database { get; } + + public StationsController(IDatabase database) { + this.Database = database; + } + + [HttpGet("")] + public ActionResult> ListStations() { + return Ok(Database.Stations); + } +} diff --git a/server/Controllers/V2/TrainController.cs b/server/Controllers/V2/TrainController.cs new file mode 100644 index 0000000..d584fbd --- /dev/null +++ b/server/Controllers/V2/TrainController.cs @@ -0,0 +1,77 @@ +using System; +using System.Linq; +using System.Threading.Tasks; +using Microsoft.AspNetCore.Mvc; +using Server.Services.Interfaces; + +namespace Server.Controllers.V2; + +[ApiController] +[ApiExplorerSettings(GroupName = "v2")] +[Route("/v2/[controller]")] +public class TrainController : Controller { + private IDataManager DataManager { get; } + + public TrainController(IDataManager dataManager) { + this.DataManager = dataManager; + } + + [HttpGet("{trainNumber}")] + public async Task TrainInfo( + [FromRoute] string trainNumber, + [FromQuery] DateTimeOffset? date = null, + [FromQuery] string? useYesterday = null + ) { + if (useYesterday != null && + (new string[] { "y", "yes", "t", "true", "1" }).Contains(useYesterday?.Trim()?.ToLower())) { + date ??= DateTimeOffset.Now.Subtract(TimeSpan.FromDays(1)); + } + + var result = (await DataManager.FetchTrain(trainNumber, date ?? DateTimeOffset.Now))!; + return new Models.V2.TrainScrapeResult { + Date = result.Date, + Number = result.Number, + Operator = result.Operator, + Rank = result.Rank, + Route = { + From = result.Groups[0].Route.From, + To = result.Groups[0].Route.To, + }, + Stations = result.Groups[0].Stations.Select(station => new Models.V2.TrainStopDescription { + Arrival = station.Arrival == null + ? null + : new Models.V2.TrainStopArrDep { + ScheduleTime = station.Arrival.ScheduleTime.ToString("o"), + Status = station.Arrival.Status == null + ? null + : new Models.V2.Status { + Delay = station.Arrival.Status.Delay, + Real = station.Arrival.Status.Real, + }, + }, + Departure = station.Departure == null + ? null + : new Models.V2.TrainStopArrDep { + ScheduleTime = station.Departure.ScheduleTime.ToString("o"), + Status = station.Departure.Status == null + ? null + : new Models.V2.Status { + Delay = station.Departure.Status.Delay, + Real = station.Departure.Status.Real, + }, + }, + Km = station.Km, + Name = station.Name, + Platform = station.Platform, + StoppingTime = station.StoppingTime, + }).ToList(), + Status = result.Groups[0].Status == null + ? null + : new Models.V2.TrainStatus { + Delay = result.Groups[0].Status!.Delay, + State = result.Groups[0].Status!.State, + Station = result.Groups[0].Status!.Station, + }, + }; + } +} diff --git a/server/Controllers/V2/TrainsController.cs b/server/Controllers/V2/TrainsController.cs new file mode 100644 index 0000000..a3e32b2 --- /dev/null +++ b/server/Controllers/V2/TrainsController.cs @@ -0,0 +1,22 @@ +using System.Collections.Generic; +using Microsoft.AspNetCore.Mvc; +using Server.Services.Interfaces; + +namespace Server.Controllers.V2; + +[ApiController] +[ApiExplorerSettings(GroupName = "v2")] +[Route("/v2/[controller]")] +public class TrainsController : Controller { + private IDatabase Database { get; } + + public TrainsController(IDatabase database) { + this.Database = database; + } + + [HttpGet("")] + public ActionResult> ListTrains() { + return Ok(Database.Trains); + } +} + diff --git a/server/Controllers/V3/StationsController.cs b/server/Controllers/V3/StationsController.cs new file mode 100644 index 0000000..9e30fe5 --- /dev/null +++ b/server/Controllers/V3/StationsController.cs @@ -0,0 +1,44 @@ +using System; +using System.Collections.Generic; +using System.Threading.Tasks; +using InfoferScraper.Models.Station; +using Microsoft.AspNetCore.Http; +using Microsoft.AspNetCore.Mvc; +using Server.Services.Interfaces; + +namespace Server.Controllers.V3; + +[ApiController] +[ApiExplorerSettings(GroupName = "v3")] +[Route("/v3/[controller]")] +public class StationsController : Controller { + private IDataManager DataManager { get; } + private IDatabase Database { get; } + + public StationsController(IDataManager dataManager, IDatabase database) { + this.DataManager = dataManager; + this.Database = database; + } + + [HttpGet("")] + public ActionResult> ListStations() { + return Ok(Database.Stations); + } + + [HttpGet("{stationName}")] + [ProducesResponseType(typeof(IStationScrapeResult), StatusCodes.Status200OK)] + [ProducesResponseType(StatusCodes.Status404NotFound)] + public async Task> StationInfo( + [FromRoute] string stationName, + [FromQuery] DateTimeOffset? date = null, + [FromQuery] string? lastUpdateId = null + ) { + var result = await DataManager.FetchStation(stationName, date ?? DateTimeOffset.Now); + if (result == null) { + return NotFound(new { + Reason = "station_not_found", + }); + } + return Ok(result); + } +} diff --git a/server/Controllers/V3/TrainsController.cs b/server/Controllers/V3/TrainsController.cs new file mode 100644 index 0000000..55bfd34 --- /dev/null +++ b/server/Controllers/V3/TrainsController.cs @@ -0,0 +1,66 @@ +using System; +using System.Collections.Generic; +using System.Threading.Tasks; +using InfoferScraper.Models.Train; +using Microsoft.AspNetCore.Http; +using Microsoft.AspNetCore.Mvc; +using scraper.Exceptions; +using Server.Services.Interfaces; + +namespace Server.Controllers.V3; + +[ApiController] +[ApiExplorerSettings(GroupName = "v3")] +[Route("/v3/[controller]")] +public class TrainsController : Controller { + private IDataManager DataManager { get; } + private IDatabase Database { get; } + + public TrainsController(IDataManager dataManager, IDatabase database) { + this.DataManager = dataManager; + this.Database = database; + } + + [HttpGet("")] + public ActionResult> ListTrains() { + return Ok(Database.Trains); + } + + /// + /// Searches for information about a train + /// + /// The number of the train, without additional things such as the rank + /// The date when the train departs from the first station + /// Information about the train + /// If the train number requested cannot be found (invalid or not running on the requested date) + [HttpGet("{trainNumber}")] + [ProducesResponseType(typeof(ITrainScrapeResult), StatusCodes.Status200OK)] + [ProducesResponseType(StatusCodes.Status404NotFound)] + public async Task> TrainInfoV3( + [FromRoute] string trainNumber, + [FromQuery] DateTimeOffset? date = null + ) { + try { + var result = await DataManager.FetchTrain(trainNumber, date ?? DateTimeOffset.Now); + if (result == null) { + return NotFound(new { + Reason = "train_not_found", + }); + } + return Ok(result); + } catch (TrainNotThisDayException) { + return NotFound(new { + Reason = "not_running_today", + }); + } + // var (token, result) = await DataManager.GetNewTrainDataUpdate( + // trainNumber, + // date ?? DateTimeOffset.Now, + // lastUpdateId ?? "" + // ); + // Response.Headers.Add("X-Update-Id", new StringValues(token)); + // return Ok(result); + } + + +} diff --git a/server/Models/V1/TrainScrapeResult.cs b/server/Models/V1/TrainScrapeResult.cs new file mode 100644 index 0000000..5a29652 --- /dev/null +++ b/server/Models/V1/TrainScrapeResult.cs @@ -0,0 +1,57 @@ +using System.Collections.Generic; + +namespace Server.Models.V1 { + public record TrainScrapeResult { + public string Rank { get; internal set; } = ""; + + public string Number { get; internal set; } = ""; + + /// + /// Date in the DD.MM.YYYY format + /// This date is taken as-is from the result. + /// + public string Date { get; internal set; } = ""; + + public string Operator { get; internal set; } = ""; + + public TrainRoute Route { get; } = new(); + + public TrainStatus? Status { get; internal set; } = new(); + public List Stations { get; internal set; } = new(); + } + + public record TrainRoute { + public TrainRoute() { + From = ""; + To = ""; + } + + public string From { get; set; } + public string To { get; set; } + } + + public record TrainStatus { + public int Delay { get; set; } + public string Station { get; set; } = ""; + public InfoferScraper.Models.Train.StatusKind State { get; set; } + } + + public record TrainStopDescription { + public string Name { get; set; } = ""; + public int Km { get; set; } + public int? StoppingTime { get; set; } + public string? Platform { get; set; } + public TrainStopArrDep? Arrival { get; set; } + public TrainStopArrDep? Departure { get; set; } + } + + public record TrainStopArrDep { + public string ScheduleTime { get; set; } = ""; + public Status? Status { get; set; } + } + + public record Status { + public int Delay { get; set; } + public bool Real { get; set; } + } +} diff --git a/server/Models/V2/StationScrapeResult.cs b/server/Models/V2/StationScrapeResult.cs new file mode 100644 index 0000000..7eaeea4 --- /dev/null +++ b/server/Models/V2/StationScrapeResult.cs @@ -0,0 +1,39 @@ +using System; +using System.Collections.Generic; + +namespace Server.Models.V2 { + public record StationScrapeResult { + public string Date { get; internal set; } = ""; + public string StationName { get; internal set; } = ""; + public List? Arrivals { get; internal set; } + public List? Departures { get; internal set; } + } + + public record StationArrival { + public int? StoppingTime { get; internal set; } + public DateTimeOffset Time { get; internal set; } + public StationArrivalTrain Train { get; internal set; } = new(); + } + + public record StationArrivalTrain { + public string Number { get; internal set; } + public string Operator { get; internal set; } + public string Rank { get; internal set; } + public List Route { get; internal set; } + public string Origin { get; internal set; } + } + + public record StationDeparture { + public int? StoppingTime { get; internal set; } + public DateTimeOffset Time { get; internal set; } + public StationDepartureTrain Train { get; internal set; } = new(); + } + + public record StationDepartureTrain { + public string Number { get; internal set; } + public string Operator { get; internal set; } + public string Rank { get; internal set; } + public List Route { get; internal set; } + public string Destination { get; internal set; } + } +} diff --git a/server/Models/V2/TrainScrapeResult.cs b/server/Models/V2/TrainScrapeResult.cs new file mode 100644 index 0000000..bcf3b46 --- /dev/null +++ b/server/Models/V2/TrainScrapeResult.cs @@ -0,0 +1,57 @@ +using System.Collections.Generic; + +namespace Server.Models.V2 { + public record TrainScrapeResult { + public string Rank { get; internal set; } = ""; + + public string Number { get; internal set; } = ""; + + /// + /// Date in the DD.MM.YYYY format + /// This date is taken as-is from the result. + /// + public string Date { get; internal set; } = ""; + + public string Operator { get; internal set; } = ""; + + public TrainRoute Route { get; } = new(); + + public TrainStatus? Status { get; internal set; } = new(); + public List Stations { get; internal set; } = new(); + } + + public record TrainRoute { + public TrainRoute() { + From = ""; + To = ""; + } + + public string From { get; set; } + public string To { get; set; } + } + + public record TrainStatus { + public int Delay { get; set; } + public string Station { get; set; } = ""; + public InfoferScraper.Models.Train.StatusKind State { get; set; } + } + + public record TrainStopDescription { + public string Name { get; set; } = ""; + public int Km { get; set; } + public int? StoppingTime { get; set; } + public string? Platform { get; set; } + public TrainStopArrDep? Arrival { get; set; } + public TrainStopArrDep? Departure { get; set; } + } + + public record TrainStopArrDep { + public string ScheduleTime { get; set; } = ""; + public Status? Status { get; set; } + } + + public record Status { + public int Delay { get; set; } + public bool Real { get; set; } + } +} diff --git a/server/Pipfile b/server/Pipfile deleted file mode 100644 index 3bcaa3b..0000000 --- a/server/Pipfile +++ /dev/null @@ -1,15 +0,0 @@ -[[source]] -url = "https://pypi.org/simple" -verify_ssl = true -name = "pypi" - -[packages] -flask = "*" -gevent = "*" -scraper = { editable = true, path = '../scraper' } -jsonschema = "*" - -[dev-packages] - -[requires] -python_version = "3.9" diff --git a/server/Pipfile.lock b/server/Pipfile.lock deleted file mode 100644 index 7de0a9d..0000000 --- a/server/Pipfile.lock +++ /dev/null @@ -1,394 +0,0 @@ -{ - "_meta": { - "hash": { - "sha256": "3c7f09679bdd68674754a714ee39503cf1a3ae265400eea074fec83559246dff" - }, - "pipfile-spec": 6, - "requires": { - "python_version": "3.9" - }, - "sources": [ - { - "name": "pypi", - "url": "https://pypi.org/simple", - "verify_ssl": true - } - ] - }, - "default": { - "attrs": { - "hashes": [ - "sha256:149e90d6d8ac20db7a955ad60cf0e6881a3f20d37096140088356da6c716b0b1", - "sha256:ef6aaac3ca6cd92904cdd0d83f629a15f18053ec84e6432106f7a4d04ae4f5fb" - ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", - "version": "==21.2.0" - }, - "beautifulsoup4": { - "hashes": [ - "sha256:4c98143716ef1cb40bf7f39a8e3eec8f8b009509e74904ba3a7b315431577e35", - "sha256:84729e322ad1d5b4d25f805bfa05b902dd96450f43842c4e99067d5e1369eb25", - "sha256:fff47e031e34ec82bf17e00da8f592fe7de69aeea38be00523c04623c04fb666" - ], - "version": "==4.9.3" - }, - "certifi": { - "hashes": [ - "sha256:2bbf76fd432960138b3ef6dda3dde0544f27cbf8546c458e60baf371917ba9ee", - "sha256:50b1e4f8446b06f41be7dd6338db18e0990601dce795c2b1686458aa7e8fa7d8" - ], - "version": "==2021.5.30" - }, - "charset-normalizer": { - "hashes": [ - "sha256:0c8911edd15d19223366a194a513099a302055a962bca2cec0f54b8b63175d8b", - "sha256:f23667ebe1084be45f6ae0538e4a5a865206544097e4e8bbcacf42cd02a348f3" - ], - "markers": "python_version >= '3'", - "version": "==2.0.4" - }, - "click": { - "hashes": [ - "sha256:8c04c11192119b1ef78ea049e0a6f0463e4c48ef00a30160c704337586f3ad7a", - "sha256:fba402a4a47334742d782209a7c79bc448911afe1149d07bdabdf480b3e2f4b6" - ], - "markers": "python_version >= '3.6'", - "version": "==8.0.1" - }, - "flask": { - "hashes": [ - "sha256:1c4c257b1892aec1398784c63791cbaa43062f1f7aeb555c4da961b20ee68f55", - "sha256:a6209ca15eb63fc9385f38e452704113d679511d9574d09b2cf9183ae7d20dc9" - ], - "index": "pypi", - "version": "==2.0.1" - }, - "gevent": { - "hashes": [ - "sha256:02d1e8ca227d0ab0b7917fd7e411f9a534475e0a41fb6f434e9264b20155201a", - "sha256:0c7b4763514fec74c9fe6ad10c3de62d8fe7b926d520b1e35eb6887181b954ff", - "sha256:1c9c87b15f792af80edc950a83ab8ef4f3ba3889712211c2c42740ddb57b5492", - "sha256:23077d87d1589ac141c22923fd76853d2cc5b7e3c5e1f1f9cdf6ff23bc9790fc", - "sha256:37a469a99e6000b42dd0b9bbd9d716dbd66cdc6e5738f136f6a266c29b90ee99", - "sha256:3b600145dc0c5b39c6f89c2e91ec6c55eb0dd52dc8148228479ca42cded358e4", - "sha256:3f5ba654bdd3c774079b553fef535ede5b52c7abd224cb235a15da90ae36251b", - "sha256:43e93e1a4738c922a2416baf33f0afb0a20b22d3dba886720bc037cd02a98575", - "sha256:473f918bdf7d2096e391f66bd8ce1e969639aa235e710aaf750a37774bb585bd", - "sha256:4c94d27be9f0439b28eb8bd0f879e6142918c62092fda7fb96b6d06f01886b94", - "sha256:55ede95f41b74e7506fab293ad04cc7fc2b6f662b42281e9f2d668ad3817b574", - "sha256:6cad37a55e904879beef2a7e7c57c57d62fde2331fef1bec7f2b2a7ef14da6a2", - "sha256:72d4c2a8e65bbc702db76456841c7ddd6de2d9ab544a24aa74ad9c2b6411a269", - "sha256:75c29ed5148c916021d39d2fac90ccc0e19adf854626a34eaee012aa6b1fcb67", - "sha256:84e1af2dfb4ea9495cb914b00b6303ca0d54bf0a92e688a17e60f6b033873df2", - "sha256:8d8655ce581368b7e1ab42c8a3a166c0b43ea04e59970efbade9448864585e99", - "sha256:90131877d3ce1a05da1b718631860815b89ff44e93c42d168c9c9e8893b26318", - "sha256:9d46bea8644048ceac5737950c08fc89c37a66c34a56a6c9e3648726e60cb767", - "sha256:a8656d6e02bf47d7fa47728cf7a7cbf408f77ef1fad12afd9e0e3246c5de1707", - "sha256:aaf1451cd0d9c32f65a50e461084a0540be52b8ea05c18669c95b42e1f71592a", - "sha256:afc877ff4f277d0e51a1206d748fdab8c1e0256f7a05e1b1067abbed71c64da9", - "sha256:b10c3326edb76ec3049646dc5131608d6d3733b5adfc75d34852028ecc67c52c", - "sha256:ceec7c5f15fb2f9b767b194daa55246830db6c7c3c2f0b1c7e9e90cb4d01f3f9", - "sha256:e00dc0450f79253b7a3a7f2a28e6ca959c8d0d47c0f9fa2c57894c7974d5965f", - "sha256:e91632fdcf1c9a33e97e35f96edcbdf0b10e36cf53b58caa946dca4836bb688c", - "sha256:f39d5defda9443b5fb99a185050e94782fe7ac38f34f751b491142216ad23bc7" - ], - "index": "pypi", - "version": "==21.8.0" - }, - "greenlet": { - "hashes": [ - "sha256:04e1849c88aa56584d4a0a6e36af5ec7cc37993fdc1fda72b56aa1394a92ded3", - "sha256:05e72db813c28906cdc59bd0da7c325d9b82aa0b0543014059c34c8c4ad20e16", - "sha256:07e6d88242e09b399682b39f8dfa1e7e6eca66b305de1ff74ed9eb1a7d8e539c", - "sha256:090126004c8ab9cd0787e2acf63d79e80ab41a18f57d6448225bbfcba475034f", - "sha256:1796f2c283faab2b71c67e9b9aefb3f201fdfbee5cb55001f5ffce9125f63a45", - "sha256:2f89d74b4f423e756a018832cd7a0a571e0a31b9ca59323b77ce5f15a437629b", - "sha256:34e6675167a238bede724ee60fe0550709e95adaff6a36bcc97006c365290384", - "sha256:3e594015a2349ec6dcceda9aca29da8dc89e85b56825b7d1f138a3f6bb79dd4c", - "sha256:3f8fc59bc5d64fa41f58b0029794f474223693fd00016b29f4e176b3ee2cfd9f", - "sha256:3fc6a447735749d651d8919da49aab03c434a300e9f0af1c886d560405840fd1", - "sha256:40abb7fec4f6294225d2b5464bb6d9552050ded14a7516588d6f010e7e366dcc", - "sha256:44556302c0ab376e37939fd0058e1f0db2e769580d340fb03b01678d1ff25f68", - "sha256:476ba9435afaead4382fbab8f1882f75e3fb2285c35c9285abb3dd30237f9142", - "sha256:4870b018ca685ff573edd56b93f00a122f279640732bb52ce3a62b73ee5c4a92", - "sha256:4adaf53ace289ced90797d92d767d37e7cdc29f13bd3830c3f0a561277a4ae83", - "sha256:4eae94de9924bbb4d24960185363e614b1b62ff797c23dc3c8a7c75bbb8d187e", - "sha256:5317701c7ce167205c0569c10abc4bd01c7f4cf93f642c39f2ce975fa9b78a3c", - "sha256:5c3b735ccf8fc8048664ee415f8af5a3a018cc92010a0d7195395059b4b39b7d", - "sha256:5cde7ee190196cbdc078511f4df0be367af85636b84d8be32230f4871b960687", - "sha256:655ab836324a473d4cd8cf231a2d6f283ed71ed77037679da554e38e606a7117", - "sha256:6ce9d0784c3c79f3e5c5c9c9517bbb6c7e8aa12372a5ea95197b8a99402aa0e6", - "sha256:6e0696525500bc8aa12eae654095d2260db4dc95d5c35af2b486eae1bf914ccd", - "sha256:75ff270fd05125dce3303e9216ccddc541a9e072d4fc764a9276d44dee87242b", - "sha256:8039f5fe8030c43cd1732d9a234fdcbf4916fcc32e21745ca62e75023e4d4649", - "sha256:84488516639c3c5e5c0e52f311fff94ebc45b56788c2a3bfe9cf8e75670f4de3", - "sha256:84782c80a433d87530ae3f4b9ed58d4a57317d9918dfcc6a59115fa2d8731f2c", - "sha256:8ddb38fb6ad96c2ef7468ff73ba5c6876b63b664eebb2c919c224261ae5e8378", - "sha256:98b491976ed656be9445b79bc57ed21decf08a01aaaf5fdabf07c98c108111f6", - "sha256:990e0f5e64bcbc6bdbd03774ecb72496224d13b664aa03afd1f9b171a3269272", - "sha256:9b02e6039eafd75e029d8c58b7b1f3e450ca563ef1fe21c7e3e40b9936c8d03e", - "sha256:a11b6199a0b9dc868990456a2667167d0ba096c5224f6258e452bfbe5a9742c5", - "sha256:a414f8e14aa7bacfe1578f17c11d977e637d25383b6210587c29210af995ef04", - "sha256:a91ee268f059583176c2c8b012a9fce7e49ca6b333a12bbc2dd01fc1a9783885", - "sha256:ac991947ca6533ada4ce7095f0e28fe25d5b2f3266ad5b983ed4201e61596acf", - "sha256:b050dbb96216db273b56f0e5960959c2b4cb679fe1e58a0c3906fa0a60c00662", - "sha256:b97a807437b81f90f85022a9dcfd527deea38368a3979ccb49d93c9198b2c722", - "sha256:bad269e442f1b7ffa3fa8820b3c3aa66f02a9f9455b5ba2db5a6f9eea96f56de", - "sha256:bf3725d79b1ceb19e83fb1aed44095518c0fcff88fba06a76c0891cfd1f36837", - "sha256:c0f22774cd8294078bdf7392ac73cf00bfa1e5e0ed644bd064fdabc5f2a2f481", - "sha256:c1862f9f1031b1dee3ff00f1027fcd098ffc82120f43041fe67804b464bbd8a7", - "sha256:c8d4ed48eed7414ccb2aaaecbc733ed2a84c299714eae3f0f48db085342d5629", - "sha256:cf31e894dabb077a35bbe6963285d4515a387ff657bd25b0530c7168e48f167f", - "sha256:d15cb6f8706678dc47fb4e4f8b339937b04eda48a0af1cca95f180db552e7663", - "sha256:dfcb5a4056e161307d103bc013478892cfd919f1262c2bb8703220adcb986362", - "sha256:e02780da03f84a671bb4205c5968c120f18df081236d7b5462b380fd4f0b497b", - "sha256:e2002a59453858c7f3404690ae80f10c924a39f45f6095f18a985a1234c37334", - "sha256:e22a82d2b416d9227a500c6860cf13e74060cf10e7daf6695cbf4e6a94e0eee4", - "sha256:e41f72f225192d5d4df81dad2974a8943b0f2d664a2a5cfccdf5a01506f5523c", - "sha256:f253dad38605486a4590f9368ecbace95865fea0f2b66615d121ac91fd1a1563", - "sha256:fddfb31aa2ac550b938d952bca8a87f1db0f8dc930ffa14ce05b5c08d27e7fd1" - ], - "markers": "platform_python_implementation == 'CPython'", - "version": "==1.1.1" - }, - "idna": { - "hashes": [ - "sha256:14475042e284991034cb48e06f6851428fb14c4dc953acd9be9a5e95c7b6dd7a", - "sha256:467fbad99067910785144ce333826c71fb0e63a425657295239737f7ecd125f3" - ], - "markers": "python_version >= '3'", - "version": "==3.2" - }, - "infofer-scraper": { - "editable": true, - "path": "./../scraper" - }, - "itsdangerous": { - "hashes": [ - "sha256:5174094b9637652bdb841a3029700391451bd092ba3db90600dea710ba28e97c", - "sha256:9e724d68fc22902a1435351f84c3fb8623f303fffcc566a4cb952df8c572cff0" - ], - "markers": "python_version >= '3.6'", - "version": "==2.0.1" - }, - "jinja2": { - "hashes": [ - "sha256:1f06f2da51e7b56b8f238affdd6b4e2c61e39598a378cc49345bc1bd42a978a4", - "sha256:703f484b47a6af502e743c9122595cc812b0271f661722403114f71a79d0f5a4" - ], - "markers": "python_version >= '3.6'", - "version": "==3.0.1" - }, - "jsonschema": { - "hashes": [ - "sha256:4e5b3cf8216f577bee9ce139cbe72eca3ea4f292ec60928ff24758ce626cd163", - "sha256:c8a85b28d377cc7737e46e2d9f2b4f44ee3c0e1deac6bf46ddefc7187d30797a" - ], - "index": "pypi", - "version": "==3.2.0" - }, - "markupsafe": { - "hashes": [ - "sha256:01a9b8ea66f1658938f65b93a85ebe8bc016e6769611be228d797c9d998dd298", - "sha256:023cb26ec21ece8dc3907c0e8320058b2e0cb3c55cf9564da612bc325bed5e64", - "sha256:0446679737af14f45767963a1a9ef7620189912317d095f2d9ffa183a4d25d2b", - "sha256:0717a7390a68be14b8c793ba258e075c6f4ca819f15edfc2a3a027c823718567", - "sha256:0955295dd5eec6cb6cc2fe1698f4c6d84af2e92de33fbcac4111913cd100a6ff", - "sha256:0d4b31cc67ab36e3392bbf3862cfbadac3db12bdd8b02a2731f509ed5b829724", - "sha256:10f82115e21dc0dfec9ab5c0223652f7197feb168c940f3ef61563fc2d6beb74", - "sha256:168cd0a3642de83558a5153c8bd34f175a9a6e7f6dc6384b9655d2697312a646", - "sha256:1d609f577dc6e1aa17d746f8bd3c31aa4d258f4070d61b2aa5c4166c1539de35", - "sha256:1f2ade76b9903f39aa442b4aadd2177decb66525062db244b35d71d0ee8599b6", - "sha256:2a7d351cbd8cfeb19ca00de495e224dea7e7d919659c2841bbb7f420ad03e2d6", - "sha256:2d7d807855b419fc2ed3e631034685db6079889a1f01d5d9dac950f764da3dad", - "sha256:2ef54abee730b502252bcdf31b10dacb0a416229b72c18b19e24a4509f273d26", - "sha256:36bc903cbb393720fad60fc28c10de6acf10dc6cc883f3e24ee4012371399a38", - "sha256:37205cac2a79194e3750b0af2a5720d95f786a55ce7df90c3af697bfa100eaac", - "sha256:3c112550557578c26af18a1ccc9e090bfe03832ae994343cfdacd287db6a6ae7", - "sha256:3dd007d54ee88b46be476e293f48c85048603f5f516008bee124ddd891398ed6", - "sha256:47ab1e7b91c098ab893b828deafa1203de86d0bc6ab587b160f78fe6c4011f75", - "sha256:49e3ceeabbfb9d66c3aef5af3a60cc43b85c33df25ce03d0031a608b0a8b2e3f", - "sha256:4efca8f86c54b22348a5467704e3fec767b2db12fc39c6d963168ab1d3fc9135", - "sha256:53edb4da6925ad13c07b6d26c2a852bd81e364f95301c66e930ab2aef5b5ddd8", - "sha256:5855f8438a7d1d458206a2466bf82b0f104a3724bf96a1c781ab731e4201731a", - "sha256:594c67807fb16238b30c44bdf74f36c02cdf22d1c8cda91ef8a0ed8dabf5620a", - "sha256:5bb28c636d87e840583ee3adeb78172efc47c8b26127267f54a9c0ec251d41a9", - "sha256:60bf42e36abfaf9aff1f50f52644b336d4f0a3fd6d8a60ca0d054ac9f713a864", - "sha256:611d1ad9a4288cf3e3c16014564df047fe08410e628f89805e475368bd304914", - "sha256:6557b31b5e2c9ddf0de32a691f2312a32f77cd7681d8af66c2692efdbef84c18", - "sha256:693ce3f9e70a6cf7d2fb9e6c9d8b204b6b39897a2c4a1aa65728d5ac97dcc1d8", - "sha256:6a7fae0dd14cf60ad5ff42baa2e95727c3d81ded453457771d02b7d2b3f9c0c2", - "sha256:6c4ca60fa24e85fe25b912b01e62cb969d69a23a5d5867682dd3e80b5b02581d", - "sha256:6fcf051089389abe060c9cd7caa212c707e58153afa2c649f00346ce6d260f1b", - "sha256:7d91275b0245b1da4d4cfa07e0faedd5b0812efc15b702576d103293e252af1b", - "sha256:905fec760bd2fa1388bb5b489ee8ee5f7291d692638ea5f67982d968366bef9f", - "sha256:97383d78eb34da7e1fa37dd273c20ad4320929af65d156e35a5e2d89566d9dfb", - "sha256:984d76483eb32f1bcb536dc27e4ad56bba4baa70be32fa87152832cdd9db0833", - "sha256:99df47edb6bda1249d3e80fdabb1dab8c08ef3975f69aed437cb69d0a5de1e28", - "sha256:a30e67a65b53ea0a5e62fe23682cfe22712e01f453b95233b25502f7c61cb415", - "sha256:ab3ef638ace319fa26553db0624c4699e31a28bb2a835c5faca8f8acf6a5a902", - "sha256:add36cb2dbb8b736611303cd3bfcee00afd96471b09cda130da3581cbdc56a6d", - "sha256:b2f4bf27480f5e5e8ce285a8c8fd176c0b03e93dcc6646477d4630e83440c6a9", - "sha256:b7f2d075102dc8c794cbde1947378051c4e5180d52d276987b8d28a3bd58c17d", - "sha256:baa1a4e8f868845af802979fcdbf0bb11f94f1cb7ced4c4b8a351bb60d108145", - "sha256:be98f628055368795d818ebf93da628541e10b75b41c559fdf36d104c5787066", - "sha256:bf5d821ffabf0ef3533c39c518f3357b171a1651c1ff6827325e4489b0e46c3c", - "sha256:c47adbc92fc1bb2b3274c4b3a43ae0e4573d9fbff4f54cd484555edbf030baf1", - "sha256:d7f9850398e85aba693bb640262d3611788b1f29a79f0c93c565694658f4071f", - "sha256:d8446c54dc28c01e5a2dbac5a25f071f6653e6e40f3a8818e8b45d790fe6ef53", - "sha256:e0f138900af21926a02425cf736db95be9f4af72ba1bb21453432a07f6082134", - "sha256:e9936f0b261d4df76ad22f8fee3ae83b60d7c3e871292cd42f40b81b70afae85", - "sha256:f5653a225f31e113b152e56f154ccbe59eeb1c7487b39b9d9f9cdb58e6c79dc5", - "sha256:f826e31d18b516f653fe296d967d700fddad5901ae07c622bb3705955e1faa94", - "sha256:f8ba0e8349a38d3001fae7eadded3f6606f0da5d748ee53cc1dab1d6527b9509", - "sha256:f9081981fe268bd86831e5c75f7de206ef275defcb82bc70740ae6dc507aee51", - "sha256:fa130dd50c57d53368c9d59395cb5526eda596d3ffe36666cd81a44d56e48872" - ], - "markers": "python_version >= '3.6'", - "version": "==2.0.1" - }, - "pyrsistent": { - "hashes": [ - "sha256:097b96f129dd36a8c9e33594e7ebb151b1515eb52cceb08474c10a5479e799f2", - "sha256:2aaf19dc8ce517a8653746d98e962ef480ff34b6bc563fc067be6401ffb457c7", - "sha256:404e1f1d254d314d55adb8d87f4f465c8693d6f902f67eb6ef5b4526dc58e6ea", - "sha256:48578680353f41dca1ca3dc48629fb77dfc745128b56fc01096b2530c13fd426", - "sha256:4916c10896721e472ee12c95cdc2891ce5890898d2f9907b1b4ae0f53588b710", - "sha256:527be2bfa8dc80f6f8ddd65242ba476a6c4fb4e3aedbf281dfbac1b1ed4165b1", - "sha256:58a70d93fb79dc585b21f9d72487b929a6fe58da0754fa4cb9f279bb92369396", - "sha256:5e4395bbf841693eaebaa5bb5c8f5cdbb1d139e07c975c682ec4e4f8126e03d2", - "sha256:6b5eed00e597b5b5773b4ca30bd48a5774ef1e96f2a45d105db5b4ebb4bca680", - "sha256:73ff61b1411e3fb0ba144b8f08d6749749775fe89688093e1efef9839d2dcc35", - "sha256:772e94c2c6864f2cd2ffbe58bb3bdefbe2a32afa0acb1a77e472aac831f83427", - "sha256:773c781216f8c2900b42a7b638d5b517bb134ae1acbebe4d1e8f1f41ea60eb4b", - "sha256:a0c772d791c38bbc77be659af29bb14c38ced151433592e326361610250c605b", - "sha256:b29b869cf58412ca5738d23691e96d8aff535e17390128a1a52717c9a109da4f", - "sha256:c1a9ff320fa699337e05edcaae79ef8c2880b52720bc031b219e5b5008ebbdef", - "sha256:cd3caef37a415fd0dae6148a1b6957a8c5f275a62cca02e18474608cb263640c", - "sha256:d5ec194c9c573aafaceebf05fc400656722793dac57f254cd4741f3c27ae57b4", - "sha256:da6e5e818d18459fa46fac0a4a4e543507fe1110e808101277c5a2b5bab0cd2d", - "sha256:e79d94ca58fcafef6395f6352383fa1a76922268fa02caa2272fff501c2fdc78", - "sha256:f3ef98d7b76da5eb19c37fda834d50262ff9167c65658d1d8f974d2e4d90676b", - "sha256:f4c8cabb46ff8e5d61f56a037974228e978f26bfefce4f61a4b1ac0ba7a2ab72" - ], - "markers": "python_version >= '3.6'", - "version": "==0.18.0" - }, - "pytz": { - "hashes": [ - "sha256:83a4a90894bf38e243cf052c8b58f381bfe9a7a483f6a9cab140bc7f702ac4da", - "sha256:eb10ce3e7736052ed3623d49975ce333bcd712c7bb19a58b9e2089d4057d0798" - ], - "version": "==2021.1" - }, - "requests": { - "hashes": [ - "sha256:6c1246513ecd5ecd4528a0906f910e8f0f9c6b8ec72030dc9fd154dc1a6efd24", - "sha256:b8aa58f8cf793ffd8782d3d8cb19e66ef36f7aba4353eec859e74678b01b07a7" - ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'", - "version": "==2.26.0" - }, - "scraper": { - "editable": true, - "path": "../scraper" - }, - "six": { - "hashes": [ - "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", - "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" - ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", - "version": "==1.16.0" - }, - "soupsieve": { - "hashes": [ - "sha256:052774848f448cf19c7e959adf5566904d525f33a3f8b6ba6f6f8f26ec7de0cc", - "sha256:c2c1c2d44f158cdbddab7824a9af8c4f83c76b1e23e049479aa432feb6c4c23b" - ], - "markers": "python_version >= '3'", - "version": "==2.2.1" - }, - "urllib3": { - "hashes": [ - "sha256:39fb8672126159acb139a7718dd10806104dec1e2f0f6c88aab05d17df10c8d4", - "sha256:f57b4c16c62fa2760b7e3d97c35b255512fb6b59a259730f36ba32ce9f8e342f" - ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'", - "version": "==1.26.6" - }, - "werkzeug": { - "hashes": [ - "sha256:1de1db30d010ff1af14a009224ec49ab2329ad2cde454c8a708130642d579c42", - "sha256:6c1ec500dcdba0baa27600f6a22f6333d8b662d22027ff9f6202e3367413caa8" - ], - "markers": "python_version >= '3.6'", - "version": "==2.0.1" - }, - "zope.event": { - "hashes": [ - "sha256:2666401939cdaa5f4e0c08cf7f20c9b21423b95e88f4675b1443973bdb080c42", - "sha256:5e76517f5b9b119acf37ca8819781db6c16ea433f7e2062c4afc2b6fbedb1330" - ], - "version": "==4.5.0" - }, - "zope.interface": { - "hashes": [ - "sha256:08f9636e99a9d5410181ba0729e0408d3d8748026ea938f3b970a0249daa8192", - "sha256:0b465ae0962d49c68aa9733ba92a001b2a0933c317780435f00be7ecb959c702", - "sha256:0cba8477e300d64a11a9789ed40ee8932b59f9ee05f85276dbb4b59acee5dd09", - "sha256:0cee5187b60ed26d56eb2960136288ce91bcf61e2a9405660d271d1f122a69a4", - "sha256:0ea1d73b7c9dcbc5080bb8aaffb776f1c68e807767069b9ccdd06f27a161914a", - "sha256:0f91b5b948686659a8e28b728ff5e74b1be6bf40cb04704453617e5f1e945ef3", - "sha256:15e7d1f7a6ee16572e21e3576d2012b2778cbacf75eb4b7400be37455f5ca8bf", - "sha256:17776ecd3a1fdd2b2cd5373e5ef8b307162f581c693575ec62e7c5399d80794c", - "sha256:194d0bcb1374ac3e1e023961610dc8f2c78a0f5f634d0c737691e215569e640d", - "sha256:1c0e316c9add0db48a5b703833881351444398b04111188069a26a61cfb4df78", - "sha256:205e40ccde0f37496904572035deea747390a8b7dc65146d30b96e2dd1359a83", - "sha256:273f158fabc5ea33cbc936da0ab3d4ba80ede5351babc4f577d768e057651531", - "sha256:2876246527c91e101184f63ccd1d716ec9c46519cc5f3d5375a3351c46467c46", - "sha256:2c98384b254b37ce50eddd55db8d381a5c53b4c10ee66e1e7fe749824f894021", - "sha256:2e5a26f16503be6c826abca904e45f1a44ff275fdb7e9d1b75c10671c26f8b94", - "sha256:334701327f37c47fa628fc8b8d28c7d7730ce7daaf4bda1efb741679c2b087fc", - "sha256:3748fac0d0f6a304e674955ab1365d515993b3a0a865e16a11ec9d86fb307f63", - "sha256:3c02411a3b62668200910090a0dff17c0b25aaa36145082a5a6adf08fa281e54", - "sha256:3dd4952748521205697bc2802e4afac5ed4b02909bb799ba1fe239f77fd4e117", - "sha256:3f24df7124c323fceb53ff6168da70dbfbae1442b4f3da439cd441681f54fe25", - "sha256:469e2407e0fe9880ac690a3666f03eb4c3c444411a5a5fddfdabc5d184a79f05", - "sha256:4de4bc9b6d35c5af65b454d3e9bc98c50eb3960d5a3762c9438df57427134b8e", - "sha256:5208ebd5152e040640518a77827bdfcc73773a15a33d6644015b763b9c9febc1", - "sha256:52de7fc6c21b419078008f697fd4103dbc763288b1406b4562554bd47514c004", - "sha256:5bb3489b4558e49ad2c5118137cfeaf59434f9737fa9c5deefc72d22c23822e2", - "sha256:5dba5f530fec3f0988d83b78cc591b58c0b6eb8431a85edd1569a0539a8a5a0e", - "sha256:5dd9ca406499444f4c8299f803d4a14edf7890ecc595c8b1c7115c2342cadc5f", - "sha256:5f931a1c21dfa7a9c573ec1f50a31135ccce84e32507c54e1ea404894c5eb96f", - "sha256:63b82bb63de7c821428d513607e84c6d97d58afd1fe2eb645030bdc185440120", - "sha256:66c0061c91b3b9cf542131148ef7ecbecb2690d48d1612ec386de9d36766058f", - "sha256:6f0c02cbb9691b7c91d5009108f975f8ffeab5dff8f26d62e21c493060eff2a1", - "sha256:71aace0c42d53abe6fc7f726c5d3b60d90f3c5c055a447950ad6ea9cec2e37d9", - "sha256:7d97a4306898b05404a0dcdc32d9709b7d8832c0c542b861d9a826301719794e", - "sha256:7df1e1c05304f26faa49fa752a8c690126cf98b40b91d54e6e9cc3b7d6ffe8b7", - "sha256:8270252effc60b9642b423189a2fe90eb6b59e87cbee54549db3f5562ff8d1b8", - "sha256:867a5ad16892bf20e6c4ea2aab1971f45645ff3102ad29bd84c86027fa99997b", - "sha256:877473e675fdcc113c138813a5dd440da0769a2d81f4d86614e5d62b69497155", - "sha256:8892f89999ffd992208754851e5a052f6b5db70a1e3f7d54b17c5211e37a98c7", - "sha256:9a9845c4c6bb56e508651f005c4aeb0404e518c6f000d5a1123ab077ab769f5c", - "sha256:a1e6e96217a0f72e2b8629e271e1b280c6fa3fe6e59fa8f6701bec14e3354325", - "sha256:a8156e6a7f5e2a0ff0c5b21d6bcb45145efece1909efcbbbf48c56f8da68221d", - "sha256:a9506a7e80bcf6eacfff7f804c0ad5350c8c95b9010e4356a4b36f5322f09abb", - "sha256:af310ec8335016b5e52cae60cda4a4f2a60a788cbb949a4fbea13d441aa5a09e", - "sha256:b0297b1e05fd128d26cc2460c810d42e205d16d76799526dfa8c8ccd50e74959", - "sha256:bf68f4b2b6683e52bec69273562df15af352e5ed25d1b6641e7efddc5951d1a7", - "sha256:d0c1bc2fa9a7285719e5678584f6b92572a5b639d0e471bb8d4b650a1a910920", - "sha256:d4d9d6c1a455d4babd320203b918ccc7fcbefe308615c521062bc2ba1aa4d26e", - "sha256:db1fa631737dab9fa0b37f3979d8d2631e348c3b4e8325d6873c2541d0ae5a48", - "sha256:dd93ea5c0c7f3e25335ab7d22a507b1dc43976e1345508f845efc573d3d779d8", - "sha256:f44e517131a98f7a76696a7b21b164bcb85291cee106a23beccce454e1f433a4", - "sha256:f7ee479e96f7ee350db1cf24afa5685a5899e2b34992fb99e1f7c1b0b758d263" - ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", - "version": "==5.4.0" - } - }, - "develop": {} -} diff --git a/server/Program.cs b/server/Program.cs new file mode 100644 index 0000000..f254171 --- /dev/null +++ b/server/Program.cs @@ -0,0 +1,17 @@ +using System; +using Microsoft.AspNetCore.Hosting; +using Microsoft.Extensions.Hosting; + +namespace Server { + public class Program { + public static void Main(string[] args) { + Console.WriteLine($"Current directory: {Environment.CurrentDirectory}"); + CreateHostBuilder(args).Build().Run(); + } + + public static IHostBuilder CreateHostBuilder(string[] args) { + return Host.CreateDefaultBuilder(args) + .ConfigureWebHostDefaults(webBuilder => { webBuilder.UseStartup(); }); + } + } +} diff --git a/server/Properties/launchSettings.json b/server/Properties/launchSettings.json new file mode 100644 index 0000000..6f47537 --- /dev/null +++ b/server/Properties/launchSettings.json @@ -0,0 +1,31 @@ +{ + "$schema": "https://json.schemastore.org/launchsettings.json", + "iisSettings": { + "windowsAuthentication": false, + "anonymousAuthentication": true, + "iisExpress": { + "applicationUrl": "http://localhost:8771", + "sslPort": 44319 + } + }, + "profiles": { + "IIS Express": { + "commandName": "IISExpress", + "launchBrowser": true, + "launchUrl": "swagger", + "environmentVariables": { + "ASPNETCORE_ENVIRONMENT": "Development" + } + }, + "server": { + "commandName": "Project", + "dotnetRunMessages": "true", + "launchBrowser": true, + "launchUrl": "swagger", + "applicationUrl": "https://localhost:5001;http://localhost:5000", + "environmentVariables": { + "ASPNETCORE_ENVIRONMENT": "Development" + } + } + } +} diff --git a/server/Services/Implementations/DataManager.cs b/server/Services/Implementations/DataManager.cs new file mode 100644 index 0000000..23506ea --- /dev/null +++ b/server/Services/Implementations/DataManager.cs @@ -0,0 +1,60 @@ +using System; +using System.Collections.Generic; +using System.Threading.Tasks; +using InfoferScraper.Models.Train; +using InfoferScraper.Models.Station; +using Server.Services.Interfaces; +using Server.Utils; +using InfoferScraper; + +namespace Server.Services.Implementations { + public class DataManager : IDataManager { + private IDatabase Database { get; } + + private NodaTime.IDateTimeZoneProvider TzProvider { get; } + private NodaTime.DateTimeZone CfrTimeZone => TzProvider["Europe/Bucharest"]; + + public DataManager(NodaTime.IDateTimeZoneProvider tzProvider, IDatabase database) { + this.TzProvider = tzProvider; + this.Database = database; + + stationCache = new(async (t) => { + var (stationName, date) = t; + var zonedDate = new NodaTime.LocalDate(date.Year, date.Month, date.Day).AtStartOfDayInZone(CfrTimeZone); + + var station = await InfoferScraper.Scrapers.StationScraper.Scrape(stationName, zonedDate.ToDateTimeOffset()); + if (station != null) { + await Database.OnStationData(station); + } + return station; + }, TimeSpan.FromMinutes(1)); + trainCache = new(async (t) => { + var (trainNumber, date) = t; + var zonedDate = new NodaTime.LocalDate(date.Year, date.Month, date.Day).AtStartOfDayInZone(CfrTimeZone); + + var train = await InfoferScraper.Scrapers.TrainScraper.Scrape(trainNumber, zonedDate.ToDateTimeOffset()); + if (train != null) { + await Database.OnTrainData(train); + } + return train; + }, TimeSpan.FromSeconds(30)); + } + + private readonly AsyncCache<(string, DateOnly), IStationScrapeResult?> stationCache; + private readonly AsyncCache<(string, DateOnly), ITrainScrapeResult?> trainCache; + + public Task FetchStation(string stationName, DateTimeOffset date) { + var cfrDateTime = new NodaTime.ZonedDateTime(NodaTime.Instant.FromDateTimeOffset(date), CfrTimeZone); + var cfrDate = new DateOnly(cfrDateTime.Year, cfrDateTime.Month, cfrDateTime.Day); + + return stationCache.GetItem((stationName.RoLettersToEn().ToLowerInvariant(), cfrDate)); + } + + public Task FetchTrain(string trainNumber, DateTimeOffset date) { + var cfrDateTime = new NodaTime.ZonedDateTime(NodaTime.Instant.FromDateTimeOffset(date), CfrTimeZone); + var cfrDate = new DateOnly(cfrDateTime.Year, cfrDateTime.Month, cfrDateTime.Day); + + return trainCache.GetItem((trainNumber, cfrDate)); + } + } +} diff --git a/server/Services/Implementations/Database.cs b/server/Services/Implementations/Database.cs new file mode 100644 index 0000000..238d8c2 --- /dev/null +++ b/server/Services/Implementations/Database.cs @@ -0,0 +1,238 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Text.Json; +using System.Text.Json.Nodes; +using System.Text.Json.Serialization; +using System.Threading.Tasks; +using Microsoft.Extensions.Logging; + +namespace Server.Services.Implementations; + +public class Database : Server.Services.Interfaces.IDatabase { + private static readonly JsonSerializerOptions serializerOptions = new() { + PropertyNamingPolicy = JsonNamingPolicy.CamelCase, + }; + + private ILogger Logger { get; } + + private bool shouldCommitOnEveryChange = true; + private bool dbDataDirty = false; + private bool stationsDirty = false; + private bool trainsDirty = false; + + public DbRecord DbData { get; private set; } = new(2); + private List stations = new(); + private List trains = new(); + + public IReadOnlyList Stations => stations; + public IReadOnlyList Trains => trains; + + private static readonly string DbDir = Environment.GetEnvironmentVariable("DB_DIR") ?? Path.Join(Environment.CurrentDirectory, "db"); + private static readonly string DbFile = Path.Join(DbDir, "db.json"); + private static readonly string StationsFile = Path.Join(DbDir, "stations.json"); + private static readonly string TrainsFile = Path.Join(DbDir, "trains.json"); + + public IDisposable MakeDbTransaction() { + shouldCommitOnEveryChange = false; + return new Server.Utils.ActionDisposable(() => { + if (dbDataDirty) File.WriteAllText(DbFile, JsonSerializer.Serialize(DbData, serializerOptions)); + if (stationsDirty) { + stations.Sort((s1, s2) => s2.StoppedAtBy.Count.CompareTo(s1.StoppedAtBy.Count)); + File.WriteAllText(StationsFile, JsonSerializer.Serialize(stations, serializerOptions)); + } + if (trainsDirty) File.WriteAllText(TrainsFile, JsonSerializer.Serialize(trains, serializerOptions)); + dbDataDirty = stationsDirty = trainsDirty = false; + shouldCommitOnEveryChange = true; + }); + } + + public Database(ILogger logger) { + Logger = logger; + + if (!Directory.Exists(DbDir)) { + Logger.LogDebug("Creating directory: {DbDir}", DbDir); + Directory.CreateDirectory(DbDir); + } + + Migration(); + + if (File.Exists(DbFile)) { + DbData = JsonSerializer.Deserialize(File.ReadAllText(DbFile), serializerOptions)!; + } + else { + File.WriteAllText(DbFile, JsonSerializer.Serialize(DbData, serializerOptions)); + } + + if (File.Exists(StationsFile)) { + stations = JsonSerializer.Deserialize>(File.ReadAllText(StationsFile), serializerOptions)!; + } + + if (File.Exists(TrainsFile)) { + trains = JsonSerializer.Deserialize>(File.ReadAllText(TrainsFile), serializerOptions)!; + } + } + + private void Migration() { + if (!File.Exists(DbFile)) { +// using var _ = Logger.BeginScope("Migrating DB version 1 -> 2"); + Logger.LogInformation("Migrating DB version 1 -> 2"); + if (File.Exists(StationsFile)) { + Logger.LogDebug("Converting StationsFile"); + var oldStations = JsonNode.Parse(File.ReadAllText(StationsFile)); + if (oldStations != null) { + Logger.LogDebug("Found {StationsCount} stations", oldStations.AsArray().Count); + foreach (var station in oldStations.AsArray()) { + if (station == null) continue; + station["stoppedAtBy"] = new JsonArray(station["stoppedAtBy"]!.AsArray().Select(num => (JsonNode)(num!).ToString()!).ToArray()); + } + stations = JsonSerializer.Deserialize>(oldStations, serializerOptions)!; + } + Logger.LogDebug("Rewriting StationsFile"); + File.WriteAllText(StationsFile, JsonSerializer.Serialize(stations, serializerOptions)); + } + if (File.Exists(TrainsFile)) { + Logger.LogDebug("Converting TrainsFile"); + var oldTrains = JsonNode.Parse(File.ReadAllText(TrainsFile)); + if (oldTrains != null) { + Logger.LogDebug("Found {TrainsCount} trains", oldTrains.AsArray().Count); + foreach (var train in oldTrains.AsArray()) { + if (train == null) continue; + train["number"] = train["numberString"]; + train.AsObject().Remove("numberString"); + } + trains = JsonSerializer.Deserialize>(oldTrains, serializerOptions)!; + } + Logger.LogDebug("Rewriting TrainsFile"); + File.WriteAllText(TrainsFile, JsonSerializer.Serialize(trains, serializerOptions)); + } + DbData = new(2); + File.WriteAllText(DbFile, JsonSerializer.Serialize(DbData, serializerOptions)); + Migration(); + } + else { + var oldDbData = JsonNode.Parse(File.ReadAllText(DbFile)); + if (((int?)oldDbData?["version"]) == 2) { + Logger.LogInformation("DB Version: 2; noop"); + } + else { + throw new Exception("Unexpected Database version"); + } + } + } + + public async Task FoundTrain(string rank, string number, string company) { + number = string.Join("", number.TakeWhile(c => '0' <= c && c <= '9')); + if (!trains.Where(train => train.Number == number).Any()) { + Logger.LogDebug("Found train {Rank} {Number} from {Company}", rank, number, company); + trains.Add(new(number, rank, company)); + if (shouldCommitOnEveryChange) { + await File.WriteAllTextAsync(TrainsFile, JsonSerializer.Serialize(trains, serializerOptions)); + } + else { + trainsDirty = true; + } + } + return number; + } + + public async Task FoundStation(string name) { + if (!stations.Where(station => station.Name == name).Any()) { + Logger.LogDebug("Found station {StationName}", name); + stations.Add(new(name, new())); + if (shouldCommitOnEveryChange) { + await File.WriteAllTextAsync(StationsFile, JsonSerializer.Serialize(stations, serializerOptions)); + } + else { + stationsDirty = true; + } + } + } + + public async Task FoundTrainAtStation(string stationName, string trainNumber) { + trainNumber = string.Join("", trainNumber.TakeWhile(c => '0' <= c && c <= '9')); + await FoundStation(stationName); + var dirty = false; + for (var i = 0; i < stations.Count; i++) { + if (stations[i].Name == stationName) { + if (!stations[i].StoppedAtBy.Contains(trainNumber)) { + Logger.LogDebug("Found train {TrainNumber} at station {StationName}", trainNumber, stationName); + stations[i].ActualStoppedAtBy.Add(trainNumber); + dirty = true; + } + break; + } + } + if (dirty) { + if (shouldCommitOnEveryChange) { + stations.Sort((s1, s2) => s2.StoppedAtBy.Count.CompareTo(s1.StoppedAtBy.Count)); + await File.WriteAllTextAsync(StationsFile, JsonSerializer.Serialize(stations, serializerOptions)); + } + else { + stationsDirty = true; + } + } + } + + public async Task OnTrainData(InfoferScraper.Models.Train.ITrainScrapeResult trainData) { + using var _ = MakeDbTransaction(); + var trainNumber = await FoundTrain(trainData.Rank, trainData.Number, trainData.Operator); + foreach (var group in trainData.Groups) { + foreach (var station in group.Stations) { + await FoundTrainAtStation(station.Name, trainNumber); + } + } + } + + public async Task OnStationData(InfoferScraper.Models.Station.IStationScrapeResult stationData) { + var stationName = stationData.StationName; + + async Task ProcessTrain(InfoferScraper.Models.Station.IStationArrDep train) { + var trainNumber = train.Train.Number; + trainNumber = await FoundTrain(train.Train.Rank, trainNumber, train.Train.Operator); + await FoundTrainAtStation(stationName, trainNumber); + if (train.Train.Route.Count != 0) { + foreach (var station in train.Train.Route) { + await FoundTrainAtStation(station, trainNumber); + } + } + } + + using var _ = MakeDbTransaction(); + + if (stationData.Arrivals != null) { + foreach (var train in stationData.Arrivals) { + await ProcessTrain(train); + } + } + if (stationData.Departures != null) { + foreach (var train in stationData.Departures) { + await ProcessTrain(train); + } + } + } +} + +public record DbRecord(int Version); + +public record StationRecord : Server.Services.Interfaces.IStationRecord { + [JsonPropertyName("stoppedAtBy")] + public List ActualStoppedAtBy { get; init; } + + public string Name { get; init; } + [JsonIgnore] + public IReadOnlyList StoppedAtBy => ActualStoppedAtBy; + + public StationRecord() { + Name = ""; + ActualStoppedAtBy = new(); + } + + public StationRecord(string name, List stoppedAtBy) { + Name = name; + ActualStoppedAtBy = stoppedAtBy; + } +} + +public record TrainRecord(string Number, string Rank, string Company) : Server.Services.Interfaces.ITrainRecord; diff --git a/server/Services/Interfaces/IDataManager.cs b/server/Services/Interfaces/IDataManager.cs new file mode 100644 index 0000000..0a24dae --- /dev/null +++ b/server/Services/Interfaces/IDataManager.cs @@ -0,0 +1,11 @@ +using System; +using System.Threading.Tasks; +using InfoferScraper.Models.Train; +using InfoferScraper.Models.Station; + +namespace Server.Services.Interfaces; + +public interface IDataManager { + public Task FetchStation(string stationName, DateTimeOffset date); + public Task FetchTrain(string trainNumber, DateTimeOffset date); +} diff --git a/server/Services/Interfaces/IDatabase.cs b/server/Services/Interfaces/IDatabase.cs new file mode 100644 index 0000000..1df6ad4 --- /dev/null +++ b/server/Services/Interfaces/IDatabase.cs @@ -0,0 +1,28 @@ +using System.Collections.Generic; +using System.Threading.Tasks; +using InfoferScraper.Models.Train; +using InfoferScraper.Models.Station; + +namespace Server.Services.Interfaces; + +public interface IDatabase { + public IReadOnlyList Stations { get; } + public IReadOnlyList Trains { get; } + + public Task FoundTrain(string rank, string number, string company); + public Task FoundStation(string name); + public Task FoundTrainAtStation(string stationName, string trainName); + public Task OnTrainData(ITrainScrapeResult trainData); + public Task OnStationData(IStationScrapeResult stationData); +} + +public interface IStationRecord { + public string Name { get; } + public IReadOnlyList StoppedAtBy { get; } +} + +public interface ITrainRecord { + public string Rank { get; } + public string Number { get; } + public string Company { get; } +} diff --git a/server/Startup.cs b/server/Startup.cs new file mode 100644 index 0000000..5bf5af9 --- /dev/null +++ b/server/Startup.cs @@ -0,0 +1,56 @@ +using System.Text.Json; +using Microsoft.AspNetCore.Builder; +using Microsoft.AspNetCore.Hosting; +using Microsoft.Extensions.Configuration; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Hosting; +using Microsoft.OpenApi.Models; +using Server.Services.Implementations; +using Server.Services.Interfaces; + +namespace Server { + public class Startup { + public Startup(IConfiguration configuration) { + Configuration = configuration; + } + + public IConfiguration Configuration { get; } + + // This method gets called by the runtime. Use this method to add services to the container. + public void ConfigureServices(IServiceCollection services) { + services.AddSingleton(); + services.AddSingleton(); + services.AddSingleton(NodaTime.DateTimeZoneProviders.Tzdb); + services.AddControllers() + .AddJsonOptions(options => { + options.JsonSerializerOptions.PropertyNamingPolicy = JsonNamingPolicy.CamelCase; + }); + services.AddSwaggerGen(c => { + c.SwaggerDoc("v1", new OpenApiInfo { Title = "InfoTren Scraper", Version = "v1" }); + c.SwaggerDoc("v2", new OpenApiInfo { Title = "InfoTren Scraper", Version = "v2" }); + c.SwaggerDoc("v3", new OpenApiInfo { Title = "InfoTren Scraper", Version = "v3" }); + }); + } + + // This method gets called by the runtime. Use this method to configure the HTTP request pipeline. + public void Configure(IApplicationBuilder app, IWebHostEnvironment env) { + if (env.IsDevelopment()) { + app.UseDeveloperExceptionPage(); + app.UseSwagger(); + app.UseSwaggerUI(c => { + c.SwaggerEndpoint("/swagger/v3/swagger.json", "InfoTren Scraper v3"); + c.SwaggerEndpoint("/swagger/v2/swagger.json", "InfoTren Scraper v2"); + c.SwaggerEndpoint("/swagger/v1/swagger.json", "InfoTren Scraper v1"); + }); + } + + // app.UseHttpsRedirection(); + + app.UseRouting(); + + app.UseAuthorization(); + + app.UseEndpoints(endpoints => { endpoints.MapControllers(); }); + } + } +} diff --git a/server/Utils/ActionDisposable.cs b/server/Utils/ActionDisposable.cs new file mode 100644 index 0000000..9e654ce --- /dev/null +++ b/server/Utils/ActionDisposable.cs @@ -0,0 +1,15 @@ +using System; + +namespace Server.Utils; + +public class ActionDisposable : IDisposable { + public Action Action { get; init; } + + public ActionDisposable(Action action) { + Action = action; + } + + public void Dispose() { + Action(); + } +} diff --git a/server/Utils/Cache.cs b/server/Utils/Cache.cs new file mode 100644 index 0000000..78cc2ea --- /dev/null +++ b/server/Utils/Cache.cs @@ -0,0 +1,69 @@ +using System; +using System.Collections.Generic; +using System.Threading.Tasks; + +namespace Server.Utils; + +public class Cache where TKey: notnull { + private readonly IDictionary cache; + + public Func Fetcher { get; init; } + public TimeSpan Validity { get; init; } + public bool StoreNull { get; init; } + + public Cache(Func fetcher, TimeSpan validity, bool storeNull = false) { + this.cache = new Dictionary(); + Fetcher = fetcher; + Validity = validity; + StoreNull = storeNull; + } + + public TValue GetItem(TKey key) { + if (cache.ContainsKey(key)) { + if (cache[key].FetchTime + Validity > DateTimeOffset.Now) { + return cache[key].Data; + } + else { + cache.Remove(key); + } + } + + var data = Fetcher(key); + if (data != null) { + cache[key] = (data, DateTimeOffset.Now); + } + return data; + } +} + +public class AsyncCache where TKey: notnull { + private readonly IDictionary cache; + + public Func> Fetcher { get; init; } + public TimeSpan Validity { get; init; } + public bool StoreNull { get; init; } + + public AsyncCache(Func> fetcher, TimeSpan validity, bool storeNull = false) { + this.cache = new Dictionary(); + Fetcher = fetcher; + Validity = validity; + StoreNull = storeNull; + } + + public async Task GetItem(TKey key) { + if (cache.ContainsKey(key)) { + if (cache[key].FetchTime + Validity > DateTimeOffset.Now) { + return cache[key].Data; + } + else { + cache.Remove(key); + } + } + + var data = await Fetcher(key); + if (data != null) { + cache[key] = (data, DateTimeOffset.Now); + } + return data; + } +} diff --git a/server/appsettings.Development.json b/server/appsettings.Development.json new file mode 100644 index 0000000..ab5b812 --- /dev/null +++ b/server/appsettings.Development.json @@ -0,0 +1,9 @@ +{ + "Logging": { + "LogLevel": { + "Default": "Debug", + "Microsoft": "Warning", + "Microsoft.Hosting.Lifetime": "Information" + } + } +} diff --git a/server/appsettings.json b/server/appsettings.json new file mode 100644 index 0000000..e409635 --- /dev/null +++ b/server/appsettings.json @@ -0,0 +1,13 @@ +{ + "ConnectionStrings": { + "caching": "Data Source=./caching.sqlite" + }, + "Logging": { + "LogLevel": { + "Default": "Information", + "Microsoft": "Warning", + "Microsoft.Hosting.Lifetime": "Information" + } + }, + "AllowedHosts": "*" +} diff --git a/server/main.py b/server/main.py deleted file mode 100644 index 7f1074b..0000000 --- a/server/main.py +++ /dev/null @@ -1,18 +0,0 @@ -from gevent.pywsgi import WSGIServer -from server.server import app - -def main(): - port = 5000 - - import os - try: - port = int(os.environ['PORT']) - except: - pass - - print(f'Starting server on port {port}') - http_server = WSGIServer(('', port), app) - http_server.serve_forever() - -if __name__ == '__main__': - main() diff --git a/server/omnisharp.json b/server/omnisharp.json new file mode 120000 index 0000000..4ed1f68 --- /dev/null +++ b/server/omnisharp.json @@ -0,0 +1 @@ +../omnisharp.json \ No newline at end of file diff --git a/server/server.csproj b/server/server.csproj new file mode 100644 index 0000000..ea47826 --- /dev/null +++ b/server/server.csproj @@ -0,0 +1,25 @@ + + + + enable + Server + Server + net6.0 + + + + + + + + + + + + + + + + + + diff --git a/server/server/__init__.py b/server/server/__init__.py deleted file mode 100644 index 7cd07a9..0000000 --- a/server/server/__init__.py +++ /dev/null @@ -1 +0,0 @@ -__all__ = ['server'] diff --git a/server/server/cache.py b/server/server/cache.py deleted file mode 100644 index d7e887d..0000000 --- a/server/server/cache.py +++ /dev/null @@ -1,18 +0,0 @@ -from datetime import date, datetime, timedelta - -_NO_DEFAULT = object() - -class CachedData: - def __init__(self, getter, initial_data=_NO_DEFAULT, validity=1000): - self.getter = getter - self.data = initial_data - self.last_refresh_date = datetime.now() - self.validity = timedelta(milliseconds=validity) - if initial_data == _NO_DEFAULT: - self.last_refresh_date -= self.validity - - def __call__(self, *args, **kwds): - if self.last_refresh_date + self.validity < datetime.now(): - self.data = self.getter() - self.last_refresh_date = datetime.now() - return self.data, self.last_refresh_date diff --git a/server/server/db.py b/server/server/db.py deleted file mode 100644 index 3136c21..0000000 --- a/server/server/db.py +++ /dev/null @@ -1,173 +0,0 @@ -# Globals -stations = [] -trains = [] -db_data = { - 'version': 2, -} - -# Examples -example_station = { - 'name': 'Gară', - 'stoppedAtBy': [123, 456] -} - -example_train = { - 'rank': 'IR', - 'numberString': '74', - 'number': 74, - 'company': 'CFR Călători' -} - -# Init - -import json -import os -from os import path, stat -from contextlib import contextmanager - -from .utils import take_while - -DB_DIR = os.environ.get('DB_DIR', '') or './db' -if not path.exists(DB_DIR): - os.mkdir(DB_DIR) - -DB_FILE = path.join(DB_DIR, 'db.json') - -STATIONS_FILE = path.join(DB_DIR, 'stations.json') - -TRAINS_FILE = path.join(DB_DIR, 'trains.json') - -def migration(): - global db_data - global trains - global stations - if not path.exists(DB_FILE): - print('[Migration] Migrating DB version 1 -> 2') - if path.exists(STATIONS_FILE): - with open(STATIONS_FILE) as f: - stations = json.load(f) - for i in range(len(stations)): - stations[i]['stoppedAtBy'] = [str(num) for num in stations[i]['stoppedAtBy']] - with open(STATIONS_FILE, 'w') as f: - json.dump(stations, f) - if path.exists(TRAINS_FILE): - with open(TRAINS_FILE) as f: - trains = json.load(f) - for i in range(len(trains)): - trains[i]['number'] = trains[i]['numberString'] - del trains[i]['numberString'] - with open(TRAINS_FILE, 'w') as f: - json.dump(trains, f) - db_data = { - 'version': 2, - } - with open(DB_FILE, 'w') as f: - json.dump(db_data, f) - migration() - else: - with open(DB_FILE) as f: - db_data = json.load(f) - if db_data['version'] == 2: - print('[Migration] DB Version: 2, noop') - -migration() - -if path.exists(DB_FILE): - with open(DB_FILE) as f: - db_data = json.load(f) -else: - with open(DB_FILE, 'w') as f: - json.dump(db_data, f) - -if path.exists(STATIONS_FILE): - with open(STATIONS_FILE) as f: - stations = json.load(f) - -if path.exists(TRAINS_FILE): - with open(TRAINS_FILE) as f: - trains = json.load(f) - -_should_commit_on_every_change = True - -@contextmanager -def db_transaction(): - global _should_commit_on_every_change - _should_commit_on_every_change = False - yield - with open(DB_FILE, 'w') as f: - json.dump(db_data, f) - with open(STATIONS_FILE, 'w') as f: - stations.sort(key=lambda s: len(s['stoppedAtBy']), reverse=True) - json.dump(stations, f) - with open(TRAINS_FILE, 'w') as f: - json.dump(trains, f) - _should_commit_on_every_change = True - -def found_train(rank: str, number: str, company: str) -> int: - number = ''.join(take_while(lambda s: str(s).isnumeric(), number)) - try: - next(filter(lambda tr: tr['number'] == number, trains)) - except StopIteration: - trains.append({ - 'number': number, - 'company': company, - 'rank': rank, - }) - if _should_commit_on_every_change: - with open(TRAINS_FILE, 'w') as f: - json.dump(trains, f) - return number - -def found_station(name: str): - try: - next(filter(lambda s: s['name'] == name, stations)) - except StopIteration: - stations.append({ - 'name': name, - 'stoppedAtBy': [], - }) - if _should_commit_on_every_change: - stations.sort(key=lambda s: len(s['stoppedAtBy']), reverse=True) - with open(STATIONS_FILE, 'w') as f: - json.dump(stations, f) - -def found_train_at_station(station_name: str, train_number: str): - train_number = ''.join(take_while(lambda s: str(s).isnumeric(), train_number)) - found_station(station_name) - for i in range(len(stations)): - if stations[i]['name'] == station_name: - if train_number not in stations[i]['stoppedAtBy']: - stations[i]['stoppedAtBy'].append(train_number) - break - if _should_commit_on_every_change: - stations.sort(key=lambda s: len(s['stoppedAtBy']), reverse=True) - with open(STATIONS_FILE, 'w') as f: - json.dump(stations, f) - -def on_train_data(train_data: dict): - with db_transaction(): - train_no = found_train(train_data['rank'], train_data['number'], train_data['operator']) - for station in train_data['stations']: - found_train_at_station(station['name'], train_no) - -def on_train_lookup_failure(train_no: str): - pass - -def on_station(station_data: dict): - station_name = station_data['stationName'] - - def process_train(train_data: dict): - train_number = train_data['train']['number'] - train_number = found_train(train_data['train']['rank'], train_number, train_data['train']['operator']) - found_train_at_station(station_name, train_number) - if 'route' in train_data['train'] and train_data['train']['route']: - for station in train_data['train']['route']: - found_train_at_station(station, train_number) - - with db_transaction(): - if station_data['arrivals']: - for train in station_data['arrivals']: - process_train(train) - if station_data['departures']: - for train in station_data['departures']: - process_train(train) diff --git a/server/server/flask_utils.py b/server/server/flask_utils.py deleted file mode 100644 index c43f00c..0000000 --- a/server/server/flask_utils.py +++ /dev/null @@ -1,29 +0,0 @@ -from flask import request as _f_request - -from .utils import filter_result as _filter_result - -def filtered_data(fn): - def filterer(*args, **kwargs): - filters = _f_request.args.get('filters', None) - if filters: - filters_raw = [f.split(':', 1) for f in filters.split(',')] - filters = {'.': []} - for key, value in filters_raw: - def add_to(obj, key, value): - if '.' in key: - prop, key = key.split('.', 1) - if prop not in filters: - obj[prop] = {'.': []} - add_to(obj[prop], key, value) - else: - obj['.'].append({key: value}) - add_to(filters, key, value) - properties = _f_request.args.get('properties', None) - if properties: - properties = properties.split(',') - - data = fn(*args, **kwargs) - - return _filter_result(data, properties, filters) - - return filterer diff --git a/server/server/scraper b/server/server/scraper deleted file mode 120000 index 294e447..0000000 --- a/server/server/scraper +++ /dev/null @@ -1 +0,0 @@ -../../scraper \ No newline at end of file diff --git a/server/server/server.py b/server/server/server.py deleted file mode 100644 index d7a8543..0000000 --- a/server/server/server.py +++ /dev/null @@ -1,65 +0,0 @@ -print(f'Server {__name__=}') - -import datetime - -from flask import Flask, jsonify, url_for -from jsonschema import validate - -from .cache import CachedData -from .scraper.schemas import TRAIN_INFO_SCHEMA -from .utils import get_hostname - -app = Flask(__name__) - -from .v2 import v2 -app.register_blueprint(v2.bp) - -@app.route('/') -def root(): - return 'Test' - -@app.route('/train/.schema.json') -def get_train_info_schema(): - return jsonify(TRAIN_INFO_SCHEMA['v1']) - -train_data_cache = {} - -@app.route('/train/') -def get_train_info(train_no: int): - def get_data(): - from .scraper.scraper import scrape_train - use_yesterday = False - result = scrape_train(train_no, use_yesterday=use_yesterday) - - from . import db - db.on_train_data(result) - - # Convert to v1 - # datetime ISO string to hh:mm - for i in range(len(result['stations'])): - if result['stations'][i]['arrival']: - date = datetime.datetime.fromisoformat(result['stations'][i]['arrival']['scheduleTime']) - result['stations'][i]['arrival']['scheduleTime'] = f'{date.hour}:{date.minute:02}' - if result['stations'][i]['departure']: - date = datetime.datetime.fromisoformat(result['stations'][i]['departure']['scheduleTime']) - result['stations'][i]['departure']['scheduleTime'] = f'{date.hour}:{date.minute:02}' - if 'stoppingTime' in result['stations'][i] and result['stations'][i]['stoppingTime']: - result['stations'][i]['stoppingTime'] //= 60 - - return result - if train_no not in train_data_cache: - train_data_cache[train_no] = CachedData(get_data, validity=1000 * 30) - data, fetch_time = train_data_cache[train_no]() - data['$schema'] = get_hostname() + url_for('.get_train_info_schema') - validate(data, schema=TRAIN_INFO_SCHEMA['v1']) - resp = jsonify(data) - resp.headers['X-Last-Fetched'] = fetch_time.isoformat() - return resp - -@app.route('/trains') -def get_trains(): - return jsonify(list(train_data_cache.keys())) - -if __name__ == '__main__': - print('Starting debug server on port 5001') - app.run(port=5000) diff --git a/server/server/utils.py b/server/server/utils.py deleted file mode 100644 index 8ebc85d..0000000 --- a/server/server/utils.py +++ /dev/null @@ -1,41 +0,0 @@ -def take_while(predicate, input): - for element in input: - if not predicate(element): - break - yield element - -_NO_DEFAULT = object() - -def check_yes_no(input: str, default=_NO_DEFAULT, considered_yes=None) -> bool: - input = str(input).strip().lower() - if not input: - if default == _NO_DEFAULT: - raise Exception('Empty input with no default') - return default - if not considered_yes: - considered_yes = ['y', 'yes', 't', 'true', '1'] - return input in considered_yes - -def get_hostname(): - import os - import platform - return os.getenv('HOSTNAME', os.getenv('COMPUTERNAME', platform.node())) - -def filter_result(data, properties=None, filters=None): - is_array = not hasattr(data, 'get') - result = data if is_array else [data] - - if filters: - # Todo: implement filters - pass - # def f(lst, filters): - # def condition(item): - - # return list(filter(condition, lst)) - # result = f(result, filters) - - if properties: - for i in range(len(result)): - result[i] = {p:result[i].get(p, None) for p in properties} - - return result if is_array else result[0] diff --git a/server/server/v2/__init__.py b/server/server/v2/__init__.py deleted file mode 100644 index 03c5bab..0000000 --- a/server/server/v2/__init__.py +++ /dev/null @@ -1 +0,0 @@ -__all__ = ['v2'] \ No newline at end of file diff --git a/server/server/v2/v2.py b/server/server/v2/v2.py deleted file mode 100644 index 47583a2..0000000 --- a/server/server/v2/v2.py +++ /dev/null @@ -1,94 +0,0 @@ -from datetime import date, datetime -import json -from flask import Blueprint, jsonify, request -from flask.helpers import url_for -from jsonschema import validate - -from .. import db -from ..cache import CachedData -from ..utils import check_yes_no, get_hostname -from ..flask_utils import filtered_data -from ..scraper.utils import ro_letters_to_en -from ..scraper.schemas import STATION_SCHEMA, TRAIN_INFO_SCHEMA - -bp = Blueprint('v2', __name__, url_prefix='/v2') - -@bp.get('/trains') -def get_known_trains(): - @filtered_data - def get_data(): - return db.trains - - result = get_data() - - return jsonify(result) - -@bp.get('/stations') -def get_known_stations(): - @filtered_data - def get_data(): - return db.stations - - result = get_data() - - return jsonify(result) - -train_data_cache = {} - -@bp.route('/train/.schema.json') -def get_train_info_schema(): - return jsonify(TRAIN_INFO_SCHEMA['v2']) - -@bp.route('/train/') -def get_train_info(train_no: str): - use_yesterday = check_yes_no(request.args.get('use_yesterday', ''), default=False) - date_override = request.args.get('date', default=None) - try: - date_override = datetime.fromisoformat(date_override) - except ValueError: - date_override = None - - @filtered_data - def get_data(): - from ..scraper.scraper import scrape_train - result = scrape_train(train_no, use_yesterday=use_yesterday, date_override=date_override) - db.on_train_data(result) - return result - if (train_no, use_yesterday) not in train_data_cache: - train_data_cache[(train_no, use_yesterday or date_override)] = CachedData(get_data, validity=1000 * 30) - data, fetch_time = train_data_cache[(train_no, use_yesterday or date_override)]() - data['$schema'] = get_hostname() + url_for('.get_train_info_schema') - validate(data, schema=TRAIN_INFO_SCHEMA['v2']) - resp = jsonify(data) - resp.headers['X-Last-Fetched'] = fetch_time.isoformat() - return resp - -station_cache = {} - -@bp.route('/station/.schema.json') -def get_station_schema(): - return jsonify(STATION_SCHEMA['v2']) - -@bp.route('/station/') -def get_station(station_name: str): - station_name = ro_letters_to_en(station_name.lower().replace(' ', '-')) - - def get_data(): - from ..scraper.scraper import scrape_station - result = scrape_station(station_name) - db.on_station(result) - return result - if station_name not in train_data_cache: - station_cache[station_name] = CachedData(get_data, validity=1000 * 30) - data, fetch_time = station_cache[station_name]() - data['$schema'] = get_hostname() + url_for('.get_station_schema') - validate(data, schema=STATION_SCHEMA['v2']) - - @filtered_data - def filter(data): - return data - - resp = jsonify(filter(data)) - resp.headers['X-Last-Fetched'] = fetch_time.isoformat() - return resp -