You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

308 lines
8.8 KiB

#! /usr/bin/env ruby
require 'erb'
require 'json'
require 'open3'
require 'set'
$datestr = (Time.now - 86400).strftime '%F'
class Object
def also
yield self
self
end
def let
yield self
end
end
# @param [String] station_name
def normalize_station station_name
station_name
.downcase
.gsub(/\s/, ' ')
.gsub('ă', 'a')
.gsub('â', 'a')
.gsub('î', 'i')
.gsub('ș', 's')
.gsub('ț', 't')
end
$station_link_name_cache = {}
# @param [String] station_name
def get_station station_name
# Try getting from file
begin
return JSON.parse(File.read(File.join($datestr, 'stations', "#{normalize_station(station_name)}.json")), symbolize_names: true)
rescue
end
station_name = normalize_station station_name
station_name = $station_link_name_cache.fetch(station_name, station_name)
station_name = ERB::Util.url_encode(station_name)
stdout, status = Open3.capture2('curl', '--silent', '--fail', '--show-error', "https://scraper.infotren.dcdev.ro/v3/stations/#{station_name}?date=#{$datestr}")
if status != 0
nil
else
JSON.parse stdout, symbolize_names: true
end
end
# @param [String] train_number
def get_train train_number
# Try getting from file
begin
return JSON.parse(File.read(File.join($datestr, 'trains', "#{train_number}.json")), symbolize_names: true)
rescue
end
train_number = ERB::Util.url_encode(train_number)
stdout, status = Open3.capture2('curl', '--silent', '--fail', '--show-error', "https://scraper.infotren.dcdev.ro/v3/trains/#{train_number}?date=#{$datestr}")
if status != 0
nil
else
JSON.parse stdout, symbolize_names: true
end
end
def populate_link_name_cache train
train[:groups].each do |g|
g[:stations].each do |s|
$station_link_name_cache[s[:name]] = s[:linkName]
end
end
end
def get_stations_from_station station
Set.new.also do |stations|
station[:arrivals].each do |train|
train[:train][:route].each do |r|
stations << r
end
end
station[:departures].each do |train|
train[:train][:route].each do |r|
stations << r
end
end
end
end
# @param [Integer] times
def reattempt times
times.times do |n|
result = yield
unless result.nil?
return result
end
end
nil
end
def get_trains_from_station station
Set.new.also do |trains|
station[:arrivals].each do |train|
trains << train[:train][:number]
end
station[:departures].each do |train|
trains << train[:train][:number]
end
end
end
def get_stations_from_train train
train[:groups].flat_map do |group|
group[:stations].map do |station|
station[:name]
end
end
end
class Log
def initialize
@to_erase = 0
end
def add message
print "\e[1A\e[2K" * @to_erase
@to_erase = 0
puts message
end
def temporary_add message
if $stdout.isatty
puts message
@to_erase += 1
end
end
end
def main
log = Log.new
log.add "Creating archive for yesterday, #{$datestr}"
existing = if Dir.exist? $datestr
log.add 'Archive already exists. Merging.'
true
else
Dir.mkdir $datestr
Dir.mkdir(File.join $datestr, 'stations')
Dir.mkdir(File.join $datestr, 'trains')
false
end
# @type [Array<String>]
roots = ['București Nord', 'Brașov', 'Iași', 'Titan Sud', 'Oltenița']
# @type [Array<String>]
visited_stations = Dir.entries(File.join $datestr, 'stations')
.filter { |entry| entry != '.' and entry != '..' }
.map { |name| if name.end_with? ".json" then name[0...-5] else name end }
.map { |name| get_station(name)[:stationName] }
if existing
log.add "#{visited_stations.count} visited stations"
end
# @type [Array<String>]
unvisited_stations = []
# @type [Set<String>]
failed_stations = Set.new
# @type [Array<String>]
visited_trains = Dir.entries(File.join $datestr, 'trains')
.filter { |entry| entry != '.' and entry != '..' }
.map { |name| if name.end_with? ".json" then name[0...-5] else name end }
if existing
log.add "#{visited_trains.count} visited trains"
end
# @type [Array<String>]
unvisited_trains = []
# @type [Set<String>]
failed_trains = Set.new
roots.each do |station|
unvisited_stations.push station unless visited_stations.include? station
end
# Get unvisited from visited
visited_trains.each do |train_number|
train = get_train train_number
populate_link_name_cache train
get_stations_from_train(train).each do |station|
unvisited_stations << station unless visited_stations.include? station or unvisited_stations.include? station
end
end
visited_stations.each do |station_name|
station = get_station station_name
get_stations_from_station(station).each do |station|
unvisited_stations << station unless visited_stations.include? station or unvisited_stations.include? station
end
get_trains_from_station(station).each do |train|
unvisited_trains << train unless visited_trains.include? train or unvisited_trains.include? train
end
end
unless unvisited_stations.empty?
log.add "#{unvisited_stations.count} unvisited stations"
end
unless unvisited_trains.empty?
log.add "#{unvisited_trains.count} unvisited trains"
end
start_time = Time.now
File.write(File.join($datestr, 'start_time.txt'), start_time.strftime('%FT%T%:z'))
until unvisited_stations.empty? and unvisited_trains.empty?
# First visit all trains
if not unvisited_trains.empty?
# @type [String]
train_number = unvisited_trains.shift
log.temporary_add "Getting train #{train_number}"
train = reattempt 3 do get_train train_number end
if train.nil?
# Failed to get
failed_trains << train_number
log.add "Failed to get train #{train_number}"
else
populate_link_name_cache train
visited_trains << train_number
File.write(File.join($datestr, 'trains', "#{train_number}.json"), JSON.dump(train))
log.add "Got train #{train_number}"
old_ustations_cnt = unvisited_stations.count
get_stations_from_train(train).each do |station|
unvisited_stations << station unless visited_stations.include? station or unvisited_stations.include? station
end
unless unvisited_stations.count == old_ustations_cnt
log.add "Found #{unvisited_stations.count - old_ustations_cnt} new stations"
end
end
log.temporary_add "Stations: #{visited_stations.count} visited, #{failed_stations.count} failed, #{unvisited_stations.count} remaining"
log.temporary_add "Trains: #{visited_trains.count} visited, #{failed_trains.count} failed, #{unvisited_trains.count} remaining"
# Then visit stations
elsif not unvisited_stations.empty?
# @type [String]
station_name = unvisited_stations.shift
log.temporary_add "Getting station #{station_name}"
station = reattempt 3 do get_station station_name end
if station.nil?
# Failed to get
failed_stations << station_name
log.add "Failed to get station #{station_name}"
else
visited_stations << station_name
filename = normalize_station(station_name)
filename = $station_link_name_cache.fetch(filename, filename)
File.write(File.join($datestr, 'stations', "#{filename}.json"), JSON.dump(station))
log.add "Got station #{station_name}"
old_ustations_cnt = unvisited_stations.count
old_utrains_cnt = unvisited_trains.count
get_stations_from_station(station).each do |station|
unvisited_stations << station unless visited_stations.include? station or unvisited_stations.include? station
end
get_trains_from_station(station).each do |train|
unvisited_trains << train unless visited_trains.include? train or unvisited_trains.include? train
end
unless unvisited_stations.count == old_ustations_cnt
log.add "Found #{unvisited_stations.count - old_ustations_cnt} new stations"
end
unless unvisited_trains.count == old_utrains_cnt
log.add "Found #{unvisited_trains.count - old_utrains_cnt} new trains"
end
end
log.temporary_add "Stations: #{visited_stations.count} visited, #{failed_stations.count} failed, #{unvisited_stations.count} remaining"
log.temporary_add "Trains: #{visited_trains.count} visited, #{failed_trains.count} failed, #{unvisited_trains.count} remaining"
end
end
log.add "Stations: #{visited_stations.count} visited, #{failed_stations.count} failed, #{unvisited_stations.count} remaining"
log.add "Trains: #{visited_trains.count} visited, #{failed_trains.count} failed, #{unvisited_trains.count} remaining"
end_time = Time.now
File.write(File.join($datestr, 'end_time.txt'), end_time.strftime('%FT%T%:z'))
timespan = (end_time - start_time).to_i
log.add "Finished in #{timespan}s (#{timespan / 3600}h #{timespan / 60 % 60}m #{timespan % 60}s)"
# Finally, write failures log
File.open(File.join($datestr, 'failed_stations.txt'), "w") do |f|
failed_stations.each do |s|
f.puts s
end
end
File.open(File.join($datestr, 'failed_trains.txt'), "w") do |f|
failed_trains.each do |s|
f.puts s
end
end
File.write(File.join($datestr, 'failures.json'), JSON.dump({
stations: failed_stations,
trains: failed_trains,
}))
File.write('./last_executed_run.txt', $datestr)
end
if __FILE__ == $0
main
end