#! /usr/bin/env ruby require 'erb' require 'json' require 'open3' require 'set' $datestr = (Time.now - 86400).strftime '%F' class Object def also yield self self end def let yield self end end # @param [String] station_name def normalize_station station_name station_name .downcase .gsub(/\s/, ' ') .gsub('ă', 'a') .gsub('â', 'a') .gsub('î', 'i') .gsub('ș', 's') .gsub('ț', 't') end $station_link_name_cache = {} # @param [String] station_name def get_station station_name # Try getting from file begin return JSON.parse(File.read(File.join($datestr, 'stations', "#{normalize_station(station_name)}.json")), symbolize_names: true) rescue end station_name = normalize_station station_name station_name = $station_link_name_cache.fetch(station_name, station_name) station_name = ERB::Util.url_encode(station_name) stdout, status = Open3.capture2('curl', '--silent', '--fail', '--show-error', "https://scraper.infotren.dcdev.ro/v3/stations/#{station_name}?date=#{$datestr}") if status != 0 nil else JSON.parse stdout, symbolize_names: true end end # @param [String] train_number def get_train train_number # Try getting from file begin return JSON.parse(File.read(File.join($datestr, 'trains', "#{train_number}.json")), symbolize_names: true) rescue end train_number = ERB::Util.url_encode(train_number) stdout, status = Open3.capture2('curl', '--silent', '--fail', '--show-error', "https://scraper.infotren.dcdev.ro/v3/trains/#{train_number}?date=#{$datestr}") if status != 0 nil else JSON.parse stdout, symbolize_names: true end end def populate_link_name_cache train train[:groups].each do |g| g[:stations].each do |s| $station_link_name_cache[s[:name]] = s[:linkName] end end end def get_stations_from_station station Set.new.also do |stations| station[:arrivals].each do |train| train[:train][:route].each do |r| stations << r end end station[:departures].each do |train| train[:train][:route].each do |r| stations << r end end end end # @param [Integer] times def reattempt times times.times do |n| result = yield unless result.nil? return result end end nil end def get_trains_from_station station Set.new.also do |trains| station[:arrivals].each do |train| trains << train[:train][:number] end station[:departures].each do |train| trains << train[:train][:number] end end end def get_stations_from_train train train[:groups].flat_map do |group| group[:stations].map do |station| station[:name] end end end class Log def initialize @to_erase = 0 end def add message print "\e[1A\e[2K" * @to_erase @to_erase = 0 puts message end def temporary_add message if $stdout.isatty puts message @to_erase += 1 end end end def main File.write('./last_attempted_run.txt', $datestr) log = Log.new log.add "Creating archive for yesterday, #{$datestr}" existing = if Dir.exist? $datestr log.add 'Archive already exists. Merging.' true else Dir.mkdir $datestr Dir.mkdir(File.join $datestr, 'stations') Dir.mkdir(File.join $datestr, 'trains') false end # @type [Array] roots = ['București Nord', 'Brașov', 'Iași', 'Titan Sud', 'Oltenița', 'Oravița', 'Anina'] # @type [Array] visited_stations = Dir.entries(File.join $datestr, 'stations') .filter { |entry| entry != '.' and entry != '..' } .map { |name| if name.end_with? ".json" then name[0...-5] else name end } .map { |name| get_station(name)[:stationName] } if existing log.add "#{visited_stations.count} visited stations" end # @type [Array] unvisited_stations = [] # @type [Set] failed_stations = Set.new # @type [Array] visited_trains = Dir.entries(File.join $datestr, 'trains') .filter { |entry| entry != '.' and entry != '..' } .map { |name| if name.end_with? ".json" then name[0...-5] else name end } if existing log.add "#{visited_trains.count} visited trains" end # @type [Array] unvisited_trains = [] # @type [Set] failed_trains = Set.new roots.each do |station| unvisited_stations.push station unless visited_stations.include? station end # Get unvisited from visited visited_trains.each do |train_number| train = get_train train_number populate_link_name_cache train get_stations_from_train(train).each do |station| unvisited_stations << station unless visited_stations.include? station or unvisited_stations.include? station end end visited_stations.each do |station_name| station = get_station station_name get_stations_from_station(station).each do |station| unvisited_stations << station unless visited_stations.include? station or unvisited_stations.include? station end get_trains_from_station(station).each do |train| unvisited_trains << train unless visited_trains.include? train or unvisited_trains.include? train end end unless unvisited_stations.empty? log.add "#{unvisited_stations.count} unvisited stations" end unless unvisited_trains.empty? log.add "#{unvisited_trains.count} unvisited trains" end start_time = Time.now File.write(File.join($datestr, 'start_time.txt'), start_time.strftime('%FT%T%:z')) until unvisited_stations.empty? and unvisited_trains.empty? # First visit all trains if not unvisited_trains.empty? # @type [String] train_number = unvisited_trains.shift log.temporary_add "Getting train #{train_number}" train = reattempt 3 do get_train train_number end if train.nil? # Failed to get failed_trains << train_number log.add "Failed to get train #{train_number}" else populate_link_name_cache train visited_trains << train_number File.write(File.join($datestr, 'trains', "#{train_number}.json"), JSON.dump(train)) log.add "Got train #{train_number}" old_ustations_cnt = unvisited_stations.count get_stations_from_train(train).each do |station| unvisited_stations << station unless visited_stations.include? station or unvisited_stations.include? station end unless unvisited_stations.count == old_ustations_cnt log.add "Found #{unvisited_stations.count - old_ustations_cnt} new stations" end end log.temporary_add "Stations: #{visited_stations.count} visited, #{failed_stations.count} failed, #{unvisited_stations.count} remaining" log.temporary_add "Trains: #{visited_trains.count} visited, #{failed_trains.count} failed, #{unvisited_trains.count} remaining" # Then visit stations elsif not unvisited_stations.empty? # @type [String] station_name = unvisited_stations.shift log.temporary_add "Getting station #{station_name}" station = reattempt 3 do get_station station_name end if station.nil? # Failed to get failed_stations << station_name log.add "Failed to get station #{station_name}" else visited_stations << station_name filename = normalize_station(station_name) filename = $station_link_name_cache.fetch(filename, filename) File.write(File.join($datestr, 'stations', "#{filename}.json"), JSON.dump(station)) log.add "Got station #{station_name}" old_ustations_cnt = unvisited_stations.count old_utrains_cnt = unvisited_trains.count get_stations_from_station(station).each do |station| unvisited_stations << station unless visited_stations.include? station or unvisited_stations.include? station end get_trains_from_station(station).each do |train| unvisited_trains << train unless visited_trains.include? train or unvisited_trains.include? train end unless unvisited_stations.count == old_ustations_cnt log.add "Found #{unvisited_stations.count - old_ustations_cnt} new stations" end unless unvisited_trains.count == old_utrains_cnt log.add "Found #{unvisited_trains.count - old_utrains_cnt} new trains" end end log.temporary_add "Stations: #{visited_stations.count} visited, #{failed_stations.count} failed, #{unvisited_stations.count} remaining" log.temporary_add "Trains: #{visited_trains.count} visited, #{failed_trains.count} failed, #{unvisited_trains.count} remaining" end end log.add "Stations: #{visited_stations.count} visited, #{failed_stations.count} failed, #{unvisited_stations.count} remaining" log.add "Trains: #{visited_trains.count} visited, #{failed_trains.count} failed, #{unvisited_trains.count} remaining" end_time = Time.now File.write(File.join($datestr, 'end_time.txt'), end_time.strftime('%FT%T%:z')) timespan = (end_time - start_time).to_i log.add "Finished in #{timespan}s (#{timespan / 3600}h #{timespan / 60 % 60}m #{timespan % 60}s)" # Finally, write failures log File.open(File.join($datestr, 'failed_stations.txt'), "w") do |f| failed_stations.each do |s| f.puts s end end File.open(File.join($datestr, 'failed_trains.txt'), "w") do |f| failed_trains.each do |s| f.puts s end end File.write(File.join($datestr, 'failures.json'), JSON.dump({ stations: failed_stations, trains: failed_trains, })) File.write('./last_executed_run.txt', $datestr) if visited_stations.count == 0 and visited_trains.count == 0 exit 1 end end if __FILE__ == $0 main end