diff --git a/scraper/scrape_station.py b/scraper/scrape_station.py index c8f0b88..4b08272 100644 --- a/scraper/scrape_station.py +++ b/scraper/scrape_station.py @@ -14,7 +14,7 @@ RO_LETTERS = r'A-Za-zăâîșțĂÂÎȚȘ' STATION_INFO_REGEX = re.compile(rf'^([{RO_LETTERS}. ]+) în ([0-9.]+)$') -STOPPING_TIME_REGEX = re.compile(r'^(necunoscută \(stație terminus\))|(?:([0-9]+) min \((?:începând cu|până la) ([0-9]{1,2}:[0-9]{2})\))$') +STOPPING_TIME_REGEX = re.compile(r'^(necunoscută \(stație terminus\))|(?:([0-9]+) (min|sec) \((?:începând cu|până la) ([0-9]{1,2}:[0-9]{2})\))$') # endregion @@ -62,13 +62,14 @@ def scrape(station_name: str): st_hr, st_min = (int(comp) for comp in result['time'].split(':')) result['time'] = tz.localize(dt_seq(st_hr, st_min)).isoformat() - unknown_st, st, st_opposite_time = STOPPING_TIME_REGEX.match( + unknown_st, st, minsec, st_opposite_time = STOPPING_TIME_REGEX.match( collapse_space(stopping_time_div.div('div', recursive=False)[1].text) ).groups() if unknown_st: result['stoppingTime'] = None elif st: - result['stoppingTime'] = int(st) + minutes = minsec == 'min' + result['stoppingTime'] = int(st) * 60 if minutes else int(st) result['train'] = {} result['train']['rank'] = collapse_space(train_div.div.div('div', recursive=False)[1].span.text) diff --git a/scraper/scrape_station_schema_v2.json b/scraper/scrape_station_schema_v2.json index 75e36c8..6233c1c 100644 --- a/scraper/scrape_station_schema_v2.json +++ b/scraper/scrape_station_schema_v2.json @@ -53,11 +53,12 @@ ] }, "stoppingTime": { + "description": "The number of seconds the train stops in the station", "type": [ "integer", "null" ], - "minimum": 1 + "minimum": 0 } }, "required": [ diff --git a/scraper/scrape_train.py b/scraper/scrape_train.py index df9c7ff..9f6aa13 100644 --- a/scraper/scrape_train.py +++ b/scraper/scrape_train.py @@ -29,7 +29,7 @@ KM_REGEX = re.compile(r'^km ([0-9]+)$') PLATFORM_REGEX = re.compile(r'^linia (.+)$') -STOPPING_TIME_REGEX = re.compile(r'^([0-9]+) min oprire$') +STOPPING_TIME_REGEX = re.compile(r'^([0-9]+) (min|sec) oprire$') STATION_DEPARR_STATUS_REGEX = re.compile(r'^(?:(la timp)|(?:((?:\+|-)[0-9]+) min \((?:(?:întârziere)|(?:mai devreme))\)))(\*?)$') @@ -106,7 +106,10 @@ def scrape(train_no: int, use_yesterday=False, date_override=None): if not station_scraped['stoppingTime']: station_scraped['stoppingTime'] = None else: - station_scraped['stoppingTime'] = int(STOPPING_TIME_REGEX.match(station_scraped['stoppingTime']).groups()[0]) + st_value, st_minsec = STOPPING_TIME_REGEX.match(station_scraped['stoppingTime']).groups() + station_scraped['stoppingTime'] = int(st_value) + if st_minsec == 'min': + station_scraped['stoppingTime'] *= 60 station_scraped['platform'] = collapse_space(middle.div.div('div', recursive=False)[0]('div', recursive=False)[3].text) if not station_scraped['platform']: station_scraped['platform'] = None diff --git a/scraper/scrape_train_schema.json b/scraper/scrape_train_schema.json index 541a657..2f588b6 100644 --- a/scraper/scrape_train_schema.json +++ b/scraper/scrape_train_schema.json @@ -113,7 +113,7 @@ "stoppingTime": { "description": "The number of minutes the train is scheduled to stop in this station", "type": ["integer", "null"], - "minimum": 1 + "minimum": 0 }, "platform": { "description": "The platform the train stopped at", diff --git a/scraper/scrape_train_schema_v2.json b/scraper/scrape_train_schema_v2.json index 837ea40..4a1433a 100644 --- a/scraper/scrape_train_schema_v2.json +++ b/scraper/scrape_train_schema_v2.json @@ -111,7 +111,7 @@ "type": "integer" }, "stoppingTime": { - "description": "The number of minutes the train is scheduled to stop in this station", + "description": "The number of seconds the train is scheduled to stop in this station", "type": ["integer", "null"], "minimum": 1 }, diff --git a/server/server/server.py b/server/server/server.py index 3380fcb..d7a8543 100644 --- a/server/server/server.py +++ b/server/server/server.py @@ -43,6 +43,8 @@ def get_train_info(train_no: int): if result['stations'][i]['departure']: date = datetime.datetime.fromisoformat(result['stations'][i]['departure']['scheduleTime']) result['stations'][i]['departure']['scheduleTime'] = f'{date.hour}:{date.minute:02}' + if 'stoppingTime' in result['stations'][i] and result['stations'][i]['stoppingTime']: + result['stations'][i]['stoppingTime'] //= 60 return result if train_no not in train_data_cache: