From bd40f9c0480938ddbb07ed322ddefb22738c1c79 Mon Sep 17 00:00:00 2001 From: Daniel Tsvetkov Date: Mon, 23 Sep 2019 22:50:22 +0200 Subject: [PATCH] timezone maddness --- .gitignore | 2 +- src/tww/data/.cache.csv | 13 ++++++ src/tww/tww.py | 93 +++++++++++++++++++++++++++++++++-------- 3 files changed, 90 insertions(+), 18 deletions(-) diff --git a/.gitignore b/.gitignore index c7b996b..aabd304 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ venv -data/.cache.csv +src/tww/data/.cache.csv __pycache__ .idea diff --git a/src/tww/data/.cache.csv b/src/tww/data/.cache.csv index 6dd48bc..750c1e0 100644 --- a/src/tww/data/.cache.csv +++ b/src/tww/data/.cache.csv @@ -4,3 +4,16 @@ bulgaria,43.0,25.0 usa,39.7563889,46.7511101 brazil,-10.0,-55.0 sydney,-33.86785,151.2073212 +sofia,42.6975135,23.3241463 +eastern,-40.9597222,175.6575012 +PT,40.44151435,-80.0100857539989 +PT,40.44151435,-80.0100857539989 +PT,40.44151435,-80.0100857539989 +new york,40.7142691,-74.0059738 +delhi,28.6666667,77.2166672 +krivodol,43.389659,23.502923804029 +vratsa,43.21,23.5625 +plovdiv,42.15,24.75 +studena,41.9158333,21.5305557 +burdo,9.5166667,45.5333328 +burgas,42.5,27.4666672 diff --git a/src/tww/tww.py b/src/tww/tww.py index 4b7b247..debc4a0 100644 --- a/src/tww/tww.py +++ b/src/tww/tww.py @@ -6,9 +6,13 @@ Find time now, in the past or future in any timezone or location. import argparse import logging import os +from collections import defaultdict import dateparser from datetime import datetime + +import pytz +from pytz import timezone from pytz.exceptions import UnknownTimeZoneError FUZZ_THRESHOLD = 70 @@ -73,18 +77,33 @@ def normalize_words_to_number(query): return normal +pytz_all_timezones = pytz.all_timezones +NORMALIZED_TZ_DICT = dict(zip([tz.lower() for tz in pytz_all_timezones], pytz_all_timezones)) + +NORMALIZED_TZ_ABBR = defaultdict(set) +TZ_ABBRS_REVERSE = defaultdict(set) +for x_tz in pytz_all_timezones: + dst_tzname = pytz.timezone(x_tz).localize(datetime.now(), is_dst=True).tzname() + nodst_tzname = pytz.timezone(x_tz).localize(datetime.now(), is_dst=False).tzname() + NORMALIZED_TZ_ABBR[dst_tzname.lower()].add(x_tz) + NORMALIZED_TZ_ABBR[nodst_tzname.lower()].add(x_tz) + TZ_ABBRS_REVERSE[x_tz].add(dst_tzname) + TZ_ABBRS_REVERSE[x_tz].add(nodst_tzname) + + def timezone_to_normal(query): """ Makes a timezone written in wrong capitalization to correct one as expected by IANA. E.g.: america/new_york -> America/New_York """ - import re - # The magic in the regex is that it splits by either / OR _ OR - # where the | are OR; and then the parens ( ) keep the splitting # entries in the list so that we can join later - normal = ''.join(x.capitalize() for x in re.split('(/|_|-)', query)) + + normal = NORMALIZED_TZ_DICT.get(query, "") + if not normal: + normal = NORMALIZED_TZ_ABBR.get(query, "") logger.debug("Normalized timezone: {} -> {}".format(query, normal)) return normal @@ -152,8 +171,8 @@ def resolve_location_local(query): try: result = heappop(heap) except IndexError: - logger.critical("Could not find location {}".format(query)) - exit(1) + logger.error("Could not find location {}".format(query)) + return "" ratio, location = result logger.debug("Location result ({}): {}".format(-ratio, location)) write_to_cache(query, location) @@ -207,17 +226,34 @@ def parse_query(query): return human_dt, human_tz_loc +def serialize_location(location): + return { + "name": location.name, + "latitude": location.latitude, + "longitude": location.longitude, + } + + def resolve_timezone(query): - result = "" + if not query: + query = "utc" + # if the human_tz_loc contains /, assume it's a timezone which could be + # incorrectly written with small letters - need Continent/City + normal_query = query.lower().strip() + found_from_iana_tz = NORMALIZED_TZ_DICT.get(normal_query, "") + found_from_abbr_tzs = list(NORMALIZED_TZ_ABBR.get(normal_query, set())) + normal_tz = found_from_iana_tz + if not normal_tz: + if found_from_abbr_tzs: + normal_tz = list(found_from_abbr_tzs)[0] + tz_abbrs = list(TZ_ABBRS_REVERSE.get(normal_tz, set())) + logger.debug("Normalized timezone: {} -> {}".format(query, normal_tz)) + local_location, remote_location = {}, {} try: - dateparser.parse(str(datetime.now()), settings={'TO_TIMEZONE': query}) - # if the human_tz_loc contains /, assume it's a timezone which could be - # incorrectly written with small letters - need Continent/City - if "/" in query: - result = timezone_to_normal(query) + pytz_result = timezone(normal_tz) except UnknownTimeZoneError: from timezonefinder import TimezoneFinder - logger.debug("No timezone: {}".format(query)) + logger.debug("No timezone: {}".format(normal_tz)) # if the human_tz_loc contains /, assume it's a timezone # the timezone could still be guessed badly, attempt to get the city # e.g.america/dallas @@ -228,14 +264,37 @@ def resolve_timezone(query): # we don't know this timezone one, assume location # Try to get from local file first location = resolve_location_local(query) - if not location: + if location: + local_location = serialize_location(location) + else: # finally go to remote location = resolve_location_remote(query) + if location: + remote_location = serialize_location(location) tzf = TimezoneFinder() loc_tz = tzf.timezone_at(lat=location.latitude, lng=location.longitude) logger.debug("Timezone: {}".format(loc_tz)) - result = loc_tz - return result + try: + pytz_result = timezone(loc_tz) + except UnknownTimeZoneError: + pytz_result = type('pytz', (), {"zone": ""}) + tz_name = pytz_result.zone + tz_abbr = pytz_result.localize(datetime.now()).strftime('%Z') if tz_name else "" + tz_offset = pytz_result.localize(datetime.now()).strftime('%z') if tz_name else "" + return { + "query": query, + "normal_query": normal_query, + "found_from_iana_tz": found_from_iana_tz, + "found_from_abbr_tzs": found_from_abbr_tzs, + "local_location": local_location, + "remote_location": remote_location, + "search_pytz": normal_tz, + "tz_abbrs": tz_abbrs, + "tz": normal_tz, + "tz_name": tz_name, + "tz_abbr": tz_abbr, + "tz_offset": tz_offset, + } def solve_query(human_dt, human_tz_loc): @@ -244,13 +303,13 @@ def solve_query(human_dt, human_tz_loc): result = dateparser.parse(human_dt, settings={'RETURN_AS_TIMEZONE_AWARE': True}) logger.debug("human_dt result: {}".format(result)) if human_tz_loc: - human_tz_loc = resolve_timezone(human_tz_loc) + human_tz_loc = resolve_timezone(human_tz_loc)["tz_name"] isofmt = result.isoformat() logger.debug("human_dt isofmt: {}".format(isofmt)) result = dateparser.parse(isofmt, settings={'TO_TIMEZONE': human_tz_loc}) logger.debug("human_dt to_timezone result: {}".format(result)) except UnknownTimeZoneError: - loc_tz = resolve_timezone(human_tz_loc) + loc_tz = resolve_timezone(human_tz_loc)["tz_name"] result = dateparser.parse(human_dt, settings={'TO_TIMEZONE': loc_tz}) return result