timezone maddness

This commit is contained in:
Daniel Tsvetkov 2019-09-23 22:50:22 +02:00
parent f8278a2906
commit bd40f9c048
3 changed files with 90 additions and 18 deletions

2
.gitignore vendored
View File

@ -1,4 +1,4 @@
venv venv
data/.cache.csv src/tww/data/.cache.csv
__pycache__ __pycache__
.idea .idea

View File

@ -4,3 +4,16 @@ bulgaria,43.0,25.0
usa,39.7563889,46.7511101 usa,39.7563889,46.7511101
brazil,-10.0,-55.0 brazil,-10.0,-55.0
sydney,-33.86785,151.2073212 sydney,-33.86785,151.2073212
sofia,42.6975135,23.3241463
eastern,-40.9597222,175.6575012
PT,40.44151435,-80.0100857539989
PT,40.44151435,-80.0100857539989
PT,40.44151435,-80.0100857539989
new york,40.7142691,-74.0059738
delhi,28.6666667,77.2166672
krivodol,43.389659,23.502923804029
vratsa,43.21,23.5625
plovdiv,42.15,24.75
studena,41.9158333,21.5305557
burdo,9.5166667,45.5333328
burgas,42.5,27.4666672

1 zurich 47.3666667 8.5500002
4 usa 39.7563889 46.7511101
5 brazil -10.0 -55.0
6 sydney -33.86785 151.2073212
7 sofia 42.6975135 23.3241463
8 eastern -40.9597222 175.6575012
9 PT 40.44151435 -80.0100857539989
10 PT 40.44151435 -80.0100857539989
11 PT 40.44151435 -80.0100857539989
12 new york 40.7142691 -74.0059738
13 delhi 28.6666667 77.2166672
14 krivodol 43.389659 23.502923804029
15 vratsa 43.21 23.5625
16 plovdiv 42.15 24.75
17 studena 41.9158333 21.5305557
18 burdo 9.5166667 45.5333328
19 burgas 42.5 27.4666672

View File

@ -6,9 +6,13 @@ Find time now, in the past or future in any timezone or location.
import argparse import argparse
import logging import logging
import os import os
from collections import defaultdict
import dateparser import dateparser
from datetime import datetime from datetime import datetime
import pytz
from pytz import timezone
from pytz.exceptions import UnknownTimeZoneError from pytz.exceptions import UnknownTimeZoneError
FUZZ_THRESHOLD = 70 FUZZ_THRESHOLD = 70
@ -73,18 +77,33 @@ def normalize_words_to_number(query):
return normal return normal
pytz_all_timezones = pytz.all_timezones
NORMALIZED_TZ_DICT = dict(zip([tz.lower() for tz in pytz_all_timezones], pytz_all_timezones))
NORMALIZED_TZ_ABBR = defaultdict(set)
TZ_ABBRS_REVERSE = defaultdict(set)
for x_tz in pytz_all_timezones:
dst_tzname = pytz.timezone(x_tz).localize(datetime.now(), is_dst=True).tzname()
nodst_tzname = pytz.timezone(x_tz).localize(datetime.now(), is_dst=False).tzname()
NORMALIZED_TZ_ABBR[dst_tzname.lower()].add(x_tz)
NORMALIZED_TZ_ABBR[nodst_tzname.lower()].add(x_tz)
TZ_ABBRS_REVERSE[x_tz].add(dst_tzname)
TZ_ABBRS_REVERSE[x_tz].add(nodst_tzname)
def timezone_to_normal(query): def timezone_to_normal(query):
""" """
Makes a timezone written in wrong capitalization to correct one Makes a timezone written in wrong capitalization to correct one
as expected by IANA. E.g.: as expected by IANA. E.g.:
america/new_york -> America/New_York america/new_york -> America/New_York
""" """
import re
# The magic in the regex is that it splits by either / OR _ OR - # The magic in the regex is that it splits by either / OR _ OR -
# where the | are OR; and then the parens ( ) keep the splitting # where the | are OR; and then the parens ( ) keep the splitting
# entries in the list so that we can join later # entries in the list so that we can join later
normal = ''.join(x.capitalize() for x in re.split('(/|_|-)', query))
normal = NORMALIZED_TZ_DICT.get(query, "")
if not normal:
normal = NORMALIZED_TZ_ABBR.get(query, "")
logger.debug("Normalized timezone: {} -> {}".format(query, normal)) logger.debug("Normalized timezone: {} -> {}".format(query, normal))
return normal return normal
@ -152,8 +171,8 @@ def resolve_location_local(query):
try: try:
result = heappop(heap) result = heappop(heap)
except IndexError: except IndexError:
logger.critical("Could not find location {}".format(query)) logger.error("Could not find location {}".format(query))
exit(1) return ""
ratio, location = result ratio, location = result
logger.debug("Location result ({}): {}".format(-ratio, location)) logger.debug("Location result ({}): {}".format(-ratio, location))
write_to_cache(query, location) write_to_cache(query, location)
@ -207,17 +226,34 @@ def parse_query(query):
return human_dt, human_tz_loc return human_dt, human_tz_loc
def serialize_location(location):
return {
"name": location.name,
"latitude": location.latitude,
"longitude": location.longitude,
}
def resolve_timezone(query): def resolve_timezone(query):
result = "" if not query:
try: query = "utc"
dateparser.parse(str(datetime.now()), settings={'TO_TIMEZONE': query})
# if the human_tz_loc contains /, assume it's a timezone which could be # if the human_tz_loc contains /, assume it's a timezone which could be
# incorrectly written with small letters - need Continent/City # incorrectly written with small letters - need Continent/City
if "/" in query: normal_query = query.lower().strip()
result = timezone_to_normal(query) found_from_iana_tz = NORMALIZED_TZ_DICT.get(normal_query, "")
found_from_abbr_tzs = list(NORMALIZED_TZ_ABBR.get(normal_query, set()))
normal_tz = found_from_iana_tz
if not normal_tz:
if found_from_abbr_tzs:
normal_tz = list(found_from_abbr_tzs)[0]
tz_abbrs = list(TZ_ABBRS_REVERSE.get(normal_tz, set()))
logger.debug("Normalized timezone: {} -> {}".format(query, normal_tz))
local_location, remote_location = {}, {}
try:
pytz_result = timezone(normal_tz)
except UnknownTimeZoneError: except UnknownTimeZoneError:
from timezonefinder import TimezoneFinder from timezonefinder import TimezoneFinder
logger.debug("No timezone: {}".format(query)) logger.debug("No timezone: {}".format(normal_tz))
# if the human_tz_loc contains /, assume it's a timezone # if the human_tz_loc contains /, assume it's a timezone
# the timezone could still be guessed badly, attempt to get the city # the timezone could still be guessed badly, attempt to get the city
# e.g.america/dallas # e.g.america/dallas
@ -228,14 +264,37 @@ def resolve_timezone(query):
# we don't know this timezone one, assume location # we don't know this timezone one, assume location
# Try to get from local file first # Try to get from local file first
location = resolve_location_local(query) location = resolve_location_local(query)
if not location: if location:
local_location = serialize_location(location)
else:
# finally go to remote # finally go to remote
location = resolve_location_remote(query) location = resolve_location_remote(query)
if location:
remote_location = serialize_location(location)
tzf = TimezoneFinder() tzf = TimezoneFinder()
loc_tz = tzf.timezone_at(lat=location.latitude, lng=location.longitude) loc_tz = tzf.timezone_at(lat=location.latitude, lng=location.longitude)
logger.debug("Timezone: {}".format(loc_tz)) logger.debug("Timezone: {}".format(loc_tz))
result = loc_tz try:
return result pytz_result = timezone(loc_tz)
except UnknownTimeZoneError:
pytz_result = type('pytz', (), {"zone": ""})
tz_name = pytz_result.zone
tz_abbr = pytz_result.localize(datetime.now()).strftime('%Z') if tz_name else ""
tz_offset = pytz_result.localize(datetime.now()).strftime('%z') if tz_name else ""
return {
"query": query,
"normal_query": normal_query,
"found_from_iana_tz": found_from_iana_tz,
"found_from_abbr_tzs": found_from_abbr_tzs,
"local_location": local_location,
"remote_location": remote_location,
"search_pytz": normal_tz,
"tz_abbrs": tz_abbrs,
"tz": normal_tz,
"tz_name": tz_name,
"tz_abbr": tz_abbr,
"tz_offset": tz_offset,
}
def solve_query(human_dt, human_tz_loc): def solve_query(human_dt, human_tz_loc):
@ -244,13 +303,13 @@ def solve_query(human_dt, human_tz_loc):
result = dateparser.parse(human_dt, settings={'RETURN_AS_TIMEZONE_AWARE': True}) result = dateparser.parse(human_dt, settings={'RETURN_AS_TIMEZONE_AWARE': True})
logger.debug("human_dt result: {}".format(result)) logger.debug("human_dt result: {}".format(result))
if human_tz_loc: if human_tz_loc:
human_tz_loc = resolve_timezone(human_tz_loc) human_tz_loc = resolve_timezone(human_tz_loc)["tz_name"]
isofmt = result.isoformat() isofmt = result.isoformat()
logger.debug("human_dt isofmt: {}".format(isofmt)) logger.debug("human_dt isofmt: {}".format(isofmt))
result = dateparser.parse(isofmt, settings={'TO_TIMEZONE': human_tz_loc}) result = dateparser.parse(isofmt, settings={'TO_TIMEZONE': human_tz_loc})
logger.debug("human_dt to_timezone result: {}".format(result)) logger.debug("human_dt to_timezone result: {}".format(result))
except UnknownTimeZoneError: except UnknownTimeZoneError:
loc_tz = resolve_timezone(human_tz_loc) loc_tz = resolve_timezone(human_tz_loc)["tz_name"]
result = dateparser.parse(human_dt, settings={'TO_TIMEZONE': loc_tz}) result = dateparser.parse(human_dt, settings={'TO_TIMEZONE': loc_tz})
return result return result