diff --git a/src/tww/tokenizer.py b/src/tww/tokenizer.py index 8835645..ed09e25 100644 --- a/src/tww/tokenizer.py +++ b/src/tww/tokenizer.py @@ -1,10 +1,12 @@ +import json import re -import sys -from datetime import timedelta, datetime -from pprint import pprint, pformat +from datetime import datetime +from pygments import highlight, lexers, formatters + +from tww import ISO_FORMAT, time_to_emoji from tww import resolve_timezone, dateparser_parse_dt, get_utcnow, get_s_since_epoch, get_ms_since_epoch, \ - dt_tz_translation, DEFAULT_FORMAT, get_local_now + dt_tz_translation, DEFAULT_FORMAT, get_local_now, query_to_format_result r_generic = re.compile('(.*)', flags=re.IGNORECASE) r_time_in_epoch_s_now = re.compile('(?:time since epoch|seconds since epoch)', flags=re.IGNORECASE) @@ -13,10 +15,13 @@ r_time_in_epoch_s3 = re.compile('(?:seconds)?\s*since\s*(.*)', flags=re.IGNORECA r_time_in_epoch_ms_now = re.compile('(?:milliseconds since epoch)', flags=re.IGNORECASE) r_time_in_epoch_ms2 = re.compile('(.*)?\s*(?:in|to)\s*(?:ms|milliseconds|miliseconds)', flags=re.IGNORECASE) r_time_in_epoch_ms3 = re.compile('(?:ms|milliseconds|miliseconds)?\s*since\s*(.*)', flags=re.IGNORECASE) +r_time_in = re.compile('(?:time)?\s*in\s*(.*)', flags=re.IGNORECASE) r_time_since = re.compile('(?:time)?\s*since\s*(.*)', flags=re.IGNORECASE) r_time_until = re.compile('(?:time)?\s*until\s*(.*)', flags=re.IGNORECASE) r_time_between = re.compile('(?:time)?\s*between\s*(.*)\s*and\s*(.*)', flags=re.IGNORECASE) r_timezone_translation = re.compile('(.*)?\s(?:in|to)\s(.*)', flags=re.IGNORECASE) +r_timezone = re.compile('(.*)?\s(?:timezone|timezones|tz)', flags=re.IGNORECASE) +r_timezone_2 = re.compile('(?:timezone in|timezones in|tz in|timezone|timezones|tz)\s(.*)?', flags=re.IGNORECASE) def handler_time_s(dt_s: str) -> int: @@ -74,23 +79,44 @@ def handler_timezone_translation(dt_s: str, timezone_like_s: str) -> dict: def handler_generic_parser(dt_s: str) -> datetime: + return query_to_format_result(dt_s, None) + + +def handler_dateparser(dt_s: str) -> datetime: return dateparser_parse_dt(dt_s) +def handler_time_in_parser(dt_s: str) -> datetime: + return query_to_format_result("now to {}".format(dt_s)) + + +def handler_timezone(timezone_s: str): + return resolve_timezone(timezone_s) + + +QUERY_TYPE_DT_TR = "datetime_translation" +QUERY_TYPE_DT = "datetime_details" +QUERY_TYPE_TZ = "timezone" +QUERY_TYPE_TD = "timedelta" + regex_handlers = [ - (r_time_in_epoch_s_now, handler_time_s_now_local), - (r_time_in_epoch_s_now, handler_time_s_now_utc), - (r_time_in_epoch_s2, handler_time_s), - (r_time_in_epoch_s3, handler_time_s), - (r_time_in_epoch_ms_now, handler_time_ms_now_local), - (r_time_in_epoch_ms_now, handler_time_ms_now_utc), - (r_time_in_epoch_ms2, handler_time_ms), - (r_time_in_epoch_ms3, handler_time_ms), - (r_timezone_translation, handler_timezone_translation), - (r_time_since, handler_time_since), - (r_time_until, handler_time_until), - (r_time_between, handler_time_diff), - (r_generic, handler_generic_parser), + (r_time_in_epoch_s_now, handler_time_s_now_local, QUERY_TYPE_DT), + (r_time_in_epoch_s_now, handler_time_s_now_utc, QUERY_TYPE_DT), + (r_time_in_epoch_s2, handler_time_s, QUERY_TYPE_DT), + (r_time_in_epoch_s3, handler_time_s, QUERY_TYPE_DT), + (r_time_in_epoch_ms_now, handler_time_ms_now_local, QUERY_TYPE_DT), + (r_time_in_epoch_ms_now, handler_time_ms_now_utc, QUERY_TYPE_DT), + (r_time_in_epoch_ms2, handler_time_ms, QUERY_TYPE_DT), + (r_time_in_epoch_ms3, handler_time_ms, QUERY_TYPE_DT), + (r_timezone_translation, handler_timezone_translation, QUERY_TYPE_DT_TR), + (r_time_since, handler_time_since, QUERY_TYPE_TD), + (r_time_until, handler_time_until, QUERY_TYPE_TD), + (r_time_between, handler_time_diff, QUERY_TYPE_TD), + (r_time_in, handler_time_in_parser, QUERY_TYPE_DT), + (r_timezone, handler_timezone, QUERY_TYPE_TZ), + (r_timezone_2, handler_timezone, QUERY_TYPE_TZ), + (r_generic, handler_dateparser, QUERY_TYPE_DT), + (r_generic, handler_generic_parser, QUERY_TYPE_DT), ] @@ -104,16 +130,17 @@ def try_regex(r, s): return groups -def parse(s): +def tokenize(s): solutions = [] - for r, h in regex_handlers: + for r, h, t in regex_handlers: g = try_regex(r, s) if g is not None: try: result = h(*g) except Exception as e: - result = None - solutions.append((h.__name__, result)) + continue + if result is not None: + solutions.append((h.__name__, (result, ), t)) return solutions @@ -139,20 +166,71 @@ def test(): "now in dublin", ] for s in test_strings: - print("{} -> {}".format(s, parse(s))) + print("{} -> {}".format(s, tokenize(s))) + + +def pretty_print_dict(obj): + formatted_json = json.dumps(obj, indent=2) + colorful_json = highlight(formatted_json, lexers.JsonLexer(), formatters.TerminalFormatter()) + print(colorful_json) + + +def dt_pretty(dt): + rv = {} + rv["iso8601_full"] = dt.strftime(ISO_FORMAT) + rv["iso8601_date"] = dt.strftime('%Y-%m-%d') + rv["iso8601_time"] = dt.strftime('%H:%M:%S') + rv["locale_dt"] = dt.strftime("%c") + rv["locale_day_of_week"] = dt.strftime("%A") + rv["locale_day_of_week_short"] = dt.strftime("%a") + rv["day_of_week_number"] = dt.strftime("%w") + rv["locale_month"] = dt.strftime("%B") + rv["locale_month_short"] = dt.strftime("%b") + rv["tz_name"] = dt.strftime("%Z") + rv["tz_offset"] = dt.strftime("%z") + rv["hh:mm"] = dt.strftime("%H:%M") + rv["locale_time"] = dt.strftime("%X") + rv["locale_date"] = dt.strftime("%x") + rv["emoji_time"] = time_to_emoji(dt) + rv["unix_s"] = get_s_since_epoch(dt) + rv["unix_ms"] = get_ms_since_epoch(dt) + return rv + + +def resolve_query_type(query): + solutions = tokenize(query) + if not solutions: + dt = get_local_now() + return [["now", (dt,), QUERY_TYPE_DT]] + return solutions + + +def resolve_query(query): + rv = { + "query": query, + "solutions": [], + } + solutions = resolve_query_type(query) + for solution in solutions: + element = {} + handler, results, query_type = solution + element["handler"] = handler + element["query_type"] = query_type + if query_type == QUERY_TYPE_DT: + element["dt"] = dt_pretty(results[0]) + elif query_type == QUERY_TYPE_DT_TR: + element["src_dt"] = dt_pretty(results[0]) + element["dst_dt"] = dt_pretty(results[1]) + elif query_type == QUERY_TYPE_TZ: + element["tz"] = results[0] + elif query_type == QUERY_TYPE_TD: + element["timedelta"] = results[0] + rv["solutions"].append(element) + return rv if __name__ == "__main__": - query = "2020-02-07 11:25:58+0000 in seconds" #' '.join(sys.argv[1:]) - results = parse(query) - for handler, result in results: - if type(result) is datetime: - print(" {} -> {}".format(handler, result.strftime(DEFAULT_FORMAT))) - elif type(result) is timedelta: - print(" {} -> {}".format(handler, result)) - elif type(result) is dict: - print(" {} -> {}".format(handler, pformat(result))) - elif type(result) is None: - print(" {} -> Couldn't solve query".format(handler)) - else: - print(" {} -> {}".format(handler, result)) + query = "now in sofia" + # query = ' '.join(sys.argv[1:]) + result = resolve_query(query) + pretty_print_dict(result) diff --git a/src/tww/tww.py b/src/tww/tww.py index ae2bf82..45d86dd 100644 --- a/src/tww/tww.py +++ b/src/tww/tww.py @@ -213,7 +213,7 @@ def parse_query(query): """ Parses the user query to the datetime, tz/loc parts """ - query = ' '.join(query) + # query = ' '.join(query) query = query.strip() if not query: logger.critical("Use a query like ['to' ]") @@ -246,6 +246,32 @@ def serialize_location(location): } +def find_from_offset(query): + for universal_alias in ["gmt", "utc", "+", "-"]: + if query.startswith(universal_alias): + splitted_query = query.split(universal_alias) + if len(splitted_query) != 2: + continue + offset = splitted_query[1] + if ':' not in offset: + try: + hhs, mms = offset, "00" + except Exception: + continue + else: + splitted_offset = offset.split(':') + if len(splitted_offset) != 2: + continue + hhs, mms = splitted_offset + try: + if universal_alias in ["+", "-"]: + return tzinfo_from_offset("{}{:02d}{:02d}".format(universal_alias, int(hhs), int(mms))) + return tzinfo_from_offset("+{:02d}{:02d}".format(int(hhs), int(mms))) + except Exception: + continue + return None, [] + + def resolve_timezone(query): if not query: query = "utc" @@ -254,10 +280,7 @@ def resolve_timezone(query): normal_query = query.lower().strip() found_from_iana_tz = NORMALIZED_TZ_DICT.get(normal_query, "") found_from_abbr_tzs = list(NORMALIZED_TZ_ABBR.get(normal_query, set())) - found_from_offset_tz = None - try: - found_from_offset_tz = tzinfo_from_offset(normal_query) - except: ... + found_from_offset_tz, offset_tzs = find_from_offset(normal_query) normal_tz = found_from_iana_tz if not normal_tz: if found_from_abbr_tzs: @@ -304,7 +327,7 @@ def resolve_timezone(query): "normal_query": normal_query, "found_from_iana_tz": found_from_iana_tz, "found_from_abbr_tzs": found_from_abbr_tzs, - "found_from_offset_tzs": found_from_offset_tz, + "found_from_offset_tzs": offset_tzs, "local_location": local_location, "remote_location": remote_location, "search_pytz": normal_tz, @@ -335,8 +358,7 @@ def solve_query(human_dt, human_tz_loc): def format_result(result, fmt): if result is None: - logger.critical("Could not solve query") - exit(1) + logger.error("Could not solve query") logger.debug("Format: {}".format(fmt)) format_result = result.strftime(fmt) logger.debug("Formated result: {} -> {}".format(result, format_result)) @@ -346,8 +368,9 @@ def format_result(result, fmt): def query_to_format_result(query, fmt=DEFAULT_FORMAT): human_dt, human_tz_loc = parse_query(query) result = solve_query(human_dt, human_tz_loc) - formated_result = format_result(result, fmt) - return formated_result + if fmt: + return format_result(result, fmt) + return result def main(args): @@ -442,8 +465,10 @@ def tzinfo_from_offset(offset: str) -> pytz.timezone: tznames = TZ_OFFSETS.get(offset, []) for tzname in tznames: if tzname.startswith('Etc/GMT'): - return pytz.timezone(tzname) - return pytz.timezone(tznames[0]) + return pytz.timezone(tzname), tznames + if tznames: + return pytz.timezone(tznames[0]), tznames + return None, [] def dateparser_parse_dt(s: str): @@ -467,7 +492,7 @@ def get_utcnow(tzaware: bool = True): def get_local_now(tzaware: bool = True): if tzaware: - return datetime.now().replace(tzinfo=tzinfo_from_offset(get_local_tz_offset())) + return datetime.now().replace(tzinfo=tzinfo_from_offset(get_local_tz_offset())[0]) return datetime.utcnow() @@ -546,4 +571,4 @@ def epoch_to_dt(seconds): def time_to_emoji(dt): seconds = get_s_since_epoch(dt) a = int((seconds / 900 - 3) / 2 % 24) - return chr(128336 + a // 2 + a % 2 * 12) \ No newline at end of file + return chr(128336 + a // 2 + a % 2 * 12)