From fb953069f34a10213cb69609a82fe2fb565967e7 Mon Sep 17 00:00:00 2001 From: Daniel Tsvetkov Date: Wed, 12 Feb 2020 14:20:54 +0100 Subject: [PATCH] argparser --- src/tww/data/custom_dt.csv | 5 ++ src/tww/tokenizer.py | 160 ++++++++++++++++++++++++------------- src/tww/tww.py | 140 +++++++++++++++++++++++++++----- 3 files changed, 229 insertions(+), 76 deletions(-) create mode 100644 src/tww/data/custom_dt.csv diff --git a/src/tww/data/custom_dt.csv b/src/tww/data/custom_dt.csv new file mode 100644 index 0000000..4dce09b --- /dev/null +++ b/src/tww/data/custom_dt.csv @@ -0,0 +1,5 @@ +christmas,25 december +xmas,25 december +new years eve,31 december 23:59:59 +new years,31 december 23:59:59 +end of workday,17:00 diff --git a/src/tww/tokenizer.py b/src/tww/tokenizer.py index 795e0da..84c6e7a 100644 --- a/src/tww/tokenizer.py +++ b/src/tww/tokenizer.py @@ -1,3 +1,4 @@ +import argparse import json import locale import re @@ -5,12 +6,15 @@ import sys from datetime import datetime from pygments import highlight, lexers, formatters +from scalpl import Cut from localization import setlocale, resolve_locale from tww import ISO_FORMAT, time_to_emoji, time_ago, workday_diff, workhours_diff, td_remainders, td_totals, td_iso8601 from tww import resolve_timezone, dateparser_parse_dt, get_utcnow, get_s_since_epoch, get_ms_since_epoch, \ dt_tz_translation, get_local_now, query_to_format_result +custom_locale = None + r_generic = re.compile('(.*)', flags=re.IGNORECASE) r_time_in_epoch_s_now = re.compile('(?:time since epoch|seconds since epoch)', flags=re.IGNORECASE) r_time_in_epoch_s2 = re.compile('(.*)?\s*(?:in|to)\s*(?:epoch|seconds since epoch|seconds)', flags=re.IGNORECASE) @@ -33,42 +37,40 @@ r_timezone = re.compile('(.*)?\s(?:timezone|timezones|tz)', flags=re.IGNORECASE) r_timezone_2 = re.compile('(?:timezone in|timezones in|tz in|timezone|timezones|tz)\s(.*)?', flags=re.IGNORECASE) -def handler_time_s(dt_s: str) -> int: - return get_s_since_epoch(dateparser_parse_dt(dt_s)) +def handler_time(dt_s): + return dateparser_parse_dt(dt_s) -def handler_time_ms(dt_s: str) -> int: - return get_ms_since_epoch(dateparser_parse_dt(dt_s)) +def handler_time_now_local(): + return get_local_now() -def handler_time_s_now_local() -> int: - return get_s_since_epoch(get_local_now()) +def handler_time_now_utc(): + return get_utcnow() -def handler_time_ms_now_local() -> int: - return get_ms_since_epoch(get_local_now()) +def dt_normalize(start_dt, end_dt) -> (datetime, datetime): + if type(start_dt) is str: + start_dt = dateparser_parse_dt(start_dt) + if type(end_dt) is str: + end_dt = dateparser_parse_dt(end_dt) + return start_dt, end_dt -def handler_time_s_now_utc() -> int: - return get_s_since_epoch(get_utcnow()) - - -def handler_time_ms_now_utc() -> int: - return get_ms_since_epoch(get_utcnow()) - - -def handler_time_diff(start_dt: datetime, end_dt: datetime) -> dict: - diff = end_dt - start_dt +def handler_time_diff(start_dt, end_dt) -> dict: + start_dt, end_dt = dt_normalize(start_dt, end_dt) + diff = start_dt - end_dt return dict(start=dt_pretty(start_dt), end=dt_pretty(end_dt), diff=td_pretty(diff)) def handler_time_since_until(start_dt_s: str) -> dict: - return handler_time_diff(dateparser_parse_dt(start_dt_s), get_local_now()) + return handler_time_diff(start_dt_s, get_local_now()) -def handler_workdays_diff(start_dt: datetime, end_dt: datetime) -> dict: +def handler_workdays_diff(start_dt, end_dt) -> dict: + start_dt, end_dt = dt_normalize(start_dt, end_dt) diff = workday_diff(start_dt, end_dt) return dict(start=dt_pretty(start_dt), end=dt_pretty(end_dt), @@ -79,7 +81,8 @@ def handler_workdays_since_until(start_dt_s: str) -> dict: return handler_workdays_diff(dateparser_parse_dt(start_dt_s), get_local_now()) -def handler_workhours_diff(start_dt: datetime, end_dt: datetime) -> dict: +def handler_workhours_diff(start_dt, end_dt) -> dict: + start_dt, end_dt = dt_normalize(start_dt, end_dt) diff = workhours_diff(start_dt, end_dt) return dict(start=dt_pretty(start_dt), end=dt_pretty(end_dt), @@ -91,6 +94,8 @@ def handler_workhours_since_until(start_dt_s: str) -> dict: def handler_timezone_translation(dt_s: str, timezone_like_s: str) -> dict: + if dt_s.lower().strip() == "time": + dt_s = "now" src_dt = dateparser_parse_dt(dt_s) tz = resolve_timezone(timezone_like_s) if not tz: @@ -102,7 +107,7 @@ def handler_timezone_translation(dt_s: str, timezone_like_s: str) -> dict: def handler_generic_parser(dt_s: str) -> datetime: - return query_to_format_result(dt_s, None) + return dateparser_parse_dt(dt_s) def handler_time_in_parser(dt_s: str) -> datetime: @@ -118,29 +123,38 @@ QUERY_TYPE_DT = "datetime_details" QUERY_TYPE_TZ = "timezone" QUERY_TYPE_TD = "timedelta" +h_default = '' +h_unix_s = 'dt->unix_s' +h_unix_ms = 'dt->unix_ms' +h_tz_offset = 'tz->tz_offset' +h_time_in = 'dt->hh:mm' +h_translation = 'dst_dt->iso8601_full' +h_default_dt = 'dt->iso8601_full' +h_default_td = 'diff->duration_iso8601' + regex_handlers = [ - (r_time_in_epoch_s_now, handler_time_s_now_local, QUERY_TYPE_DT), - (r_time_in_epoch_s_now, handler_time_s_now_utc, QUERY_TYPE_DT), - (r_time_in_epoch_s2, handler_time_s, QUERY_TYPE_DT), - (r_time_in_epoch_s3, handler_time_s, QUERY_TYPE_DT), - (r_time_in_epoch_ms_now, handler_time_ms_now_local, QUERY_TYPE_DT), - (r_time_in_epoch_ms_now, handler_time_ms_now_utc, QUERY_TYPE_DT), - (r_time_in_epoch_ms2, handler_time_ms, QUERY_TYPE_DT), - (r_time_in_epoch_ms3, handler_time_ms, QUERY_TYPE_DT), - (r_timezone_translation, handler_timezone_translation, QUERY_TYPE_DT_TR), - (r_time_since, handler_time_since_until, QUERY_TYPE_TD), - (r_time_until, handler_time_since_until, QUERY_TYPE_TD), - (r_time_between, handler_time_diff, QUERY_TYPE_TD), - (r_workdays_since, handler_workdays_since_until, QUERY_TYPE_TD), - (r_workdays_until, handler_workdays_since_until, QUERY_TYPE_TD), - (r_workdays_between, handler_workdays_diff, QUERY_TYPE_TD), - (r_workhours_since, handler_workhours_since_until, QUERY_TYPE_TD), - (r_workhours_until, handler_workhours_since_until, QUERY_TYPE_TD), - (r_workhours_between, handler_workhours_diff, QUERY_TYPE_TD), - (r_time_in, handler_time_in_parser, QUERY_TYPE_DT), - (r_timezone, handler_timezone, QUERY_TYPE_TZ), - (r_timezone_2, handler_timezone, QUERY_TYPE_TZ), - (r_generic, handler_generic_parser, QUERY_TYPE_DT), + (r_time_in_epoch_s_now, handler_time_now_local, QUERY_TYPE_DT, h_unix_s), + (r_time_in_epoch_s_now, handler_time_now_utc, QUERY_TYPE_DT, h_unix_s), + (r_time_in_epoch_s2, handler_time, QUERY_TYPE_DT, h_unix_s), + (r_time_in_epoch_s3, handler_time, QUERY_TYPE_DT, h_unix_s), + (r_time_in_epoch_ms_now, handler_time_now_local, QUERY_TYPE_DT, h_unix_ms), + (r_time_in_epoch_ms_now, handler_time_now_utc, QUERY_TYPE_DT, h_unix_ms), + (r_time_in_epoch_ms2, handler_time, QUERY_TYPE_DT, h_unix_ms), + (r_time_in_epoch_ms3, handler_time, QUERY_TYPE_DT, h_unix_ms), + (r_timezone_translation, handler_timezone_translation, QUERY_TYPE_DT_TR, h_translation), + (r_time_since, handler_time_since_until, QUERY_TYPE_TD, h_default_td), + (r_time_until, handler_time_since_until, QUERY_TYPE_TD, h_default_td), + (r_time_between, handler_time_diff, QUERY_TYPE_TD, h_default_td), + (r_workdays_since, handler_workdays_since_until, QUERY_TYPE_TD, h_default_td), + (r_workdays_until, handler_workdays_since_until, QUERY_TYPE_TD, h_default_td), + (r_workdays_between, handler_workdays_diff, QUERY_TYPE_TD, h_default_td), + (r_workhours_since, handler_workhours_since_until, QUERY_TYPE_TD, h_default_td), + (r_workhours_until, handler_workhours_since_until, QUERY_TYPE_TD, h_default_td), + (r_workhours_between, handler_workhours_diff, QUERY_TYPE_TD, h_default_td), + (r_time_in, handler_time_in_parser, QUERY_TYPE_DT, h_time_in), + (r_timezone, handler_timezone, QUERY_TYPE_TZ, h_tz_offset), + (r_timezone_2, handler_timezone, QUERY_TYPE_TZ, h_tz_offset), + (r_generic, handler_generic_parser, QUERY_TYPE_DT, h_default_dt), ] @@ -156,7 +170,7 @@ def try_regex(r, s): def tokenize(s): solutions = [] - for r, h, t in regex_handlers: + for r, h, t, hi in regex_handlers: g = try_regex(r, s) if g is not None: try: @@ -164,7 +178,7 @@ def tokenize(s): except Exception as e: continue if result is not None: - solutions.append((h.__name__, result, t)) + solutions.append((h.__name__, result, t, hi)) return solutions @@ -174,6 +188,20 @@ def pretty_print_dict(obj): print(colorful_json) +def show_magic_results(obj, args): + for solution in obj['solutions']: + entry_proxy = Cut(solution, sep='->') + highlight_entry = solution["highlight"] + try: + highlight_result = entry_proxy[highlight_entry] + except Exception as e: + continue + if args.handlers: + print("{} -> {}".format(solution['handler'], highlight_result)) + else: + print(highlight_result) + + def dt_pretty(dt): rv = {} global custom_locale @@ -200,7 +228,7 @@ def dt_pretty(dt): def td_pretty(td): rv = { - "sign": '-' if td.days < 0 else '+', + "sign": '-' if td.days > 0 else '+', "in_the": 'future' if td.days < 0 else 'past', "time_ago": time_ago(td), "duration_iso8601": td_iso8601(td), @@ -214,7 +242,7 @@ def resolve_query_type(query): solutions = tokenize(query) if not solutions: dt = get_local_now() - return [["now", dt, QUERY_TYPE_DT]] + return [["now", dt, QUERY_TYPE_DT, h_default]] return solutions @@ -226,9 +254,10 @@ def resolve_query(query): solutions = resolve_query_type(query) for solution in solutions: element = {} - handler, results, query_type = solution + handler, results, query_type, hi = solution element["handler"] = handler element["query_type"] = query_type + element["highlight"] = hi try: if query_type == QUERY_TYPE_DT: element["dt"] = dt_pretty(results) @@ -241,8 +270,8 @@ def resolve_query(query): elif query_type == QUERY_TYPE_TD: element["timedelta"] = results rv["solutions"].append(element) - except Exception: - ... + except Exception as e: + continue return rv @@ -273,10 +302,27 @@ def test(): print("{} -> {}".format(s, resolve_query(s))) -if __name__ == "__main__": - custom_locale = "" - custom_locale = resolve_locale(custom_locale) - query = ' '.join(sys.argv[1:]) - # query = "workhours until 2/12/2020 12:00" +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument('query', nargs='*', default="now", help="freeform") + parser.add_argument('--locale', dest='locale') + parser.add_argument('--handlers', dest='handlers', action='store_true') + parser.add_argument('--full', dest='full', action='store_true') + args = parser.parse_args() + return args + + +def main(args): + global custom_locale + custom_locale = resolve_locale(args.locale) + query = ' '.join(args.query) + query = "time in sofia" result = resolve_query(query) - pretty_print_dict(result) + if args.full: + pretty_print_dict(result) + show_magic_results(result, args) + + +if __name__ == "__main__": + args = parse_args() + main(args) diff --git a/src/tww/tww.py b/src/tww/tww.py index 5eaba7a..e27721e 100644 --- a/src/tww/tww.py +++ b/src/tww/tww.py @@ -4,9 +4,11 @@ Find time now, in the past or future in any timezone or location. """ import argparse +import csv import datetime import logging import os +import re from collections import defaultdict import dateparser @@ -18,6 +20,7 @@ from datetimerange import DateTimeRange from dateutil.parser import parse as dutil_parse from dateparser.timezone_parser import StaticTzInfo from dateutil.tz import gettz, tzlocal +from fuzzywuzzy import fuzz from pytz import timezone from pytz.exceptions import UnknownTimeZoneError @@ -134,8 +137,6 @@ def create_if_not_exists(fname): def write_to_cache(query, location): - import csv - logger.debug("Writing location to cache") with open(os.path.join(basepath, "data", ".cache.csv"), 'a+') as wf: cachewriter = csv.writer(wf) @@ -156,9 +157,7 @@ def resolve_location_local(query): """ Find a location by searching in local db of countries and cities """ - import csv from heapq import heappush, heappop - from fuzzywuzzy import fuzz query = query.lower() create_if_not_exists(os.path.join(basepath, "data", ".cache.csv")) @@ -473,10 +472,113 @@ def tzinfo_from_offset(offset: str) -> pytz.timezone: return None, [] -def dateparser_parse_dt(s: str): - # print("Dateparser query: {}".format(s)) +def custom_dt_parse(query): + with open(os.path.join(basepath, "data", "custom_dt.csv")) as f: + cfile = csv.reader(f) + for row in cfile: + entry = row[0] + fuzz_query = query.lower().strip() + fuzz_ratio = fuzz.ratio(fuzz_query, entry) + if fuzz_ratio >= 95: + return row[1] + return query + + +r_next = re.compile('(?:next)?\s*(.*)', flags=re.IGNORECASE) +r_prev = re.compile('(?:last|prev|previous)?\s*(.*)', flags=re.IGNORECASE) +r_this = re.compile('(?:this|that)?\s*(.*)', flags=re.IGNORECASE) + + +def get_local_now_parsed(s): + now = get_local_now() + now = now.replace(hour=0, minute=0, second=0, microsecond=0) + parsed = parse_dt(s) + parsed = parsed.replace(tzinfo=now.tzinfo) + return now, parsed + + +def get_week_start_end(dt): + start = dt - timedelta(days=dt.weekday()) + end = start + timedelta(days=6) + return start, end + + +def handler_next_weekday(s): + now, parsed = get_local_now_parsed(s) + week_start, week_end = get_week_start_end(now) + if parsed > week_end: + # parsed is in next week + return str(parsed) + if week_start <= parsed <= week_end: + # parsed is in this week + return str(parsed + timedelta(days=7)) + else: + # parsed is in previous week + return str(parsed + timedelta(days=14)) + + +def handler_prev_weekday(s): + now, parsed = get_local_now_parsed(s) + week_start, week_end = get_week_start_end(now) + if parsed > week_end: + # parsed is in next week + return str(parsed - timedelta(days=14)) + if week_start <= parsed <= week_end: + # parsed is in this week + return str(parsed - timedelta(days=7)) + else: + # parsed is in previous week + return str(parsed) + + +def handler_this_weekday(s): + now, parsed = get_local_now_parsed(s) + week_start, week_end = get_week_start_end(now) + if parsed > week_end: + # parsed is in next week + return str(parsed - timedelta(days=7)) + if week_start <= parsed <= week_end: + # parsed is in this week + return str(parsed) + else: + # parsed is in previous week + return str(parsed + timedelta(days=7)) + + +def try_regex(r, s): + try: + m = re.match(r, s) + except: + return None + if m: + groups = m.groups() + return groups + + +regex_handlers = [ + (r_next, handler_next_weekday), + (r_prev, handler_prev_weekday), + (r_this, handler_this_weekday), +] + + +def regex_parsers(s): + for r, h in regex_handlers: + g = try_regex(r, s) + if g is not None: + try: + result = h(*g) + except Exception as e: + continue + if result is not None: + return result + return s + + +def dateparser_parse_dt(s: str): + s = custom_dt_parse(s) + s = regex_parsers(s) parsed = parse_dt(s) - # print("Dateparser parsed query: {}".format(parsed)) if not parsed: parsed = dutil_parse(s) if not parsed: @@ -643,11 +745,11 @@ def workhours_diff(start, end, workhour_begin="09:00", workhour_end="17:00", wor def td_remainders(td): # split seconds to larger units seconds = td.total_seconds() - minutes, seconds = divmod(seconds, 60) - hours, minutes = divmod(minutes, 60) - days, hours = divmod(hours, 24) - months, days = divmod(days, 30.42) - years, months = divmod(months, 12) + minutes, seconds = divmod(abs(int(seconds)), 60) + hours, minutes = divmod(abs(int(minutes)), 60) + days, hours = divmod(abs(int(hours)), 24) + months, days = divmod(abs(int(days)), 30.42) + years, months = divmod(abs(int(months)), 12) years, months, days, hours, minutes, seconds = map(int, (years, months, days, hours, minutes, seconds)) years, months, days, hours, minutes, seconds = map(abs, (years, months, days, hours, minutes, seconds)) return dict( @@ -662,12 +764,12 @@ def td_remainders(td): def td_totals(td): seconds = td.total_seconds() - minutes = seconds // 60 - hours = seconds // (60 * 60) - days = seconds // (24 * 60 * 60) - weeks = seconds // (7 * 24 * 60 * 60) - months = seconds // (30 * 24 * 60 * 60) - years = seconds // (365 * 24 * 60 * 60) + minutes = seconds / 60 + hours = seconds / (60 * 60) + days = seconds / (24 * 60 * 60) + weeks = seconds / (7 * 24 * 60 * 60) + months = seconds / (30 * 24 * 60 * 60) + years = seconds / (365 * 24 * 60 * 60) years, months, weeks, days, hours, minutes, seconds = map(abs, (years, months, weeks, days, hours, minutes, seconds)) return dict( @@ -694,4 +796,4 @@ def td_iso8601(td): for short, timeframe in hms: if rem[timeframe]: fmt += "{}{}".format(rem[timeframe], short) - return fmt \ No newline at end of file + return fmt