general solution based tokenizer
This commit is contained in:
parent
559bd39e9a
commit
abbba47bb5
@ -1,10 +1,12 @@
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
from datetime import timedelta, datetime
|
||||
from pprint import pprint, pformat
|
||||
from datetime import datetime
|
||||
|
||||
from pygments import highlight, lexers, formatters
|
||||
|
||||
from tww import ISO_FORMAT, time_to_emoji
|
||||
from tww import resolve_timezone, dateparser_parse_dt, get_utcnow, get_s_since_epoch, get_ms_since_epoch, \
|
||||
dt_tz_translation, DEFAULT_FORMAT, get_local_now
|
||||
dt_tz_translation, DEFAULT_FORMAT, get_local_now, query_to_format_result
|
||||
|
||||
r_generic = re.compile('(.*)', flags=re.IGNORECASE)
|
||||
r_time_in_epoch_s_now = re.compile('(?:time since epoch|seconds since epoch)', flags=re.IGNORECASE)
|
||||
@ -13,10 +15,13 @@ r_time_in_epoch_s3 = re.compile('(?:seconds)?\s*since\s*(.*)', flags=re.IGNORECA
|
||||
r_time_in_epoch_ms_now = re.compile('(?:milliseconds since epoch)', flags=re.IGNORECASE)
|
||||
r_time_in_epoch_ms2 = re.compile('(.*)?\s*(?:in|to)\s*(?:ms|milliseconds|miliseconds)', flags=re.IGNORECASE)
|
||||
r_time_in_epoch_ms3 = re.compile('(?:ms|milliseconds|miliseconds)?\s*since\s*(.*)', flags=re.IGNORECASE)
|
||||
r_time_in = re.compile('(?:time)?\s*in\s*(.*)', flags=re.IGNORECASE)
|
||||
r_time_since = re.compile('(?:time)?\s*since\s*(.*)', flags=re.IGNORECASE)
|
||||
r_time_until = re.compile('(?:time)?\s*until\s*(.*)', flags=re.IGNORECASE)
|
||||
r_time_between = re.compile('(?:time)?\s*between\s*(.*)\s*and\s*(.*)', flags=re.IGNORECASE)
|
||||
r_timezone_translation = re.compile('(.*)?\s(?:in|to)\s(.*)', flags=re.IGNORECASE)
|
||||
r_timezone = re.compile('(.*)?\s(?:timezone|timezones|tz)', flags=re.IGNORECASE)
|
||||
r_timezone_2 = re.compile('(?:timezone in|timezones in|tz in|timezone|timezones|tz)\s(.*)?', flags=re.IGNORECASE)
|
||||
|
||||
|
||||
def handler_time_s(dt_s: str) -> int:
|
||||
@ -74,23 +79,44 @@ def handler_timezone_translation(dt_s: str, timezone_like_s: str) -> dict:
|
||||
|
||||
|
||||
def handler_generic_parser(dt_s: str) -> datetime:
|
||||
return query_to_format_result(dt_s, None)
|
||||
|
||||
|
||||
def handler_dateparser(dt_s: str) -> datetime:
|
||||
return dateparser_parse_dt(dt_s)
|
||||
|
||||
|
||||
def handler_time_in_parser(dt_s: str) -> datetime:
|
||||
return query_to_format_result("now to {}".format(dt_s))
|
||||
|
||||
|
||||
def handler_timezone(timezone_s: str):
|
||||
return resolve_timezone(timezone_s)
|
||||
|
||||
|
||||
QUERY_TYPE_DT_TR = "datetime_translation"
|
||||
QUERY_TYPE_DT = "datetime_details"
|
||||
QUERY_TYPE_TZ = "timezone"
|
||||
QUERY_TYPE_TD = "timedelta"
|
||||
|
||||
regex_handlers = [
|
||||
(r_time_in_epoch_s_now, handler_time_s_now_local),
|
||||
(r_time_in_epoch_s_now, handler_time_s_now_utc),
|
||||
(r_time_in_epoch_s2, handler_time_s),
|
||||
(r_time_in_epoch_s3, handler_time_s),
|
||||
(r_time_in_epoch_ms_now, handler_time_ms_now_local),
|
||||
(r_time_in_epoch_ms_now, handler_time_ms_now_utc),
|
||||
(r_time_in_epoch_ms2, handler_time_ms),
|
||||
(r_time_in_epoch_ms3, handler_time_ms),
|
||||
(r_timezone_translation, handler_timezone_translation),
|
||||
(r_time_since, handler_time_since),
|
||||
(r_time_until, handler_time_until),
|
||||
(r_time_between, handler_time_diff),
|
||||
(r_generic, handler_generic_parser),
|
||||
(r_time_in_epoch_s_now, handler_time_s_now_local, QUERY_TYPE_DT),
|
||||
(r_time_in_epoch_s_now, handler_time_s_now_utc, QUERY_TYPE_DT),
|
||||
(r_time_in_epoch_s2, handler_time_s, QUERY_TYPE_DT),
|
||||
(r_time_in_epoch_s3, handler_time_s, QUERY_TYPE_DT),
|
||||
(r_time_in_epoch_ms_now, handler_time_ms_now_local, QUERY_TYPE_DT),
|
||||
(r_time_in_epoch_ms_now, handler_time_ms_now_utc, QUERY_TYPE_DT),
|
||||
(r_time_in_epoch_ms2, handler_time_ms, QUERY_TYPE_DT),
|
||||
(r_time_in_epoch_ms3, handler_time_ms, QUERY_TYPE_DT),
|
||||
(r_timezone_translation, handler_timezone_translation, QUERY_TYPE_DT_TR),
|
||||
(r_time_since, handler_time_since, QUERY_TYPE_TD),
|
||||
(r_time_until, handler_time_until, QUERY_TYPE_TD),
|
||||
(r_time_between, handler_time_diff, QUERY_TYPE_TD),
|
||||
(r_time_in, handler_time_in_parser, QUERY_TYPE_DT),
|
||||
(r_timezone, handler_timezone, QUERY_TYPE_TZ),
|
||||
(r_timezone_2, handler_timezone, QUERY_TYPE_TZ),
|
||||
(r_generic, handler_dateparser, QUERY_TYPE_DT),
|
||||
(r_generic, handler_generic_parser, QUERY_TYPE_DT),
|
||||
]
|
||||
|
||||
|
||||
@ -104,16 +130,17 @@ def try_regex(r, s):
|
||||
return groups
|
||||
|
||||
|
||||
def parse(s):
|
||||
def tokenize(s):
|
||||
solutions = []
|
||||
for r, h in regex_handlers:
|
||||
for r, h, t in regex_handlers:
|
||||
g = try_regex(r, s)
|
||||
if g is not None:
|
||||
try:
|
||||
result = h(*g)
|
||||
except Exception as e:
|
||||
result = None
|
||||
solutions.append((h.__name__, result))
|
||||
continue
|
||||
if result is not None:
|
||||
solutions.append((h.__name__, (result, ), t))
|
||||
return solutions
|
||||
|
||||
|
||||
@ -139,20 +166,71 @@ def test():
|
||||
"now in dublin",
|
||||
]
|
||||
for s in test_strings:
|
||||
print("{} -> {}".format(s, parse(s)))
|
||||
print("{} -> {}".format(s, tokenize(s)))
|
||||
|
||||
|
||||
def pretty_print_dict(obj):
|
||||
formatted_json = json.dumps(obj, indent=2)
|
||||
colorful_json = highlight(formatted_json, lexers.JsonLexer(), formatters.TerminalFormatter())
|
||||
print(colorful_json)
|
||||
|
||||
|
||||
def dt_pretty(dt):
|
||||
rv = {}
|
||||
rv["iso8601_full"] = dt.strftime(ISO_FORMAT)
|
||||
rv["iso8601_date"] = dt.strftime('%Y-%m-%d')
|
||||
rv["iso8601_time"] = dt.strftime('%H:%M:%S')
|
||||
rv["locale_dt"] = dt.strftime("%c")
|
||||
rv["locale_day_of_week"] = dt.strftime("%A")
|
||||
rv["locale_day_of_week_short"] = dt.strftime("%a")
|
||||
rv["day_of_week_number"] = dt.strftime("%w")
|
||||
rv["locale_month"] = dt.strftime("%B")
|
||||
rv["locale_month_short"] = dt.strftime("%b")
|
||||
rv["tz_name"] = dt.strftime("%Z")
|
||||
rv["tz_offset"] = dt.strftime("%z")
|
||||
rv["hh:mm"] = dt.strftime("%H:%M")
|
||||
rv["locale_time"] = dt.strftime("%X")
|
||||
rv["locale_date"] = dt.strftime("%x")
|
||||
rv["emoji_time"] = time_to_emoji(dt)
|
||||
rv["unix_s"] = get_s_since_epoch(dt)
|
||||
rv["unix_ms"] = get_ms_since_epoch(dt)
|
||||
return rv
|
||||
|
||||
|
||||
def resolve_query_type(query):
|
||||
solutions = tokenize(query)
|
||||
if not solutions:
|
||||
dt = get_local_now()
|
||||
return [["now", (dt,), QUERY_TYPE_DT]]
|
||||
return solutions
|
||||
|
||||
|
||||
def resolve_query(query):
|
||||
rv = {
|
||||
"query": query,
|
||||
"solutions": [],
|
||||
}
|
||||
solutions = resolve_query_type(query)
|
||||
for solution in solutions:
|
||||
element = {}
|
||||
handler, results, query_type = solution
|
||||
element["handler"] = handler
|
||||
element["query_type"] = query_type
|
||||
if query_type == QUERY_TYPE_DT:
|
||||
element["dt"] = dt_pretty(results[0])
|
||||
elif query_type == QUERY_TYPE_DT_TR:
|
||||
element["src_dt"] = dt_pretty(results[0])
|
||||
element["dst_dt"] = dt_pretty(results[1])
|
||||
elif query_type == QUERY_TYPE_TZ:
|
||||
element["tz"] = results[0]
|
||||
elif query_type == QUERY_TYPE_TD:
|
||||
element["timedelta"] = results[0]
|
||||
rv["solutions"].append(element)
|
||||
return rv
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
query = "2020-02-07 11:25:58+0000 in seconds" #' '.join(sys.argv[1:])
|
||||
results = parse(query)
|
||||
for handler, result in results:
|
||||
if type(result) is datetime:
|
||||
print(" {} -> {}".format(handler, result.strftime(DEFAULT_FORMAT)))
|
||||
elif type(result) is timedelta:
|
||||
print(" {} -> {}".format(handler, result))
|
||||
elif type(result) is dict:
|
||||
print(" {} -> {}".format(handler, pformat(result)))
|
||||
elif type(result) is None:
|
||||
print(" {} -> Couldn't solve query".format(handler))
|
||||
else:
|
||||
print(" {} -> {}".format(handler, result))
|
||||
query = "now in sofia"
|
||||
# query = ' '.join(sys.argv[1:])
|
||||
result = resolve_query(query)
|
||||
pretty_print_dict(result)
|
||||
|
@ -213,7 +213,7 @@ def parse_query(query):
|
||||
"""
|
||||
Parses the user query to the datetime, tz/loc parts
|
||||
"""
|
||||
query = ' '.join(query)
|
||||
# query = ' '.join(query)
|
||||
query = query.strip()
|
||||
if not query:
|
||||
logger.critical("Use a query like <datetime-like> ['to' <timezone or location>]")
|
||||
@ -246,6 +246,32 @@ def serialize_location(location):
|
||||
}
|
||||
|
||||
|
||||
def find_from_offset(query):
|
||||
for universal_alias in ["gmt", "utc", "+", "-"]:
|
||||
if query.startswith(universal_alias):
|
||||
splitted_query = query.split(universal_alias)
|
||||
if len(splitted_query) != 2:
|
||||
continue
|
||||
offset = splitted_query[1]
|
||||
if ':' not in offset:
|
||||
try:
|
||||
hhs, mms = offset, "00"
|
||||
except Exception:
|
||||
continue
|
||||
else:
|
||||
splitted_offset = offset.split(':')
|
||||
if len(splitted_offset) != 2:
|
||||
continue
|
||||
hhs, mms = splitted_offset
|
||||
try:
|
||||
if universal_alias in ["+", "-"]:
|
||||
return tzinfo_from_offset("{}{:02d}{:02d}".format(universal_alias, int(hhs), int(mms)))
|
||||
return tzinfo_from_offset("+{:02d}{:02d}".format(int(hhs), int(mms)))
|
||||
except Exception:
|
||||
continue
|
||||
return None, []
|
||||
|
||||
|
||||
def resolve_timezone(query):
|
||||
if not query:
|
||||
query = "utc"
|
||||
@ -254,10 +280,7 @@ def resolve_timezone(query):
|
||||
normal_query = query.lower().strip()
|
||||
found_from_iana_tz = NORMALIZED_TZ_DICT.get(normal_query, "")
|
||||
found_from_abbr_tzs = list(NORMALIZED_TZ_ABBR.get(normal_query, set()))
|
||||
found_from_offset_tz = None
|
||||
try:
|
||||
found_from_offset_tz = tzinfo_from_offset(normal_query)
|
||||
except: ...
|
||||
found_from_offset_tz, offset_tzs = find_from_offset(normal_query)
|
||||
normal_tz = found_from_iana_tz
|
||||
if not normal_tz:
|
||||
if found_from_abbr_tzs:
|
||||
@ -304,7 +327,7 @@ def resolve_timezone(query):
|
||||
"normal_query": normal_query,
|
||||
"found_from_iana_tz": found_from_iana_tz,
|
||||
"found_from_abbr_tzs": found_from_abbr_tzs,
|
||||
"found_from_offset_tzs": found_from_offset_tz,
|
||||
"found_from_offset_tzs": offset_tzs,
|
||||
"local_location": local_location,
|
||||
"remote_location": remote_location,
|
||||
"search_pytz": normal_tz,
|
||||
@ -335,8 +358,7 @@ def solve_query(human_dt, human_tz_loc):
|
||||
|
||||
def format_result(result, fmt):
|
||||
if result is None:
|
||||
logger.critical("Could not solve query")
|
||||
exit(1)
|
||||
logger.error("Could not solve query")
|
||||
logger.debug("Format: {}".format(fmt))
|
||||
format_result = result.strftime(fmt)
|
||||
logger.debug("Formated result: {} -> {}".format(result, format_result))
|
||||
@ -346,8 +368,9 @@ def format_result(result, fmt):
|
||||
def query_to_format_result(query, fmt=DEFAULT_FORMAT):
|
||||
human_dt, human_tz_loc = parse_query(query)
|
||||
result = solve_query(human_dt, human_tz_loc)
|
||||
formated_result = format_result(result, fmt)
|
||||
return formated_result
|
||||
if fmt:
|
||||
return format_result(result, fmt)
|
||||
return result
|
||||
|
||||
|
||||
def main(args):
|
||||
@ -442,8 +465,10 @@ def tzinfo_from_offset(offset: str) -> pytz.timezone:
|
||||
tznames = TZ_OFFSETS.get(offset, [])
|
||||
for tzname in tznames:
|
||||
if tzname.startswith('Etc/GMT'):
|
||||
return pytz.timezone(tzname)
|
||||
return pytz.timezone(tznames[0])
|
||||
return pytz.timezone(tzname), tznames
|
||||
if tznames:
|
||||
return pytz.timezone(tznames[0]), tznames
|
||||
return None, []
|
||||
|
||||
|
||||
def dateparser_parse_dt(s: str):
|
||||
@ -467,7 +492,7 @@ def get_utcnow(tzaware: bool = True):
|
||||
|
||||
def get_local_now(tzaware: bool = True):
|
||||
if tzaware:
|
||||
return datetime.now().replace(tzinfo=tzinfo_from_offset(get_local_tz_offset()))
|
||||
return datetime.now().replace(tzinfo=tzinfo_from_offset(get_local_tz_offset())[0])
|
||||
return datetime.utcnow()
|
||||
|
||||
|
||||
@ -546,4 +571,4 @@ def epoch_to_dt(seconds):
|
||||
def time_to_emoji(dt):
|
||||
seconds = get_s_since_epoch(dt)
|
||||
a = int((seconds / 900 - 3) / 2 % 24)
|
||||
return chr(128336 + a // 2 + a % 2 * 12)
|
||||
return chr(128336 + a // 2 + a % 2 * 12)
|
||||
|
Loading…
Reference in New Issue
Block a user