general solution based tokenizer
This commit is contained in:
parent
559bd39e9a
commit
abbba47bb5
@ -1,10 +1,12 @@
|
|||||||
|
import json
|
||||||
import re
|
import re
|
||||||
import sys
|
from datetime import datetime
|
||||||
from datetime import timedelta, datetime
|
|
||||||
from pprint import pprint, pformat
|
|
||||||
|
|
||||||
|
from pygments import highlight, lexers, formatters
|
||||||
|
|
||||||
|
from tww import ISO_FORMAT, time_to_emoji
|
||||||
from tww import resolve_timezone, dateparser_parse_dt, get_utcnow, get_s_since_epoch, get_ms_since_epoch, \
|
from tww import resolve_timezone, dateparser_parse_dt, get_utcnow, get_s_since_epoch, get_ms_since_epoch, \
|
||||||
dt_tz_translation, DEFAULT_FORMAT, get_local_now
|
dt_tz_translation, DEFAULT_FORMAT, get_local_now, query_to_format_result
|
||||||
|
|
||||||
r_generic = re.compile('(.*)', flags=re.IGNORECASE)
|
r_generic = re.compile('(.*)', flags=re.IGNORECASE)
|
||||||
r_time_in_epoch_s_now = re.compile('(?:time since epoch|seconds since epoch)', flags=re.IGNORECASE)
|
r_time_in_epoch_s_now = re.compile('(?:time since epoch|seconds since epoch)', flags=re.IGNORECASE)
|
||||||
@ -13,10 +15,13 @@ r_time_in_epoch_s3 = re.compile('(?:seconds)?\s*since\s*(.*)', flags=re.IGNORECA
|
|||||||
r_time_in_epoch_ms_now = re.compile('(?:milliseconds since epoch)', flags=re.IGNORECASE)
|
r_time_in_epoch_ms_now = re.compile('(?:milliseconds since epoch)', flags=re.IGNORECASE)
|
||||||
r_time_in_epoch_ms2 = re.compile('(.*)?\s*(?:in|to)\s*(?:ms|milliseconds|miliseconds)', flags=re.IGNORECASE)
|
r_time_in_epoch_ms2 = re.compile('(.*)?\s*(?:in|to)\s*(?:ms|milliseconds|miliseconds)', flags=re.IGNORECASE)
|
||||||
r_time_in_epoch_ms3 = re.compile('(?:ms|milliseconds|miliseconds)?\s*since\s*(.*)', flags=re.IGNORECASE)
|
r_time_in_epoch_ms3 = re.compile('(?:ms|milliseconds|miliseconds)?\s*since\s*(.*)', flags=re.IGNORECASE)
|
||||||
|
r_time_in = re.compile('(?:time)?\s*in\s*(.*)', flags=re.IGNORECASE)
|
||||||
r_time_since = re.compile('(?:time)?\s*since\s*(.*)', flags=re.IGNORECASE)
|
r_time_since = re.compile('(?:time)?\s*since\s*(.*)', flags=re.IGNORECASE)
|
||||||
r_time_until = re.compile('(?:time)?\s*until\s*(.*)', flags=re.IGNORECASE)
|
r_time_until = re.compile('(?:time)?\s*until\s*(.*)', flags=re.IGNORECASE)
|
||||||
r_time_between = re.compile('(?:time)?\s*between\s*(.*)\s*and\s*(.*)', flags=re.IGNORECASE)
|
r_time_between = re.compile('(?:time)?\s*between\s*(.*)\s*and\s*(.*)', flags=re.IGNORECASE)
|
||||||
r_timezone_translation = re.compile('(.*)?\s(?:in|to)\s(.*)', flags=re.IGNORECASE)
|
r_timezone_translation = re.compile('(.*)?\s(?:in|to)\s(.*)', flags=re.IGNORECASE)
|
||||||
|
r_timezone = re.compile('(.*)?\s(?:timezone|timezones|tz)', flags=re.IGNORECASE)
|
||||||
|
r_timezone_2 = re.compile('(?:timezone in|timezones in|tz in|timezone|timezones|tz)\s(.*)?', flags=re.IGNORECASE)
|
||||||
|
|
||||||
|
|
||||||
def handler_time_s(dt_s: str) -> int:
|
def handler_time_s(dt_s: str) -> int:
|
||||||
@ -74,23 +79,44 @@ def handler_timezone_translation(dt_s: str, timezone_like_s: str) -> dict:
|
|||||||
|
|
||||||
|
|
||||||
def handler_generic_parser(dt_s: str) -> datetime:
|
def handler_generic_parser(dt_s: str) -> datetime:
|
||||||
|
return query_to_format_result(dt_s, None)
|
||||||
|
|
||||||
|
|
||||||
|
def handler_dateparser(dt_s: str) -> datetime:
|
||||||
return dateparser_parse_dt(dt_s)
|
return dateparser_parse_dt(dt_s)
|
||||||
|
|
||||||
|
|
||||||
|
def handler_time_in_parser(dt_s: str) -> datetime:
|
||||||
|
return query_to_format_result("now to {}".format(dt_s))
|
||||||
|
|
||||||
|
|
||||||
|
def handler_timezone(timezone_s: str):
|
||||||
|
return resolve_timezone(timezone_s)
|
||||||
|
|
||||||
|
|
||||||
|
QUERY_TYPE_DT_TR = "datetime_translation"
|
||||||
|
QUERY_TYPE_DT = "datetime_details"
|
||||||
|
QUERY_TYPE_TZ = "timezone"
|
||||||
|
QUERY_TYPE_TD = "timedelta"
|
||||||
|
|
||||||
regex_handlers = [
|
regex_handlers = [
|
||||||
(r_time_in_epoch_s_now, handler_time_s_now_local),
|
(r_time_in_epoch_s_now, handler_time_s_now_local, QUERY_TYPE_DT),
|
||||||
(r_time_in_epoch_s_now, handler_time_s_now_utc),
|
(r_time_in_epoch_s_now, handler_time_s_now_utc, QUERY_TYPE_DT),
|
||||||
(r_time_in_epoch_s2, handler_time_s),
|
(r_time_in_epoch_s2, handler_time_s, QUERY_TYPE_DT),
|
||||||
(r_time_in_epoch_s3, handler_time_s),
|
(r_time_in_epoch_s3, handler_time_s, QUERY_TYPE_DT),
|
||||||
(r_time_in_epoch_ms_now, handler_time_ms_now_local),
|
(r_time_in_epoch_ms_now, handler_time_ms_now_local, QUERY_TYPE_DT),
|
||||||
(r_time_in_epoch_ms_now, handler_time_ms_now_utc),
|
(r_time_in_epoch_ms_now, handler_time_ms_now_utc, QUERY_TYPE_DT),
|
||||||
(r_time_in_epoch_ms2, handler_time_ms),
|
(r_time_in_epoch_ms2, handler_time_ms, QUERY_TYPE_DT),
|
||||||
(r_time_in_epoch_ms3, handler_time_ms),
|
(r_time_in_epoch_ms3, handler_time_ms, QUERY_TYPE_DT),
|
||||||
(r_timezone_translation, handler_timezone_translation),
|
(r_timezone_translation, handler_timezone_translation, QUERY_TYPE_DT_TR),
|
||||||
(r_time_since, handler_time_since),
|
(r_time_since, handler_time_since, QUERY_TYPE_TD),
|
||||||
(r_time_until, handler_time_until),
|
(r_time_until, handler_time_until, QUERY_TYPE_TD),
|
||||||
(r_time_between, handler_time_diff),
|
(r_time_between, handler_time_diff, QUERY_TYPE_TD),
|
||||||
(r_generic, handler_generic_parser),
|
(r_time_in, handler_time_in_parser, QUERY_TYPE_DT),
|
||||||
|
(r_timezone, handler_timezone, QUERY_TYPE_TZ),
|
||||||
|
(r_timezone_2, handler_timezone, QUERY_TYPE_TZ),
|
||||||
|
(r_generic, handler_dateparser, QUERY_TYPE_DT),
|
||||||
|
(r_generic, handler_generic_parser, QUERY_TYPE_DT),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@ -104,16 +130,17 @@ def try_regex(r, s):
|
|||||||
return groups
|
return groups
|
||||||
|
|
||||||
|
|
||||||
def parse(s):
|
def tokenize(s):
|
||||||
solutions = []
|
solutions = []
|
||||||
for r, h in regex_handlers:
|
for r, h, t in regex_handlers:
|
||||||
g = try_regex(r, s)
|
g = try_regex(r, s)
|
||||||
if g is not None:
|
if g is not None:
|
||||||
try:
|
try:
|
||||||
result = h(*g)
|
result = h(*g)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
result = None
|
continue
|
||||||
solutions.append((h.__name__, result))
|
if result is not None:
|
||||||
|
solutions.append((h.__name__, (result, ), t))
|
||||||
return solutions
|
return solutions
|
||||||
|
|
||||||
|
|
||||||
@ -139,20 +166,71 @@ def test():
|
|||||||
"now in dublin",
|
"now in dublin",
|
||||||
]
|
]
|
||||||
for s in test_strings:
|
for s in test_strings:
|
||||||
print("{} -> {}".format(s, parse(s)))
|
print("{} -> {}".format(s, tokenize(s)))
|
||||||
|
|
||||||
|
|
||||||
|
def pretty_print_dict(obj):
|
||||||
|
formatted_json = json.dumps(obj, indent=2)
|
||||||
|
colorful_json = highlight(formatted_json, lexers.JsonLexer(), formatters.TerminalFormatter())
|
||||||
|
print(colorful_json)
|
||||||
|
|
||||||
|
|
||||||
|
def dt_pretty(dt):
|
||||||
|
rv = {}
|
||||||
|
rv["iso8601_full"] = dt.strftime(ISO_FORMAT)
|
||||||
|
rv["iso8601_date"] = dt.strftime('%Y-%m-%d')
|
||||||
|
rv["iso8601_time"] = dt.strftime('%H:%M:%S')
|
||||||
|
rv["locale_dt"] = dt.strftime("%c")
|
||||||
|
rv["locale_day_of_week"] = dt.strftime("%A")
|
||||||
|
rv["locale_day_of_week_short"] = dt.strftime("%a")
|
||||||
|
rv["day_of_week_number"] = dt.strftime("%w")
|
||||||
|
rv["locale_month"] = dt.strftime("%B")
|
||||||
|
rv["locale_month_short"] = dt.strftime("%b")
|
||||||
|
rv["tz_name"] = dt.strftime("%Z")
|
||||||
|
rv["tz_offset"] = dt.strftime("%z")
|
||||||
|
rv["hh:mm"] = dt.strftime("%H:%M")
|
||||||
|
rv["locale_time"] = dt.strftime("%X")
|
||||||
|
rv["locale_date"] = dt.strftime("%x")
|
||||||
|
rv["emoji_time"] = time_to_emoji(dt)
|
||||||
|
rv["unix_s"] = get_s_since_epoch(dt)
|
||||||
|
rv["unix_ms"] = get_ms_since_epoch(dt)
|
||||||
|
return rv
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_query_type(query):
|
||||||
|
solutions = tokenize(query)
|
||||||
|
if not solutions:
|
||||||
|
dt = get_local_now()
|
||||||
|
return [["now", (dt,), QUERY_TYPE_DT]]
|
||||||
|
return solutions
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_query(query):
|
||||||
|
rv = {
|
||||||
|
"query": query,
|
||||||
|
"solutions": [],
|
||||||
|
}
|
||||||
|
solutions = resolve_query_type(query)
|
||||||
|
for solution in solutions:
|
||||||
|
element = {}
|
||||||
|
handler, results, query_type = solution
|
||||||
|
element["handler"] = handler
|
||||||
|
element["query_type"] = query_type
|
||||||
|
if query_type == QUERY_TYPE_DT:
|
||||||
|
element["dt"] = dt_pretty(results[0])
|
||||||
|
elif query_type == QUERY_TYPE_DT_TR:
|
||||||
|
element["src_dt"] = dt_pretty(results[0])
|
||||||
|
element["dst_dt"] = dt_pretty(results[1])
|
||||||
|
elif query_type == QUERY_TYPE_TZ:
|
||||||
|
element["tz"] = results[0]
|
||||||
|
elif query_type == QUERY_TYPE_TD:
|
||||||
|
element["timedelta"] = results[0]
|
||||||
|
rv["solutions"].append(element)
|
||||||
|
return rv
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
query = "2020-02-07 11:25:58+0000 in seconds" #' '.join(sys.argv[1:])
|
query = "now in sofia"
|
||||||
results = parse(query)
|
# query = ' '.join(sys.argv[1:])
|
||||||
for handler, result in results:
|
result = resolve_query(query)
|
||||||
if type(result) is datetime:
|
pretty_print_dict(result)
|
||||||
print(" {} -> {}".format(handler, result.strftime(DEFAULT_FORMAT)))
|
|
||||||
elif type(result) is timedelta:
|
|
||||||
print(" {} -> {}".format(handler, result))
|
|
||||||
elif type(result) is dict:
|
|
||||||
print(" {} -> {}".format(handler, pformat(result)))
|
|
||||||
elif type(result) is None:
|
|
||||||
print(" {} -> Couldn't solve query".format(handler))
|
|
||||||
else:
|
|
||||||
print(" {} -> {}".format(handler, result))
|
|
||||||
|
@ -213,7 +213,7 @@ def parse_query(query):
|
|||||||
"""
|
"""
|
||||||
Parses the user query to the datetime, tz/loc parts
|
Parses the user query to the datetime, tz/loc parts
|
||||||
"""
|
"""
|
||||||
query = ' '.join(query)
|
# query = ' '.join(query)
|
||||||
query = query.strip()
|
query = query.strip()
|
||||||
if not query:
|
if not query:
|
||||||
logger.critical("Use a query like <datetime-like> ['to' <timezone or location>]")
|
logger.critical("Use a query like <datetime-like> ['to' <timezone or location>]")
|
||||||
@ -246,6 +246,32 @@ def serialize_location(location):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def find_from_offset(query):
|
||||||
|
for universal_alias in ["gmt", "utc", "+", "-"]:
|
||||||
|
if query.startswith(universal_alias):
|
||||||
|
splitted_query = query.split(universal_alias)
|
||||||
|
if len(splitted_query) != 2:
|
||||||
|
continue
|
||||||
|
offset = splitted_query[1]
|
||||||
|
if ':' not in offset:
|
||||||
|
try:
|
||||||
|
hhs, mms = offset, "00"
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
splitted_offset = offset.split(':')
|
||||||
|
if len(splitted_offset) != 2:
|
||||||
|
continue
|
||||||
|
hhs, mms = splitted_offset
|
||||||
|
try:
|
||||||
|
if universal_alias in ["+", "-"]:
|
||||||
|
return tzinfo_from_offset("{}{:02d}{:02d}".format(universal_alias, int(hhs), int(mms)))
|
||||||
|
return tzinfo_from_offset("+{:02d}{:02d}".format(int(hhs), int(mms)))
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
return None, []
|
||||||
|
|
||||||
|
|
||||||
def resolve_timezone(query):
|
def resolve_timezone(query):
|
||||||
if not query:
|
if not query:
|
||||||
query = "utc"
|
query = "utc"
|
||||||
@ -254,10 +280,7 @@ def resolve_timezone(query):
|
|||||||
normal_query = query.lower().strip()
|
normal_query = query.lower().strip()
|
||||||
found_from_iana_tz = NORMALIZED_TZ_DICT.get(normal_query, "")
|
found_from_iana_tz = NORMALIZED_TZ_DICT.get(normal_query, "")
|
||||||
found_from_abbr_tzs = list(NORMALIZED_TZ_ABBR.get(normal_query, set()))
|
found_from_abbr_tzs = list(NORMALIZED_TZ_ABBR.get(normal_query, set()))
|
||||||
found_from_offset_tz = None
|
found_from_offset_tz, offset_tzs = find_from_offset(normal_query)
|
||||||
try:
|
|
||||||
found_from_offset_tz = tzinfo_from_offset(normal_query)
|
|
||||||
except: ...
|
|
||||||
normal_tz = found_from_iana_tz
|
normal_tz = found_from_iana_tz
|
||||||
if not normal_tz:
|
if not normal_tz:
|
||||||
if found_from_abbr_tzs:
|
if found_from_abbr_tzs:
|
||||||
@ -304,7 +327,7 @@ def resolve_timezone(query):
|
|||||||
"normal_query": normal_query,
|
"normal_query": normal_query,
|
||||||
"found_from_iana_tz": found_from_iana_tz,
|
"found_from_iana_tz": found_from_iana_tz,
|
||||||
"found_from_abbr_tzs": found_from_abbr_tzs,
|
"found_from_abbr_tzs": found_from_abbr_tzs,
|
||||||
"found_from_offset_tzs": found_from_offset_tz,
|
"found_from_offset_tzs": offset_tzs,
|
||||||
"local_location": local_location,
|
"local_location": local_location,
|
||||||
"remote_location": remote_location,
|
"remote_location": remote_location,
|
||||||
"search_pytz": normal_tz,
|
"search_pytz": normal_tz,
|
||||||
@ -335,8 +358,7 @@ def solve_query(human_dt, human_tz_loc):
|
|||||||
|
|
||||||
def format_result(result, fmt):
|
def format_result(result, fmt):
|
||||||
if result is None:
|
if result is None:
|
||||||
logger.critical("Could not solve query")
|
logger.error("Could not solve query")
|
||||||
exit(1)
|
|
||||||
logger.debug("Format: {}".format(fmt))
|
logger.debug("Format: {}".format(fmt))
|
||||||
format_result = result.strftime(fmt)
|
format_result = result.strftime(fmt)
|
||||||
logger.debug("Formated result: {} -> {}".format(result, format_result))
|
logger.debug("Formated result: {} -> {}".format(result, format_result))
|
||||||
@ -346,8 +368,9 @@ def format_result(result, fmt):
|
|||||||
def query_to_format_result(query, fmt=DEFAULT_FORMAT):
|
def query_to_format_result(query, fmt=DEFAULT_FORMAT):
|
||||||
human_dt, human_tz_loc = parse_query(query)
|
human_dt, human_tz_loc = parse_query(query)
|
||||||
result = solve_query(human_dt, human_tz_loc)
|
result = solve_query(human_dt, human_tz_loc)
|
||||||
formated_result = format_result(result, fmt)
|
if fmt:
|
||||||
return formated_result
|
return format_result(result, fmt)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
def main(args):
|
def main(args):
|
||||||
@ -442,8 +465,10 @@ def tzinfo_from_offset(offset: str) -> pytz.timezone:
|
|||||||
tznames = TZ_OFFSETS.get(offset, [])
|
tznames = TZ_OFFSETS.get(offset, [])
|
||||||
for tzname in tznames:
|
for tzname in tznames:
|
||||||
if tzname.startswith('Etc/GMT'):
|
if tzname.startswith('Etc/GMT'):
|
||||||
return pytz.timezone(tzname)
|
return pytz.timezone(tzname), tznames
|
||||||
return pytz.timezone(tznames[0])
|
if tznames:
|
||||||
|
return pytz.timezone(tznames[0]), tznames
|
||||||
|
return None, []
|
||||||
|
|
||||||
|
|
||||||
def dateparser_parse_dt(s: str):
|
def dateparser_parse_dt(s: str):
|
||||||
@ -467,7 +492,7 @@ def get_utcnow(tzaware: bool = True):
|
|||||||
|
|
||||||
def get_local_now(tzaware: bool = True):
|
def get_local_now(tzaware: bool = True):
|
||||||
if tzaware:
|
if tzaware:
|
||||||
return datetime.now().replace(tzinfo=tzinfo_from_offset(get_local_tz_offset()))
|
return datetime.now().replace(tzinfo=tzinfo_from_offset(get_local_tz_offset())[0])
|
||||||
return datetime.utcnow()
|
return datetime.utcnow()
|
||||||
|
|
||||||
|
|
||||||
@ -546,4 +571,4 @@ def epoch_to_dt(seconds):
|
|||||||
def time_to_emoji(dt):
|
def time_to_emoji(dt):
|
||||||
seconds = get_s_since_epoch(dt)
|
seconds = get_s_since_epoch(dt)
|
||||||
a = int((seconds / 900 - 3) / 2 % 24)
|
a = int((seconds / 900 - 3) / 2 % 24)
|
||||||
return chr(128336 + a // 2 + a % 2 * 12)
|
return chr(128336 + a // 2 + a % 2 * 12)
|
||||||
|
Loading…
Reference in New Issue
Block a user