tokenizer
This commit is contained in:
parent
3a39204d26
commit
3109540c08
@ -11,7 +11,8 @@ from tww import parse_query, solve_query, TZ_OFFSETS
|
|||||||
|
|
||||||
def time_ago(date=None, diff=None):
|
def time_ago(date=None, diff=None):
|
||||||
"""
|
"""
|
||||||
Get a datetime object, timedelta object or a int() Epoch timestamp and return a
|
Get a datetime object, timedelta object or a int() Epoch timestamp and
|
||||||
|
return a
|
||||||
pretty string like 'an hour ago', 'Yesterday', '3 months ago',
|
pretty string like 'an hour ago', 'Yesterday', '3 months ago',
|
||||||
'just now', etc
|
'just now', etc
|
||||||
Modified from: http://stackoverflow.com/a/1551394/141084
|
Modified from: http://stackoverflow.com/a/1551394/141084
|
||||||
@ -56,19 +57,25 @@ def time_ago(date=None, diff=None):
|
|||||||
minutes = second_diff % 3600 // 60
|
minutes = second_diff % 3600 // 60
|
||||||
if second_diff < 86400:
|
if second_diff < 86400:
|
||||||
hrs_diff = second_diff // 3600
|
hrs_diff = second_diff // 3600
|
||||||
return str("{}{:02d}:{:02d}:{:02d}".format(sign, hrs_diff, minutes, seconds))
|
return str("{}{:02d}:{:02d}:{:02d}".format(sign, hrs_diff, minutes,
|
||||||
|
seconds))
|
||||||
seconds = second_diff % 60
|
seconds = second_diff % 60
|
||||||
minutes = second_diff % 3600 // 60
|
minutes = second_diff % 3600 // 60
|
||||||
hours = second_diff // 3600
|
hours = second_diff // 3600
|
||||||
if day_diff < 365:
|
if day_diff < 365:
|
||||||
if day_diff < 30:
|
if day_diff < 30:
|
||||||
return str("{}{}d{:02d}:{:02d}:{:02d}".format(sign, day_diff, hours, minutes, seconds))
|
return str("{}{}d{:02d}:{:02d}:{:02d}".format(sign, day_diff, hours,
|
||||||
|
minutes, seconds))
|
||||||
months, days = day_diff // 30, day_diff % 30
|
months, days = day_diff // 30, day_diff % 30
|
||||||
return str("{}{}m{}d{:02d}:{:02d}:{:02d}".format(sign, months, days, hours, minutes, seconds))
|
return str(
|
||||||
|
"{}{}m{}d{:02d}:{:02d}:{:02d}".format(sign, months, days, hours,
|
||||||
|
minutes, seconds))
|
||||||
years = day_diff // 365
|
years = day_diff // 365
|
||||||
days = day_diff % 365
|
days = day_diff % 365
|
||||||
months, days = days // 30, days % 30
|
months, days = days // 30, days % 30
|
||||||
return str("{}{}y{}m{}d{:02d}:{:02d}:{:02d}".format(sign, years, months, days, hours, minutes, seconds))
|
return str(
|
||||||
|
"{}{}y{}m{}d{:02d}:{:02d}:{:02d}".format(sign, years, months, days,
|
||||||
|
hours, minutes, seconds))
|
||||||
|
|
||||||
|
|
||||||
def query_to_dt(query):
|
def query_to_dt(query):
|
||||||
@ -90,9 +97,13 @@ def tzinfo_from_offset(offset: str) -> pytz.timezone:
|
|||||||
|
|
||||||
|
|
||||||
def dateparser_parse_dt(s: str):
|
def dateparser_parse_dt(s: str):
|
||||||
print("Dateparser query: {}".format(s))
|
# print("Dateparser query: {}".format(s))
|
||||||
parsed = parse_dt(s)
|
parsed = parse_dt(s)
|
||||||
print("Dateparser parsed query: {}".format(parsed))
|
# print("Dateparser parsed query: {}".format(parsed))
|
||||||
|
if not parsed:
|
||||||
|
return None
|
||||||
|
if parsed.tzinfo is None:
|
||||||
|
parsed = parsed.replace(tzinfo=pytz.timezone("utc"))
|
||||||
return parsed
|
return parsed
|
||||||
|
|
||||||
|
|
||||||
@ -102,7 +113,8 @@ def get_utcnow(tzaware: bool = True):
|
|||||||
return datetime.utcnow()
|
return datetime.utcnow()
|
||||||
|
|
||||||
|
|
||||||
def dt_tz_translation(dt: datetime, to_tz_offset: str, from_tz_offset: str = "+00:00") -> datetime:
|
def dt_tz_translation(dt: datetime, to_tz_offset: str,
|
||||||
|
from_tz_offset: str = "+00:00") -> datetime:
|
||||||
if ':' in to_tz_offset:
|
if ':' in to_tz_offset:
|
||||||
to_shh, to_mm = to_tz_offset.split(':')
|
to_shh, to_mm = to_tz_offset.split(':')
|
||||||
else:
|
else:
|
||||||
@ -114,7 +126,8 @@ def dt_tz_translation(dt: datetime, to_tz_offset: str, from_tz_offset: str = "+0
|
|||||||
tzinfo = tzinfo_from_offset(to_tz_offset)
|
tzinfo = tzinfo_from_offset(to_tz_offset)
|
||||||
if dt.tzinfo:
|
if dt.tzinfo:
|
||||||
return dt.astimezone(tzinfo)
|
return dt.astimezone(tzinfo)
|
||||||
r_dt = dt + timedelta(hours=int(to_shh), minutes=int(to_mm)) - timedelta(hours=int(from_shh), minutes=int(from_mm))
|
r_dt = dt + timedelta(hours=int(to_shh), minutes=int(to_mm)) - timedelta(
|
||||||
|
hours=int(from_shh), minutes=int(from_mm))
|
||||||
r_dt = tzinfo.localize(r_dt)
|
r_dt = tzinfo.localize(r_dt)
|
||||||
return r_dt
|
return r_dt
|
||||||
|
|
||||||
@ -131,11 +144,13 @@ def get_local_tzname_iana():
|
|||||||
|
|
||||||
|
|
||||||
def get_local_tz_offset():
|
def get_local_tz_offset():
|
||||||
return format_offset_from_timedelta(datetime.now(tzlocal()).tzinfo._std_offset)
|
return format_offset_from_timedelta(
|
||||||
|
datetime.now(tzlocal()).tzinfo._std_offset)
|
||||||
|
|
||||||
|
|
||||||
def tzname_to_tz_offset(tzname_iana: str):
|
def tzname_to_tz_offset(tzname_iana: str):
|
||||||
return format_offset_from_timedelta(pytz.timezone(tzname_iana).utcoffset(get_utcnow(False)))
|
return format_offset_from_timedelta(
|
||||||
|
pytz.timezone(tzname_iana).utcoffset(get_utcnow(False)))
|
||||||
|
|
||||||
|
|
||||||
def get_dt_tz_offset(dt: datetime) -> timedelta:
|
def get_dt_tz_offset(dt: datetime) -> timedelta:
|
||||||
@ -148,20 +163,28 @@ def get_dt_tz_offset(dt: datetime) -> timedelta:
|
|||||||
return timedelta(0)
|
return timedelta(0)
|
||||||
|
|
||||||
|
|
||||||
def get_seconds_since_epoch(dt):
|
def get_us_since_epoch(dt: datetime):
|
||||||
utc_seconds = int(dt.timestamp())
|
utc_seconds = int(dt.timestamp() * 1e6)
|
||||||
if dt.tzinfo is None:
|
if dt.tzinfo is None:
|
||||||
return utc_seconds
|
return utc_seconds
|
||||||
local_seconds = get_dt_tz_offset(dt).seconds
|
local_seconds = get_dt_tz_offset(dt).seconds
|
||||||
return utc_seconds + local_seconds
|
return utc_seconds + local_seconds
|
||||||
|
|
||||||
|
|
||||||
|
def get_ms_since_epoch(dt):
|
||||||
|
return int(get_us_since_epoch(dt) / 1e3)
|
||||||
|
|
||||||
|
|
||||||
|
def get_s_since_epoch(dt):
|
||||||
|
return int(get_us_since_epoch(dt) / 1e6)
|
||||||
|
|
||||||
|
|
||||||
def epoch_to_dt(seconds):
|
def epoch_to_dt(seconds):
|
||||||
return datetime.fromtimestamp(seconds)
|
return datetime.fromtimestamp(seconds)
|
||||||
|
|
||||||
|
|
||||||
def time_to_emoji(dt):
|
def time_to_emoji(dt):
|
||||||
seconds = get_seconds_since_epoch(dt)
|
seconds = get_s_since_epoch(dt)
|
||||||
a = int((seconds / 900 - 3) / 2 % 24)
|
a = int((seconds / 900 - 3) / 2 % 24)
|
||||||
return chr(128336 + a // 2 + a % 2 * 12)
|
return chr(128336 + a // 2 + a % 2 * 12)
|
||||||
|
|
||||||
@ -187,7 +210,7 @@ if __name__ == "__main__":
|
|||||||
)
|
)
|
||||||
))
|
))
|
||||||
|
|
||||||
print(get_seconds_since_epoch(
|
print(get_s_since_epoch(
|
||||||
dateparser_parse_dt("2019-12-11 15:53:40+0000")
|
dateparser_parse_dt("2019-12-11 15:53:40+0000")
|
||||||
))
|
))
|
||||||
|
|
||||||
@ -204,21 +227,30 @@ if __name__ == "__main__":
|
|||||||
"Now in New York" # Location:City
|
"Now in New York" # Location:City
|
||||||
"Now in Bulgaria" # Location:Country
|
"Now in Bulgaria" # Location:Country
|
||||||
"Now in USA" # Location:Country - multiple timezones
|
"Now in USA" # Location:Country - multiple timezones
|
||||||
"Now in CET" # Timezone:Abbreviation - https://en.wikipedia.org/wiki/List_of_time_zone_abbreviations
|
"Now in CET" # Timezone:Abbreviation -
|
||||||
"Now in Europe/Zurich" # Timezone:IANA - tz db, https://en.wikipedia.org/wiki/List_of_tz_database_time_zones, timezone database
|
# https://en.wikipedia.org/wiki/List_of_time_zone_abbreviations
|
||||||
"Now in Alfa" # Timezone:Military - https://en.wikipedia.org/wiki/List_of_military_time_zones
|
"Now in Europe/Zurich" # Timezone:IANA - tz db,
|
||||||
"Now in +02:00" # Timezone:Offset - https://en.wikipedia.org/wiki/List_of_UTC_time_offsets
|
# https://en.wikipedia.org/wiki/List_of_tz_database_time_zones, timezone
|
||||||
|
# database
|
||||||
|
"Now in Alfa" # Timezone:Military -
|
||||||
|
# https://en.wikipedia.org/wiki/List_of_military_time_zones
|
||||||
|
"Now in +02:00" # Timezone:Offset -
|
||||||
|
# https://en.wikipedia.org/wiki/List_of_UTC_time_offsets
|
||||||
|
|
||||||
"<TIME> IN <WHEREVER> TO <WHEREVER>"
|
"<TIME> IN <WHEREVER> TO <WHEREVER>"
|
||||||
"=================================="
|
"=================================="
|
||||||
"Now in Bulgaria to UTC" # == "Now in UTC"
|
"Now in Bulgaria to UTC" # == "Now in UTC"
|
||||||
"2019-12-12 03:14:15+03:00 to New York" # Something in +03:00 to Location:City
|
"2019-12-12 03:14:15+03:00 to New York" # Something in +03:00 to
|
||||||
|
# Location:City
|
||||||
|
|
||||||
"<TIMEDELTA>"
|
"<TIMEDELTA>"
|
||||||
"================="
|
"================="
|
||||||
"[Time] since <TIME>" # now-<TIME> -> timedelta
|
"[Time] since <TIME>" # now-<TIME> ->
|
||||||
"[Time] until <TIME>" # <TIME>-now -> timedelta
|
# timedelta
|
||||||
"[Time] between 27-01-1992 and 09 May 1997" # == end_dt - start_dt -> timedelta
|
"[Time] until <TIME>" # <TIME>-now ->
|
||||||
|
# timedelta
|
||||||
|
"[Time] between 27-01-1992 and 09 May 1997" # == end_dt - start_dt ->
|
||||||
|
# timedelta
|
||||||
|
|
||||||
"<TIME CALCULATION>"
|
"<TIME CALCULATION>"
|
||||||
"================="
|
"================="
|
||||||
@ -227,7 +259,6 @@ if __name__ == "__main__":
|
|||||||
"12-12-2019 + 2 weeks" # == dt + timedelta -> datetime
|
"12-12-2019 + 2 weeks" # == dt + timedelta -> datetime
|
||||||
"05:23 - 150 minutes" # == dt - timedelta -> datetime
|
"05:23 - 150 minutes" # == dt - timedelta -> datetime
|
||||||
|
|
||||||
|
|
||||||
# strptime: str -> datetime
|
# strptime: str -> datetime
|
||||||
# strftime: datetime -> str
|
# strftime: datetime -> str
|
||||||
|
|
||||||
@ -248,32 +279,54 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
# https://docs.python.org/3/library/datetime.html
|
# https://docs.python.org/3/library/datetime.html
|
||||||
#
|
#
|
||||||
# %a Weekday as locale’s abbreviated name. "Sun, Mon, …, Sat (en_US); So, Mo, …, Sa (de_DE)
|
# %a Weekday as locale’s abbreviated name.
|
||||||
# %A Weekday as locale’s full name. Sunday, Monday, …, Saturday (en_US); Sonntag, Montag, …, Samstag (de_DE)
|
# "Sun, Mon, …, Sat (en_US); So, Mo, …, Sa (de_DE)
|
||||||
# %w Weekday as a decimal number, where 0 is Sunday and 6 is Saturday. 0, 1, …, 6
|
# %A Weekday as locale’s full name.
|
||||||
# %d Day of the month as a zero-padded decimal number. 01, 02, …, 31
|
# Sunday, Monday, …, Saturday (en_US); Sonntag, Montag, …, Samstag (de_DE)
|
||||||
# %b Month as locale’s abbreviated name. Jan, Feb, …, Dec (en_US); Jan, Feb, …, Dez (de_DE)
|
# %w Weekday as a decimal number, where 0 is Sunday and 6 is
|
||||||
# %B Month as locale’s full name. January, February, …, December (en_US); Januar, Februar, …, Dezember (de_DE)
|
# Saturday. 0, 1, …, 6
|
||||||
# %m Month as a zero-padded decimal number. 01, 02, …, 12
|
# %d Day of the month as a zero-padded decimal number.
|
||||||
# %y Year without century as a zero-padded decimal number. 00, 01, …, 99
|
# 01, 02, …, 31
|
||||||
# %Y Year with century as a decimal number. 0001, 0002, …, 2013, 2014, …, 9998, 9999
|
# %b Month as locale’s abbreviated name.
|
||||||
# %H Hour (24-hour clock) as a zero-padded decimal number. 00, 01, …, 23
|
# Jan, Feb, …, Dec (en_US); Jan, Feb, …, Dez (de_DE)
|
||||||
# %I Hour (12-hour clock) as a zero-padded decimal number. 01, 02, …, 12
|
# %B Month as locale’s full name.
|
||||||
# %p Locale’s equivalent of either AM or PM. AM, PM (en_US); am, pm (de_DE)
|
# January, February, …, December (en_US); Januar, Februar, …, Dezember (
|
||||||
# %M Minute as a zero-padded decimal number. 00, 01, …, 59
|
# de_DE)
|
||||||
# %S Second as a zero-padded decimal number. 00, 01, …, 59
|
# %m Month as a zero-padded decimal number.
|
||||||
# %f Microsecond as a decimal number, zero-padded on the left. 000000, 000001, …, 999999
|
# 01, 02, …, 12
|
||||||
|
# %y Year without century as a zero-padded decimal number.
|
||||||
|
# 00, 01, …, 99
|
||||||
|
# %Y Year with century as a decimal number.
|
||||||
|
# 0001, 0002, …, 2013, 2014, …, 9998, 9999
|
||||||
|
# %H Hour (24-hour clock) as a zero-padded decimal number.
|
||||||
|
# 00, 01, …, 23
|
||||||
|
# %I Hour (12-hour clock) as a zero-padded decimal number.
|
||||||
|
# 01, 02, …, 12
|
||||||
|
# %p Locale’s equivalent of either AM or PM.
|
||||||
|
# AM, PM (en_US); am, pm (de_DE)
|
||||||
|
# %M Minute as a zero-padded decimal number.
|
||||||
|
# 00, 01, …, 59
|
||||||
|
# %S Second as a zero-padded decimal number.
|
||||||
|
# 00, 01, …, 59
|
||||||
|
# %f Microsecond as a decimal number, zero-padded on the left.
|
||||||
|
# 000000, 000001, …, 999999
|
||||||
# %z UTC offset in the form ±HHMM[SS[.ffffff]]
|
# %z UTC offset in the form ±HHMM[SS[.ffffff]]
|
||||||
# (empty string if the object is naive). (empty), +0000, -0400, +1030, +063415, -030712.345216
|
# (empty string if the object is naive).
|
||||||
# %Z Time zone name (empty string if the object is naive). (empty), UTC, EST, CST
|
# (empty), +0000, -0400, +1030, +063415, -030712.345216
|
||||||
# %j Day of the year as a zero-padded decimal number. 001, 002, …, 366
|
# %Z Time zone name (empty string if the object is naive).
|
||||||
|
# (empty), UTC, EST, CST
|
||||||
|
# %j Day of the year as a zero-padded decimal number.
|
||||||
|
# 001, 002, …, 366
|
||||||
# %U Week number of the year (Sunday as the first day of the week)
|
# %U Week number of the year (Sunday as the first day of the week)
|
||||||
# as a zero padded decimal number. All days in a new year
|
# as a zero padded decimal number. All days in a new year
|
||||||
# preceding the first Sunday are considered to be in week 0. 00, 01, …, 53
|
# preceding the first Sunday are considered to be in week
|
||||||
|
# 0. 00, 01, …, 53
|
||||||
# %W Week number of the year (Monday as the first day of the week)
|
# %W Week number of the year (Monday as the first day of the week)
|
||||||
# as a decimal number. All days in a new year preceding the
|
# as a decimal number. All days in a new year preceding the
|
||||||
# first Monday are considered to be in week 0. 00, 01, …, 53
|
# first Monday are considered to be in week 0.
|
||||||
# %c Locale’s appropriate date and time representation. Tue Aug 16 21:30:00 1988 (en_US); Di 16 Aug 21:30:00 1988 (de_DE)
|
# 00, 01, …, 53
|
||||||
|
# %c Locale’s appropriate date and time representation.
|
||||||
|
# Tue Aug 16 21:30:00 1988 (en_US); Di 16 Aug 21:30:00 1988 (de_DE)
|
||||||
# %x Locale’s appropriate date representation. 08/16/1988 (en_US); 16.08.1988 (de_DE)
|
# %x Locale’s appropriate date representation. 08/16/1988 (en_US); 16.08.1988 (de_DE)
|
||||||
# %X Locale’s appropriate time representation. 21:30:00 (en_US); 21:30:00 (de_DE)
|
# %X Locale’s appropriate time representation. 21:30:00 (en_US); 21:30:00 (de_DE)
|
||||||
# %% A literal '%' character. %
|
# %% A literal '%' character. %
|
||||||
|
95
src/tww/tokenizer.py
Normal file
95
src/tww/tokenizer.py
Normal file
@ -0,0 +1,95 @@
|
|||||||
|
import re
|
||||||
|
from datetime import timedelta
|
||||||
|
|
||||||
|
from src.tww.time_lib import dateparser_parse_dt, get_utcnow, \
|
||||||
|
get_s_since_epoch, \
|
||||||
|
get_ms_since_epoch
|
||||||
|
|
||||||
|
r_time_in_epoch_s = re.compile(
|
||||||
|
'\s*(.*)?\s*(?:in|to)\s*(?:epoch|seconds since epoch|seconds)\s*',
|
||||||
|
flags=re.IGNORECASE)
|
||||||
|
r_time_in_epoch_s2 = re.compile('\s*(?:seconds)?\s*since\s*(.*)\s*',
|
||||||
|
flags=re.IGNORECASE)
|
||||||
|
r_time_in_epoch_ms = re.compile(
|
||||||
|
'\s*(.*)?\s*(?:in|to)\s*(?:ms|milliseconds|miliseconds)\s*',
|
||||||
|
flags=re.IGNORECASE)
|
||||||
|
r_time_in_epoch_ms2 = re.compile(
|
||||||
|
'\s*(?:ms|milliseconds|miliseconds)?\s*since\s*(.*)\s*',
|
||||||
|
flags=re.IGNORECASE)
|
||||||
|
r_time_since = re.compile('\s*(?:time)?\s*since\s*(.*)\s*', flags=re.IGNORECASE)
|
||||||
|
r_time_until = re.compile('\s*(?:time)?\s*until\s*(.*)\s*', flags=re.IGNORECASE)
|
||||||
|
r_time_between = re.compile('\s*(?:time)?\s*between\s*(.*)\s*and\s*(.*)\s*',
|
||||||
|
flags=re.IGNORECASE)
|
||||||
|
|
||||||
|
test_strings = [
|
||||||
|
None,
|
||||||
|
"",
|
||||||
|
"s",
|
||||||
|
" ",
|
||||||
|
"Time since 2019-05-12",
|
||||||
|
"Since yesterday",
|
||||||
|
"time between yesterday and tomorrow",
|
||||||
|
"time until 25 december",
|
||||||
|
"time sinc",
|
||||||
|
"now in milliseconds",
|
||||||
|
"seconds since epoch",
|
||||||
|
"1992-01-27 to epoch",
|
||||||
|
"milliseconds since 1992-01-27",
|
||||||
|
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def handler_time_s(dt_s: str) -> int:
|
||||||
|
return get_s_since_epoch(dateparser_parse_dt(dt_s))
|
||||||
|
|
||||||
|
|
||||||
|
def handler_time_ms(dt_s: str) -> int:
|
||||||
|
return get_ms_since_epoch(dateparser_parse_dt(dt_s))
|
||||||
|
|
||||||
|
|
||||||
|
def handler_time_diff(start_dt: str, end_dt: str) -> timedelta:
|
||||||
|
return dateparser_parse_dt(end_dt) - dateparser_parse_dt(start_dt)
|
||||||
|
|
||||||
|
|
||||||
|
def handler_time_since(start_dt_s: str) -> timedelta:
|
||||||
|
return handler_time_diff(start_dt_s, str(get_utcnow()))
|
||||||
|
|
||||||
|
|
||||||
|
def handler_time_until(end_dt_s: str) -> timedelta:
|
||||||
|
return handler_time_diff(str(get_utcnow()), end_dt_s)
|
||||||
|
|
||||||
|
|
||||||
|
regex_handlers = [
|
||||||
|
(r_time_in_epoch_s, handler_time_s),
|
||||||
|
(r_time_in_epoch_s2, handler_time_s),
|
||||||
|
(r_time_in_epoch_ms, handler_time_ms),
|
||||||
|
(r_time_in_epoch_ms2, handler_time_ms),
|
||||||
|
(r_time_since, handler_time_since),
|
||||||
|
(r_time_until, handler_time_until),
|
||||||
|
(r_time_between, handler_time_diff),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def try_regex(r, s):
|
||||||
|
try:
|
||||||
|
m = re.match(r, s)
|
||||||
|
except:
|
||||||
|
return None
|
||||||
|
if m:
|
||||||
|
groups = m.groups()
|
||||||
|
return groups
|
||||||
|
|
||||||
|
|
||||||
|
def parse(s):
|
||||||
|
for r, h in regex_handlers:
|
||||||
|
g = try_regex(r, s)
|
||||||
|
if g is not None:
|
||||||
|
try:
|
||||||
|
return h(*g)
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
for s in test_strings:
|
||||||
|
print("{} -> {}".format(s, parse(s)))
|
Loading…
Reference in New Issue
Block a user