tokenizer
This commit is contained in:
parent
3a39204d26
commit
3109540c08
@ -11,7 +11,8 @@ from tww import parse_query, solve_query, TZ_OFFSETS
|
||||
|
||||
def time_ago(date=None, diff=None):
|
||||
"""
|
||||
Get a datetime object, timedelta object or a int() Epoch timestamp and return a
|
||||
Get a datetime object, timedelta object or a int() Epoch timestamp and
|
||||
return a
|
||||
pretty string like 'an hour ago', 'Yesterday', '3 months ago',
|
||||
'just now', etc
|
||||
Modified from: http://stackoverflow.com/a/1551394/141084
|
||||
@ -56,19 +57,25 @@ def time_ago(date=None, diff=None):
|
||||
minutes = second_diff % 3600 // 60
|
||||
if second_diff < 86400:
|
||||
hrs_diff = second_diff // 3600
|
||||
return str("{}{:02d}:{:02d}:{:02d}".format(sign, hrs_diff, minutes, seconds))
|
||||
return str("{}{:02d}:{:02d}:{:02d}".format(sign, hrs_diff, minutes,
|
||||
seconds))
|
||||
seconds = second_diff % 60
|
||||
minutes = second_diff % 3600 // 60
|
||||
hours = second_diff // 3600
|
||||
if day_diff < 365:
|
||||
if day_diff < 30:
|
||||
return str("{}{}d{:02d}:{:02d}:{:02d}".format(sign, day_diff, hours, minutes, seconds))
|
||||
return str("{}{}d{:02d}:{:02d}:{:02d}".format(sign, day_diff, hours,
|
||||
minutes, seconds))
|
||||
months, days = day_diff // 30, day_diff % 30
|
||||
return str("{}{}m{}d{:02d}:{:02d}:{:02d}".format(sign, months, days, hours, minutes, seconds))
|
||||
return str(
|
||||
"{}{}m{}d{:02d}:{:02d}:{:02d}".format(sign, months, days, hours,
|
||||
minutes, seconds))
|
||||
years = day_diff // 365
|
||||
days = day_diff % 365
|
||||
months, days = days // 30, days % 30
|
||||
return str("{}{}y{}m{}d{:02d}:{:02d}:{:02d}".format(sign, years, months, days, hours, minutes, seconds))
|
||||
return str(
|
||||
"{}{}y{}m{}d{:02d}:{:02d}:{:02d}".format(sign, years, months, days,
|
||||
hours, minutes, seconds))
|
||||
|
||||
|
||||
def query_to_dt(query):
|
||||
@ -90,9 +97,13 @@ def tzinfo_from_offset(offset: str) -> pytz.timezone:
|
||||
|
||||
|
||||
def dateparser_parse_dt(s: str):
|
||||
print("Dateparser query: {}".format(s))
|
||||
# print("Dateparser query: {}".format(s))
|
||||
parsed = parse_dt(s)
|
||||
print("Dateparser parsed query: {}".format(parsed))
|
||||
# print("Dateparser parsed query: {}".format(parsed))
|
||||
if not parsed:
|
||||
return None
|
||||
if parsed.tzinfo is None:
|
||||
parsed = parsed.replace(tzinfo=pytz.timezone("utc"))
|
||||
return parsed
|
||||
|
||||
|
||||
@ -102,7 +113,8 @@ def get_utcnow(tzaware: bool = True):
|
||||
return datetime.utcnow()
|
||||
|
||||
|
||||
def dt_tz_translation(dt: datetime, to_tz_offset: str, from_tz_offset: str = "+00:00") -> datetime:
|
||||
def dt_tz_translation(dt: datetime, to_tz_offset: str,
|
||||
from_tz_offset: str = "+00:00") -> datetime:
|
||||
if ':' in to_tz_offset:
|
||||
to_shh, to_mm = to_tz_offset.split(':')
|
||||
else:
|
||||
@ -114,7 +126,8 @@ def dt_tz_translation(dt: datetime, to_tz_offset: str, from_tz_offset: str = "+0
|
||||
tzinfo = tzinfo_from_offset(to_tz_offset)
|
||||
if dt.tzinfo:
|
||||
return dt.astimezone(tzinfo)
|
||||
r_dt = dt + timedelta(hours=int(to_shh), minutes=int(to_mm)) - timedelta(hours=int(from_shh), minutes=int(from_mm))
|
||||
r_dt = dt + timedelta(hours=int(to_shh), minutes=int(to_mm)) - timedelta(
|
||||
hours=int(from_shh), minutes=int(from_mm))
|
||||
r_dt = tzinfo.localize(r_dt)
|
||||
return r_dt
|
||||
|
||||
@ -131,11 +144,13 @@ def get_local_tzname_iana():
|
||||
|
||||
|
||||
def get_local_tz_offset():
|
||||
return format_offset_from_timedelta(datetime.now(tzlocal()).tzinfo._std_offset)
|
||||
return format_offset_from_timedelta(
|
||||
datetime.now(tzlocal()).tzinfo._std_offset)
|
||||
|
||||
|
||||
def tzname_to_tz_offset(tzname_iana: str):
|
||||
return format_offset_from_timedelta(pytz.timezone(tzname_iana).utcoffset(get_utcnow(False)))
|
||||
return format_offset_from_timedelta(
|
||||
pytz.timezone(tzname_iana).utcoffset(get_utcnow(False)))
|
||||
|
||||
|
||||
def get_dt_tz_offset(dt: datetime) -> timedelta:
|
||||
@ -148,20 +163,28 @@ def get_dt_tz_offset(dt: datetime) -> timedelta:
|
||||
return timedelta(0)
|
||||
|
||||
|
||||
def get_seconds_since_epoch(dt):
|
||||
utc_seconds = int(dt.timestamp())
|
||||
def get_us_since_epoch(dt: datetime):
|
||||
utc_seconds = int(dt.timestamp() * 1e6)
|
||||
if dt.tzinfo is None:
|
||||
return utc_seconds
|
||||
local_seconds = get_dt_tz_offset(dt).seconds
|
||||
return utc_seconds + local_seconds
|
||||
|
||||
|
||||
def get_ms_since_epoch(dt):
|
||||
return int(get_us_since_epoch(dt) / 1e3)
|
||||
|
||||
|
||||
def get_s_since_epoch(dt):
|
||||
return int(get_us_since_epoch(dt) / 1e6)
|
||||
|
||||
|
||||
def epoch_to_dt(seconds):
|
||||
return datetime.fromtimestamp(seconds)
|
||||
|
||||
|
||||
def time_to_emoji(dt):
|
||||
seconds = get_seconds_since_epoch(dt)
|
||||
seconds = get_s_since_epoch(dt)
|
||||
a = int((seconds / 900 - 3) / 2 % 24)
|
||||
return chr(128336 + a // 2 + a % 2 * 12)
|
||||
|
||||
@ -187,7 +210,7 @@ if __name__ == "__main__":
|
||||
)
|
||||
))
|
||||
|
||||
print(get_seconds_since_epoch(
|
||||
print(get_s_since_epoch(
|
||||
dateparser_parse_dt("2019-12-11 15:53:40+0000")
|
||||
))
|
||||
|
||||
@ -204,21 +227,30 @@ if __name__ == "__main__":
|
||||
"Now in New York" # Location:City
|
||||
"Now in Bulgaria" # Location:Country
|
||||
"Now in USA" # Location:Country - multiple timezones
|
||||
"Now in CET" # Timezone:Abbreviation - https://en.wikipedia.org/wiki/List_of_time_zone_abbreviations
|
||||
"Now in Europe/Zurich" # Timezone:IANA - tz db, https://en.wikipedia.org/wiki/List_of_tz_database_time_zones, timezone database
|
||||
"Now in Alfa" # Timezone:Military - https://en.wikipedia.org/wiki/List_of_military_time_zones
|
||||
"Now in +02:00" # Timezone:Offset - https://en.wikipedia.org/wiki/List_of_UTC_time_offsets
|
||||
"Now in CET" # Timezone:Abbreviation -
|
||||
# https://en.wikipedia.org/wiki/List_of_time_zone_abbreviations
|
||||
"Now in Europe/Zurich" # Timezone:IANA - tz db,
|
||||
# https://en.wikipedia.org/wiki/List_of_tz_database_time_zones, timezone
|
||||
# database
|
||||
"Now in Alfa" # Timezone:Military -
|
||||
# https://en.wikipedia.org/wiki/List_of_military_time_zones
|
||||
"Now in +02:00" # Timezone:Offset -
|
||||
# https://en.wikipedia.org/wiki/List_of_UTC_time_offsets
|
||||
|
||||
"<TIME> IN <WHEREVER> TO <WHEREVER>"
|
||||
"=================================="
|
||||
"Now in Bulgaria to UTC" # == "Now in UTC"
|
||||
"2019-12-12 03:14:15+03:00 to New York" # Something in +03:00 to Location:City
|
||||
"2019-12-12 03:14:15+03:00 to New York" # Something in +03:00 to
|
||||
# Location:City
|
||||
|
||||
"<TIMEDELTA>"
|
||||
"================="
|
||||
"[Time] since <TIME>" # now-<TIME> -> timedelta
|
||||
"[Time] until <TIME>" # <TIME>-now -> timedelta
|
||||
"[Time] between 27-01-1992 and 09 May 1997" # == end_dt - start_dt -> timedelta
|
||||
"[Time] since <TIME>" # now-<TIME> ->
|
||||
# timedelta
|
||||
"[Time] until <TIME>" # <TIME>-now ->
|
||||
# timedelta
|
||||
"[Time] between 27-01-1992 and 09 May 1997" # == end_dt - start_dt ->
|
||||
# timedelta
|
||||
|
||||
"<TIME CALCULATION>"
|
||||
"================="
|
||||
@ -227,7 +259,6 @@ if __name__ == "__main__":
|
||||
"12-12-2019 + 2 weeks" # == dt + timedelta -> datetime
|
||||
"05:23 - 150 minutes" # == dt - timedelta -> datetime
|
||||
|
||||
|
||||
# strptime: str -> datetime
|
||||
# strftime: datetime -> str
|
||||
|
||||
@ -248,32 +279,54 @@ if __name__ == "__main__":
|
||||
|
||||
# https://docs.python.org/3/library/datetime.html
|
||||
#
|
||||
# %a Weekday as locale’s abbreviated name. "Sun, Mon, …, Sat (en_US); So, Mo, …, Sa (de_DE)
|
||||
# %A Weekday as locale’s full name. Sunday, Monday, …, Saturday (en_US); Sonntag, Montag, …, Samstag (de_DE)
|
||||
# %w Weekday as a decimal number, where 0 is Sunday and 6 is Saturday. 0, 1, …, 6
|
||||
# %d Day of the month as a zero-padded decimal number. 01, 02, …, 31
|
||||
# %b Month as locale’s abbreviated name. Jan, Feb, …, Dec (en_US); Jan, Feb, …, Dez (de_DE)
|
||||
# %B Month as locale’s full name. January, February, …, December (en_US); Januar, Februar, …, Dezember (de_DE)
|
||||
# %m Month as a zero-padded decimal number. 01, 02, …, 12
|
||||
# %y Year without century as a zero-padded decimal number. 00, 01, …, 99
|
||||
# %Y Year with century as a decimal number. 0001, 0002, …, 2013, 2014, …, 9998, 9999
|
||||
# %H Hour (24-hour clock) as a zero-padded decimal number. 00, 01, …, 23
|
||||
# %I Hour (12-hour clock) as a zero-padded decimal number. 01, 02, …, 12
|
||||
# %p Locale’s equivalent of either AM or PM. AM, PM (en_US); am, pm (de_DE)
|
||||
# %M Minute as a zero-padded decimal number. 00, 01, …, 59
|
||||
# %S Second as a zero-padded decimal number. 00, 01, …, 59
|
||||
# %f Microsecond as a decimal number, zero-padded on the left. 000000, 000001, …, 999999
|
||||
# %a Weekday as locale’s abbreviated name.
|
||||
# "Sun, Mon, …, Sat (en_US); So, Mo, …, Sa (de_DE)
|
||||
# %A Weekday as locale’s full name.
|
||||
# Sunday, Monday, …, Saturday (en_US); Sonntag, Montag, …, Samstag (de_DE)
|
||||
# %w Weekday as a decimal number, where 0 is Sunday and 6 is
|
||||
# Saturday. 0, 1, …, 6
|
||||
# %d Day of the month as a zero-padded decimal number.
|
||||
# 01, 02, …, 31
|
||||
# %b Month as locale’s abbreviated name.
|
||||
# Jan, Feb, …, Dec (en_US); Jan, Feb, …, Dez (de_DE)
|
||||
# %B Month as locale’s full name.
|
||||
# January, February, …, December (en_US); Januar, Februar, …, Dezember (
|
||||
# de_DE)
|
||||
# %m Month as a zero-padded decimal number.
|
||||
# 01, 02, …, 12
|
||||
# %y Year without century as a zero-padded decimal number.
|
||||
# 00, 01, …, 99
|
||||
# %Y Year with century as a decimal number.
|
||||
# 0001, 0002, …, 2013, 2014, …, 9998, 9999
|
||||
# %H Hour (24-hour clock) as a zero-padded decimal number.
|
||||
# 00, 01, …, 23
|
||||
# %I Hour (12-hour clock) as a zero-padded decimal number.
|
||||
# 01, 02, …, 12
|
||||
# %p Locale’s equivalent of either AM or PM.
|
||||
# AM, PM (en_US); am, pm (de_DE)
|
||||
# %M Minute as a zero-padded decimal number.
|
||||
# 00, 01, …, 59
|
||||
# %S Second as a zero-padded decimal number.
|
||||
# 00, 01, …, 59
|
||||
# %f Microsecond as a decimal number, zero-padded on the left.
|
||||
# 000000, 000001, …, 999999
|
||||
# %z UTC offset in the form ±HHMM[SS[.ffffff]]
|
||||
# (empty string if the object is naive). (empty), +0000, -0400, +1030, +063415, -030712.345216
|
||||
# %Z Time zone name (empty string if the object is naive). (empty), UTC, EST, CST
|
||||
# %j Day of the year as a zero-padded decimal number. 001, 002, …, 366
|
||||
# (empty string if the object is naive).
|
||||
# (empty), +0000, -0400, +1030, +063415, -030712.345216
|
||||
# %Z Time zone name (empty string if the object is naive).
|
||||
# (empty), UTC, EST, CST
|
||||
# %j Day of the year as a zero-padded decimal number.
|
||||
# 001, 002, …, 366
|
||||
# %U Week number of the year (Sunday as the first day of the week)
|
||||
# as a zero padded decimal number. All days in a new year
|
||||
# preceding the first Sunday are considered to be in week 0. 00, 01, …, 53
|
||||
# preceding the first Sunday are considered to be in week
|
||||
# 0. 00, 01, …, 53
|
||||
# %W Week number of the year (Monday as the first day of the week)
|
||||
# as a decimal number. All days in a new year preceding the
|
||||
# first Monday are considered to be in week 0. 00, 01, …, 53
|
||||
# %c Locale’s appropriate date and time representation. Tue Aug 16 21:30:00 1988 (en_US); Di 16 Aug 21:30:00 1988 (de_DE)
|
||||
# first Monday are considered to be in week 0.
|
||||
# 00, 01, …, 53
|
||||
# %c Locale’s appropriate date and time representation.
|
||||
# Tue Aug 16 21:30:00 1988 (en_US); Di 16 Aug 21:30:00 1988 (de_DE)
|
||||
# %x Locale’s appropriate date representation. 08/16/1988 (en_US); 16.08.1988 (de_DE)
|
||||
# %X Locale’s appropriate time representation. 21:30:00 (en_US); 21:30:00 (de_DE)
|
||||
# %% A literal '%' character. %
|
||||
|
95
src/tww/tokenizer.py
Normal file
95
src/tww/tokenizer.py
Normal file
@ -0,0 +1,95 @@
|
||||
import re
|
||||
from datetime import timedelta
|
||||
|
||||
from src.tww.time_lib import dateparser_parse_dt, get_utcnow, \
|
||||
get_s_since_epoch, \
|
||||
get_ms_since_epoch
|
||||
|
||||
r_time_in_epoch_s = re.compile(
|
||||
'\s*(.*)?\s*(?:in|to)\s*(?:epoch|seconds since epoch|seconds)\s*',
|
||||
flags=re.IGNORECASE)
|
||||
r_time_in_epoch_s2 = re.compile('\s*(?:seconds)?\s*since\s*(.*)\s*',
|
||||
flags=re.IGNORECASE)
|
||||
r_time_in_epoch_ms = re.compile(
|
||||
'\s*(.*)?\s*(?:in|to)\s*(?:ms|milliseconds|miliseconds)\s*',
|
||||
flags=re.IGNORECASE)
|
||||
r_time_in_epoch_ms2 = re.compile(
|
||||
'\s*(?:ms|milliseconds|miliseconds)?\s*since\s*(.*)\s*',
|
||||
flags=re.IGNORECASE)
|
||||
r_time_since = re.compile('\s*(?:time)?\s*since\s*(.*)\s*', flags=re.IGNORECASE)
|
||||
r_time_until = re.compile('\s*(?:time)?\s*until\s*(.*)\s*', flags=re.IGNORECASE)
|
||||
r_time_between = re.compile('\s*(?:time)?\s*between\s*(.*)\s*and\s*(.*)\s*',
|
||||
flags=re.IGNORECASE)
|
||||
|
||||
test_strings = [
|
||||
None,
|
||||
"",
|
||||
"s",
|
||||
" ",
|
||||
"Time since 2019-05-12",
|
||||
"Since yesterday",
|
||||
"time between yesterday and tomorrow",
|
||||
"time until 25 december",
|
||||
"time sinc",
|
||||
"now in milliseconds",
|
||||
"seconds since epoch",
|
||||
"1992-01-27 to epoch",
|
||||
"milliseconds since 1992-01-27",
|
||||
|
||||
]
|
||||
|
||||
|
||||
def handler_time_s(dt_s: str) -> int:
|
||||
return get_s_since_epoch(dateparser_parse_dt(dt_s))
|
||||
|
||||
|
||||
def handler_time_ms(dt_s: str) -> int:
|
||||
return get_ms_since_epoch(dateparser_parse_dt(dt_s))
|
||||
|
||||
|
||||
def handler_time_diff(start_dt: str, end_dt: str) -> timedelta:
|
||||
return dateparser_parse_dt(end_dt) - dateparser_parse_dt(start_dt)
|
||||
|
||||
|
||||
def handler_time_since(start_dt_s: str) -> timedelta:
|
||||
return handler_time_diff(start_dt_s, str(get_utcnow()))
|
||||
|
||||
|
||||
def handler_time_until(end_dt_s: str) -> timedelta:
|
||||
return handler_time_diff(str(get_utcnow()), end_dt_s)
|
||||
|
||||
|
||||
regex_handlers = [
|
||||
(r_time_in_epoch_s, handler_time_s),
|
||||
(r_time_in_epoch_s2, handler_time_s),
|
||||
(r_time_in_epoch_ms, handler_time_ms),
|
||||
(r_time_in_epoch_ms2, handler_time_ms),
|
||||
(r_time_since, handler_time_since),
|
||||
(r_time_until, handler_time_until),
|
||||
(r_time_between, handler_time_diff),
|
||||
]
|
||||
|
||||
|
||||
def try_regex(r, s):
|
||||
try:
|
||||
m = re.match(r, s)
|
||||
except:
|
||||
return None
|
||||
if m:
|
||||
groups = m.groups()
|
||||
return groups
|
||||
|
||||
|
||||
def parse(s):
|
||||
for r, h in regex_handlers:
|
||||
g = try_regex(r, s)
|
||||
if g is not None:
|
||||
try:
|
||||
return h(*g)
|
||||
except:
|
||||
continue
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
for s in test_strings:
|
||||
print("{} -> {}".format(s, parse(s)))
|
Loading…
Reference in New Issue
Block a user