diff --git a/src/tww/data/country_codes.csv b/src/tww/data/country_codes.csv new file mode 100644 index 0000000..145f38f --- /dev/null +++ b/src/tww/data/country_codes.csv @@ -0,0 +1,256 @@ +"Afghanistan","AF","AFG","4","33","65" +"Albania","AL","ALB","8","41","20" +"Algeria","DZ","DZA","12","28","3" +"American Samoa","AS","ASM","16","-14.3333","-170" +"Andorra","AD","AND","20","42.5","1.6" +"Angola","AO","AGO","24","-12.5","18.5" +"Anguilla","AI","AIA","660","18.25","-63.1667" +"Antarctica","AQ","ATA","10","-90","0" +"Antigua and Barbuda","AG","ATG","28","17.05","-61.8" +"Argentina","AR","ARG","32","-34","-64" +"Armenia","AM","ARM","51","40","45" +"Aruba","AW","ABW","533","12.5","-69.9667" +"Australia","AU","AUS","36","-27","133" +"Austria","AT","AUT","40","47.3333","13.3333" +"Azerbaijan","AZ","AZE","31","40.5","47.5" +"Bahamas","BS","BHS","44","24.25","-76" +"Bahrain","BH","BHR","48","26","50.55" +"Bangladesh","BD","BGD","50","24","90" +"Barbados","BB","BRB","52","13.1667","-59.5333" +"Belarus","BY","BLR","112","53","28" +"Belgium","BE","BEL","56","50.8333","4" +"Belize","BZ","BLZ","84","17.25","-88.75" +"Benin","BJ","BEN","204","9.5","2.25" +"Bermuda","BM","BMU","60","32.3333","-64.75" +"Bhutan","BT","BTN","64","27.5","90.5" +"Bolivia, Plurinational State of","BO","BOL","68","-17","-65" +"Bolivia","BO","BOL","68","-17","-65" +"Bosnia and Herzegovina","BA","BIH","70","44","18" +"Botswana","BW","BWA","72","-22","24" +"Bouvet Island","BV","BVT","74","-54.4333","3.4" +"Brazil","BR","BRA","76","-10","-55" +"British Indian Ocean Territory","IO","IOT","86","-6","71.5" +"Brunei Darussalam","BN","BRN","96","4.5","114.6667" +"Brunei","BN","BRN","96","4.5","114.6667" +"Bulgaria","BG","BGR","100","43","25" +"Burkina Faso","BF","BFA","854","13","-2" +"Burundi","BI","BDI","108","-3.5","30" +"Cambodia","KH","KHM","116","13","105" +"Cameroon","CM","CMR","120","6","12" +"Canada","CA","CAN","124","60","-95" +"Cape Verde","CV","CPV","132","16","-24" +"Cayman Islands","KY","CYM","136","19.5","-80.5" +"Central African Republic","CF","CAF","140","7","21" +"Chad","TD","TCD","148","15","19" +"Chile","CL","CHL","152","-30","-71" +"China","CN","CHN","156","35","105" +"Christmas Island","CX","CXR","162","-10.5","105.6667" +"Cocos (Keeling) Islands","CC","CCK","166","-12.5","96.8333" +"Colombia","CO","COL","170","4","-72" +"Comoros","KM","COM","174","-12.1667","44.25" +"Congo","CG","COG","178","-1","15" +"Congo, the Democratic Republic of the","CD","COD","180","0","25" +"Cook Islands","CK","COK","184","-21.2333","-159.7667" +"Costa Rica","CR","CRI","188","10","-84" +"Côte d'Ivoire","CI","CIV","384","8","-5" +"Ivory Coast","CI","CIV","384","8","-5" +"Croatia","HR","HRV","191","45.1667","15.5" +"Cuba","CU","CUB","192","21.5","-80" +"Cyprus","CY","CYP","196","35","33" +"Czech Republic","CZ","CZE","203","49.75","15.5" +"Denmark","DK","DNK","208","56","10" +"Djibouti","DJ","DJI","262","11.5","43" +"Dominica","DM","DMA","212","15.4167","-61.3333" +"Dominican Republic","DO","DOM","214","19","-70.6667" +"Ecuador","EC","ECU","218","-2","-77.5" +"Egypt","EG","EGY","818","27","30" +"El Salvador","SV","SLV","222","13.8333","-88.9167" +"Equatorial Guinea","GQ","GNQ","226","2","10" +"Eritrea","ER","ERI","232","15","39" +"Estonia","EE","EST","233","59","26" +"Ethiopia","ET","ETH","231","8","38" +"Falkland Islands (Malvinas)","FK","FLK","238","-51.75","-59" +"Faroe Islands","FO","FRO","234","62","-7" +"Fiji","FJ","FJI","242","-18","175" +"Finland","FI","FIN","246","64","26" +"France","FR","FRA","250","46","2" +"French Guiana","GF","GUF","254","4","-53" +"French Polynesia","PF","PYF","258","-15","-140" +"French Southern Territories","TF","ATF","260","-43","67" +"Gabon","GA","GAB","266","-1","11.75" +"Gambia","GM","GMB","270","13.4667","-16.5667" +"Georgia","GE","GEO","268","42","43.5" +"Germany","DE","DEU","276","51","9" +"Ghana","GH","GHA","288","8","-2" +"Gibraltar","GI","GIB","292","36.1833","-5.3667" +"Greece","GR","GRC","300","39","22" +"Greenland","GL","GRL","304","72","-40" +"Grenada","GD","GRD","308","12.1167","-61.6667" +"Guadeloupe","GP","GLP","312","16.25","-61.5833" +"Guam","GU","GUM","316","13.4667","144.7833" +"Guatemala","GT","GTM","320","15.5","-90.25" +"Guernsey","GG","GGY","831","49.5","-2.56" +"Guinea","GN","GIN","324","11","-10" +"Guinea-Bissau","GW","GNB","624","12","-15" +"Guyana","GY","GUY","328","5","-59" +"Haiti","HT","HTI","332","19","-72.4167" +"Heard Island and McDonald Islands","HM","HMD","334","-53.1","72.5167" +"Holy See (Vatican City State)","VA","VAT","336","41.9","12.45" +"Honduras","HN","HND","340","15","-86.5" +"Hong Kong","HK","HKG","344","22.25","114.1667" +"Hungary","HU","HUN","348","47","20" +"Iceland","IS","ISL","352","65","-18" +"India","IN","IND","356","20","77" +"Indonesia","ID","IDN","360","-5","120" +"Iran, Islamic Republic of","IR","IRN","364","32","53" +"Iraq","IQ","IRQ","368","33","44" +"Ireland","IE","IRL","372","53","-8" +"Isle of Man","IM","IMN","833","54.23","-4.55" +"Israel","IL","ISR","376","31.5","34.75" +"Italy","IT","ITA","380","42.8333","12.8333" +"Jamaica","JM","JAM","388","18.25","-77.5" +"Japan","JP","JPN","392","36","138" +"Jersey","JE","JEY","832","49.21","-2.13" +"Jordan","JO","JOR","400","31","36" +"Kazakhstan","KZ","KAZ","398","48","68" +"Kenya","KE","KEN","404","1","38" +"Kiribati","KI","KIR","296","1.4167","173" +"Korea, Democratic People's Republic of","KP","PRK","408","40","127" +"Korea, Republic of","KR","KOR","410","37","127.5" +"South Korea","KR","KOR","410","37","127.5" +"Kuwait","KW","KWT","414","29.3375","47.6581" +"Kyrgyzstan","KG","KGZ","417","41","75" +"Lao People's Democratic Republic","LA","LAO","418","18","105" +"Latvia","LV","LVA","428","57","25" +"Lebanon","LB","LBN","422","33.8333","35.8333" +"Lesotho","LS","LSO","426","-29.5","28.5" +"Liberia","LR","LBR","430","6.5","-9.5" +"Libyan Arab Jamahiriya","LY","LBY","434","25","17" +"Libya","LY","LBY","434","25","17" +"Liechtenstein","LI","LIE","438","47.1667","9.5333" +"Lithuania","LT","LTU","440","56","24" +"Luxembourg","LU","LUX","442","49.75","6.1667" +"Macao","MO","MAC","446","22.1667","113.55" +"Macedonia, the former Yugoslav Republic of","MK","MKD","807","41.8333","22" +"Madagascar","MG","MDG","450","-20","47" +"Malawi","MW","MWI","454","-13.5","34" +"Malaysia","MY","MYS","458","2.5","112.5" +"Maldives","MV","MDV","462","3.25","73" +"Mali","ML","MLI","466","17","-4" +"Malta","MT","MLT","470","35.8333","14.5833" +"Marshall Islands","MH","MHL","584","9","168" +"Martinique","MQ","MTQ","474","14.6667","-61" +"Mauritania","MR","MRT","478","20","-12" +"Mauritius","MU","MUS","480","-20.2833","57.55" +"Mayotte","YT","MYT","175","-12.8333","45.1667" +"Mexico","MX","MEX","484","23","-102" +"Micronesia, Federated States of","FM","FSM","583","6.9167","158.25" +"Moldova, Republic of","MD","MDA","498","47","29" +"Monaco","MC","MCO","492","43.7333","7.4" +"Mongolia","MN","MNG","496","46","105" +"Montenegro","ME","MNE","499","42","19" +"Montserrat","MS","MSR","500","16.75","-62.2" +"Morocco","MA","MAR","504","32","-5" +"Mozambique","MZ","MOZ","508","-18.25","35" +"Myanmar","MM","MMR","104","22","98" +"Burma","MM","MMR","104","22","98" +"Namibia","NA","NAM","516","-22","17" +"Nauru","NR","NRU","520","-0.5333","166.9167" +"Nepal","NP","NPL","524","28","84" +"Netherlands","NL","NLD","528","52.5","5.75" +"Netherlands Antilles","AN","ANT","530","12.25","-68.75" +"New Caledonia","NC","NCL","540","-21.5","165.5" +"New Zealand","NZ","NZL","554","-41","174" +"Nicaragua","NI","NIC","558","13","-85" +"Niger","NE","NER","562","16","8" +"Nigeria","NG","NGA","566","10","8" +"Niue","NU","NIU","570","-19.0333","-169.8667" +"Norfolk Island","NF","NFK","574","-29.0333","167.95" +"Northern Mariana Islands","MP","MNP","580","15.2","145.75" +"Norway","NO","NOR","578","62","10" +"Oman","OM","OMN","512","21","57" +"Pakistan","PK","PAK","586","30","70" +"Palau","PW","PLW","585","7.5","134.5" +"Palestinian Territory, Occupied","PS","PSE","275","32","35.25" +"Panama","PA","PAN","591","9","-80" +"Papua New Guinea","PG","PNG","598","-6","147" +"Paraguay","PY","PRY","600","-23","-58" +"Peru","PE","PER","604","-10","-76" +"Philippines","PH","PHL","608","13","122" +"Pitcairn","PN","PCN","612","-24.7","-127.4" +"Poland","PL","POL","616","52","20" +"Portugal","PT","PRT","620","39.5","-8" +"Puerto Rico","PR","PRI","630","18.25","-66.5" +"Qatar","QA","QAT","634","25.5","51.25" +"Réunion","RE","REU","638","-21.1","55.6" +"Romania","RO","ROU","642","46","25" +"Russian Federation","RU","RUS","643","60","100" +"Russia","RU","RUS","643","60","100" +"Rwanda","RW","RWA","646","-2","30" +"Saint Helena, Ascension and Tristan da Cunha","SH","SHN","654","-15.9333","-5.7" +"Saint Kitts and Nevis","KN","KNA","659","17.3333","-62.75" +"Saint Lucia","LC","LCA","662","13.8833","-61.1333" +"Saint Pierre and Miquelon","PM","SPM","666","46.8333","-56.3333" +"Saint Vincent and the Grenadines","VC","VCT","670","13.25","-61.2" +"Saint Vincent & the Grenadines","VC","VCT","670","13.25","-61.2" +"St. Vincent and the Grenadines","VC","VCT","670","13.25","-61.2" +"Samoa","WS","WSM","882","-13.5833","-172.3333" +"San Marino","SM","SMR","674","43.7667","12.4167" +"Sao Tome and Principe","ST","STP","678","1","7" +"Saudi Arabia","SA","SAU","682","25","45" +"Senegal","SN","SEN","686","14","-14" +"Serbia","RS","SRB","688","44","21" +"Seychelles","SC","SYC","690","-4.5833","55.6667" +"Sierra Leone","SL","SLE","694","8.5","-11.5" +"Singapore","SG","SGP","702","1.3667","103.8" +"Slovakia","SK","SVK","703","48.6667","19.5" +"Slovenia","SI","SVN","705","46","15" +"Solomon Islands","SB","SLB","90","-8","159" +"Somalia","SO","SOM","706","10","49" +"South Africa","ZA","ZAF","710","-29","24" +"South Georgia and the South Sandwich Islands","GS","SGS","239","-54.5","-37" +"Spain","ES","ESP","724","40","-4" +"Sri Lanka","LK","LKA","144","7","81" +"Sudan","SD","SDN","736","15","30" +"Suriname","SR","SUR","740","4","-56" +"Svalbard and Jan Mayen","SJ","SJM","744","78","20" +"Swaziland","SZ","SWZ","748","-26.5","31.5" +"Sweden","SE","SWE","752","62","15" +"Switzerland","CH","CHE","756","47","8" +"Syrian Arab Republic","SY","SYR","760","35","38" +"Taiwan, Province of China","TW","TWN","158","23.5","121" +"Taiwan","TW","TWN","158","23.5","121" +"Tajikistan","TJ","TJK","762","39","71" +"Tanzania, United Republic of","TZ","TZA","834","-6","35" +"Thailand","TH","THA","764","15","100" +"Timor-Leste","TL","TLS","626","-8.55","125.5167" +"Togo","TG","TGO","768","8","1.1667" +"Tokelau","TK","TKL","772","-9","-172" +"Tonga","TO","TON","776","-20","-175" +"Trinidad and Tobago","TT","TTO","780","11","-61" +"Trinidad & Tobago","TT","TTO","780","11","-61" +"Tunisia","TN","TUN","788","34","9" +"Turkey","TR","TUR","792","39","35" +"Turkmenistan","TM","TKM","795","40","60" +"Turks and Caicos Islands","TC","TCA","796","21.75","-71.5833" +"Tuvalu","TV","TUV","798","-8","178" +"Uganda","UG","UGA","800","1","32" +"Ukraine","UA","UKR","804","49","32" +"United Arab Emirates","AE","ARE","784","24","54" +"United Kingdom","GB","GBR","826","54","-2" +"United States","US","USA","840","38","-97" +"United States Minor Outlying Islands","UM","UMI","581","19.2833","166.6" +"Uruguay","UY","URY","858","-33","-56" +"Uzbekistan","UZ","UZB","860","41","64" +"Vanuatu","VU","VUT","548","-16","167" +"Venezuela, Bolivarian Republic of","VE","VEN","862","8","-66" +"Venezuela","VE","VEN","862","8","-66" +"Viet Nam","VN","VNM","704","16","106" +"Vietnam","VN","VNM","704","16","106" +"Virgin Islands, British","VG","VGB","92","18.5","-64.5" +"Virgin Islands, U.S.","VI","VIR","850","18.3333","-64.8333" +"Wallis and Futuna","WF","WLF","876","-13.3","-176.2" +"Western Sahara","EH","ESH","732","24.5","-13" +"Yemen","YE","YEM","887","15","48" +"Zambia","ZM","ZMB","894","-15","30" +"Zimbabwe","ZW","ZWE","716","-20","30" \ No newline at end of file diff --git a/src/tww/data/language_codes.csv b/src/tww/data/language_codes.csv new file mode 100644 index 0000000..63dccb5 --- /dev/null +++ b/src/tww/data/language_codes.csv @@ -0,0 +1,184 @@ +"Northwest Caucasian","Abkhaz","аҧсуа бызшәа, аҧсшәа","ab","abk", +"Afro-Asiatic","Afar","Afaraf","aa","aar", +"Indo-European","Afrikaans","Afrikaans","af","afr", +"Niger–Congo","Akan","Akan","ak","aka", +"Indo-European","Albanian","Shqip","sq","sqi","alb" +"Afro-Asiatic","Amharic","አማርኛ","am","amh", +"Afro-Asiatic","Arabic","العربية","ar","ara", +"Indo-European","Aragonese","aragonés","an","arg", +"Indo-European","Armenian","Հայերեն","hy","hye","arm" +"Indo-European","Assamese","অসমীয়া","as","asm", +"Northeast Caucasian","Avaric","авар мацӀ, магӀарул мацӀ","av","ava", +"Indo-European","Avestan","avesta","ae","ave", +"Aymaran","Aymara","aymar aru","ay","aym", +"Turkic","Azerbaijani","azərbaycan dili","az","aze", +"Niger–Congo","Bambara","bamanankan","bm","bam", +"Turkic","Bashkir","башҡорт теле","ba","bak", +"Language isolate","Basque","euskara, euskera","eu","eus","baq" +"Indo-European","Belarusian","беларуская мова","be","bel", +"Indo-European","Bengali, Bangla","বাংলা","bn","ben", +"Indo-European","Bihari","भोजपुरी","bh","bih", +"Creole","Bislama","Bislama","bi","bis", +"Indo-European","Bosnian","bosanski jezik","bs","bos", +"Indo-European","Breton","brezhoneg","br","bre", +"Indo-European","Bulgarian","български език","bg","bul", +"Sino-Tibetan","Burmese","ဗမာစာ","my","mya","bur" +"Indo-European","Catalan","català","ca","cat", +"Austronesian","Chamorro","Chamoru","ch","cha", +"Northeast Caucasian","Chechen","нохчийн мотт","ce","che", +"Niger–Congo","Chichewa, Chewa, Nyanja","chiCheŵa, chinyanja","ny","nya", +"Sino-Tibetan","Chinese","中文 (Zhōngwén), 汉语, 漢語","zh","zho","chi" +"Turkic","Chuvash","чӑваш чӗлхи","cv","chv", +"Indo-European","Cornish","Kernewek","kw","cor", +"Indo-European","Corsican","corsu, lingua corsa","co","cos", +"Algonquian","Cree","ᓀᐦᐃᔭᐍᐏᐣ","cr","cre", +"Indo-European","Croatian","hrvatski jezik","hr","hrv", +"Indo-European","Czech","čeština, český jazyk","cs","ces","cze" +"Indo-European","Danish","dansk","da","dan", +"Indo-European","Divehi, Dhivehi, Maldivian","ދިވެހި","dv","div", +"Indo-European","Dutch","Nederlands, Vlaams","nl","nld","dut" +"Sino-Tibetan","Dzongkha","རྫོང་ཁ","dz","dzo", +"Indo-European","English","English","en","eng", +"Constructed","Esperanto","Esperanto","eo","epo", +"Uralic","Estonian","eesti, eesti keel","et","est", +"Niger–Congo","Ewe","Eʋegbe","ee","ewe", +"Indo-European","Faroese","føroyskt","fo","fao", +"Austronesian","Fijian","vosa Vakaviti","fj","fij", +"Uralic","Finnish","suomi, suomen kieli","fi","fin", +"Indo-European","French","français, langue française","fr","fra","fre" +"Niger–Congo","Fula, Fulah, Pulaar, Pular","Fulfulde, Pulaar, Pular","ff","ful", +"Indo-European","Galician","galego","gl","glg", +"South Caucasian","Georgian","ქართული","ka","kat","geo" +"Indo-European","German","Deutsch","de","deu","ger" +"Indo-European","Greek (modern)","ελληνικά","el","ell","gre" +"Tupian","Guaraní","Avañe'ẽ","gn","grn", +"Indo-European","Gujarati","ગુજરાતી","gu","guj", +"Creole","Haitian, Haitian Creole","Kreyòl ayisyen","ht","hat", +"Afro-Asiatic","Hausa","(Hausa) هَوُسَ","ha","hau", +"Afro-Asiatic","Hebrew (modern)","עברית","he","heb", +"Niger–Congo","Herero","Otjiherero","hz","her", +"Indo-European","Hindi","हिन्दी, हिंदी","hi","hin", +"Austronesian","Hiri Motu","Hiri Motu","ho","hmo", +"Uralic","Hungarian","magyar","hu","hun", +"Constructed","Interlingua","Interlingua","ia","ina", +"Austronesian","Indonesian","Bahasa Indonesia","id","ind", +"Constructed","Interlingue","Originally called Occidental; then Interlingue after WWII","ie","ile", +"Indo-European","Irish","Gaeilge","ga","gle", +"Niger–Congo","Igbo","Asụsụ Igbo","ig","ibo", +"Eskimo–Aleut","Inupiaq","Iñupiaq, Iñupiatun","ik","ipk", +"Constructed","Ido","Ido","io","ido", +"Indo-European","Icelandic","Íslenska","is","isl","ice" +"Indo-European","Italian","Italiano","it","ita", +"Eskimo–Aleut","Inuktitut","ᐃᓄᒃᑎᑐᑦ","iu","iku", +"Japonic","Japanese","日本語 (にほんご)","ja","jpn", +"Austronesian","Javanese","ꦧꦱꦗꦮ, Basa Jawa","jv","jav", +"Eskimo–Aleut","Kalaallisut, Greenlandic","kalaallisut, kalaallit oqaasii","kl","kal", +"Dravidian","Kannada","ಕನ್ನಡ","kn","kan", +"Nilo-Saharan","Kanuri","Kanuri","kr","kau", +"Indo-European","Kashmiri","कश्मीरी, كشميري‎","ks","kas", +"Turkic","Kazakh","қазақ тілі","kk","kaz", +"Austroasiatic","Khmer","ខ្មែរ, ខេមរភាសា, ភាសាខ្មែរ","km","khm", +"Niger–Congo","Kikuyu, Gikuyu","Gĩkũyũ","ki","kik", +"Niger–Congo","Kinyarwanda","Ikinyarwanda","rw","kin", +"Turkic","Kyrgyz","Кыргызча, Кыргыз тили","ky","kir", +"Uralic","Komi","коми кыв","kv","kom", +"Niger–Congo","Kongo","Kikongo","kg","kon", +"Koreanic","Korean","한국어","ko","kor", +"Indo-European","Kurdish","Kurdî, كوردی‎","ku","kur", +"Niger–Congo","Kwanyama, Kuanyama","Kuanyama","kj","kua", +"Indo-European","Latin","latine, lingua latina","la","lat", +"Indo-European","Luxembourgish, Letzeburgesch","Lëtzebuergesch","lb","ltz", +"Niger–Congo","Ganda","Luganda","lg","lug", +"Indo-European","Limburgish, Limburgan, Limburger","Limburgs","li","lim", +"Niger–Congo","Lingala","Lingála","ln","lin", +"Tai–Kadai","Lao","ພາສາລາວ","lo","lao", +"Indo-European","Lithuanian","lietuvių kalba","lt","lit", +"Niger–Congo","Luba-Katanga","Tshiluba","lu","lub", +"Indo-European","Latvian","latviešu valoda","lv","lav", +"Indo-European","Manx","Gaelg, Gailck","gv","glv", +"Indo-European","Macedonian","македонски јазик","mk","mkd","mac" +"Austronesian","Malagasy","fiteny malagasy","mg","mlg", +"Austronesian","Malay","bahasa Melayu, بهاس ملايو‎","ms","msa","may" +"Dravidian","Malayalam","മലയാളം","ml","mal", +"Afro-Asiatic","Maltese","Malti","mt","mlt", +"Austronesian","Māori","te reo Māori","mi","mri","mao" +"Indo-European","Marathi (Marāṭhī)","मराठी","mr","mar", +"Austronesian","Marshallese","Kajin M̧ajeļ","mh","mah", +"Mongolic","Mongolian","Монгол хэл","mn","mon", +"Austronesian","Nauruan","Dorerin Naoero","na","nau", +"Dené–Yeniseian","Navajo, Navaho","Diné bizaad","nv","nav", +"Niger–Congo","Northern Ndebele","isiNdebele","nd","nde", +"Indo-European","Nepali","नेपाली","ne","nep", +"Niger–Congo","Ndonga","Owambo","ng","ndo", +"Indo-European","Norwegian Bokmål","Norsk bokmål","nb","nob", +"Indo-European","Norwegian Nynorsk","Norsk nynorsk","nn","nno", +"Indo-European","Norwegian","Norsk","no","nor", +"Sino-Tibetan","Nuosu","ꆈꌠ꒿ Nuosuhxop","ii","iii", +"Niger–Congo","Southern Ndebele","isiNdebele","nr","nbl", +"Indo-European","Occitan","occitan, lenga d'òc","oc","oci", +"Algonquian","Ojibwe, Ojibwa","ᐊᓂᔑᓈᐯᒧᐎᓐ","oj","oji", +"Indo-European","Old Church Slavonic, Church Slavonic, Old Bulgarian","ѩзыкъ словѣньскъ","cu","chu", +"Afro-Asiatic","Oromo","Afaan Oromoo","om","orm", +"Indo-European","Oriya","ଓଡ଼ିଆ","or","ori", +"Indo-European","Ossetian, Ossetic","ирон æвзаг","os","oss", +"Indo-European","(Eastern) Punjabi","ਪੰਜਾਬੀ","pa","pan", +"Indo-European","Pāli","पाऴि","pi","pli", +"Indo-European","Persian (Farsi)","فارسی","fa","fas","per" +"Indo-European","Polish","język polski, polszczyzna","pl","pol", +"Indo-European","Pashto, Pushto","پښتو","ps","pus", +"Indo-European","Portuguese","Português","pt","por", +"Quechuan","Quechua","Runa Simi, Kichwa","qu","que", +"Indo-European","Romansh","rumantsch grischun","rm","roh", +"Niger–Congo","Kirundi","Ikirundi","rn","run", +"Indo-European","Romanian","Română","ro","ron","rum" +"Indo-European","Russian","Русский","ru","rus", +"Indo-European","Sanskrit (Saṁskṛta)","संस्कृतम्","sa","san", +"Indo-European","Sardinian","sardu","sc","srd", +"Indo-European","Sindhi","सिन्धी, سنڌي، سندھی‎","sd","snd", +"Uralic","Northern Sami","Davvisámegiella","se","sme", +"Austronesian","Samoan","gagana fa'a Samoa","sm","smo", +"Creole","Sango","yângâ tî sängö","sg","sag", +"Indo-European","Serbian","српски језик","sr","srp", +"Indo-European","Scottish Gaelic, Gaelic","Gàidhlig","gd","gla", +"Niger–Congo","Shona","chiShona","sn","sna", +"Indo-European","Sinhalese, Sinhala","සිංහල","si","sin", +"Indo-European","Slovak","slovenčina, slovenský jazyk","sk","slk","slo" +"Indo-European","Slovene","slovenski jezik, slovenščina","sl","slv", +"Afro-Asiatic","Somali","Soomaaliga, af Soomaali","so","som", +"Niger–Congo","Southern Sotho","Sesotho","st","sot", +"Indo-European","Spanish","Español","es","spa", +"Austronesian","Sundanese","Basa Sunda","su","sun", +"Niger–Congo","Swahili","Kiswahili","sw","swa", +"Niger–Congo","Swati","SiSwati","ss","ssw", +"Indo-European","Swedish","svenska","sv","swe", +"Dravidian","Tamil","தமிழ்","ta","tam", +"Dravidian","Telugu","తెలుగు","te","tel", +"Indo-European","Tajik","тоҷикӣ, toçikī, تاجیکی‎","tg","tgk", +"Tai–Kadai","Thai","ไทย","th","tha", +"Afro-Asiatic","Tigrinya","ትግርኛ","ti","tir", +"Sino-Tibetan","Tibetan Standard, Tibetan, Central","བོད་ཡིག","bo","bod","tib" +"Turkic","Turkmen","Türkmen, Түркмен","tk","tuk", +"Austronesian","Tagalog","Wikang Tagalog","tl","tgl", +"Niger–Congo","Tswana","Setswana","tn","tsn", +"Austronesian","Tonga (Tonga Islands)","faka Tonga","to","ton", +"Turkic","Turkish","Türkçe","tr","tur", +"Niger–Congo","Tsonga","Xitsonga","ts","tso", +"Turkic","Tatar","татар теле, tatar tele","tt","tat", +"Niger–Congo","Twi","Twi","tw","twi", +"Austronesian","Tahitian","Reo Tahiti","ty","tah", +"Turkic","Uyghur","ئۇيغۇرچە‎, Uyghurche","ug","uig", +"Indo-European","Ukrainian","Українська","uk","ukr", +"Indo-European","Urdu","اردو","ur","urd", +"Turkic","Uzbek","Oʻzbek, Ўзбек, أۇزبېك‎","uz","uzb", +"Niger–Congo","Venda","Tshivenḓa","ve","ven", +"Austroasiatic","Vietnamese","Tiếng Việt","vi","vie", +"Constructed","Volapük","Volapük","vo","vol", +"Indo-European","Walloon","walon","wa","wln", +"Indo-European","Welsh","Cymraeg","cy","cym","wel" +"Niger–Congo","Wolof","Wollof","wo","wol", +"Indo-European","Western Frisian","Frysk","fy","fry", +"Niger–Congo","Xhosa","isiXhosa","xh","xho", +"Indo-European","Yiddish","ייִדיש","yi","yid", +"Niger–Congo","Yoruba","Yorùbá","yo","yor", +"Tai–Kadai","Zhuang, Chuang","Saɯ cueŋƅ, Saw cuengh","za","zha", +"Niger–Congo","Zulu","isiZulu","zu","zul", \ No newline at end of file diff --git a/src/tww/tokenizer.py b/src/tww/tokenizer.py index 323c185..212ff12 100644 --- a/src/tww/tokenizer.py +++ b/src/tww/tokenizer.py @@ -1,11 +1,16 @@ +import contextlib +import csv import json +import locale +import os import re import sys from datetime import datetime +from fuzzywuzzy import fuzz from pygments import highlight, lexers, formatters -from tww import ISO_FORMAT, time_to_emoji, time_ago +from tww import ISO_FORMAT, time_to_emoji, time_ago, basepath from tww import resolve_timezone, dateparser_parse_dt, get_utcnow, get_s_since_epoch, get_ms_since_epoch, \ dt_tz_translation, get_local_now, query_to_format_result @@ -163,27 +168,150 @@ def test(): def pretty_print_dict(obj): - formatted_json = json.dumps(obj, indent=2) + formatted_json = json.dumps(obj, indent=2, ensure_ascii=False) colorful_json = highlight(formatted_json, lexers.JsonLexer(), formatters.TerminalFormatter()) print(colorful_json) +@contextlib.contextmanager +def setlocale(*args, **kw): + saved = locale.setlocale(locale.LC_ALL) + yield locale.setlocale(*args, **kw) + locale.setlocale(locale.LC_ALL, saved) + + +def find_country_alias(locale_s): + with open(os.path.join(basepath, "data", "country_codes.csv")) as f: + cfile = csv.reader(f) + for row in cfile: + country, alpha2, alpha3 = row[0:3] + country, alpha2, alpha3 = country.lower(), alpha2.lower(), alpha3.lower() + if locale_s in [country, alpha2, alpha3]: + return country, alpha2, alpha3 + fuzz_ratio = fuzz.ratio(locale_s, country) + if fuzz_ratio > 90: + return country, alpha2, alpha3 + return None, None, None + + +def find_language_alias(locale_s): + with open(os.path.join(basepath, "data", "language_codes.csv")) as f: + cfile = csv.reader(f) + for row in cfile: + name, native_name, a2, a3 = row[1:5] + name, native_name, a2, a3 = name.lower(), native_name.lower(), a2.lower(), a3.lower() + if locale_s in [a2, a3, name, native_name]: + return name, native_name, a2, a3 + fuzz_ratio = fuzz.ratio(locale_s, name) + if fuzz_ratio > 90: + return name, native_name, a2, a3 + fuzz_ratio = fuzz.ratio(locale_s, native_name) + if fuzz_ratio > 80: + return name, native_name, a2, a3 + return None, None, None, None + + +def lc_time_to_codes(lc_time): + country_lang, encoding = lc_time.split('.') + country_code, lang_code = country_lang.split('_') + return country_code, lang_code, encoding + + +def get_default_locale(): + default_locale = locale.getlocale() + if type(default_locale) == tuple: + default_locale = "{}.{}".format(*default_locale) + country_code, lang_code, encoding = lc_time_to_codes(default_locale) + return country_code, lang_code, encoding, default_locale + + +def resolve_locale(locale_s): + country_code, lang_code, encoding, default_locale = get_default_locale() + rv = dict( + query=locale_s, + country_code=country_code, + lang_code=lang_code, + encoding=encoding, + lc_time=default_locale, + ) + default_encoding = 'utf-8' + if not locale_s: + return rv + if '.' in locale_s: + country_lang, encoding = locale_s.split('.') + else: + country_lang, encoding = locale_s, default_encoding + if '_' in country_lang: + country_code, lang_code = country_lang.split('_') + if len(country_code) == 2 and len(lang_code) == 2: + try: + lc_time = "{}_{}.{}".format(country_code, lang_code, encoding) + locale.setlocale(locale.LC_TIME, lc_time) + rv["country_code"] = country_code + rv["lang_code"] = lang_code + rv["encoding"] = encoding + rv["lc_time"] = lc_time + return rv + except: + ... + locale_s = locale_s.strip().lower() + country, alpha2, alpha3 = find_country_alias(locale_s) + lang_name, lang_native_name, lang2, lang3 = find_language_alias(locale_s) + if alpha2: + locale_hypotheses = {k: v for k, v in locale.locale_alias.items() if k.startswith(alpha2)} + for k, v in locale_hypotheses.items(): + lower = k.lower() + if 'utf-8' in lower: + rv["lc_time"] = v + break + else: + if locale_hypotheses: + lc_time = locale_hypotheses.get(alpha2) + if lc_time: + country_code, lang_code, encoding = lc_time_to_codes(lc_time) + rv["country_code"] = country_code + rv["lang_code"] = lang_code + rv["encoding"] = encoding + rv["lc_time"] = lc_time + return rv + if lang2: + locale_hypotheses = {k: v for k, v in locale.locale_alias.items() if k.startswith(lang2)} + for k, v in locale_hypotheses.items(): + lower = k.lower() + if 'utf-8' in lower: + rv["lc_time"] = v + break + else: + if locale_hypotheses: + lc_time = locale_hypotheses.get(lang2) + if lc_time: + country_code, lang_code, encoding = lc_time_to_codes(lc_time) + rv["country_code"] = country_code + rv["lang_code"] = lang_code + rv["encoding"] = encoding + rv["lc_time"] = lc_time + return rv + return rv + + def dt_pretty(dt): rv = {} + global custom_locale rv["iso8601_full"] = dt.strftime(ISO_FORMAT) rv["iso8601_date"] = dt.strftime('%Y-%m-%d') rv["iso8601_time"] = dt.strftime('%H:%M:%S') rv["locale_dt"] = dt.strftime("%c") - rv["locale_day_of_week"] = dt.strftime("%A") - rv["locale_day_of_week_short"] = dt.strftime("%a") rv["day_of_week_number"] = dt.strftime("%w") - rv["locale_month"] = dt.strftime("%B") - rv["locale_month_short"] = dt.strftime("%b") - rv["tz_name"] = dt.strftime("%Z") + rv["locale"] = custom_locale + with setlocale(locale.LC_TIME, custom_locale.get("lc_time")): + rv["locale_month"] = dt.strftime("%B") + rv["locale_month_short"] = dt.strftime("%b") + rv["locale_day_of_week_short"] = dt.strftime("%a") + rv["locale_day_of_week"] = dt.strftime("%A") + rv["locale_date"] = dt.strftime("%x") + rv["locale_time"] = dt.strftime("%X") rv["tz_offset"] = dt.strftime("%z") rv["hh:mm"] = dt.strftime("%H:%M") - rv["locale_time"] = dt.strftime("%X") - rv["locale_date"] = dt.strftime("%x") rv["emoji_time"] = time_to_emoji(dt) rv["unix_s"] = get_s_since_epoch(dt) rv["unix_ms"] = get_ms_since_epoch(dt) @@ -218,7 +346,8 @@ def td_totals(td): weeks = seconds // (7 * 24 * 60 * 60) months = seconds // (30 * 24 * 60 * 60) years = seconds // (365 * 24 * 60 * 60) - years, months, weeks, days, hours, minutes, seconds = map(abs, (years, months, weeks, days, hours, minutes, seconds)) + years, months, weeks, days, hours, minutes, seconds = map(abs, + (years, months, weeks, days, hours, minutes, seconds)) return dict( seconds=seconds, minutes=minutes, @@ -262,7 +391,7 @@ def resolve_query_type(query): solutions = tokenize(query) if not solutions: dt = get_local_now() - return [["now", (dt,), QUERY_TYPE_DT]] + return [["now", dt, QUERY_TYPE_DT]] return solutions @@ -292,7 +421,10 @@ def resolve_query(query): if __name__ == "__main__": - # query = "2020-02-11T13:01:05+0100 to sofia" - query = ' '.join(sys.argv[1:]) + query = "" + custom_locale = "български език" + # custom_locale = "" + custom_locale = resolve_locale(custom_locale) + # query = ' '.join(sys.argv[1:]) result = resolve_query(query) pretty_print_dict(result) diff --git a/src/tww/tww.py b/src/tww/tww.py index 5aa191d..9fea040 100644 --- a/src/tww/tww.py +++ b/src/tww/tww.py @@ -205,8 +205,7 @@ def resolve_location_remote(query): write_to_cache(query, location) return location except GeocoderTimedOut: - logger.critical("Timed out resolving location. Try specifying a timezone directly") - exit(1) + logger.error("Timed out resolving location. Try specifying a timezone directly") def parse_query(query): @@ -216,8 +215,7 @@ def parse_query(query): # query = ' '.join(query) query = query.strip() if not query: - logger.critical("Use a query like ['to' ]") - exit(1) + logger.error("Use a query like ['to' ]") to_query = query.split(" to ") logger.debug("to_query: {}".format(to_query)) if len(to_query) == 1: @@ -227,8 +225,7 @@ def parse_query(query): # datetime to timezone human_dt, human_tz_loc = to_query else: - logger.critical("There can be only one 'to' in the query string") - exit(1) + logger.error("There can be only one 'to' in the query string") logger.debug("raw human_dt: {}".format(human_dt)) logger.debug("raw human_tz_loc: {}".format(human_tz_loc)) @@ -552,11 +549,7 @@ def get_dt_tz_offset(dt: datetime) -> timedelta: def get_us_since_epoch(dt: datetime): - utc_seconds = int(dt.timestamp() * 1e6) - if dt.tzinfo is None: - return utc_seconds - local_seconds = get_dt_tz_offset(dt).seconds - return utc_seconds + local_seconds + return int(dt.timestamp() * 1e6) def get_ms_since_epoch(dt): @@ -571,7 +564,16 @@ def epoch_to_dt(seconds): return datetime.fromtimestamp(seconds) +def get_local_s_since_epoch(dt: datetime): + utc_s = int(dt.timestamp()) + if dt.tzinfo is None: + return utc_s + local_s = int(get_dt_tz_offset(dt).seconds) + total_s = utc_s + local_s + return total_s + + def time_to_emoji(dt): - seconds = get_s_since_epoch(dt) + seconds = get_local_s_since_epoch(dt) a = int((seconds / 900 - 3) / 2 % 24) return chr(128336 + a // 2 + a % 2 * 12)