import argparse
import json
import logging
import os

import requests
from bs4 import BeautifulSoup, NavigableString

logging.basicConfig()
logger = logging.getLogger()

URL_BASE = 'https://nimi.li/'
URL_WORD = URL_BASE + '{word}'
URL_WORD_2 = 'https://linku.la/words/{word}'


def setup_logging_level(debug=False):
    log_level = logging.DEBUG if debug else logging.INFO
    logger.setLevel(log_level)
    logger.debug("Debugging enabled")


def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('query', nargs='*', default="", help="freeform")
    parser.add_argument('--debug', dest='debug', action='store_true')
    parser.add_argument('--raise-exceptions', dest='raise_exceptions', action='store_true')
    return parser.parse_args()


args = parse_args()
setup_logging_level(args.debug)

DATA_DIR = os.path.join(os.path.dirname(__file__), 'data')
WORDS_DIR = os.path.join(DATA_DIR, 'words')


def get_cache_or_url(url, cache_file):
    if os.path.exists(cache_file):
        logger.debug("Getting cached response from {}".format(cache_file))
        with open(cache_file, 'r') as f:
            response_text = f.read()
    else:
        os.makedirs(os.path.dirname(cache_file), exist_ok=True)
        logger.debug("Getting response from {}".format(url))
        response = requests.get(url)
        if response.status_code != 200:
            if args.raise_exceptions:
                raise Exception("Error getting response from {}".format(url))
            logger.error("Error getting response from {}".format(url))
        response_text = response.text
        with open(cache_file, 'w+') as f:
            f.write(response.text)
    return response_text

def get_word_list():
    words_file = os.path.join(DATA_DIR, 'words.json')
    if os.path.exists(words_file):
        logger.debug("Getting cached response from {}".format(words_file))
        with open(words_file, 'r') as f:
            words = json.load(f)
    else:
        words = set()
        cache_file = os.path.join(DATA_DIR, 'all_words.html')
        response_text = get_cache_or_url(URL_BASE, cache_file)

        soup = BeautifulSoup(response_text, 'html.parser')

        word_tags = soup.select('main > div.grid p.font-pona')
        for word_tag in word_tags:
            if word_tag.text[-1] in ['0', '1', '2', '3']:
                final_word = word_tag.text[:-1]
            else:
                final_word = word_tag.text
            words.add(final_word)
        with open(words_file, 'w+') as f:
            json.dump(words, f, ensure_ascii=False, indent=2)
    return words


def get_word_data(word, url):
    cache_file = os.path.join(WORDS_DIR, f'{word}.html')
    response_text = get_cache_or_url(url, cache_file)

    soup = BeautifulSoup(response_text, 'html.parser')

    tag = soup.select('body > div > div.px-4.sm\:px-8.lg\:px-16.m-auto.max-w-screen-xl > main > div.grid.sm\:grid-cols-2.mt-6.gap-6 > div:nth-child(1) > div')
    pu_defs = [t.text for t in tag]
    pu_definitions = []
    for pu_def in pu_defs:
        pu_def_split = pu_def.split()
        pos = pu_def_split[0]
        definition = ' '.join(pu_def_split[1:])
        pu_definitions.append({
            "pos": pos,
            "definition": definition,
        })
    
    # TODO: doesn't work
    tag_ku = soup.select('body > div > div > main > div > div:nth-child(1) > div.p-6.pt-0.flex.flex-col.gap-3 > div:nth-child(2) > span')
    ku_defs = [t.text for t in tag_ku]
    
    return {
        'pu_definitions': pu_definitions
    }

def get_words_data(word_list):
    words_data_file = os.path.join(DATA_DIR, 'words_data.json')
    if os.path.exists(words_data_file):
        logger.debug("Getting cached response from {}".format(words_data_file))
        with open(words_data_file, 'r') as f:
            words_data = json.load(f)
    else:
        words_data = {}
        for word in word_list:
            url = URL_WORD_2.format(word=word)
            word_data = get_word_data(word, url)
            words_data[word] = word_data
        with open(words_data_file, 'w+') as f:
            json.dump(words_data, f, ensure_ascii=False, indent=2)
    return words_data

def main():
    for folder in [WORDS_DIR]:
        os.makedirs(folder, exist_ok=True)
    word_list = get_word_list()
    print(word_list)
    words_data = get_words_data(word_list)
    nouns = []
    for word, data in words_data.items():
        for pu_def in data.get('pu_definitions'):
            if pu_def.get('pos') == 'ADJECTIVE':
                nouns.append(word)
    print('" | "'.join(nouns))


if __name__ == "__main__":
    main()