toki_pona/linku_api/lipamanka_data/parse_lipamanka.py

import json
from bs4 import BeautifulSoup

def create_dictionary(html_file):
    """Parses the HTML file and creates a dictionary of word definitions."""

    with open(html_file, 'r') as f:
        soup = BeautifulSoup(f, 'html.parser')

    word_definitions = {}
    current_word = None

    for element in soup.find(id="the-dictionary").next_siblings:
        if element.name == 'h3':
            current_word = element.text.strip()
            word_definitions[current_word] = ""
        elif element.name == 'p' and current_word:
            word_definitions[current_word] += element.text.strip() + " "

    return word_definitions

# Example usage:
html_file_path = "linku_api/lipamanka_data/page.html"
result_dict = create_dictionary(html_file_path)

with open("linku_api/lipamanka_data.json", "w+") as f:
    json.dump(result_dict, f, indent=2)
updates with lipamanka and words def 2024-06-17 12:17:29 +02:00			`import json`
			`from bs4 import BeautifulSoup`

			`def create_dictionary(html_file):`
			`"""Parses the HTML file and creates a dictionary of word definitions."""`

			`with open(html_file, 'r') as f:`
			`soup = BeautifulSoup(f, 'html.parser')`

			`word_definitions = {}`
			`current_word = None`

			`for element in soup.find(id="the-dictionary").next_siblings:`
			`if element.name == 'h3':`
			`current_word = element.text.strip()`
			`word_definitions[current_word] = ""`
			`elif element.name == 'p' and current_word:`
			`word_definitions[current_word] += element.text.strip() + " "`

			`return word_definitions`

			`# Example usage:`
			`html_file_path = "linku_api/lipamanka_data/page.html"`
			`result_dict = create_dictionary(html_file_path)`

			`with open("linku_api/lipamanka_data.json", "w+") as f:`
			`json.dump(result_dict, f, indent=2)`