28 lines
853 B
Python
28 lines
853 B
Python
|
import json
|
||
|
from bs4 import BeautifulSoup
|
||
|
|
||
|
def create_dictionary(html_file):
|
||
|
"""Parses the HTML file and creates a dictionary of word definitions."""
|
||
|
|
||
|
with open(html_file, 'r') as f:
|
||
|
soup = BeautifulSoup(f, 'html.parser')
|
||
|
|
||
|
word_definitions = {}
|
||
|
current_word = None
|
||
|
|
||
|
for element in soup.find(id="the-dictionary").next_siblings:
|
||
|
if element.name == 'h3':
|
||
|
current_word = element.text.strip()
|
||
|
word_definitions[current_word] = ""
|
||
|
elif element.name == 'p' and current_word:
|
||
|
word_definitions[current_word] += element.text.strip() + " "
|
||
|
|
||
|
return word_definitions
|
||
|
|
||
|
# Example usage:
|
||
|
html_file_path = "linku_api/lipamanka_data/page.html"
|
||
|
result_dict = create_dictionary(html_file_path)
|
||
|
|
||
|
with open("linku_api/lipamanka_data.json", "w+") as f:
|
||
|
json.dump(result_dict, f, indent=2)
|