96 lines
2.9 KiB
Python
96 lines
2.9 KiB
Python
import re
|
|
from wordpropertyconversion import word_property_conversion
|
|
from parsel import Selector
|
|
|
|
from models import Session, WordData
|
|
|
|
|
|
def renderer_word(txt) -> dict:
|
|
sel = Selector(txt)
|
|
# word
|
|
spelling = sel.css(".keyword::text").get()
|
|
importance = sel.xpath("//span[@class='via rank']/text()").get()
|
|
word = {
|
|
"spelling": spelling,
|
|
"importance": importance
|
|
}
|
|
return word
|
|
|
|
|
|
def renderer_meaningslist(txt) -> list:
|
|
# meanings
|
|
sel = Selector(txt)
|
|
sel.css("#synonyms").remove()
|
|
meanings_list = sel.css(".trans-container")
|
|
if meanings_list == []:
|
|
return []
|
|
else:
|
|
meanings_list = meanings_list[0].xpath("//div/ul/li/text()").getall()
|
|
# meanings_list = sel.xpath("//div[@class='trans-container'][1]").xpath("//div/ul/li/text()").getall()
|
|
meanings = map(renderer_meaning, meanings_list)
|
|
meanings = list(meanings)
|
|
while None in meanings:
|
|
meanings.remove(None)
|
|
return meanings
|
|
|
|
|
|
def renderer_meaning(text):
|
|
word_property = re.match(r"[a-z]{1,8}\.", text)
|
|
if word_property is None:
|
|
return None
|
|
word_property = word_property.group()
|
|
word_property = word_property_conversion(word_property)
|
|
length = len(word_property)
|
|
meaning = text[length+1:]
|
|
return {
|
|
"word_property": word_property,
|
|
"meaning": meaning
|
|
}
|
|
|
|
|
|
def has_value_to_render(text):
|
|
sel = Selector(text)
|
|
return sel.css(".error-typo") == []
|
|
|
|
|
|
def testcase1():
|
|
with Session() as session:
|
|
# data = session.query(WordData).first()
|
|
data = session.query(WordData).filter_by(word="ob").first() # the
|
|
text = data.html
|
|
# print(parser_worddict(text))
|
|
# print(renderer_meaningslist(text))
|
|
# print(has_value_to_render(text))
|
|
# astr = "[ 过去式 researched 过去分词 researched 现在分词 researching ]"
|
|
astr = "linux下的桌面环境"
|
|
ans = renderer_meaning(astr)
|
|
print(ans)
|
|
|
|
|
|
def testcase3():
|
|
with Session() as session:
|
|
data = session.query(WordData).filter_by(word="search").one()
|
|
text = data.html
|
|
ans = renderer_meaningslist(text)
|
|
print(ans)
|
|
|
|
|
|
def testcase4():
|
|
"test word importance None"
|
|
with Session() as session:
|
|
data = session.query(WordData).filter_by(word="john").one()
|
|
text = data.html
|
|
ans = renderer_word(text)
|
|
print(ans)
|
|
print(type(ans["importance"]))
|
|
|
|
|
|
def testcase2():
|
|
txt = """
|
|
<div id="results-contents" class="results-content"><div class="trans-wrapper" id="phrsListTab"><h2 class="wordbook-js"><span class="keyword">hentai</span></h2></div><div id="wordArticle" class="trans-wrapper trans-tab"><h3><span class="tabs"></span></h3><div id="wordArticleToggle"></div></div></div>
|
|
"""
|
|
renderer_meaningslist(txt)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
testcase4() |