osdict_project/spider/renderercontents.py

96 lines
2.9 KiB
Python

import re
from wordpropertyconversion import word_property_conversion
from parsel import Selector
from models import Session, WordData
def renderer_word(txt) -> dict:
sel = Selector(txt)
# word
spelling = sel.css(".keyword::text").get()
importance = sel.xpath("//span[@class='via rank']/text()").get()
word = {
"spelling": spelling,
"importance": importance
}
return word
def renderer_meaningslist(txt) -> list:
# meanings
sel = Selector(txt)
sel.css("#synonyms").remove()
meanings_list = sel.css(".trans-container")
if meanings_list == []:
return []
else:
meanings_list = meanings_list[0].xpath("//div/ul/li/text()").getall()
# meanings_list = sel.xpath("//div[@class='trans-container'][1]").xpath("//div/ul/li/text()").getall()
meanings = map(renderer_meaning, meanings_list)
meanings = list(meanings)
while None in meanings:
meanings.remove(None)
return meanings
def renderer_meaning(text):
word_property = re.match(r"[a-z]{1,8}\.", text)
if word_property is None:
return None
word_property = word_property.group()
word_property = word_property_conversion(word_property)
length = len(word_property)
meaning = text[length+1:]
return {
"word_property": word_property,
"meaning": meaning
}
def has_value_to_render(text):
sel = Selector(text)
return sel.css(".error-typo") == []
def testcase1():
with Session() as session:
# data = session.query(WordData).first()
data = session.query(WordData).filter_by(word="ob").first() # the
text = data.html
# print(parser_worddict(text))
# print(renderer_meaningslist(text))
# print(has_value_to_render(text))
# astr = "[ 过去式 researched 过去分词 researched 现在分词 researching ]"
astr = "linux下的桌面环境"
ans = renderer_meaning(astr)
print(ans)
def testcase3():
with Session() as session:
data = session.query(WordData).filter_by(word="search").one()
text = data.html
ans = renderer_meaningslist(text)
print(ans)
def testcase4():
"test word importance None"
with Session() as session:
data = session.query(WordData).filter_by(word="john").one()
text = data.html
ans = renderer_word(text)
print(ans)
print(type(ans["importance"]))
def testcase2():
txt = """
<div id="results-contents" class="results-content"><div class="trans-wrapper" id="phrsListTab"><h2 class="wordbook-js"><span class="keyword">hentai</span></h2></div><div id="wordArticle" class="trans-wrapper trans-tab"><h3><span class="tabs"></span></h3><div id="wordArticleToggle"></div></div></div>
"""
renderer_meaningslist(txt)
if __name__ == "__main__":
testcase4()