init
This commit is contained in:
96
spider/renderercontents.py
Normal file
96
spider/renderercontents.py
Normal file
@ -0,0 +1,96 @@
|
||||
import re
|
||||
from wordpropertyconversion import word_property_conversion
|
||||
from parsel import Selector
|
||||
|
||||
from models import Session, WordData
|
||||
|
||||
|
||||
def renderer_word(txt) -> dict:
|
||||
sel = Selector(txt)
|
||||
# word
|
||||
spelling = sel.css(".keyword::text").get()
|
||||
importance = sel.xpath("//span[@class='via rank']/text()").get()
|
||||
word = {
|
||||
"spelling": spelling,
|
||||
"importance": importance
|
||||
}
|
||||
return word
|
||||
|
||||
|
||||
def renderer_meaningslist(txt) -> list:
|
||||
# meanings
|
||||
sel = Selector(txt)
|
||||
sel.css("#synonyms").remove()
|
||||
meanings_list = sel.css(".trans-container")
|
||||
if meanings_list == []:
|
||||
return []
|
||||
else:
|
||||
meanings_list = meanings_list[0].xpath("//div/ul/li/text()").getall()
|
||||
# meanings_list = sel.xpath("//div[@class='trans-container'][1]").xpath("//div/ul/li/text()").getall()
|
||||
meanings = map(renderer_meaning, meanings_list)
|
||||
meanings = list(meanings)
|
||||
while None in meanings:
|
||||
meanings.remove(None)
|
||||
return meanings
|
||||
|
||||
|
||||
def renderer_meaning(text):
|
||||
word_property = re.match(r"[a-z]{1,8}\.", text)
|
||||
if word_property is None:
|
||||
return None
|
||||
word_property = word_property.group()
|
||||
word_property = word_property_conversion(word_property)
|
||||
length = len(word_property)
|
||||
meaning = text[length+1:]
|
||||
return {
|
||||
"word_property": word_property,
|
||||
"meaning": meaning
|
||||
}
|
||||
|
||||
|
||||
def has_value_to_render(text):
|
||||
sel = Selector(text)
|
||||
return sel.css(".error-typo") == []
|
||||
|
||||
|
||||
def testcase1():
|
||||
with Session() as session:
|
||||
# data = session.query(WordData).first()
|
||||
data = session.query(WordData).filter_by(word="ob").first() # the
|
||||
text = data.html
|
||||
# print(parser_worddict(text))
|
||||
# print(renderer_meaningslist(text))
|
||||
# print(has_value_to_render(text))
|
||||
# astr = "[ 过去式 researched 过去分词 researched 现在分词 researching ]"
|
||||
astr = "linux下的桌面环境"
|
||||
ans = renderer_meaning(astr)
|
||||
print(ans)
|
||||
|
||||
|
||||
def testcase3():
|
||||
with Session() as session:
|
||||
data = session.query(WordData).filter_by(word="search").one()
|
||||
text = data.html
|
||||
ans = renderer_meaningslist(text)
|
||||
print(ans)
|
||||
|
||||
|
||||
def testcase4():
|
||||
"test word importance None"
|
||||
with Session() as session:
|
||||
data = session.query(WordData).filter_by(word="john").one()
|
||||
text = data.html
|
||||
ans = renderer_word(text)
|
||||
print(ans)
|
||||
print(type(ans["importance"]))
|
||||
|
||||
|
||||
def testcase2():
|
||||
txt = """
|
||||
<div id="results-contents" class="results-content"><div class="trans-wrapper" id="phrsListTab"><h2 class="wordbook-js"><span class="keyword">hentai</span></h2></div><div id="wordArticle" class="trans-wrapper trans-tab"><h3><span class="tabs"></span></h3><div id="wordArticleToggle"></div></div></div>
|
||||
"""
|
||||
renderer_meaningslist(txt)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
testcase4()
|
Reference in New Issue
Block a user