import re from wordpropertyconversion import word_property_conversion from parsel import Selector from models import Session, WordData def renderer_word(txt) -> dict: sel = Selector(txt) # word spelling = sel.css(".keyword::text").get() importance = sel.xpath("//span[@class='via rank']/text()").get() word = { "spelling": spelling, "importance": importance } return word def renderer_meaningslist(txt) -> list: # meanings sel = Selector(txt) sel.css("#synonyms").remove() meanings_list = sel.css(".trans-container") if meanings_list == []: return [] else: meanings_list = meanings_list[0].xpath("//div/ul/li/text()").getall() # meanings_list = sel.xpath("//div[@class='trans-container'][1]").xpath("//div/ul/li/text()").getall() meanings = map(renderer_meaning, meanings_list) meanings = list(meanings) while None in meanings: meanings.remove(None) return meanings def renderer_meaning(text): word_property = re.match(r"[a-z]{1,8}\.", text) if word_property is None: return None word_property = word_property.group() word_property = word_property_conversion(word_property) length = len(word_property) meaning = text[length+1:] return { "word_property": word_property, "meaning": meaning } def has_value_to_render(text): sel = Selector(text) return sel.css(".error-typo") == [] def testcase1(): with Session() as session: # data = session.query(WordData).first() data = session.query(WordData).filter_by(word="ob").first() # the text = data.html # print(parser_worddict(text)) # print(renderer_meaningslist(text)) # print(has_value_to_render(text)) # astr = "[ 过去式 researched 过去分词 researched 现在分词 researching ]" astr = "linux下的桌面环境" ans = renderer_meaning(astr) print(ans) def testcase3(): with Session() as session: data = session.query(WordData).filter_by(word="search").one() text = data.html ans = renderer_meaningslist(text) print(ans) def testcase4(): "test word importance None" with Session() as session: data = session.query(WordData).filter_by(word="john").one() text = data.html ans = renderer_word(text) print(ans) print(type(ans["importance"])) def testcase2(): txt = """

hentai

""" renderer_meaningslist(txt) if __name__ == "__main__": testcase4()