47 lines
1.8 KiB
Python
47 lines
1.8 KiB
Python
import logging
|
|
|
|
from models import Session, WordData, Word
|
|
from serializers import WordAddSerializer, MeaningAddSerializer
|
|
from renderercontents import renderer_word, renderer_meaningslist, has_value_to_render
|
|
|
|
|
|
def create_word_meaning(text, session):
|
|
word_dict = renderer_word(text)
|
|
word_serializer = WordAddSerializer(word_dict, session)
|
|
flag = False
|
|
try:
|
|
flag = word_serializer.is_valid(True)
|
|
except Exception as e:
|
|
logging.error("msg: {} data: {}".format(e, word_dict))
|
|
if not flag:
|
|
return # fail validation
|
|
if session.query(Word).filter_by(spelling=word_dict['spelling']).count() > 0:
|
|
return # repeat
|
|
word = word_serializer.save()
|
|
if word.spelling is None: # word is null
|
|
logging.error("word spelling is null word_dict: {}\n{}".format(word_dict, text))
|
|
try:
|
|
meaning_list = renderer_meaningslist(text)
|
|
except Exception as e:
|
|
logging.error("msg: {} word: {}\n{}".format(e, word.spelling, text))
|
|
raise e
|
|
for meaning in meaning_list:
|
|
meaning_serializer = MeaningAddSerializer(meaning, session, word)
|
|
flag = False
|
|
try:
|
|
flag = meaning_serializer.is_valid(True)
|
|
except Exception as e:
|
|
logging.error("msg: {} word: {} data: {}".format(e, word.spelling, meaning))
|
|
meaning_serializer.save()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
logging.basicConfig(filename="spider.log")
|
|
with Session() as session:
|
|
queryset = session.query(WordData).filter_by(has_retrieve=True)
|
|
texts = map(lambda word: word.html, queryset)
|
|
for text in texts:
|
|
if has_value_to_render(text):
|
|
create_word_meaning(text, session)
|
|
session.commit()
|