osdict_project/spider/action_serialize.py

47 lines
1.8 KiB
Python

import logging
from models import Session, WordData, Word
from serializers import WordAddSerializer, MeaningAddSerializer
from renderercontents import renderer_word, renderer_meaningslist, has_value_to_render
def create_word_meaning(text, session):
word_dict = renderer_word(text)
word_serializer = WordAddSerializer(word_dict, session)
flag = False
try:
flag = word_serializer.is_valid(True)
except Exception as e:
logging.error("msg: {} data: {}".format(e, word_dict))
if not flag:
return # fail validation
if session.query(Word).filter_by(spelling=word_dict['spelling']).count() > 0:
return # repeat
word = word_serializer.save()
if word.spelling is None: # word is null
logging.error("word spelling is null word_dict: {}\n{}".format(word_dict, text))
try:
meaning_list = renderer_meaningslist(text)
except Exception as e:
logging.error("msg: {} word: {}\n{}".format(e, word.spelling, text))
raise e
for meaning in meaning_list:
meaning_serializer = MeaningAddSerializer(meaning, session, word)
flag = False
try:
flag = meaning_serializer.is_valid(True)
except Exception as e:
logging.error("msg: {} word: {} data: {}".format(e, word.spelling, meaning))
meaning_serializer.save()
if __name__ == "__main__":
logging.basicConfig(filename="spider.log")
with Session() as session:
queryset = session.query(WordData).filter_by(has_retrieve=True)
texts = map(lambda word: word.html, queryset)
for text in texts:
if has_value_to_render(text):
create_word_meaning(text, session)
session.commit()