from typing import Callable, Optional from fake_useragent import UserAgent import httpx from validator import Validator import httpchoices class NoValidator(Validator): def is_valid(self, raise_error): return True class Spider: validator_class: Validator = NoValidator parser: Callable[[str], dict] = None def __init__( self, url, method="GET", request_data: Optional[dict] = None, params: Optional[dict] = None, ) -> None: self.useragent = UserAgent() self.headers = {"User-Agent": self.useragent.random} self.__data = {} self.__html = "" self.url = url self.method = method self.has_verified = False httpchoices.HttpMethod.is_valid(method, True) self.__request_parameters = { "data": request_data, "params": params, } def get_parser(self): assert self.parser is not None return self.__class__.parser async def __get_html(self) -> str: if self.__html != "": return self.__html async with httpx.AsyncClient() as client: try: response = await client.request( self.method, self.url, headers=self.headers, **self.__request_parameters ) response.raise_for_status() self.__html = response.text except httpx.HTTPStatusError: pass return self.__html async def __get_data(self) -> dict: if self.__data == {} or self.__data == []: html = await self.__get_html() self.__data = self.get_parser()(html) return self.__data async def is_valid(self, raise_exception=False) -> bool: data = await self.__get_data() validator_class = self.validator_class(data) ans = validator_class.is_valid(raise_exception) if ans: self.has_verified = True return ans async def data(self) -> dict: if self.has_verified: return self.__data else: await self.is_valid(True) return self.__data