from collections import Counter import re from bs4 import BeautifulSoup from craft import CraftComponent, CraftItem, CraftRecipe from craft_storage import CraftStorage from dto import ParsedItem, ParsedRecipeInput, SpriteData from loguru import logger def extract_crafts(soup: BeautifulSoup, storage: CraftStorage) -> set[str]: craft_containers = soup.find_all("div", class_="mcui-Crafting_Table-container") if(craft_containers.__len__() < 1): logger.info('Классических рецептов не найдено') return set() src_links : set[str] = set() for container in craft_containers: is_hidden = check_is_hidden(container) if(is_hidden): continue output_item = parse_craft_item(container) if(output_item is None): continue input_kit = parse_craft_components_and_recipe(container, output_item.item.name, output_item.amount) if(input_kit is None): continue src_links.update(input_kit.source_links) already_exists = not storage.try_add_recipe_signature(output_item.item.name, input_kit.components, 'classic') if(already_exists): continue storage.add_item(output_item.item) for input_item in input_kit.items: storage.add_item(input_item) recipe_id = storage.add_recipe(input_kit.recipe) for component in input_kit.components: component.recipe_id = recipe_id storage.add_component(component) logger.info(f'Добавлен рецепт крафта для предмета {output_item.item.name}') return src_links def check_is_hidden(container : BeautifulSoup) -> bool: if(container.find_parent('table', class_='collapsed')): logger.warning(f'{container} Не будет исследован. Причина: помечен как скрытый') return True return False def parse_craft_item(container) -> ParsedItem|None: output_amount = 1 output_span = container.find('span', class_='mcui-output') if(output_span is None): logger.error(f'ошибка для \n{container}\n: не найдено ячейки с результатом!') return None output_span_name_container = output_span.find('span', class_='invslot-item') if(output_span_name_container is None): logger.error(f'ошибка для \n{output_span}\n: не найден текстовый контейнер!') return None data_from_span = extract_data_from_sprite_span(output_span_name_container) if(data_from_span is not None): output_item_title = data_from_span.title output_item_img_shift = data_from_span.shift output_item_img_url = data_from_span.img_url else: data_from_img = extract_data_from_sprite_img(output_span_name_container) if(data_from_img is None): logger.error(f'ошибка для \n{output_span_name_container}\n: не удалось извлечь картинку и заголовок ни одним из способов') return None output_item_title = data_from_img.title output_item_img_shift = data_from_img.shift output_item_img_url = data_from_img.img_url item = CraftItem(output_item_title, output_item_img_url, output_item_img_shift) return ParsedItem(item=item, amount=output_amount) def parse_craft_components_and_recipe(container : BeautifulSoup, output_item_name: str, output_count = 1) -> ParsedRecipeInput|None: input_span = container.find('span', class_='mcui-input') if(input_span is None): logger.error(f'ошибка для \n{container}\n: не найдено рецепта крафта') return None input_span_name_containers = input_span.find_all('span', class_='invslot-item') if(input_span_name_containers is None or input_span_name_containers.__len__() == 0): logger.error(f'Ошибка для \n{input_span}\n: не найдено айтемов в рецепте') return None craft_items = [] recipe = None craft_components = [] src_links = [] #existing_items = set() for input_span_name_container in input_span_name_containers: if(input_span_name_container.contents is None or input_span_name_container.contents.__len__() == 0): logger.warning('Пустая ячейка, скипаю') continue if(input_span_name_container is None): logger.error('ошибка для набора контейнеров: не найден текстовый контейнер!') return None data_from_span = extract_data_from_sprite_span(input_span_name_container) if(data_from_span is not None): input_item_title = data_from_span.title input_item_img_shift = data_from_span.shift input_item_img_url = data_from_span.img_url if(data_from_span.source_link is not None): src_links.append(data_from_span.source_link) else: data_from_img = extract_data_from_sprite_img(input_span_name_container) if(data_from_img is None): logger.error(f'ошибка для \n{input_span_name_container}\n: не удалось извлечь картинку и заголовок ни одним из способов') return None input_item_title = data_from_img.title input_item_img_shift = data_from_img.shift input_item_img_url = data_from_img.img_url craft_items.append(CraftItem(input_item_title, input_item_img_url, input_item_img_shift)) counter = Counter(c.name for c in craft_items) recipe = CraftRecipe(output_item_name, output_count, 'Верстак') for element in counter.most_common(50): ##WARN айдишник изменится при сохранении craft_components.append(CraftComponent(-1, element[0], element[1])) return ParsedRecipeInput( items=craft_items, recipe=recipe, components=craft_components, source_links=[x for x in src_links if x is not None] ) def extract_data_from_sprite_img(input_span_name_container) -> SpriteData|None: input_item_img = input_span_name_container.find('img') if(input_item_img is None): return None title = input_item_img.get('alt') if(title is None): logger.error(f'ошибка для \n{input_span_name_container}\n: не найдено описание картинки!') return None url = input_item_img.get('src') if(url is None): logger.error(f'ошибка для \n{input_span_name_container}\n: не найден url картинки!') return None return SpriteData( amount=1, title=title, shift=(0, 0), img_url=url, source_link=None) def extract_data_from_sprite_span(input_span_name_container) -> SpriteData|None: input_item_sprite_span = input_span_name_container.find('span', class_='sprite') #Контейнера со спрайтом может не быть! if(input_item_sprite_span is None): return None input_item_title = input_item_sprite_span.get('title') if(input_item_title is None): logger.error(f'ошибка для \n{input_span_name_container}\n: не найдено заголовка предмета') return None output_amount=1 amount_container = input_span_name_container.find('span', class_='invslot-stacksize') if(amount_container is not None): logger.info(f'Для объекта {input_item_title} найдено количество : {amount_container.text}') output_amount = int(amount_container.text) item_source_link = get_item_source_link(input_span_name_container) input_item_img_url = extract_img_classname(input_item_sprite_span) if(input_item_img_url is None): logger.error(f'Ошибка для \n{input_item_img_url}\n: не найден файл спрайта') return None input_item_img_shift = extract_img_shift(input_item_sprite_span) if(input_item_img_shift is None): logger.error(f'Ошибка для \n{input_item_sprite_span}\n: не найдено смещения для спрайта') return None return SpriteData( amount=output_amount, title=input_item_title, shift=input_item_img_shift, img_url=input_item_img_url, source_link=item_source_link ) def extract_img_shift(output_item_sprite_span): if(output_item_sprite_span is None): return None css_value = output_item_sprite_span.get('style') if(css_value is None): return None # Шаблон: ищем два числа (с возможным минусом) перед 'px' pattern = r'(-?\d+)px\s+(-?\d+)px' match = re.search(pattern, css_value) if match: x = int(match.group(1)) y = int(match.group(2)) return (x, y) else: return None # Если шаблон не найден def extract_img_classname(sprite_span): if(sprite_span is None): return None 'sprite industrialcraft-2-inv-sprite' classes = sprite_span.get('class', []) for cls in classes: if cls.endswith('-sprite'): return cls return None # Если не найдено def get_item_source_link(input_span_name_container) -> str|None: link_container = input_span_name_container.find('a') if(link_container is None): return None link = link_container.get('href') if(link is None): return None return f'https://ru.minecraft.wiki{link}'