import re from bs4 import BeautifulSoup from craft import CraftComponent, CraftItem, CraftRecipe from craft_storage import CraftStorage from dto import ParsedItem, ParsedRecipeInput, SpriteData from loguru import logger def extract_crafts(soup: BeautifulSoup, storage: CraftStorage) -> set[str]: craft_containers = soup.find_all("div", class_="craft-gui") if(craft_containers.__len__() < 1): logger.info('Машинных рецептов не найдено') return set() src_links : set[str] = set() for container in craft_containers: is_hidden = check_is_hidden(container) if(is_hidden): continue output_items = parse_craft_items(container) for output_item in output_items: # Получаем список рецептов (может быть несколько для печи) if(output_item is None): logger.error(f'ошибка для: \n{container}\n не удалось получить продукт машинного рецепта') continue recipes_list = parse_craft_components_and_recipe( container, output_item.item.name, output_item.amount ) for recipe_input in recipes_list: # Обрабатываем каждый рецепт отдельно if not storage.try_add_recipe_signature( output_item.item.name, recipe_input.components, recipe_input.recipe.craft_type ): continue # Дубликат storage.add_item(output_item.item) for input_item in recipe_input.items: storage.add_item(input_item) recipe_id = storage.add_recipe(recipe_input.recipe) for component in recipe_input.components: component.recipe_id = recipe_id storage.add_component(component) logger.info(f'Добавлен рецепт машинного крафта для {output_item.item.name} ' f'(ингредиент: {recipe_input.components[0].input_item})') return src_links def check_is_hidden(container : BeautifulSoup) -> bool: if(container.find_parent('table', class_='collapsed')): logger.warning(f'{container} Не будет исследован. Причина: помечен как скрытый') return True return False def parse_craft_items(container : BeautifulSoup) -> list[ParsedItem]|None: result = [] output_span : BeautifulSoup = container.find('span', class_='gt-output') if(output_span is None): logger.error(f'ошибка для \n{container}\n: не найдено ячейки с результатом!') return None output_span_name_containers = output_span.find_all('span', class_='invslot-item') if(output_span_name_containers is None or output_span_name_containers.__len__ == 0): logger.error(f'ошибка для \n{output_span}\n: не найдено ни одного текстового контейнера!') return None for output_span_name_container in output_span_name_containers: data_from_span = extract_data_from_sprite_span(output_span_name_container) if(data_from_span is not None): output_item_title = data_from_span.title output_item_img_shift = data_from_span.shift output_item_img_url = data_from_span.img_url output_amount = data_from_span.amount else: data_from_img = extract_data_from_sprite_img(output_span_name_container) if(data_from_img is None): logger.error(f'ошибка для \n{output_span_name_container}\n: не удалось извлечь картинку и заголовок ни одним из способов') continue output_item_title = data_from_img.title output_item_img_shift = data_from_img.shift output_item_img_url = data_from_img.img_url output_amount = 1 item = CraftItem(output_item_title, output_item_img_url, output_item_img_shift) result.append(ParsedItem(item=item, amount=output_amount)) return result def parse_craft_components_and_recipe(container, output_item_name: str, output_item_count: int) -> list[ParsedRecipeInput]: input_span = container.find('span', class_='gt-input') if not input_span: logger.error(f'Ошибка: нет блока gt-input в {container}') return [] recipe_info = extract_recipe_type(container) if not recipe_info: return [] is_furnace = 'Печь' in recipe_info recipes = [] # Список рецептов if is_furnace: # Для печи каждый предмет в левом слоте — отдельный рецепт left_cell = input_span.find('span', class_='invslot') if left_cell: item_spans = left_cell.find_all('span', class_='invslot-item') for item_span in item_spans: data = extract_data_from_sprite_span(item_span) or extract_data_from_sprite_img(item_span) if data: item = CraftItem(data.title, data.img_url, data.shift) # Создаём отдельный рецепт для каждого ингредиента recipe = CraftRecipe(output_item_name, output_item_count, recipe_info) component = CraftComponent(-1, item.name, data.amount) recipes.append(ParsedRecipeInput( items=[item], recipe=recipe, components=[component], source_links=[] )) else: # Для других машин — один рецепт со всеми ингредиентами item_spans = input_span.find_all('span', class_='invslot-item') items = [] components = [] for item_span in item_spans: data = extract_data_from_sprite_span(item_span) or extract_data_from_sprite_img(item_span) if data: item = CraftItem(data.title, data.img_url, data.shift) ##skip ic2 stuff if(item.name=='Энергия'): continue items.append(item) components.append(CraftComponent(-1, item.name, data.amount)) recipe = CraftRecipe(output_item_name, output_item_count, recipe_info) recipes.append(ParsedRecipeInput( items=items, recipe=recipe, components=components, source_links=[] )) return recipes def extract_recipe_type(container) -> str|None: processor_container = container.find('span', class_='minetip') if(processor_container is None): logger.error(f'Ошибка для {container}: не найден тип машинного рецепта') return None return clean_recipe_type_str(processor_container.get('data-minetip-text')) def clean_recipe_type_str(input: str): pattern = r"\s*//&7Модификация:/.*$" input = input.replace('&3','') input = re.sub(pattern, "", input).strip() return input def extract_data_from_sprite_img(input_span_name_container) -> SpriteData|None: input_item_img = input_span_name_container.find('img') if(input_item_img is None): return None title = input_item_img.get('alt') if(title is None): logger.error(f'ошибка для \n{input_span_name_container}\n: не найдено описание картинки!') return None url = input_item_img.get('src') if(url is None): logger.error(f'ошибка для \n{input_span_name_container}\n: не найден url картинки!') return None return SpriteData( amount=1, title=title, shift=(0, 0), img_url=url, source_link=None) def extract_data_from_sprite_span(input_span_name_container) -> SpriteData|None: input_item_sprite_span = input_span_name_container.find('span', class_='sprite') #Контейнера со спрайтом может не быть! if(input_item_sprite_span is None): return None input_item_title = input_item_sprite_span.get('title') if(input_item_title is None): logger.error(f'ошибка для \n{input_span_name_container}\n: не найдено заголовка предмета') return None amount_container = input_span_name_container.find('span', class_='invslot-stacksize') output_amount = 1 if(amount_container is not None): logger.info(f'Для объекта {input_item_title} найдено количество : {amount_container.text}') output_amount = int(amount_container.text) item_source_link = get_item_source_link(input_span_name_container) input_item_img_url = extract_img_classname(input_item_sprite_span) if(input_item_img_url is None): logger.error(f'Ошибка для \n{input_item_img_url}\n: не найден файл спрайта') return None input_item_img_shift = extract_img_shift(input_item_sprite_span) if(input_item_img_shift is None): logger.error(f'Ошибка для \n{input_item_sprite_span}\n: не найдено смещения для спрайта') return None return SpriteData( title=input_item_title, shift=input_item_img_shift, img_url=input_item_img_url, source_link=item_source_link, amount=output_amount ) def extract_img_shift(output_item_sprite_span): if(output_item_sprite_span is None): return None css_value = output_item_sprite_span.get('style') if(css_value is None): return None # Шаблон: ищем два числа (с возможным минусом) перед 'px' pattern = r'(-?\d+)px\s+(-?\d+)px' match = re.search(pattern, css_value) if match: x = int(match.group(1)) y = int(match.group(2)) return (x, y) else: return None # Если шаблон не найден def extract_img_classname(sprite_span): if(sprite_span is None): return None 'sprite industrialcraft-2-inv-sprite' classes = sprite_span.get('class', []) for cls in classes: if cls.endswith('-sprite'): return cls return None # Если не найдено def get_item_source_link(input_span_name_container) -> str|None: link_container = input_span_name_container.find('a') if(link_container is None): return None link = link_container.get('href') if(link is None): return None return f'https://ru.minecraft.wiki{link}'