Files
CraftCalc/craft_parser/device_extractor.py
2026-01-19 22:17:38 +07:00

262 lines
10 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import re
from bs4 import BeautifulSoup
from craft import CraftComponent, CraftItem, CraftRecipe
from craft_storage import CraftStorage
from dto import ParsedItem, ParsedRecipeInput, SpriteData
from loguru import logger
def extract_crafts(soup: BeautifulSoup, storage: CraftStorage) -> set[str]:
craft_containers = soup.find_all("div", class_="craft-gui")
if(craft_containers.__len__() < 1):
logger.info('Машинных рецептов не найдено')
return set()
src_links : set[str] = set()
for container in craft_containers:
is_hidden = check_is_hidden(container)
if(is_hidden):
continue
output_item = parse_craft_item(container)
# Получаем список рецептов (может быть несколько для печи)
recipes_list = parse_craft_components_and_recipe(
container, output_item.item.name, output_item.amount
)
for recipe_input in recipes_list: # Обрабатываем каждый рецепт отдельно
if not storage.try_add_recipe_signature(
output_item.item.name,
recipe_input.components,
recipe_input.recipe.craft_type
):
continue # Дубликат
storage.add_item(output_item.item)
for input_item in recipe_input.items:
storage.add_item(input_item)
recipe_id = storage.add_recipe(recipe_input.recipe)
for component in recipe_input.components:
component.recipe_id = recipe_id
storage.add_component(component)
logger.info(f'Добавлен рецепт машинного крафта для {output_item.item.name} '
f'(ингредиент: {recipe_input.components[0].input_item})')
return src_links
def check_is_hidden(container : BeautifulSoup) -> bool:
if(container.find_parent('table', class_='collapsed')):
logger.warning(f'{container} Не будет исследован. Причина: помечен как скрытый')
return True
return False
def parse_craft_item(container) -> ParsedItem|None:
output_span = container.find('span', class_='gt-output')
if(output_span is None):
logger.error(f'ошибка для \n{container}\n: не найдено ячейки с результатом!')
return None
output_span_name_container = output_span.find('span', class_='invslot-item')
if(output_span_name_container is None):
logger.error(f'ошибка для \n{output_span}\n: не найден текстовый контейнер!')
return None
data_from_span = extract_data_from_sprite_span(output_span_name_container)
if(data_from_span is not None):
output_item_title = data_from_span.title
output_item_img_shift = data_from_span.shift
output_item_img_url = data_from_span.img_url
output_amount = data_from_span.amount
else:
data_from_img = extract_data_from_sprite_img(output_span_name_container)
if(data_from_img is None):
logger.error(f'ошибка для \n{output_span_name_container}\n: не удалось извлечь картинку и заголовок ни одним из способов')
return None
output_item_title = data_from_img.title
output_item_img_shift = data_from_img.shift
output_item_img_url = data_from_img.img_url
output_amount = 1
item = CraftItem(output_item_title, output_item_img_url, output_item_img_shift)
return ParsedItem(item=item, amount=output_amount)
def parse_craft_components_and_recipe(container, output_item_name: str, output_item_count: int) -> list[ParsedRecipeInput]:
input_span = container.find('span', class_='gt-input')
if not input_span:
logger.error(f'Ошибка: нет блока gt-input в {container}')
return []
recipe_info = extract_recipe_type(container)
if not recipe_info:
return []
is_furnace = 'Печь' in recipe_info
recipes = [] # Список рецептов
if is_furnace:
# Для печи каждый предмет в левом слоте — отдельный рецепт
left_cell = input_span.find('span', class_='invslot')
if left_cell:
item_spans = left_cell.find_all('span', class_='invslot-item')
for item_span in item_spans:
data = extract_data_from_sprite_span(item_span) or extract_data_from_sprite_img(item_span)
if data:
item = CraftItem(data.title, data.img_url, data.shift)
# Создаём отдельный рецепт для каждого ингредиента
recipe = CraftRecipe(output_item_name, output_item_count, recipe_info)
component = CraftComponent(-1, item.name, data.amount)
recipes.append(ParsedRecipeInput(
items=[item],
recipe=recipe,
components=[component],
source_links=[]
))
else:
# Для других машин — один рецепт со всеми ингредиентами
item_spans = input_span.find_all('span', class_='invslot-item')
items = []
components = []
for item_span in item_spans:
data = extract_data_from_sprite_span(item_span) or extract_data_from_sprite_img(item_span)
if data:
item = CraftItem(data.title, data.img_url, data.shift)
##skip ic2 stuff
if(item.name=='Энергия'):
continue
items.append(item)
components.append(CraftComponent(-1, item.name, data.amount))
recipe = CraftRecipe(output_item_name, output_item_count, recipe_info)
recipes.append(ParsedRecipeInput(
items=items,
recipe=recipe,
components=components,
source_links=[]
))
return recipes
def extract_recipe_type(container) -> str|None:
processor_container = container.find('span', class_='minetip')
if(processor_container is None):
logger.error(f'Ошибка для {container}: не найден тип машинного рецепта')
return None
return clean_recipe_type_str(processor_container.get('data-minetip-text'))
def clean_recipe_type_str(input: str):
pattern = r"\s*//&7Модификация:/.*$"
input = input.replace('&3','')
input = re.sub(pattern, "", input).strip()
return input
def extract_data_from_sprite_img(input_span_name_container) -> SpriteData|None:
input_item_img = input_span_name_container.find('img')
if(input_item_img is None):
return None
title = input_item_img.get('alt')
if(title is None):
logger.error(f'ошибка для \n{input_span_name_container}\n: не найдено описание картинки!')
return None
url = input_item_img.get('src')
if(url is None):
logger.error(f'ошибка для \n{input_span_name_container}\n: не найден url картинки!')
return None
return SpriteData(
title=title,
shift=(0, 0),
img_url=url,
source_link=None)
def extract_data_from_sprite_span(input_span_name_container) -> SpriteData|None:
input_item_sprite_span = input_span_name_container.find('span', class_='sprite')
#Контейнера со спрайтом может не быть!
if(input_item_sprite_span is None):
return None
input_item_title = input_item_sprite_span.get('title')
if(input_item_title is None):
logger.error(f'ошибка для \n{input_span_name_container}\n: не найдено заголовка предмета')
return None
amount_container = input_span_name_container.find('span', class_='invslot-stacksize')
output_amount = 1
if(amount_container is not None):
logger.info(f'Для объекта {input_item_title} найдено количество : {amount_container.text}')
output_amount = int(amount_container.text)
item_source_link = get_item_source_link(input_span_name_container)
input_item_img_url = extract_img_classname(input_item_sprite_span)
if(input_item_img_url is None):
logger.error(f'Ошибка для \n{input_item_img_url}\n: не найден файл спрайта')
return None
input_item_img_shift = extract_img_shift(input_item_sprite_span)
if(input_item_img_shift is None):
logger.error(f'Ошибка для \n{input_item_sprite_span}\n: не найдено смещения для спрайта')
return None
return SpriteData(
title=input_item_title,
shift=input_item_img_shift,
img_url=input_item_img_url,
source_link=item_source_link,
amount=output_amount
)
def extract_img_shift(output_item_sprite_span):
if(output_item_sprite_span is None):
return None
css_value = output_item_sprite_span.get('style')
if(css_value is None):
return None
# Шаблон: ищем два числа (с возможным минусом) перед 'px'
pattern = r'(-?\d+)px\s+(-?\d+)px'
match = re.search(pattern, css_value)
if match:
x = int(match.group(1))
y = int(match.group(2))
return (x, y)
else:
return None # Если шаблон не найден
def extract_img_classname(sprite_span):
if(sprite_span is None):
return None
'sprite industrialcraft-2-inv-sprite'
classes = sprite_span.get('class', [])
for cls in classes:
if cls.endswith('-sprite'):
return cls
return None # Если не найдено
def get_item_source_link(input_span_name_container) -> str|None:
link_container = input_span_name_container.find('a')
if(link_container is None):
return None
link = link_container.get('href')
if(link is None):
return None
return f'https://ru.minecraft.wiki{link}'