262 lines
10 KiB
Python
262 lines
10 KiB
Python
import re
|
||
from bs4 import BeautifulSoup
|
||
|
||
from craft import CraftComponent, CraftItem, CraftRecipe
|
||
from craft_storage import CraftStorage
|
||
from dto import ParsedItem, ParsedRecipeInput, SpriteData
|
||
from loguru import logger
|
||
|
||
|
||
def extract_crafts(soup: BeautifulSoup, storage: CraftStorage) -> set[str]:
|
||
|
||
craft_containers = soup.find_all("div", class_="craft-gui")
|
||
if(craft_containers.__len__() < 1):
|
||
logger.info('Машинных рецептов не найдено')
|
||
return set()
|
||
|
||
src_links : set[str] = set()
|
||
|
||
for container in craft_containers:
|
||
is_hidden = check_is_hidden(container)
|
||
if(is_hidden):
|
||
continue
|
||
|
||
output_item = parse_craft_item(container)
|
||
# Получаем список рецептов (может быть несколько для печи)
|
||
recipes_list = parse_craft_components_and_recipe(
|
||
container, output_item.item.name, output_item.amount
|
||
)
|
||
|
||
for recipe_input in recipes_list: # Обрабатываем каждый рецепт отдельно
|
||
if not storage.try_add_recipe_signature(
|
||
output_item.item.name,
|
||
recipe_input.components,
|
||
recipe_input.recipe.craft_type
|
||
):
|
||
continue # Дубликат
|
||
|
||
storage.add_item(output_item.item)
|
||
for input_item in recipe_input.items:
|
||
storage.add_item(input_item)
|
||
|
||
recipe_id = storage.add_recipe(recipe_input.recipe)
|
||
for component in recipe_input.components:
|
||
component.recipe_id = recipe_id
|
||
storage.add_component(component)
|
||
|
||
logger.info(f'Добавлен рецепт машинного крафта для {output_item.item.name} '
|
||
f'(ингредиент: {recipe_input.components[0].input_item})')
|
||
|
||
return src_links
|
||
|
||
def check_is_hidden(container : BeautifulSoup) -> bool:
|
||
if(container.find_parent('table', class_='collapsed')):
|
||
logger.warning(f'{container} Не будет исследован. Причина: помечен как скрытый')
|
||
return True
|
||
return False
|
||
|
||
def parse_craft_item(container) -> ParsedItem|None:
|
||
output_span = container.find('span', class_='gt-output')
|
||
if(output_span is None):
|
||
logger.error(f'ошибка для \n{container}\n: не найдено ячейки с результатом!')
|
||
return None
|
||
|
||
output_span_name_container = output_span.find('span', class_='invslot-item')
|
||
if(output_span_name_container is None):
|
||
logger.error(f'ошибка для \n{output_span}\n: не найден текстовый контейнер!')
|
||
return None
|
||
|
||
|
||
data_from_span = extract_data_from_sprite_span(output_span_name_container)
|
||
if(data_from_span is not None):
|
||
output_item_title = data_from_span.title
|
||
output_item_img_shift = data_from_span.shift
|
||
output_item_img_url = data_from_span.img_url
|
||
output_amount = data_from_span.amount
|
||
else:
|
||
data_from_img = extract_data_from_sprite_img(output_span_name_container)
|
||
if(data_from_img is None):
|
||
logger.error(f'ошибка для \n{output_span_name_container}\n: не удалось извлечь картинку и заголовок ни одним из способов')
|
||
return None
|
||
output_item_title = data_from_img.title
|
||
output_item_img_shift = data_from_img.shift
|
||
output_item_img_url = data_from_img.img_url
|
||
output_amount = 1
|
||
|
||
item = CraftItem(output_item_title, output_item_img_url, output_item_img_shift)
|
||
return ParsedItem(item=item, amount=output_amount)
|
||
|
||
def parse_craft_components_and_recipe(container, output_item_name: str, output_item_count: int) -> list[ParsedRecipeInput]:
|
||
input_span = container.find('span', class_='gt-input')
|
||
if not input_span:
|
||
logger.error(f'Ошибка: нет блока gt-input в {container}')
|
||
return []
|
||
|
||
recipe_info = extract_recipe_type(container)
|
||
if not recipe_info:
|
||
return []
|
||
|
||
is_furnace = 'Печь' in recipe_info
|
||
recipes = [] # Список рецептов
|
||
|
||
if is_furnace:
|
||
# Для печи каждый предмет в левом слоте — отдельный рецепт
|
||
left_cell = input_span.find('span', class_='invslot')
|
||
if left_cell:
|
||
item_spans = left_cell.find_all('span', class_='invslot-item')
|
||
for item_span in item_spans:
|
||
data = extract_data_from_sprite_span(item_span) or extract_data_from_sprite_img(item_span)
|
||
if data:
|
||
item = CraftItem(data.title, data.img_url, data.shift)
|
||
# Создаём отдельный рецепт для каждого ингредиента
|
||
recipe = CraftRecipe(output_item_name, output_item_count, recipe_info)
|
||
component = CraftComponent(-1, item.name, data.amount)
|
||
recipes.append(ParsedRecipeInput(
|
||
items=[item],
|
||
recipe=recipe,
|
||
components=[component],
|
||
source_links=[]
|
||
))
|
||
else:
|
||
# Для других машин — один рецепт со всеми ингредиентами
|
||
item_spans = input_span.find_all('span', class_='invslot-item')
|
||
items = []
|
||
components = []
|
||
for item_span in item_spans:
|
||
data = extract_data_from_sprite_span(item_span) or extract_data_from_sprite_img(item_span)
|
||
if data:
|
||
|
||
|
||
item = CraftItem(data.title, data.img_url, data.shift)
|
||
|
||
##skip ic2 stuff
|
||
if(item.name=='Энергия'):
|
||
continue
|
||
|
||
items.append(item)
|
||
components.append(CraftComponent(-1, item.name, data.amount))
|
||
|
||
recipe = CraftRecipe(output_item_name, output_item_count, recipe_info)
|
||
recipes.append(ParsedRecipeInput(
|
||
items=items,
|
||
recipe=recipe,
|
||
components=components,
|
||
source_links=[]
|
||
))
|
||
|
||
return recipes
|
||
|
||
|
||
|
||
def extract_recipe_type(container) -> str|None:
|
||
processor_container = container.find('span', class_='minetip')
|
||
if(processor_container is None):
|
||
logger.error(f'Ошибка для {container}: не найден тип машинного рецепта')
|
||
return None
|
||
return clean_recipe_type_str(processor_container.get('data-minetip-text'))
|
||
|
||
def clean_recipe_type_str(input: str):
|
||
pattern = r"\s*//&7Модификация:/.*$"
|
||
|
||
input = input.replace('&3','')
|
||
input = re.sub(pattern, "", input).strip()
|
||
return input
|
||
|
||
|
||
def extract_data_from_sprite_img(input_span_name_container) -> SpriteData|None:
|
||
input_item_img = input_span_name_container.find('img')
|
||
if(input_item_img is None):
|
||
return None
|
||
|
||
title = input_item_img.get('alt')
|
||
if(title is None):
|
||
logger.error(f'ошибка для \n{input_span_name_container}\n: не найдено описание картинки!')
|
||
return None
|
||
|
||
url = input_item_img.get('src')
|
||
if(url is None):
|
||
logger.error(f'ошибка для \n{input_span_name_container}\n: не найден url картинки!')
|
||
return None
|
||
return SpriteData(
|
||
title=title,
|
||
shift=(0, 0),
|
||
img_url=url,
|
||
source_link=None)
|
||
|
||
def extract_data_from_sprite_span(input_span_name_container) -> SpriteData|None:
|
||
input_item_sprite_span = input_span_name_container.find('span', class_='sprite')
|
||
|
||
|
||
#Контейнера со спрайтом может не быть!
|
||
if(input_item_sprite_span is None):
|
||
return None
|
||
|
||
input_item_title = input_item_sprite_span.get('title')
|
||
if(input_item_title is None):
|
||
logger.error(f'ошибка для \n{input_span_name_container}\n: не найдено заголовка предмета')
|
||
return None
|
||
|
||
|
||
amount_container = input_span_name_container.find('span', class_='invslot-stacksize')
|
||
output_amount = 1
|
||
if(amount_container is not None):
|
||
logger.info(f'Для объекта {input_item_title} найдено количество : {amount_container.text}')
|
||
output_amount = int(amount_container.text)
|
||
|
||
|
||
item_source_link = get_item_source_link(input_span_name_container)
|
||
|
||
input_item_img_url = extract_img_classname(input_item_sprite_span)
|
||
if(input_item_img_url is None):
|
||
logger.error(f'Ошибка для \n{input_item_img_url}\n: не найден файл спрайта')
|
||
return None
|
||
|
||
input_item_img_shift = extract_img_shift(input_item_sprite_span)
|
||
if(input_item_img_shift is None):
|
||
logger.error(f'Ошибка для \n{input_item_sprite_span}\n: не найдено смещения для спрайта')
|
||
return None
|
||
|
||
return SpriteData(
|
||
title=input_item_title,
|
||
shift=input_item_img_shift,
|
||
img_url=input_item_img_url,
|
||
source_link=item_source_link,
|
||
amount=output_amount
|
||
)
|
||
|
||
def extract_img_shift(output_item_sprite_span):
|
||
if(output_item_sprite_span is None):
|
||
return None
|
||
css_value = output_item_sprite_span.get('style')
|
||
if(css_value is None):
|
||
return None
|
||
# Шаблон: ищем два числа (с возможным минусом) перед 'px'
|
||
pattern = r'(-?\d+)px\s+(-?\d+)px'
|
||
match = re.search(pattern, css_value)
|
||
|
||
if match:
|
||
x = int(match.group(1))
|
||
y = int(match.group(2))
|
||
return (x, y)
|
||
else:
|
||
return None # Если шаблон не найден
|
||
|
||
def extract_img_classname(sprite_span):
|
||
if(sprite_span is None):
|
||
return None
|
||
'sprite industrialcraft-2-inv-sprite'
|
||
classes = sprite_span.get('class', [])
|
||
for cls in classes:
|
||
if cls.endswith('-sprite'):
|
||
return cls
|
||
return None # Если не найдено
|
||
|
||
def get_item_source_link(input_span_name_container) -> str|None:
|
||
link_container = input_span_name_container.find('a')
|
||
if(link_container is None):
|
||
return None
|
||
link = link_container.get('href')
|
||
if(link is None):
|
||
return None
|
||
return f'https://ru.minecraft.wiki{link}'
|