232 lines
9.4 KiB
Python
232 lines
9.4 KiB
Python
from collections import Counter
|
||
import re
|
||
from bs4 import BeautifulSoup
|
||
from craft import CraftComponent, CraftItem, CraftRecipe
|
||
from craft_storage import CraftStorage
|
||
from dto import ParsedItem, ParsedRecipeInput, SpriteData
|
||
from loguru import logger
|
||
|
||
def extract_crafts(soup: BeautifulSoup, storage: CraftStorage) -> set[str]:
|
||
|
||
craft_containers = soup.find_all("div", class_="mcui-Crafting_Table-container")
|
||
if(craft_containers.__len__() < 1):
|
||
logger.info('Классических рецептов не найдено')
|
||
return set()
|
||
|
||
src_links : set[str] = set()
|
||
|
||
for container in craft_containers:
|
||
is_hidden = check_is_hidden(container)
|
||
if(is_hidden):
|
||
continue
|
||
output_item = parse_craft_item(container)
|
||
if(output_item is None):
|
||
continue
|
||
input_kit = parse_craft_components_and_recipe(container, output_item.item.name, output_item.amount)
|
||
if(input_kit is None):
|
||
continue
|
||
|
||
src_links.update(input_kit.source_links)
|
||
|
||
already_exists = not storage.try_add_recipe_signature(output_item.item.name, input_kit.components, 'classic')
|
||
if(already_exists):
|
||
continue
|
||
|
||
storage.add_item(output_item.item)
|
||
for input_item in input_kit.items:
|
||
storage.add_item(input_item)
|
||
|
||
recipe_id = storage.add_recipe(input_kit.recipe)
|
||
for component in input_kit.components:
|
||
component.recipe_id = recipe_id
|
||
storage.add_component(component)
|
||
logger.info(f'Добавлен рецепт крафта для предмета {output_item.item.name}')
|
||
|
||
return src_links
|
||
|
||
def check_is_hidden(container : BeautifulSoup) -> bool:
|
||
if(container.find_parent('table', class_='collapsed')):
|
||
logger.warning(f'{container} Не будет исследован. Причина: помечен как скрытый')
|
||
return True
|
||
return False
|
||
|
||
def parse_craft_item(container) -> ParsedItem|None:
|
||
output_amount = 1
|
||
output_span = container.find('span', class_='mcui-output')
|
||
if(output_span is None):
|
||
logger.error(f'ошибка для \n{container}\n: не найдено ячейки с результатом!')
|
||
return None
|
||
|
||
output_span_name_container = output_span.find('span', class_='invslot-item')
|
||
if(output_span_name_container is None):
|
||
logger.error(f'ошибка для \n{output_span}\n: не найден текстовый контейнер!')
|
||
return None
|
||
|
||
|
||
data_from_span = extract_data_from_sprite_span(output_span_name_container)
|
||
if(data_from_span is not None):
|
||
output_item_title = data_from_span.title
|
||
output_item_img_shift = data_from_span.shift
|
||
output_item_img_url = data_from_span.img_url
|
||
else:
|
||
data_from_img = extract_data_from_sprite_img(output_span_name_container)
|
||
if(data_from_img is None):
|
||
logger.error(f'ошибка для \n{output_span_name_container}\n: не удалось извлечь картинку и заголовок ни одним из способов')
|
||
return None
|
||
output_item_title = data_from_img.title
|
||
output_item_img_shift = data_from_img.shift
|
||
output_item_img_url = data_from_img.img_url
|
||
|
||
item = CraftItem(output_item_title, output_item_img_url, output_item_img_shift)
|
||
return ParsedItem(item=item, amount=output_amount)
|
||
|
||
def parse_craft_components_and_recipe(container, output_item_name: str, output_count = 1) -> ParsedRecipeInput|None:
|
||
input_span = container.find('span', class_='mcui-input')
|
||
if(input_span is None):
|
||
logger.error(f'ошибка для \n{container}\n: не найдено рецепта крафта')
|
||
return None
|
||
|
||
input_span_name_containers = input_span.find_all('span', class_='invslot-item')
|
||
if(input_span_name_containers is None or input_span_name_containers.__len__() == 0):
|
||
logger.error(f'Ошибка для \n{input_span}\n: не найдено айтемов в рецепте')
|
||
return None
|
||
|
||
craft_items = []
|
||
recipe = None
|
||
craft_components = []
|
||
src_links = []
|
||
#existing_items = set()
|
||
|
||
for input_span_name_container in input_span_name_containers:
|
||
|
||
if(input_span_name_container is None):
|
||
logger.error('ошибка для набора контейнеров: не найден текстовый контейнер!')
|
||
return None
|
||
|
||
data_from_span = extract_data_from_sprite_span(input_span_name_container)
|
||
if(data_from_span is not None):
|
||
input_item_title = data_from_span.title
|
||
input_item_img_shift = data_from_span.shift
|
||
input_item_img_url = data_from_span.img_url
|
||
if(data_from_span.source_link is not None):
|
||
src_links.append(data_from_span.source_link)
|
||
else:
|
||
data_from_img = extract_data_from_sprite_img(input_span_name_container)
|
||
if(data_from_img is None):
|
||
logger.error(f'ошибка для \n{input_span_name_container}\n: не удалось извлечь картинку и заголовок ни одним из способов')
|
||
return None
|
||
input_item_title = data_from_img.title
|
||
input_item_img_shift = data_from_img.shift
|
||
input_item_img_url = data_from_img.img_url
|
||
|
||
|
||
craft_items.append(CraftItem(input_item_title, input_item_img_url, input_item_img_shift))
|
||
|
||
counter = Counter(c.name for c in craft_items)
|
||
recipe = CraftRecipe(output_item_name, output_count, 'Верстак')
|
||
for element in counter.most_common(50):
|
||
##WARN айдишник изменится при сохранении
|
||
craft_components.append(CraftComponent(-1, element[0], element[1]))
|
||
|
||
return ParsedRecipeInput(
|
||
items=craft_items,
|
||
recipe=recipe,
|
||
components=craft_components,
|
||
source_links=[x for x in src_links if x is not None]
|
||
)
|
||
|
||
def extract_data_from_sprite_img(input_span_name_container) -> SpriteData|None:
|
||
input_item_img = input_span_name_container.find('img')
|
||
if(input_item_img is None):
|
||
return None
|
||
|
||
title = input_item_img.get('alt')
|
||
if(title is None):
|
||
logger.error(f'ошибка для \n{input_span_name_container}\n: не найдено описание картинки!')
|
||
return None
|
||
|
||
url = input_item_img.get('src')
|
||
if(url is None):
|
||
logger.error(f'ошибка для \n{input_span_name_container}\n: не найден url картинки!')
|
||
return None
|
||
return SpriteData(
|
||
amount=1,
|
||
title=title,
|
||
shift=(0, 0),
|
||
img_url=url,
|
||
source_link=None)
|
||
|
||
def extract_data_from_sprite_span(input_span_name_container) -> SpriteData|None:
|
||
input_item_sprite_span = input_span_name_container.find('span', class_='sprite')
|
||
|
||
#Контейнера со спрайтом может не быть!
|
||
if(input_item_sprite_span is None):
|
||
return None
|
||
|
||
input_item_title = input_item_sprite_span.get('title')
|
||
if(input_item_title is None):
|
||
logger.error(f'ошибка для \n{input_span_name_container}\n: не найдено заголовка предмета')
|
||
return None
|
||
|
||
output_amount=1
|
||
amount_container = input_span_name_container.find('span', class_='invslot-stacksize')
|
||
if(amount_container is not None):
|
||
logger.info(f'Для объекта {input_item_title} найдено количество : {amount_container.text}')
|
||
output_amount = int(amount_container.text)
|
||
|
||
item_source_link = get_item_source_link(input_span_name_container)
|
||
|
||
input_item_img_url = extract_img_classname(input_item_sprite_span)
|
||
if(input_item_img_url is None):
|
||
logger.error(f'Ошибка для \n{input_item_img_url}\n: не найден файл спрайта')
|
||
return None
|
||
|
||
input_item_img_shift = extract_img_shift(input_item_sprite_span)
|
||
if(input_item_img_shift is None):
|
||
logger.error(f'Ошибка для \n{input_item_sprite_span}\n: не найдено смещения для спрайта')
|
||
return None
|
||
|
||
return SpriteData(
|
||
amount=output_amount,
|
||
title=input_item_title,
|
||
shift=input_item_img_shift,
|
||
img_url=input_item_img_url,
|
||
source_link=item_source_link
|
||
)
|
||
|
||
def extract_img_shift(output_item_sprite_span):
|
||
if(output_item_sprite_span is None):
|
||
return None
|
||
css_value = output_item_sprite_span.get('style')
|
||
if(css_value is None):
|
||
return None
|
||
# Шаблон: ищем два числа (с возможным минусом) перед 'px'
|
||
pattern = r'(-?\d+)px\s+(-?\d+)px'
|
||
match = re.search(pattern, css_value)
|
||
|
||
if match:
|
||
x = int(match.group(1))
|
||
y = int(match.group(2))
|
||
return (x, y)
|
||
else:
|
||
return None # Если шаблон не найден
|
||
|
||
def extract_img_classname(sprite_span):
|
||
if(sprite_span is None):
|
||
return None
|
||
'sprite industrialcraft-2-inv-sprite'
|
||
classes = sprite_span.get('class', [])
|
||
for cls in classes:
|
||
if cls.endswith('-sprite'):
|
||
return cls
|
||
return None # Если не найдено
|
||
|
||
def get_item_source_link(input_span_name_container) -> str|None:
|
||
link_container = input_span_name_container.find('a')
|
||
if(link_container is None):
|
||
return None
|
||
link = link_container.get('href')
|
||
if(link is None):
|
||
return None
|
||
return f'https://ru.minecraft.wiki{link}'
|