import os
from urllib.parse import urljoin
import requests
from bs4 import BeautifulSoup
from .regex_tools import get_game_name, check_has_filetype, parse_languages
[docs]
def get_html_page(
url,
cache=False,
cache_filename="index.html",
):
"""Get an HTML page as a soup
Args:
url (string): URL
cache (bool): If True, will save the game index as a cache. Defaults to False
cache_filename (string): Filename to cache file to. Defaults to "index.html"
"""
if not cache:
r = requests.get(url)
soup = BeautifulSoup(r.content, "html.parser")
else:
if not os.path.exists(cache_filename):
r = requests.get(url)
with open(cache_filename, mode="wb") as f:
f.write(r.content)
r = r.content
else:
with open(cache_filename, mode="rb") as f:
r = f.read()
soup = BeautifulSoup(r, "html.parser")
return soup
[docs]
def get_game_dict(
general_config,
regex_config,
nxbrew_url,
):
"""Download the game index, and parse relevant info out of it
Args:
general_config (dict): General configuration
regex_config (dict): Regex configuration
nxbrew_url (string): NXBrew URL
"""
game_dict = {}
url = urljoin(nxbrew_url, "Index/game-index/games/")
# Load in the HTML
game_html = get_html_page(
url,
cache_filename="game_index.html",
)
index = game_html.find("div", {"id": "easyindex-index"})
nsp_xci_variations = regex_config["nsp_variations"] + regex_config["xci_variations"]
for item in index.find_all("li"):
# Get the long name, the short name, and the URL
long_name = item.text
# If there are any forbidden titles, skip them here
if long_name in general_config["forbidden_titles"]:
continue
short_name = get_game_name(long_name, nsp_xci_variations=nsp_xci_variations)
url = item.find("a").get("href")
if url in game_dict:
raise ValueError(f"Duplicate URLs found: {url}")
# Pull out whether NSP/XCI, and whether it has updates/DLCs
remaining_name = long_name.replace(short_name, "")
has_nsp = check_has_filetype(remaining_name, regex_config["nsp_variations"])
has_xci = check_has_filetype(remaining_name, regex_config["xci_variations"])
has_update = check_has_filetype(
remaining_name, regex_config["update_variations"]
)
has_dlc = check_has_filetype(remaining_name, regex_config["dlc_variations"])
game_dict[url] = {
"long_name": long_name,
"short_name": short_name,
"url": url,
"has_nsp": has_nsp,
"has_xci": has_xci,
"has_update": has_update,
"has_dlc": has_dlc,
}
return game_dict
[docs]
def get_languages(soup, lang_dict):
"""Parse languages from a soup
Args:
soup (bs4.BeautifulSoup): soup object to find languages in
lang_dict (dict): Dictionary of languages
"""
# Parse out languages, find the <strong> tag with language in it,
# and then find the next_sibling
strong_tag = soup.findAll("strong")
for s in strong_tag:
if "language" in s.text.lower():
lang_str = s.next_sibling.text
langs = parse_languages(
lang_str,
lang_dict=lang_dict,
)
return langs
[docs]
def get_thumb_url(soup):
"""Parse thumbnail URL from a soup
Args:
soup (bs4.BeautifulSoup): soup object to find languages in
"""
img = soup.find("meta", {"property": "og:image"})
url = img["content"]
return url