Source code for nxbrew_dl.util.html_tools

import os
from urllib.parse import urljoin

import requests
from bs4 import BeautifulSoup

from .regex_tools import get_game_name, check_has_filetype, parse_languages



[docs]
def get_html_page(
    url,
    cache=False,
    cache_filename="index.html",
):
    """Get an HTML page as a soup

    Args:
        url (string): URL
        cache (bool): If True, will save the game index as a cache. Defaults to False
        cache_filename (string): Filename to cache file to. Defaults to "index.html"
    """

    if not cache:
        r = requests.get(url)
        soup = BeautifulSoup(r.content, "html.parser")
    else:
        if not os.path.exists(cache_filename):
            r = requests.get(url)
            with open(cache_filename, mode="wb") as f:
                f.write(r.content)
            r = r.content
        else:
            with open(cache_filename, mode="rb") as f:
                r = f.read()
        soup = BeautifulSoup(r, "html.parser")

    return soup




[docs]
def get_game_dict(
    general_config,
    regex_config,
    nxbrew_url,
):
    """Download the game index, and parse relevant info out of it

    Args:
        general_config (dict): General configuration
        regex_config (dict): Regex configuration
        nxbrew_url (string): NXBrew URL
    """

    game_dict = {}

    url = urljoin(nxbrew_url, "Index/game-index/games/")

    # Load in the HTML
    game_html = get_html_page(
        url,
        cache_filename="game_index.html",
    )
    index = game_html.find("div", {"id": "easyindex-index"})

    nsp_xci_variations = regex_config["nsp_variations"] + regex_config["xci_variations"]
    for item in index.find_all("li"):

        # Get the long name, the short name, and the URL
        long_name = item.text

        # If there are any forbidden titles, skip them here
        if long_name in general_config["forbidden_titles"]:
            continue

        short_name = get_game_name(long_name, nsp_xci_variations=nsp_xci_variations)
        url = item.find("a").get("href")

        if url in game_dict:
            raise ValueError(f"Duplicate URLs found: {url}")

        # Pull out whether NSP/XCI, and whether it has updates/DLCs
        remaining_name = long_name.replace(short_name, "")
        has_nsp = check_has_filetype(remaining_name, regex_config["nsp_variations"])
        has_xci = check_has_filetype(remaining_name, regex_config["xci_variations"])
        has_update = check_has_filetype(
            remaining_name, regex_config["update_variations"]
        )
        has_dlc = check_has_filetype(remaining_name, regex_config["dlc_variations"])

        game_dict[url] = {
            "long_name": long_name,
            "short_name": short_name,
            "url": url,
            "has_nsp": has_nsp,
            "has_xci": has_xci,
            "has_update": has_update,
            "has_dlc": has_dlc,
        }

    return game_dict




[docs]
def get_languages(soup, lang_dict):
    """Parse languages from a soup

    Args:
        soup (bs4.BeautifulSoup): soup object to find languages in
        lang_dict (dict): Dictionary of languages
    """

    # Parse out languages, find the <strong> tag with language in it,
    # and then find the next_sibling
    strong_tag = soup.findAll("strong")
    for s in strong_tag:
        if "language" in s.text.lower():
            lang_str = s.next_sibling.text
            langs = parse_languages(
                lang_str,
                lang_dict=lang_dict,
            )
            return langs




[docs]
def get_thumb_url(soup):
    """Parse thumbnail URL from a soup

    Args:
        soup (bs4.BeautifulSoup): soup object to find languages in
    """

    img = soup.find("meta", {"property": "og:image"})
    url = img["content"]

    return url