File download_npm.py of Package audioboookshelf

#!/usr/bin/python3

"""
This module downloads npm packages for open build service. It reads a package-lock.json file,
downloads the tarballs for the packages, and creates a node_modules.tar.gz file containing the
extracted contents of the tarballs.
"""

import base64
import hashlib

import json
import os
import sys
import tarfile
import tempfile
import urllib.parse
import urllib.request

from dataclasses import dataclass


@dataclass
class PackageData:
    """
    Data for running process_package
    """

    package_dir: str
    archive_url: str
    hash_method: str | None
    expected_hash: bytes | None
    cache_dir: str
    extract_dir: str
    skip_validation: bool
    ignore_missing_integrity: bool
    retries: int


def process_package(data: PackageData) -> None:
    """
    Downloads an npm package, validates the signature, and extracts it to the node_modules folder
    """
    print(f"Downloading package {data.archive_url}...", end="")

    archive_path = os.path.join(
        data.cache_dir, urllib.parse.urlsplit(data.archive_url).path.lstrip("/")
    )
    archive_dir = os.path.dirname(archive_path)

    os.makedirs(archive_dir, exist_ok=True)

    if os.path.exists(archive_path):
        print(" Using cached archive.")
    else:
        num_tries = 0
        while True:
            try:
                urllib.request.urlretrieve(data.archive_url, archive_path)
                print(" Downloaded.")
                break
            except Exception as e:
                print(
                    f"\n  ERROR: Failed to download package from {data.archive_url}: {e}"
                )
                if data.retries > num_tries:
                    print(" Download failed, retrying...", end="")
                    data.retries -= 1
                    num_tries += 1

    if data.skip_validation:
        print(" Skipping validation.")
    elif data.hash_method and data.expected_hash:
        print(f" Verifying {data.hash_method} checksum...", end="")
        archive_hash = hashlib.new(data.hash_method)
        with open(archive_path, "rb") as archive_handle:
            while chunk := archive_handle.read(8192):
                archive_hash.update(chunk)

        if archive_hash.digest() != data.expected_hash:
            print("\n  ERROR: Checksum mismatch!")
            print(f" Expected:   {data.expected_hash.hex()}")
            print(f" Calculated: {archive_hash.hexdigest()}")
            sys.exit(-1)
        else:
            print(" Succeeded.")
    elif data.ignore_missing_integrity:
        print("WARNING: No integrity field found, skipping checksum verification.")
    else:
        print("ERROR: No integrity field found.")
        sys.exit(-1)

    with tarfile.open(archive_path) as archive:
        combined_dir = os.path.join(data.extract_dir, data.package_dir)
        os.makedirs(combined_dir)
        print(" Extracting... ", end="")
        archive.extractall(combined_dir, filter=filter_npm_package)
        print("Done.")


def create_archive(in_dir: str, out_name: str, out_dir: str) -> None:
    """
    Saves the node_modules folder to a tarball
    """
    output_archive = os.path.join(out_dir, out_name)
    print(f"Created archive {output_archive}...", end="")
    with tarfile.open(output_archive, "w:gz") as tar:
        for i in os.listdir(in_dir):
            tar.add(os.path.join(in_dir, i),  i)
    print(" Done.")


def filter_npm_package(member: tarfile.TarInfo, path: str) -> tarfile.TarInfo | None:
    """
    Filters out non-package files from the npm tarball and
    removes the leading 'package/' from the paths.
    """
    del path
    if member.name.startswith("package/"):
        return member.replace(name=member.name[8:])
    return None


def process(lock_url: str, archive_name: str, out_dir: str, num_retries: int):
    """
    Main function for processing a package-lock.json file and creating a node_modules tarball
    """
    with tempfile.TemporaryDirectory() as tmpdir:
        node_modules_dir = os.path.join(tmpdir, "node_modules")
        os.mkdir(node_modules_dir)

        cache_dir = os.path.join(tmpdir, "cache")
        os.mkdir(cache_dir)

        with urllib.request.urlopen(lock_url) as lock_dl:
            packages = json.load(lock_dl)["packages"]

        for package_dir, package_info in packages.items():
            if "resolved" not in package_info:
                continue
            archive_url = package_info["resolved"]

            integrity = (
                package_info["integrity"].split("-")
                if "integrity" in package_info
                else None
            )

            package_data = PackageData(
                archive_url=archive_url,
                package_dir=package_dir,
                hash_method=integrity[0].casefold() if integrity else None,
                expected_hash=base64.b64decode(integrity[1]) if integrity else None,
                cache_dir=cache_dir,
                extract_dir=node_modules_dir,
                skip_validation=False,
                ignore_missing_integrity=False,
                retries=num_retries,
            )
            process_package(package_data)
        create_archive(node_modules_dir, archive_name, out_dir)


if __name__ == "__main__":
    process(
        "https://raw.githubusercontent.com/advplyr/audiobookshelf/refs/tags/v2.33.0/package-lock.json",
        "server_node_modules.tar.gz",
        ".",
        3,
    )
    process(
        "https://raw.githubusercontent.com/advplyr/audiobookshelf/refs/tags/v2.33.0/client/package-lock.json",
        "client_node_modules.tar.gz",
        ".",
        3,
    )
openSUSE Build Service is sponsored by