File download_npm.py of Package audioboookshelf
#!/usr/bin/python3
"""
This module downloads npm packages for open build service. It reads a package-lock.json file,
downloads the tarballs for the packages, and creates a node_modules.tar.gz file containing the
extracted contents of the tarballs.
"""
import base64
import hashlib
import json
import os
import sys
import tarfile
import tempfile
import urllib.parse
import urllib.request
from dataclasses import dataclass
@dataclass
class PackageData:
"""
Data for running process_package
"""
package_dir: str
archive_url: str
hash_method: str | None
expected_hash: bytes | None
cache_dir: str
extract_dir: str
skip_validation: bool
ignore_missing_integrity: bool
retries: int
def process_package(data: PackageData) -> None:
"""
Downloads an npm package, validates the signature, and extracts it to the node_modules folder
"""
print(f"Downloading package {data.archive_url}...", end="")
archive_path = os.path.join(
data.cache_dir, urllib.parse.urlsplit(data.archive_url).path.lstrip("/")
)
archive_dir = os.path.dirname(archive_path)
os.makedirs(archive_dir, exist_ok=True)
if os.path.exists(archive_path):
print(" Using cached archive.")
else:
num_tries = 0
while True:
try:
urllib.request.urlretrieve(data.archive_url, archive_path)
print(" Downloaded.")
break
except Exception as e:
print(
f"\n ERROR: Failed to download package from {data.archive_url}: {e}"
)
if data.retries > num_tries:
print(" Download failed, retrying...", end="")
data.retries -= 1
num_tries += 1
if data.skip_validation:
print(" Skipping validation.")
elif data.hash_method and data.expected_hash:
print(f" Verifying {data.hash_method} checksum...", end="")
archive_hash = hashlib.new(data.hash_method)
with open(archive_path, "rb") as archive_handle:
while chunk := archive_handle.read(8192):
archive_hash.update(chunk)
if archive_hash.digest() != data.expected_hash:
print("\n ERROR: Checksum mismatch!")
print(f" Expected: {data.expected_hash.hex()}")
print(f" Calculated: {archive_hash.hexdigest()}")
sys.exit(-1)
else:
print(" Succeeded.")
elif data.ignore_missing_integrity:
print("WARNING: No integrity field found, skipping checksum verification.")
else:
print("ERROR: No integrity field found.")
sys.exit(-1)
with tarfile.open(archive_path) as archive:
combined_dir = os.path.join(data.extract_dir, data.package_dir)
os.makedirs(combined_dir)
print(" Extracting... ", end="")
archive.extractall(combined_dir, filter=filter_npm_package)
print("Done.")
def create_archive(in_dir: str, out_name: str, out_dir: str) -> None:
"""
Saves the node_modules folder to a tarball
"""
output_archive = os.path.join(out_dir, out_name)
print(f"Created archive {output_archive}...", end="")
with tarfile.open(output_archive, "w:gz") as tar:
for i in os.listdir(in_dir):
tar.add(os.path.join(in_dir, i), i)
print(" Done.")
def filter_npm_package(member: tarfile.TarInfo, path: str) -> tarfile.TarInfo | None:
"""
Filters out non-package files from the npm tarball and
removes the leading 'package/' from the paths.
"""
del path
if member.name.startswith("package/"):
return member.replace(name=member.name[8:])
return None
def process(lock_url: str, archive_name: str, out_dir: str, num_retries: int):
"""
Main function for processing a package-lock.json file and creating a node_modules tarball
"""
with tempfile.TemporaryDirectory() as tmpdir:
node_modules_dir = os.path.join(tmpdir, "node_modules")
os.mkdir(node_modules_dir)
cache_dir = os.path.join(tmpdir, "cache")
os.mkdir(cache_dir)
with urllib.request.urlopen(lock_url) as lock_dl:
packages = json.load(lock_dl)["packages"]
for package_dir, package_info in packages.items():
if "resolved" not in package_info:
continue
archive_url = package_info["resolved"]
integrity = (
package_info["integrity"].split("-")
if "integrity" in package_info
else None
)
package_data = PackageData(
archive_url=archive_url,
package_dir=package_dir,
hash_method=integrity[0].casefold() if integrity else None,
expected_hash=base64.b64decode(integrity[1]) if integrity else None,
cache_dir=cache_dir,
extract_dir=node_modules_dir,
skip_validation=False,
ignore_missing_integrity=False,
retries=num_retries,
)
process_package(package_data)
create_archive(node_modules_dir, archive_name, out_dir)
if __name__ == "__main__":
process(
"https://raw.githubusercontent.com/advplyr/audiobookshelf/refs/tags/v2.33.0/package-lock.json",
"server_node_modules.tar.gz",
".",
3,
)
process(
"https://raw.githubusercontent.com/advplyr/audiobookshelf/refs/tags/v2.33.0/client/package-lock.json",
"client_node_modules.tar.gz",
".",
3,
)