File kernel-source-component.py of Package kernel-source-component

#!/usr/bin/python3
#
# Extract a tarball and changelog corresponding to an independently buildable
# userspace component from kernel source git
#
# (C) 2023 by Tony Jones <tonyj@suse.de>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
# See http://www.gnu.org/licenses/gpl-2.0.html for full license text.

import argparse
import giturlparse
import glob
import os
import pygit2
import re
import sys
import subprocess
import tempfile
import whatthepatch
import time

from datetime import datetime, timezone
from wcmatch import glob

scratch_env = "SCRATCH_AREA"
scratch_size_gb = 5
scratch_size_bytes = scratch_size_gb * (2**30)
component_files = ""
patterns = []


def vprint(*pargs, **kwargs):
    global args

    if args.verbose:
        print(*pargs, **kwargs)


def warning(*pargs, **kwargs):
    global args

    if not args.quiet:
        print("Warning: ", file=sys.stderr, end="")
        print(*pargs, file=sys.stderr, **kwargs)


def error(*pargs, **kwargs):
    print("Error: ", file=sys.stderr, end="")
    print(*pargs, file=sys.stderr, **kwargs)


def cl_write(*pargs, **kwargs):
    print(*pargs, file=clfile, **kwargs)


def shortsha(commit):
    return str(commit)[0:10]


def read_obsinfo():
    names = glob.glob("*.obsinfo")

    if not names:
        error("Unable to determine component name")
        return None
    elif len(names) > 1:
        error("Too many obsinfo files")
        return None

    obsinfof = names.pop()

    try:
        obsinfo = {}

        with open(obsinfof) as file:
            for line in file:
                name, value = line.partition(":")[::2]
                obsinfo[name.strip()] = value.strip()
    except:
        error(f"Error parsing obsinfo file '{obsinfof}'")
        return None

    pathbase = re.sub(".obsinfo$", "", obsinfof)
    name = obsinfo["name"]

    if pathbase != name:
        error("Filename/name: mismatch for obsinfo '{obsinfo}' 'name:{name}'")
        return None

    for required in ["name", "commit"]:
        if not required in obsinfo:
            error(f"Error name '{required}' missing from obsinfo '{obsinfof}'")
            return None

    for suffix in ["spec", "changes"]:
        if not os.path.isfile(name + "." + suffix):
            error(f"Unable to verify {suffix} file for component {name}")
            return None

    return obsinfo


def write_obsinfo(outdir, obsinfo):
    for key in ["name", "version", "mtime", "commit"]:
        if not key in obsinfo:
            error("Error name '{key}' missing from obsinfo")
            return False

    name = obsinfo["name"]
    file = f"{outdir}/{name}.obsinfo"

    newp = open(file, "w")
    if not newp:
        error(f"Unable to open new obsinfo file '{file}' for writing")
        return False

    for key in ["name", "version", "mtime", "commit"]:
        print(f"{key}: {obsinfo[key]}", file=newp)

    return True


def update_changelog(outdir, obsinfo, newclp):
    name = obsinfo["name"]
    file = f"{name}.changes"
    newfile = f"{outdir}/{file}"

    oldp = open(file, "r")
    if not oldp:
        error(f"Unable to open old changes file '{oldfile}' for reading")
        return False

    newp = open(newfile, "w")
    if not newp:
        error(f"Unable to open new changes file '{file}' for writing")
        oldp.close()
        return False

    newclp.seek(0)
    for fp in [newclp, oldp]:
        for line in fp.readlines():
            newp.write(line)

    oldp.close()
    newp.close()
    return True


def generate_interdiff(path, filea, fileb):
    global interdiff_fail

    cmd = f"cd {path} && interdiff <(git cat-file -p {filea.id}) <(git cat-file -p {fileb.id}) 2> /dev/null"

    proc = subprocess.Popen([cmd], stdout=subprocess.PIPE, shell=True)
    file = os.fdopen(proc.stdout.fileno(), "r", encoding="utf8")
    blob = file.read()

    returncode = proc.wait()

    if returncode != 0:
        interdiff_fail += 1
        return None

    paths = []
    for diff in whatthepatch.parse_patch(blob):
        path = re.sub("^[ab]/", "", diff.header.new_path)
        paths.append(path)

    return paths


def git_checkout(gitrepo, gitbranch):
    scratch = os.environ.get(scratch_env)

    scratch_area = None

    for dir in [scratch, "/abuild", "/tmp"]:
        if dir and os.path.isdir(dir) and os.access(dir, os.W_OK):
            scratch_area = dir
            break

    if not scratch_area:
        error(
            f"Unable to locate suitable scratch area.  Set environment variable {scratch_env} to a directory with at least {scratch_size_gb}GB available"
        )
        return None

    statvfs = os.statvfs(scratch_area)

    if not statvfs or statvfs.f_frsize * statvfs.f_bavail < scratch_size_bytes:
        error(
            f"Scratch area '{scratch_area}' requires >= {scratch_size_gb}GB available space"
        )
        return None

    tmpdirctx = tempfile.TemporaryDirectory(None, None, scratch_area)

    cmd = ["git", "clone"]
    msg = f"{gitrepo}"
    if gitbranch:
        cmd.append(f"-b{gitbranch}")
        msg += f":{gitbranch}"
    cmd.append(gitrepo)
    cmd.append(tmpdirctx.name)

    vprint(f"Cloning git tree from {msg} to {tmpdirctx.name}")
    proc = subprocess.run(cmd)

    if proc.returncode != 0:
        error("git clone failed")
        return None

    vprint("Git clone completed")

    return tmpdirctx


def get_kernel_version(path):
    makefile = path + "/Makefile"

    try:
        with open(makefile) as file:
            version = None
            patchlevel = None
            sublevel = None
            extraversion = None

            for line in file:
                _name, _value = line.partition("=")[::2]
                name = _name.strip()
                value = _value.strip()

                if name and value:
                    match name:
                        case "VERSION":
                            version = value
                        case "PATCHLEVEL":
                            patchlevel = value
                        case "SUBLEVEL":
                            sublevel = value
                        case "EXTRAVERSION":
                            extraversion = value

                # value should always be present even if ""
                if name == "EXTRAVERSION":
                    break

            if version and patchlevel:
                full = f"{version}.{patchlevel}"

                if sublevel:
                    full += f".{sublevel}"
                    if extraversion:
                        full += f".{extraversion}"

                return f"{version}.{patchlevel}.{sublevel}"

    except:
        error(f"Unable to parse config {config}")

    return None


def get_patch_changelog(data):
    line = 1
    subject = None
    references = None
    for line in data.splitlines():
        if line == 10:
            return None

        res = re.search(r"^([A-Z][a-z]+):(.*)$", line)

        if not res:
            continue

        tag, _value = res.groups()
        value = _value.lstrip()

        match tag:
            case "Subject":
                subject = value.replace("[PATCH] ", "")
            case "References":
                references = value

        if subject and references:
            return subject, references


def read_config():
    componentfilesf = "_component_files"
    patternsf = "_patterns"
    global patterns, component_files

    try:
        patterns = [line.rstrip() for line in open(patternsf, "r")]
    except:
        error(f"Unable to open pattern file {patternsf}")
        return False

    try:
        lines = [line.rstrip() for line in open(componentfilesf)]
        for line in lines:
            for item in line.split(" "):
                component_files += " " + item
    except:
        error(f"Unable to open component file {componentfilesf}")
        return False

    return True


def sequence_patches(path):
    cmd = f"cd {path} && scripts/sequence-patch.sh --rapid"
    proc = subprocess.Popen([cmd], stdout=subprocess.PIPE, shell=True)
    path = None

    while True:
        line = proc.stdout.readline()
        if not line:
            break
        match line.split():
            case [b"Creating", b"tree", b"in", _path]:
                path = _path.decode()

    returncode = proc.wait()

    if not path or returncode != 0:
        error("sequence_patch failed")
        return None
    else:
        print(f"Expanded kernel tree to: {path}")

    return path


def generate_tarfile(expanded_tree, outdir, obsinfo):
    tarversion = obsinfo["version"]
    name = obsinfo["name"]
    tarname = f"{name}-{tarversion}"
    tarfile = f"{outdir}/{tarname}.tar.gz"

    cmd = f"cd {expanded_tree} && tar --transform 's/^/.\/{tarname}\//' -czf {tarfile} {component_files}"
    proc = subprocess.run(["/bin/bash", "-c", cmd])

    return proc.returncode == 0


def format_commit_message(message, lines=None):
    existing_fmt = False

    for lineno, line in enumerate(message.splitlines()):
        if lines and lineno == lines:
            break

        if lineno == 0 and line[0] == "-":
            existing_fmt = True

        if existing_fmt:
            cl_write(line)
        else:
            cl_write(f"- {line}" if lineno == 0 else f"  {line}")


#
#
# PROGRAM START

parser = argparse.ArgumentParser(
    description="Extract tarball and changelog from kernel source git"
)
parser.add_argument(
    "--outdir",
    help="osc service parameter for internal use only "
    "(determines where generated files go before "
    "collection",
    dest="outdir",
    required=True,
)
parser.add_argument(
    "--verbose", "-v", help="Enable verbose output", dest="verbose", action="store_true"
)
parser.add_argument(
    "--quiet", "-q", help="Enable quiet mode", dest="quiet", action="store_true"
)
parser.add_argument(
    "--repo",
    "-r",
    help="kernel source repository",
    dest="gitrepo",
    default="kerncvs.suse.de:/home/git/kernel-source.git",
)
parser.add_argument(
    "--branch", "-b", dest="gitbranch", help="kernel source branch name"
)
parser.add_argument(
    "--force", "-f", 
    action="store_true",
    dest="force",
    default="False",
    help="Force generation of changelog/tarball even when no changes are detected",
)

args = parser.parse_args()

if not os.path.isdir(args.outdir) or not os.access(args.outdir, os.W_OK):
    error(f"Specified directory '{args.outdir}' does not exist or is not writable")
    exit(1)

outdir=os.path.abspath(args.outdir)

if outdir == os.getcwd():
    error(f"Specified directory '{args.outdir}' is pwd")
    exit(1)

obsinfo = read_obsinfo()

if not obsinfo:
    error("Unable to read obsinfo file")
    exit(1)

if "version" in obsinfo:
    match = re.match("^.*\.git([0-9]+)\.[0-9a-z]*$", obsinfo["version"])
    if match:
        offset = int(match.group(1))
else:
    warning("commit: but no prior version: found in obsinfo [setting gitoffset = 0]")
    offset = 0

if not read_config():
    error("Unable to read config")
    exit(1)

p = giturlparse.parse(args.gitrepo)
if not p:
    error("Unable to parse repository {args.gitrepo}")
    exit(1)

if os.path.isdir(p.href):
    if args.gitbranch:
        error(
            "Branch option invalid if specified repository is directory.  Please manually checkout required branch and rerun"
        )
        exit(1)

    unexpanded_git_tree = args.gitrepo
else:
    gitdir = git_checkout(args.gitrepo, args.gitbranch)
    if not gitdir:
        error("Unable to checkout kernel source tree from git")
        exit(1)

    unexpanded_git_tree = gitdir.name

clfile = tempfile.TemporaryFile(mode="w+")

repo = pygit2.Repository(unexpanded_git_tree)

total_changes = 0
head = repo.head.target

newobsinfo = {}
newobsinfo["name"] = obsinfo["name"]
newobsinfo["commit"] = head

interdiff_fail = 0
whatthepatch_fail = 0

for commit in repo.walk(head, pygit2.GIT_SORT_NONE):

    if str(commit.id) == obsinfo["commit"]:
        break

    offset += 1

    if len(commit.parents) == 1:
        changelog = []

        diff = commit.parents[0].tree.diff_to_tree(commit.tree)

        num_deltas = len(diff)
        for delta in diff.deltas:
            status = delta.status

            if status == pygit2.GIT_DELTA_ADDED:
                file1 = delta.new_file
            elif status == pygit2.GIT_DELTA_DELETED:
                file1 = delta.old_file
            elif status == pygit2.GIT_DELTA_MODIFIED:
                file1 = delta.new_file
                file2 = delta.old_file
            else:
                continue

            if status == pygit2.GIT_DELTA_ADDED or status == pygit2.GIT_DELTA_DELETED:
                if glob.globmatch(file1.path, "patches.*/**", flags=glob.GLOBSTAR):
                    filematched = False
                    blob = repo.revparse_single(str(file1.id)).data.decode("utf-8")

                    for diff in whatthepatch.parse_patch(blob):
                        if not diff or not diff.header:
                            whatthepatch_fail += 1
                            continue

                        if glob.globmatch(
                            diff.header.new_path, patterns, flags=glob.GLOBSTAR
                        ):
                            filematched = True
                            break

                    if filematched:
                        res = get_patch_changelog(blob)
                        if res:
                            subject, references = res
                            changelog.append((status, subject, references, file1.path))

            elif status == pygit2.GIT_DELTA_MODIFIED:
                new = glob.globmatch(file1.path, "patches.*/**", flags=glob.GLOBSTAR)
                old = glob.globmatch(file2.path, "patches.*/**", flags=glob.GLOBSTAR)

                filematched = False
                if new or old:
                    paths = generate_interdiff(unexpanded_git_tree, file1, file2)
                    if paths:
                        for path in paths:
                            if glob.globmatch(path, patterns, flags=glob.GLOBSTAR):
                                filematched = True
                                break

                if filematched:
                    blob = repo.revparse_single(str(file1.id)).data.decode("utf-8")
                    res = get_patch_changelog(blob)
                    if res:
                        subject, references = res
                        changelog.append((status, subject, references, file1.path))

        num_changes = len(changelog)

        total_changes += num_changes

        if num_changes > 0:
            cl_write(
                "-------------------------------------------------------------------"
            )
            stime = datetime.fromtimestamp(commit.commit_time, timezone.utc).strftime(
                "%a %b %d %H:%M:%S %Z %Y"
            )
            cl_write(f"{stime} - {commit.author}\n")

            if num_changes == 1:
                if num_changes == 1:
                    # most typical case, one change (patch addition, deletion) or
                    # refresh of patch
                    status, subject, references, file = changelog.pop()

                    # ignore above, just use original git commit message
                    format_commit_message(commit.message)

            elif num_changes > 1:
                format_commit_message(commit.message, 6)

                cl_write("")
                cl_write("Relevant changes:")
                for status, subject, references, file in changelog:
                    if subject:
                        for index, line in enumerate(subject.splitlines()):
                            mode = ""
                            match status:
                                case pygit2.GIT_DELTA_ADDED:
                                    mode = "Add"
                                case pygit2.GIT_DELTA_DELETED:
                                    mode = "Delete"
                                case pygit2.GIT_DELTA_MODIFIED:
                                    mode = "Refresh"

                            format_commit_message(f"{mode} patch: {file}\n{subject}")

                    if references:
                        cl_write(f"  ({references})")

            cl_write(f"\n  [kernel-source commit {shortsha(commit.id)}]\n")

if interdiff_fail > 0:
    warning(f"{interdiff_fail} patches could not be processed by interdiff")

if whatthepatch_fail > 0:
    warning(f"{whatthepatch_fail} patches could not be processed by whatthepatch")

if total_changes > 0 or args.force == True:
    print(f"New changes: {total_changes}")

    expanded_git_tree = sequence_patches(unexpanded_git_tree)
    if not expanded_git_tree:
        error("Unable to expand git tree")
        exit(1)

    kernversion = get_kernel_version(expanded_git_tree)

    if not kernversion:
        error("Unable to determine kernel version")
        exit(1)

    newobsinfo["version"] = f"{kernversion}.git{offset}.{shortsha(head)}"
    newobsinfo["mtime"] = int(time.time())

    if not generate_tarfile(expanded_git_tree, outdir, newobsinfo):
        error("Unable to generate tarfile")
        exit(1)

    update_changelog(outdir, obsinfo, clfile)
    write_obsinfo(outdir, newobsinfo)
    print(f"Updated revision to {str(head)}")

else:
    print("No new changes")
openSUSE Build Service is sponsored by