File collect_licenses.pl of Package licenses

#! /usr/bin/perl -wT
#
# collect_licenses.pl -- a script to copy license files from the system into a tar ball.
#
######################################################################
#
# Copyright 2007 Novell Inc., jw@suse.de
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
# 
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
# 
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
#####################################################################
# 2007-01-16, jw, V1.0 -- initial draft.
# 2007-01-30, jw, V1.1 -- rpm scanner added.
# 2007-01-30, jw, V1.2 -- eula added to $collect.

use Data::Dumper;
use Digest::MD5;
use Cwd;
use strict;

$ENV{PATH} = '/bin:/usr/bin';
delete $ENV{ENV};

my $version = '1.2';
my $verbose = 1;

my $tarball = 'licenses.tar.gz';
my $dir = 'licenses';
my $logfile = 'licenses/meta/log.txt';

my $blacklist_re = '(/usr/share/doc/licenses/|/usr/share/gnump3d/|\.png$|\.jpg$|\.svg$)';

my $collect_cmd_fmt = q{find '%s' -type f | egrep -i '%s'};
my $collect = q{/usr/share:(license|copying|copyright|eula)};

my $cpio = '/usr/bin/cpio --quiet';
my $rpm = '/bin/rpm';

my $extract = 0;
my $pack = 0;
my $run_collect = 0;
my $add = 0;
my $keep_unpacked = 0;
my $rpmdir = undef;
my $tmpdir = $ENV{LICENSES_TMP_DIR} || "/tmp/licenses-$$/%s";
my $noexec = 0;

usage() unless @ARGV;

while (defined(my $arg = shift))
  {
    if    ($arg =~ m{^-v})   { $verbose++; }
    elsif ($arg =~ m{^-q})   { $verbose = 0; }
    elsif ($arg =~ m{^--$})  { last; }
    elsif ($arg =~ m{^-a})   { $add++; }
    elsif ($arg =~ m{^-c})   { $run_collect++; $collect = shift if $ARGV[0] and $ARGV[0] !~ m{^-}}
    elsif ($arg =~ m{^-x})   { $extract++; }
    elsif ($arg =~ m{^-r})   { $rpmdir = shift; }
    elsif ($arg =~ m{^-p})   { $pack++; }
    elsif ($arg =~ m{^-t})   { $pack++; $extract++; }
    elsif ($arg =~ m{^-k})   { $keep_unpacked++; }

    elsif ($arg =~ m{^-})    { usage("unknown option '$arg'"); }
    else { unshift @ARGV, $arg; last }
  }

$keep_unpacked++ unless $pack;
my ($prefix_string,$infix_pattern,$collect_cmd) = split_prefix_infix($collect);

if ($extract && -f $tarball)
  {
    my $cmd = "tar zxvf '$tarball'";
    system $cmd and die "'$cmd' failed: $! $@\n";
  }

mkdir_pf($logfile);
open L, ">>", $logfile;
print L "$0 extract=$extract add=$add run_collect=$run_collect pack=$pack started " . scalar(localtime) . "\n";
print L "collect=$collect\n" if $run_collect;
print L "rpmdir=$rpmdir\n" if defined $rpmdir;
print L "$tarball extracted.\n" if $extract  && -f $tarball;

if (defined($rpmdir))
  {
    $rpmdir = $1 if $rpmdir =~ m{^(.*)$};	# untaint.
    if (-d $rpmdir)
      {
        open FINDPKG, "find $rpmdir -name \*.rpm|" or die "open find $rpmdir failed: $! $@\n";
	while (defined(my $pkg = <FINDPKG>))
	  {
	    $pkg = $1 if $pkg =~ m{(\S+)};
	    die "find returned non-file: $pkg\n" unless -f $pkg;
	    print " $pkg\n" if $verbose;
            collect_from_rpm($pkg);
	  }
	close FINDPKG;
      }
    else
      {
        collect_from_rpm($rpmdir);
      }
  }
else
  {
    collect_licenses('/');
  }

if ($pack && -d $dir)
  {
    rename $tarball, "$tarball." . time() if -f $tarball;
    my $cmd = "tar zcvf '$tarball' '$dir'";
    system $cmd and die "'$cmd' failed: $! $@\n";
    print L "$tarball packed.\n";
  }

print L "$0 done " . scalar(localtime) . "\n";
close L;
rm_rf($dir) unless $keep_unpacked;

exit 0;
############################################################################

##
## this version of mkdir_pf handles relative and absolute paths.
##
sub mkdir_pf
{
  my ($path) = @_;

  my @dirs = split "/", $path;
  pop @dirs;                    # nuke trailing filename
  $path = ($dirs[0] eq '') ? '/' : '';
  for my $d (@dirs)
    { 
      $path .= "$d/";
      mkdir $path, 0777 or die "mkdir $path failed: $!" unless -d $path;
    }
  return 1;     # success, be mkdir compatible
}

## rm_rf -- recursive file tree delete
##
## fn is a predicate, that receives a file path name as parameter.
## If fn returns zero for a file object, it is excluded from removal.
## Directories are removed unconditionally, if we can empty them first. 
##
## Directories are traversed in reverse alphabetical order;
## thus dotfiles usually still exist while fn is called for other files.
## fn defaults to a true value.
## Adds write perm on the dir, if unlink/rmdir fails.
##
## Caution: No taint checks here.
##
## Does not follow symlinks for opendir; this could carry us out of the tree.
## Tries unlink after rmdir, just in case it is a weird directory symlik.
##
## rm_rf fails on very deep directory structures. It should 
## - chdir downward (remembering inode numbers), 
## - clear one level, 
## - cd(..), check if 
##   - inode matches, clear one level 
##   - or, if it does not match redo from start.
## - done.
##
sub rm_rf
{
  my ($path, $fn, $comment) = @_;

  $comment ||='';

  if (!-l $path and opendir DIR, $path)
    {
      my @e = grep { !/^(\.|\.\.)$/ } readdir DIR;
      closedir DIR;
      rm_rf("$path/$_") for reverse sort @e;
      $path = $1 if $path =~ m{^(.*)$};
      print "rmdir $path\n" if $verbose > 2;
      unless (unlink $path or rmdir $path)
        {
          my $dir; $dir = $1 if $path =~ m{^(.*/).};
          $dir = '.' unless defined $dir;
          chmod 0777, $dir;
          return if unlink $path or rmdir $path;
system "/bin/ls -la $path";
          warn "rm_rf: rmdir($path) failed: $!\n";
        }
    }
  else
    {
      if (!$fn or &$fn($path))
        {
          $path = $1 if $path =~ m{^(.*)$};     # UNTAINT. brute
          print "unlink $path\n" if $verbose > 2;
          unless (unlink $path)
            {
              my $dir; $dir = $1 if $path =~ m{^(.*/).};
              $dir = '.' unless defined $dir;
              chmod 0777, $dir;
              return if unlink $path;

              warn "rm_rf: unlink($path) failed: $!\n";
            }
        }
      else
        {
          print "rm_rf: skip $path, $comment\n" if $verbose > 2;
        }
    }
}


sub rpm_header
{
  my ($file, $name) = @_;
  my $cmd = "$rpm -qp --qf '%{$name}' '$file'";
  open RPM, "$cmd|" or die "cannot run $cmd: $!";
  my $r = join '', <RPM>;
  close RPM or die "failed to run $cmd: $!";
  die "$cmd: failed\n" unless length $r;
  return $r;
}

sub rpm_filelist
{
  my ($file, $name) = @_;
  my $cmd = "$rpm -qpl '$file'";
  open RPM, "$cmd|" or die "cannot run $cmd: $!";
  my @r = <RPM>;
  chomp @r;
  close RPM or die "failed to run $cmd: $!";
  die "$cmd: failed\n" unless scalar @r;
  return @r;
}


# extract an RPM file, optionally limited to certain files.
#
sub unrpm
{
  my ($pkg, $dir, $files) = @_;
  my $pattern = '';
  if ($files)
    {
      # we put leading dots in front of the file name list.
      # This is how rpm2cpio exports the names.
      # And we escape '?' and '*' because cpio reads shell globbing 
      # patterns, not files.

      $pattern = " '." . join("' '.", @$files) . "'" if $files;
      $pattern =~ s{([\*\?])}{\\$1}g;
      $pattern = $1 if $pattern =~ m{^(.*)$};
    }
  run_cmd("rpm2cpio '$pkg' | (cd $dir && $cpio -uidm$pattern)");
}

sub collect_from_rpm
{
  my ($pkg) = @_;
  my $name = rpm_header($pkg, 'name');
  $name = $1 if $name =~ m{^([^/]+)$};	# untaint, no slashes please.
  my @l = rpm_filelist($pkg);
  @l = grep { /$infix_pattern/i } grep { /^\Q$prefix_string\E/ } @l;
  return unless scalar @l;

  my $root = sprintf $tmpdir, $name;	# where created files go.
  mkdir_pf("$root/.");
  unrpm($pkg, $root, \@l);
  collect_licenses($root, $name);
  rm_rf($root);
}

sub run_cmd
{
  my ($cmd) = @_;
  print "\n$cmd\n" if $verbose > 1 or $noexec;
  return if $noexec;
  system "$cmd" and die "$cmd failed: $@ $!";
}


sub usage
{
  my ($msg) = @_;
  $msg .= "\n" if $msg and $msg !~ m{\n$};
  $msg = "Error: " . $msg if $msg;

  print qq{
license_collector.pl version $version
$msg
Usage:

$0 [options] [files ...]

Valid options are:
 
 -v              Be more verbose. Default: $verbose
 -q              Be quiet.
 
 -a              Add files. 
 -c [prefix:infix]	 
                 Add files selected by pattern. 
                 A case sensitive prefix string and a case insensitive 
		 infix regexp-pattern can be specified seperated by a colon.
		 Defaults to "$collect". 
 -r rpmdir	 Add files found in the rpm's in rpmdir.
 -l logfile      Append to logfile. Defaults to $logfile .
		 
 -t              First do -x, last do -p.
 -x              Extract '$tarball' into ./$dir .
 -p              Pack '$tarball' from ./$dir .

Examples:

Update from the current system:
  $0 -t -c

Make a fresh start for SLES10-SP1:
  $0 -p -c -r /mounts/dist/install/SLP/SLES-10-SP1-Beta2/i386

add one file from one rpm:
  $0 -t -r foobar.rpm -a /usr/lib/foobar/doc/Readme.txt

};

  exit 0;
}

sub split_prefix_infix
{
  my ($colon_sep) = @_;
  $colon_sep =~ m{^([^:]+):(.*)$} or
    die "split_prefix_infix: pattern seperator colon(:) not found\n";
  my ($pre, $in) = ($1, $2);
  $pre =~ s{^/+}{};
  my $cmd = sprintf $collect_cmd_fmt, $pre, $in;
  return ("/$pre", $in, $cmd);
}

sub collect_licenses
{
  my ($root, $pkg_name) = @_;

  if ($run_collect)
    {
      print L "running comand in $root: $collect_cmd\n";
      my $cwd = getcwd;
      chdir($root) or die "collect_licenses: chdir($root) failed: $!";
      open C, "$collect_cmd|" or die "'$collect_cmd' failed: $! $@\n";

      $cwd = $1 if $cwd =~ m{^(.*)$};	# untaint
      chdir($cwd) or warn "collect_licenses: chdir($cwd) failed: $!";

      while (defined (my $line = <C>))
	{
	  chomp $line;
	  my $file = ($line =~ m{^(.*)$})[0];	# force untaint
	  $file = "/$file" unless $file =~ m{^/};
	  next if $file =~ m{$blacklist_re};

	  if (open FILE, "<", "$root$file")
	    {
	      my $md5 = Digest::MD5->new;
	      $md5->addfile(*FILE);
	      close(FILE);
	      my $digest = $md5->hexdigest;
	      my $dest = "$dir/md5/$digest";
	      unless (-f $dest)
		{
		  mkdir_pf($dest);
		  system "cp '$root$file' '$dest'" and die "cannot copy $root$file";
		  print L "added $digest $file\n";
		  print "added $file\n" if $verbose;
		}
	      else
		{
		  print L " have $digest $file\n";
		}

	      if ($pkg_name)
	        {
#		  my $lname = ($file =~ m{([^/]+)$})[0];
		  my $lname = $file;
		  $lname =~ s{^/+}{};
		  $lname =~ s{/+}{,}g;

		  $lname = "$dir/pkg/$pkg_name/$lname";
		  unless (-e $lname)
		    {
		      my $rel = $dest;
		      $rel =~ s{^[^/]+/}{../../};
		      mkdir_pf($lname);
		      symlink($rel, $lname);
		    }
		}
	    }
	  else
	    {
	      warn "cannot read $root$file: $!\n";
	    }

	}
      close C;
    }
  else
    {
      warn "nothing done. try -c ?\n";
    }
}