File update_pkg.pl of Package jw-env

#! /usr/bin/perl -w
#
# (C) 2009-2010, jw@suse.de, opensuse.org Novell Inc, 
# Distribute under GPLv2 or GPLv3.
#
# update_pkg.sh -- shell script to update rpm-based from source.
#
# look into a spec file, parse the URL in %Source* for its containing directory,
# browse that directoy with w3m, retrieve the latest file matching the old pattern.
# run bznew if needed, delete old tar balls.
# update version number in spec file.
#
# Dependencies: wget, sed.
#
# 2009-04-19, jw, V0.1 -- initial draft
# 2009-04-20, jw, V0.2 -- no more w3m -dump, this had abbreviated names. 
#                         We prefer html parsing from wget -O -
# 2009-04-21, jw, V0.3 -- Added sort_fn_version for better version sorting.
#                         Waiting for ENTER before changing things.
# 2009-05-05, jw, V0.4 -- allow %version as well as %{version}
# 2009-09-01, jw, V0.5 -- more %macro expansion. %version needs fix.
#                         hardcoded tar in $suf  needs fix.
# 2009-12-05, jw, V0.6 -- prepend URL, if source has no prefix.
# 2009-12-15, jw, V0.7 -- expand_defines() now does both %{def} and %def\b
# 2010-01-13, jw, V0.8 -- cheating with complex defines: cpan_name
# 2010-04-26, jw, V0.9 -- added --no-check-certificate, for allioth
# 2010-08-25, jw, V0.10 -- ZIP support
# 2010-11-01, jw, V0.11 -- support Source: http://sf.net/projects/%name/
#                          files/%name-%version.tar.bz2
# 2010-12-21, jw, V0.12 -- support {urldir} reporting relative urls.
#                          seen in http://search.cpan.org/CPAN/authors/id/
#                          R/RE/RETOH/CMS 
# 2011-04-22, jw, V0.13 -- html parser regexp now avoids pathological slow cases.
# 2011-06-20, jw, V0.14 -- no more bznew.
# 2011-09-18, jw, V0.15 -- better ignore .sig,.md5,.sha256
#
# FIXME: If URL is in specfile, it should first look there, and try to find
#        source tar balls there, before opening the source urls as a
#        direcory.

use strict;
use Data::Dumper;

my $attic = 'ATTIC';
my $version = '0.15';

my $specfile = shift;
unless (defined $specfile)
 {
   opendir DIR, '.' or die "opendir . : !$\n";
   my @s = sort grep { /\.spec$/ } readdir DIR;
   closedir DIR;
   die "no *.spec file here??\n" unless @s;
   if (scalar @s > 1)
     { 
       use Cwd;
       my $dir = getcwd;
       $dir =~ s{.*/}{};
       $s[0] = "$dir.spec" if -f "$dir.spec";
     }
   $specfile = $s[0];
 }

print "using $specfile\n";

my $pkg_version;
my @sources;
my %defines;
open IN, "<", $specfile or die "open($specfile) failed: $!\n";
while (defined (my $line = <IN>))
  {
    chomp $line;
    $defines{$1} = $2 if $line =~ m{^%define\s+(\w+)\s+(.*)};
    $line = expand_defines($line);
    push @sources, { specname => $1 } if $line =~ m{^source.*?\s([\S]+)}i;
    $defines{lc $1} = $2 if $line =~ m{^([A-Z]\w+):\s+(\S+)};
    $pkg_version = $1 if $line =~ m{^Version:\s+(\S+)};
  }
close IN;

print Dumper \%defines;

die "no \%Source URLs?\n" unless @sources;

for my $s (@sources)
  {
    if ($s->{specname} =~ m{^((ftp|http)[\S]+)}i)
      {
        $s->{urlpat} = $s->{specname};
      }
    elsif ($defines{url})
      {
        $s->{urlpat} = $defines{url} . '/' . $s->{specname};
      }

    ($s->{urldir},$s->{filepat}) = ($1,$2) if $s->{urlpat} =~ m{^(.*)/(.*?)$};
    if (($s->{filepat} =~ m{(.*-)(%\{?version\}?|\d[\.\w_]*)(\.tar.*?)$}) or
        ($s->{filepat} =~ m{(.*-)(%\{?version\}?|\d[\.\w_]*)(\.zip|\.ZIP)$}) or
        ($s->{filepat} =~ m{(.*_)(%\{?version\}?|\d[\.\w]*)(\.tar.*?)$}))
      {
        my ($pre,$suf) = ($1,$3);
	if ($suf =~ m{^(\.tar\.)}i)
	  {
	    $suf = '\.[Tt][\w\.]*?';
	  }
	else
	  {
	    $suf = "\Q$suf\E";
	  }
        $s->{filepat} = "\Q$pre\E(\\d[\\.\\w_]*?)$suf";
      }
    my $cmd = "wget --no-check-certificate -O - '$s->{urldir}'";
    my @all = `$cmd`;
    my %vmap;
    for my $line (@all)
      {
        # html text may or may not have newlines.
        while ($line =~ m{=[\s"]*($s->{filepat})["\s>]}g)
	  {
	    my ($candidate,$key) = ($1,$2);
	    next if $candidate =~ m{\.(sig|sha256|md5)$}i;
	    $vmap{$key} = { file => $candidate };
	  }

        # http://sf.net/projects/%name/files uses this syntax:
	# " href="http://sourceforge.net/projects/vtcl/files/vtcl/1.6.1.a1/vtcl-1.6.1a1.tar.gz/download" title="/vtcl/1.6.1.a1/vtcl-1.6.1a1.tar.gz:  released on 2007-02-28">vtcl-1.6.1a1.tar.gz</a>
	#
	## CAUTION: 
	## while ($line =~ m{href\s*=[\s"]*([^\s"]+)(.*?)>($s->{filepat})</a>}g)
	## is horribly slow with 700kb text from http://www.cpan.org/modules/by-module/WWW/
	while ($line =~ m{href\s*=[\s"]*([^\s"]+)([^>]*?)>($s->{filepat})</a>}g)
	  {
	    my ($url,$skip,$candidate,$key) = ($1,$2,$3,$4);
	    next if $candidate =~ m{\.(sig|sha256|md5)$}i;
	    # sometimes .*? does not match the smallest possible distance.
	    $url = $1 if $skip =~ m{.*href\s*=[\s"]*([^\s"]+)};
	    printf "$candidate  %d\n", pos $line;
	    $vmap{$key} = { url => $url, file => $candidate };
	  }
      }
    die "$cmd did not find any $s->{filepat}\n" unless keys %vmap;
    my @vmap = sort { &sort_fn_version } keys %vmap;
    $s->{latest} = $vmap{$vmap[0]};
    $s->{version} = $1 if $s->{latest}{file} =~ m{$s->{filepat}};

    # ignore downgrades, and existing files.
    if ($s->{version} gt $pkg_version and !-f $s->{latest}{file})
      {
        opendir DIR, '.' or die "opendir . : !$\n";
        @{$s->{unlink}} = grep { /^$s->{filepat}$/ } readdir DIR;
        closedir DIR;
	my $f = $s->{latest}{file};
	print "{latest}" . Dumper $s->{latest};
	if (my $u = $s->{latest}{url})
	  {
	    unless ($u =~ m{://})
	      {
	        # an incomplete URL
		if ($u =~ m{^/})
		  {
		    my $host = $1 if $s->{urldir} =~ m{^(.*://[^/]*)};
		    $u = "$host$u";
		  }
		else
		  {
		    # a relative url even..
		    $u = "$s->{urldir}/$u";
		  }
	      }
	    $cmd = "wget --no-check-certificate -c '$u'";
	  }
	else
	  {
	    $cmd = "wget --no-check-certificate -c '$s->{urldir}/$f'";
	  }
	system $cmd and die "$cmd; failed: $!\n";
	die "$cmd; failed to create $f\n" unless -f $f;
	## no recompression please, source service will fail.
	# if ($f =~ m{\.gz$})
	#   { 
	#     print  "bznew $f\n";
	#     system "bznew $f" and die "bznew $f; failed: $!\n";
	#     $f =~ s{\.gz$}{.bz2};
	#     die "bznew failed to create $f\n" unless -f $f;
	#   }
	##
	$f =~ s{([-_])\Q$s->{version}\E\.}{$1\%\{?version\}?.};
	$s->{urlpat} = "$s->{urldir}/$f";
	$cmd = qq{sed -i $specfile -e 's/\\(for package.*(Version\\s*\\)\Q$pkg_version\E)/\\1\Q$s->{version}\E)/' -e 's/^\\(Version:\\s*\\)\Q$pkg_version\E/\\1\Q$s->{version}\E/'};
	print "Updating $f from $pkg_version to $s->{version}\n";
	print "Press ENTER to continue.\n";
	<STDIN>;

	print "$cmd\n";
	system $cmd and die "$cmd; failed: $!\n";
	unlink @{$s->{unlink}};
      }
    else
      {
        print "Nothing new: found $s->{latest}{file}, have version $pkg_version\n";
      }
  }

exit 0;

##########################
## reverse numeric vector sort.
## accepting any non-numeric or not dotted notation.
##
## Note that this is not the same as used by rpm. 
## rpm would sort 2.0-beta1 after 2.0.
## --------
## see also perldoc Sort::Versions for an alternative

sub sort_fn_version
{
  my ($aa, $bb) = ($a,$b);

  ## 2.0beta1 comes before 2.0, after 1.99
  ## so let us append a string of very high 
  ## characters, if there is no beta.

  $aa .= '~' x 16 unless $aa =~ m{[a-z]}i;
  $bb .= '~' x 16 unless $bb =~ m{[a-z]}i;

  ## now the fun part.
  ## pad the leading digit vector $1 to 8 components.
  ## and append the text suffix $2 after the padding.
  ## a leading '.' is enforced at the text suffix, 
  ## reagardless if it had a leading '-' or nothing.
  ## E.g. 2.0-beta1, 2.0.beta1 and 2.0beta1 are all the same.
  ## 
  ## The first text suffix is right-padded with '~', so that 
  ## 'alpha', 'beta', 'prerelease' and 'rc' sort in their natural language order,
  ## despite the leading 0 padding below.

  $aa =~ s{^([\d\.]*\d)[\.-]*([^\.]*)(.*)$}{$1 . ('.0' x (8-($1 =~ tr/././))) .'.'
.$2.('~'x(12-length($2))).$3}e;
  $bb =~ s{^([\d\.]*\d)[\.-]*([^\.]*)(.*)$}{$1 . ('.0' x (8-($1 =~ tr/././))) .'.'
.$2.('~'x(12-length($2))).$3}e;


  ## Now we can pad all vector elements to 16 positions with leading zeros.

  $aa =~ s{([^\.]+)}{("0"x(16-length($1))).$1}ge;
  $bb =~ s{([^\.]+)}{("0"x(16-length($1))).$1}ge;

  ## ... and now the comparision is a simple fixed 
  ## length string compare (kind of).
  return $bb cmp $aa;
}

sub expand_defines
{
  my ($line) = @_;
  my $limit = 100;
  my $count = 0;

  if (defined $defines{cpan_name})
    {
      # %define cpan_name %( echo %{name} | %{__sed} -e 's,perl-,,' )
      $defines{cpan_name} = $1 if $defines{name} =~ m{^perl-(.*)};
    }

  while ($line =~ m{%{(\w+)}})
    {
      my ($key) = ($1);
      die "expand_defines($key): $limit substitutions reached: '$line'\n" 
        if $count++ >= $limit;
      # the space in there is important, it prevents endless recursion
      $defines{$key} = "%{$key unknown}" unless defined $defines{$key};
      $line =~ s{%{$key}}{$defines{$key}}g;
    }
  while ($line =~ m{.%(\w+)\b})	# not at start of line please
    {
      my ($key) = ($1);
      die "expand_defines($key): $limit substitutions reached: '$line'\n" 
        if $count++ >= $limit;
      # the space in there is important, it prevents endless recursion
      $defines{$key} = "%{$key unknown}" unless defined $defines{$key};
      $line =~ s{%$key\b}{$defines{$key}}g;
    }
  print "$line\n" if $count;
  return $line;
}
openSUSE Build Service is sponsored by