File libreoffice-hyphen-gen-spec of Package libreoffice-hyphen

#!/usr/bin/perl -w

# This script changes the definite article of ProductName

use strict;
use File::Copy;

my $args = join ' ',  @ARGV;

# start to cound dictionary sources from a non-zero value
my $sources_counter = 1000;
# FIXME: we need a global variable to pass the data to the sort function
my $pdata_sort = undef;

############################################################
# loading data
############################################################

sub save_locale_data($$$)
{
    my ($pdata, $locale, $pld) = @_;

    my @conditions = ();
    push @conditions, $pld->{'condition_tag'} if (defined $pld->{'condition_tag'});
    push @conditions, 'default';

    # default pack suffix is the lang code from the locale
    unless (defined $pld->{'default'}{'pack_suffix'}) {
	$pld->{'default'}{'pack_suffix'} = $locale;
	$pld->{'default'}{'pack_suffix'} =~ s/_.*//;
    }

    # default provided lang
    unless (defined $pld->{'default'}{'prov_lang'}) {
	$pld->{'default'}{'prov_lang'} = $locale;
	$pld->{'default'}{'prov_lang'} =~ s/_.*//;
    }

    # required tags:
    # source is not required because one source tarball might provide more dictionaries
    # license
    die "Error: license tag is not defined for the locale $locale, line $.\n" unless (defined $pld->{'default'}{'license'});
    # version
#    foreach my $condition_tag (@conditions) {
#	die "Error: version tag is not defined for the locale $locale, line $.\n" unless (defined $pld->{$condition_tag}{'version'});
#    }

    # finally save the locale data
    $pdata->{'dict'}{$locale} = $pld;

    # udpate the list of used licenses (for source package
    foreach my $condition_tag (@conditions) {
	if (defined $pld->{$condition_tag}{'license'}) {
	    foreach my $license (@{$pld->{$condition_tag}{'license'}}) {
		$pdata->{'license'}{$license} = 1;
	    }
	}
    }
}

sub add_condition_tag($$$)
{
    my ($pld, $locale, $condition_tag) = @_;

    if ($condition_tag ne 'default') {
	if (defined $pld->{'condition_tag'}) {
	    die "Error: Only one condition is supported for one dictionary, line $.\n" if ($pld->{'condition_tag'} ne $condition_tag);
	} else {
	    $pld->{'condition_tag'} = $condition_tag;
	    % {$pld->{$condition_tag}} = ();
	}
    } else {
	% {$pld->{'default'}} = () unless (defined $pld->{'default'});
    }
}

sub save_locale_data_string($$$$$)
{
    my ($pld, $locale, $condition_tag, $tag, $value) = @_;

    add_condition_tag($pld, $locale, $condition_tag);
    die "Error: Tag $tag defined twice, line $.\n" if (defined $pld->{$condition_tag}{$tag});
    $pld->{$condition_tag}{$tag} = $value;
}

sub add_locale_data_list($$$$$)
{
    my ($pld, $locale, $condition_tag, $tag, $value) = @_;

    add_condition_tag($pld, $locale, $condition_tag);
    foreach my $item ( split(/,\s*/, $value) ) {
	if ( $tag eq 'types' && ! ($item =~ m/^(myspell|hunspell|hyphen|thesaurus)$/) ) {
	    print STDERR "Warning: unknown type $item on line $.\n";
	}
	@ {$pld->{$condition_tag}{$tag}} = () unless (defined $pld->{$condition_tag}{$tag});
	push @ {$pld->{$condition_tag}{$tag}}, $item;
    }
}

sub read_data($$)
{
    my ($pdata, $data_file) = @_;
    my $curLang;

    my $locale = undef;
    # pointer to locale data
    my $pld = undef;
    my $condition = undef;
    my $condition_dict = undef;
    my $condition_tag = 'default';
    my $condition_else = undef;
    my $condition_dict_was = undef;

    open DATA , "< $data_file" or die "Can't open '$data_file'\n";

    while( my $line = <DATA>){
	chomp $line;
	# ignore comments
	$line =~ s/#.*$//;
	
#	print "just empty line?\n";
	# empty line
	if ( $line =~ m/^\w*$/ ) {
	    next;
	}
	
	# %if
	if ( $line =~ m/^(\%if.*)$/ ) {
	    die "Error: FIXME: %if with more levels are not supported, line $.\n" if (defined $condition || defined $condition_else || $condition_tag ne 'default');
	    $condition = "$1";
	    next;
	}

	# %else
	if ( $line =~ m/^\%else\s*$/ ) {
	    die "Error: No %if defined for %else at line $.\n" unless ($condition_tag ne 'default');
	    $condition_tag = 'default';
	    $condition_else = 1;
	    next;
	}

	# %endif
	if ( $line =~ m/^\%endif\s*$/ ) {
	    die "Error: Define the default doctionary using %else, line $.\n" if ($condition_tag ne 'default');
	    if (defined $condition_else) {
		$condition_else = undef;
	    } elsif (defined $condition_dict) {
		$condition_dict = undef;
		# the next line must define another dictionary (=> the locale: tag must follow)
		$condition_dict_was = 1;
	    } else {
		die "Error: %endif does not match %if at line $.\n";
	    }
	    next;
	}

	# tags
	if ( $line =~ m/^(\w+):\s*(.*)$/ ) {
	    # support uppercase tag names
	    my $tag = lc($1);
	    my $value = $2;
	    # remove blank characters from the end of the value
	    $value =~ s/\s*$//;

	    die "Error: no value defined for the tag $tag, line $.\n" unless ($value);

	    if ( $tag eq 'locale' ) {
		die "Error: locale $value already defined earlier, line $.\n" if ( defined ($pdata->{'dict'}{$value}) );
		# a section for new language starts => save the old data
		save_locale_data($pdata, $locale, $pld) if defined ($locale);
		$locale = $value;
		% {$pdata->{'dict'}{$locale}} = ();
		$pld = \% {$pdata->{'dict'}{$locale}};
		if (defined $condition) {
		    $condition_dict = $condition;
		    $pld->{'contition_dict'} = $condition;
		    $condition = undef;
		}
		# good, this was the right place to put the %endif for the whole dictionary
		$condition_dict_was = undef;
	        next;
	    }

	    if (defined $condition_dict_was) {
		die "Error: %endif on wrong place, line $.\n" .
		    "       You either need to put %if-%endif are the whole dictionary or you must\n" .
		    "       use :%if-%else-%endif to define a specific dictionary for a specific\n" .
		    "       distribution, line\n";
	    }


	    if (defined $condition) {
		$condition_tag = $condition;
		$condition = undef;
	    }

	    if ( $tag eq 'encoding' ||
		 $tag eq 'language' ||
		 $tag eq 'pack_suffix' ||
		 $tag eq 'prov_lang' ||
		 $tag eq 'source' ||
		 $tag eq 'url' ||
		 $tag eq 'version' ) {
		save_locale_data_string($pld, $locale, $condition_tag, $tag, $value);
		next;
	    }

	    if ( $tag eq 'dependency' || 
		 $tag eq 'license' ||
		 $tag eq 'types' || 
		 $tag eq 'prep' ) {
		add_locale_data_list($pld, $locale, $condition_tag, $tag, $value);
		next;
	    }

	}
	
	die "Synrax error in $data_file, line $.\n";
    }
    save_locale_data($pdata, $locale, $pld) if defined ($locale);
    close(DATA);
}

############################################################
# writing sections
############################################################

##################################
# conditional lines
sub write_conditional_prep_hack($$$)
{
    my ($pld, $locale, $condition_tag) = @_;
    my $out = "";

    foreach my $hack (@{$pld->{$condition_tag}{'prep'}}) {
	$out .= "$hack\n";
    }

    return $out;
}

sub write_conditional_version_definition($$$)
{
    my ($pld, $locale, $condition_tag) = @_;
    my $out = "";

    $out .= "\%define ${locale}_version $pld->{$condition_tag}{'version'}\n" if (defined $pld->{$condition_tag}{'version'});

    return $out;
}

sub write_conditional_sources_unpack($$$)
{
    my ($pld, $locale, $condition_tag) = @_;
    my $out = "";

    $out .= "    \%\{S:$pld->{$condition_tag}{'source_number'}\} \\\n" if (defined $pld->{$condition_tag}{'source_number'});
}

sub write_conditional_encoding($$$)
{
    my ($pld, $locale, $condition_tag) = @_;
    my $out = "";

    $out .= "	\"${locale}\") coding=\"$pld->{$condition_tag}{'encoding'}\" ;;\n" if (defined $pld->{$condition_tag}{'encoding'});

    return $out;
}


##############################
# simple sections

sub write_do_not_edit_section($)
{
    my $out = "###################################################################\n" .
              "## DO NOT EDIT THIS SPEC FILE\n" .
              "## Generated by:\n" .
              "## perl libreoffice-hyphen-gen-spec $args\n" .
              "###################################################################";
    return $out;
}

sub write_all_licenses_section($)
{
    my ($pdata) = @_;

    my @licenses = sort (keys %{$pdata->{'license'}});
    my $out = join ", ", @licenses;

    return $out;
}



##############################
# all langs sections

sub write_provides_section($$)
{
    my ($pdata, $locale) = @_;
    my $pld = \% {$pdata->{'dict'}{$locale}};
    my $out = "";

    $out .= "Provides:       locale(libreoffice:$pld->{'default'}{'prov_lang'}) locale(OpenOffice_org:$pld->{'default'}{'prov_lang'})\n";

    return $out;
}

sub write_sources_section($$)
{
    my ($pdata, $locale) = @_;
    my $pld = \% {$pdata->{'dict'}{$locale}};
    my $out = "";

    my @conditions = ();
    push @conditions, $pld->{'condition_tag'} if (defined $pld->{'condition_tag'});
    push @conditions, 'default';

    foreach my $condition_tag (@conditions) {
	if (defined $pld->{$condition_tag}{'source'}) {
	    $pld->{$condition_tag}{'source_number'} = $sources_counter++;
	    $out .= "Source$pld->{$condition_tag}{'source_number'}:     $pld->{$condition_tag}{'source'}\n";
	}
    }

    return $out;
}

sub write_lang_name_section($$)
{
    my ($pdata, $locale) = @_;
    my $pld = \% {$pdata->{'dict'}{$locale}};
    my $out = "";

    $out .= "	\"${locale}\") lang_name=\"$pld->{'default'}{'pack_suffix'}\" ;;\n" if (defined $pld->{'default'}{'pack_suffix'});

    return $out;
}

sub write_encoding_section($$)
{
    my ($pdata, $locale) = @_;
    my $pld = \% {$pdata->{'dict'}{$locale}};
    my $out = "";

    my $condition_tag = undef;
    $condition_tag = $pld->{'condition_tag'} if (defined $pld->{'condition_tag'});

    if ((defined $pld->{'default'}{'encoding'}) ||
        (defined $condition_tag && defined $pld->{$condition_tag}{'encoding'})) {
	$out .= write_conditional_lines($pld, $locale, \&write_conditional_encoding, 1);
    }

    return $out;
}

##############################
# optional langs sections

sub write_metadata_section($$)
{
    my ($pdata, $locale) = @_;
    my $pld = \% {$pdata->{'dict'}{$locale}};
    my $out = "";

    my $condition_tag = undef;
    $condition_tag = $pld->{'condition_tag'} if (defined $pld->{'condition_tag'});

    $out .= write_conditional_lines($pld, $locale, \&write_conditional_version_definition, 0);

    $out .= "\%package $pld->{'default'}{'pack_suffix'}\n";
    $out .= "License:        " . join (", ", @{$pld->{'default'}{'license'}}) . "\n";
    if (defined $condition_tag) {
	$out .= "Version:        %${locale}_version\n";
    } else {
	$out .= "Version:        $pld->{'default'}{'version'}\n";
    }
    $out .= "Release:        1\n";
    $out .= "Group:          Productivity/Office/Dictionary\n";
    $out .= "Summary:        $pld->{'default'}{'language'} Thesaurus Dictionary for OpenOffice.org\n";
    $out .= "Provides:       OpenOffice_org:/usr/lib/ooo-2.0/share/dict/ooo/th_${locale}_v2.dat\n";
    $out .= "Provides:       locale(libreoffice:$pld->{'default'}{'prov_lang'}) locale(OpenOffice_org:$pld->{'default'}{'prov_lang'})\n";
    if (defined $pld->{'default'}{'dependency'}) {
	foreach my $dep (@{$pld->{'default'}{'dependency'}}) {
	    $out .= "$dep\n";
	}
    }
    $out .= "\n";
    $out .= "%description $pld->{'default'}{'pack_suffix'}\n";
    $out .= "The $pld->{'default'}{'language'} thesaurus dictionary that can be used to look up for\n";
    $out .= "synonyms and related words in the OpenOffice.org office suite.\n";

    return $out;
}

sub write_sources_unpack_section($$)
{
    my ($pdata, $locale) = @_;
    my $pld = \% {$pdata->{'dict'}{$locale}};

    return write_conditional_lines($pld, $locale, \&write_conditional_sources_unpack, 1);
}

# dict-specific hacks in the %prep section
sub write_prep_hacks_section($$)
{
    my ($pdata, $locale) = @_;
    my $pld = \% {$pdata->{'dict'}{$locale}};
    my $out = "";

    my $condition_tag = undef;
    $condition_tag = $pld->{'condition_tag'} if (defined $pld->{'condition_tag'});

    if ((defined $pld->{'default'}{'prep'}) ||
        (defined $condition_tag && defined $pld->{$condition_tag}{'prep'})) {
	$out .= write_conditional_lines($pld, $locale, \&write_conditional_prep_hack, 1);
    }

    return $out;
}

sub write_files_section($$)
{
    my ($pdata, $locale) = @_;
    my $pld = \% {$pdata->{'dict'}{$locale}};

    return "\%files -f ${locale}.list $pld->{'default'}{'pack_suffix'}\n";
}

########################################################
# universal writing functions

sub sort_dictionaries()
{
    # sort the dictionaries by the package name
    $pdata_sort->{'dict'}{$a}->{'default'}{'pack_suffix'} cmp $pdata_sort->{'dict'}{$b}->{'default'}{'pack_suffix'};
}

# the text is different on different distributions
sub write_conditional_lines($$$$)
{
    my ($pld, $locale, $write_conditional_lines, $write_default ) = @_;
    my $out = "";

    my $condition_tag = undef;
    $condition_tag = $pld->{'condition_tag'} if (defined $pld->{'condition_tag'});

    my $conditional_lines = "";
    $conditional_lines = & {$write_conditional_lines} ($pld, $locale, $condition_tag) if (defined $condition_tag);
    my $default_lines = & {$write_conditional_lines} ($pld, $locale, 'default');

    # write the %if, %else, %endif liens only when the condition is defined and something to write is defined
    my $write_condition = undef;
    $write_condition = 1 if ($condition_tag && ($conditional_lines || $default_lines));

    if ($write_condition) {
	$out .= "$pld->{'condition_tag'}\n";
	$out .= $conditional_lines;
	$out .= "\%else\n";
    }

    if ($write_condition || $write_default) {
	$out .= & {$write_conditional_lines} ($pld, $locale, 'default');
    }

    if ($write_condition) {
        $out .= "\%endif\n";
    }

    return $out;
}

# the text is repeated for each locale
# it is used only for selected distributions
# entire dictionary is disabled on some distributions
sub write_locale_sections_optional($$$)
{
    my ($pdata, $write_section, $delimiter) = @_;
    my $out = "";

    $pdata_sort = $pdata;
    foreach my $locale (sort sort_dictionaries keys %{$pdata->{'dict'}}) {
	$out .= "$pdata->{'dict'}{$locale}->{'contition_dict'}\n" if (defined $pdata->{'dict'}{$locale}->{'contition_dict'});
	$out .= & {$write_section} ($pdata, $locale);
	$out .= "\%endif\n" if (defined $pdata->{'dict'}{$locale}->{'contition_dict'});
	$out .= "$delimiter";
    }
    return $out;
}

# the text is repeated for each locale
# it is used on all distributions
sub write_locale_sections($$)
{
    my ($pdata, $write_section) = @_;
    my $out = "";

    $pdata_sort = $pdata;
    foreach my $locale (sort sort_dictionaries keys %{$pdata->{'dict'}}) {
	$out .= & {$write_section} ($pdata, $locale);
    }
    return $out;
}

# the text is not repeated for each locale
sub write_simple_section($$)
{
    my ($pdata, $write_section) = @_;

    return & {$write_section} ($pdata);
}

sub write_spec($$)
{
    my ($pdata, $spec_template) = @_;

    my $all_licenses = write_simple_section ($pdata, \&write_all_licenses_section);
    my $do_not_edit = write_simple_section ($pdata, \&write_do_not_edit_section);
    my $provides = write_locale_sections ($pdata, \&write_provides_section);
    my $sources = write_locale_sections ($pdata, \&write_sources_section);
#    my $metadata = write_locale_sections_optional ($pdata, \&write_metadata_section, "\n\n\n");
    my $sources_unpack = write_locale_sections_optional ($pdata, \&write_sources_unpack_section, "");
    my $prep_hacks = write_locale_sections_optional ($pdata, \&write_prep_hacks_section, "");
    my $lang_name = write_locale_sections ($pdata, \&write_lang_name_section);
    my $encoding = write_locale_sections ($pdata, \&write_encoding_section);
    my $files = write_locale_sections_optional ($pdata, \&write_files_section, "\n");

    my $spec = $spec_template;
    $spec =~ s/.in$//;

    print "Generating $spec...\n";

    open TEMPLATE , "< $spec_template" or die "Can't open '$spec_template'\n";
    open SPEC , "> $spec" or die "Can't open '$spec for writing'\n";

    while( my $line = <TEMPLATE>) {
	$line =~ s/\@ALL_LICENSES\@/$all_licenses/;
	$line =~ s/\@DO_NOT_EDIT_COMMENT\@/$do_not_edit/;
	$line =~ s/\@PROVIDES\@/$provides/;
	$line =~ s/\@SOURCES\@/$sources/;
#	$line =~ s/\@METADATA\@/$metadata/;
	$line =~ s/\@SOURCES_UNPACK@/$sources_unpack/;
	$line =~ s/\@PREP_HACKS@/$prep_hacks/;
	$line =~ s/\@LANG_NAME@/$lang_name/;
	$line =~ s/\@ENCODING@/$encoding/;
	$line =~ s/\@FILES@/$files/;

	print SPEC $line;
    }

    close(TEMPLATE);
    close(SPEC);
}

############################################################
# main stuff
############################################################

sub usage()
{
    print "This tool generates the help spec files\n\n" .

          "Usage:\n".
	  "\tmyspell-dictionaries-gen-spec [--help] spec_template.in data_file\n\n";
}

# info about data structure
# it is a hash, keys introduce perl-like structure items:
#	'dict'		... hash; key is the primary locale for the given dictioanry, e.g. "en_US"
#			    the value is hash that store an information about ech dictionary using
#			    tags; the known tags are:
#	    'language'		hash(*) of string; language name, e.g Catalan, German, Norwegian Bokmaal
#	    'pack_suffix'	hash(*) of string; package name is normaly defined by the language name (lowercase,
#				underscore instread of space); you might define a non-standard one
#				using this tag, e.g norsk-bokmaal for Norwegian Bokmaal
#	    'prov_lang'		hash(*) of string; most dictionaries provides the language defined by the locale;
#				you might force the full locale using this tag; just mention
#				the full locale here again, e.g. de_DE vs de_AT
#	    'version'		hash(*) of string; package version, e.g. 2.1.5
#	    'source'		hash(*) of string; source file name, e.g. catalan.oxt
#	    'source_number'	hash(*) of integers; number of the source file, e.g. 1000, 1001, 1002
#	    'license'		hash(*) of array of strings; comma separated list of dictionary licenses, e.g. GPLv2, LGPLv2.1
#	    'types'		hash(*) of array of strings; comma separated list of included dictionary types; possible values are:
#				myspell, hunspell, hyphen, thesaurus
#	    'url'		hash(*) of string; url where the dictionary sources was taken from, e.g.
#				http://extensions.services.openoffice.org/project/dict-catalan
#	    'encoding'		hash(*) of string; documentation files in non-UTF-8 enconding should be recoded; the original
#				encoding is guessed from the locale; you might define another encoding
#				here, e.g. ISO-8859-1 
#	    'depdendency'	hash(*) of array of strings; extra dependency, e,g. Conflicts:      myspell-german-old
#	    'prep'		hash(*) of array of strings; extra hacks for the %prep section
#
#	    		    the above tags are taken from the data file; the hash also support some extra keys:
#	    'contition_dict'    string; defines a condition under which the dictionary is packaged, e.g. %if %suse_version <= 1120
#	'license'	... hash; key is the license used for a dictionary, e.g. LGPLv2; it is used to generate
#				licenses for the source package

#
# hash(*) of ...   - key is the condition under which the tag is defined; it is usually used to define a special tag
#		     for another distro; the key 'default' defines tags for the default distro, ...

my %data;
my $spec_template;
my $data_file;
my $help;



for my $arg (@ARGV) {
    if ($arg eq '--help' || $arg eq '-h') {
	usage;
	exit 0;
    } else {
	-f $arg || die "Error: The file does not exist: $arg\n";
	if (! defined $spec_template) {
	    $spec_template = $arg;
	} elsif (! defined $data_file) {
	    $data_file = $arg;
	} else {
	    die "Error: Too many arguments!\n";
	}
    }
}

die "Error: Spec file template is not defined, try --help" unless (defined $spec_template);
die "Error: Data file is not defined, try --help" unless (defined $data_file);

read_data(\%data, $data_file);
write_spec(\%data, $spec_template);
openSUSE Build Service is sponsored by