File libreoffice-hyphen-gen-spec of Package libreoffice-hyphen
#!/usr/bin/perl -w
# This script changes the definite article of ProductName
use strict;
use File::Copy;
my $args = join ' ', @ARGV;
# start to cound dictionary sources from a non-zero value
my $sources_counter = 1000;
# FIXME: we need a global variable to pass the data to the sort function
my $pdata_sort = undef;
############################################################
# loading data
############################################################
sub save_locale_data($$$)
{
my ($pdata, $locale, $pld) = @_;
my @conditions = ();
push @conditions, $pld->{'condition_tag'} if (defined $pld->{'condition_tag'});
push @conditions, 'default';
# default pack suffix is the lang code from the locale
unless (defined $pld->{'default'}{'pack_suffix'}) {
$pld->{'default'}{'pack_suffix'} = $locale;
$pld->{'default'}{'pack_suffix'} =~ s/_.*//;
}
# default provided lang
unless (defined $pld->{'default'}{'prov_lang'}) {
$pld->{'default'}{'prov_lang'} = $locale;
$pld->{'default'}{'prov_lang'} =~ s/_.*//;
}
# required tags:
# source is not required because one source tarball might provide more dictionaries
# license
die "Error: license tag is not defined for the locale $locale, line $.\n" unless (defined $pld->{'default'}{'license'});
# version
# foreach my $condition_tag (@conditions) {
# die "Error: version tag is not defined for the locale $locale, line $.\n" unless (defined $pld->{$condition_tag}{'version'});
# }
# finally save the locale data
$pdata->{'dict'}{$locale} = $pld;
# udpate the list of used licenses (for source package
foreach my $condition_tag (@conditions) {
if (defined $pld->{$condition_tag}{'license'}) {
foreach my $license (@{$pld->{$condition_tag}{'license'}}) {
$pdata->{'license'}{$license} = 1;
}
}
}
}
sub add_condition_tag($$$)
{
my ($pld, $locale, $condition_tag) = @_;
if ($condition_tag ne 'default') {
if (defined $pld->{'condition_tag'}) {
die "Error: Only one condition is supported for one dictionary, line $.\n" if ($pld->{'condition_tag'} ne $condition_tag);
} else {
$pld->{'condition_tag'} = $condition_tag;
% {$pld->{$condition_tag}} = ();
}
} else {
% {$pld->{'default'}} = () unless (defined $pld->{'default'});
}
}
sub save_locale_data_string($$$$$)
{
my ($pld, $locale, $condition_tag, $tag, $value) = @_;
add_condition_tag($pld, $locale, $condition_tag);
die "Error: Tag $tag defined twice, line $.\n" if (defined $pld->{$condition_tag}{$tag});
$pld->{$condition_tag}{$tag} = $value;
}
sub add_locale_data_list($$$$$)
{
my ($pld, $locale, $condition_tag, $tag, $value) = @_;
add_condition_tag($pld, $locale, $condition_tag);
foreach my $item ( split(/,\s*/, $value) ) {
if ( $tag eq 'types' && ! ($item =~ m/^(myspell|hunspell|hyphen|thesaurus)$/) ) {
print STDERR "Warning: unknown type $item on line $.\n";
}
@ {$pld->{$condition_tag}{$tag}} = () unless (defined $pld->{$condition_tag}{$tag});
push @ {$pld->{$condition_tag}{$tag}}, $item;
}
}
sub read_data($$)
{
my ($pdata, $data_file) = @_;
my $curLang;
my $locale = undef;
# pointer to locale data
my $pld = undef;
my $condition = undef;
my $condition_dict = undef;
my $condition_tag = 'default';
my $condition_else = undef;
my $condition_dict_was = undef;
open DATA , "< $data_file" or die "Can't open '$data_file'\n";
while( my $line = <DATA>){
chomp $line;
# ignore comments
$line =~ s/#.*$//;
# print "just empty line?\n";
# empty line
if ( $line =~ m/^\w*$/ ) {
next;
}
# %if
if ( $line =~ m/^(\%if.*)$/ ) {
die "Error: FIXME: %if with more levels are not supported, line $.\n" if (defined $condition || defined $condition_else || $condition_tag ne 'default');
$condition = "$1";
next;
}
# %else
if ( $line =~ m/^\%else\s*$/ ) {
die "Error: No %if defined for %else at line $.\n" unless ($condition_tag ne 'default');
$condition_tag = 'default';
$condition_else = 1;
next;
}
# %endif
if ( $line =~ m/^\%endif\s*$/ ) {
die "Error: Define the default doctionary using %else, line $.\n" if ($condition_tag ne 'default');
if (defined $condition_else) {
$condition_else = undef;
} elsif (defined $condition_dict) {
$condition_dict = undef;
# the next line must define another dictionary (=> the locale: tag must follow)
$condition_dict_was = 1;
} else {
die "Error: %endif does not match %if at line $.\n";
}
next;
}
# tags
if ( $line =~ m/^(\w+):\s*(.*)$/ ) {
# support uppercase tag names
my $tag = lc($1);
my $value = $2;
# remove blank characters from the end of the value
$value =~ s/\s*$//;
die "Error: no value defined for the tag $tag, line $.\n" unless ($value);
if ( $tag eq 'locale' ) {
die "Error: locale $value already defined earlier, line $.\n" if ( defined ($pdata->{'dict'}{$value}) );
# a section for new language starts => save the old data
save_locale_data($pdata, $locale, $pld) if defined ($locale);
$locale = $value;
% {$pdata->{'dict'}{$locale}} = ();
$pld = \% {$pdata->{'dict'}{$locale}};
if (defined $condition) {
$condition_dict = $condition;
$pld->{'contition_dict'} = $condition;
$condition = undef;
}
# good, this was the right place to put the %endif for the whole dictionary
$condition_dict_was = undef;
next;
}
if (defined $condition_dict_was) {
die "Error: %endif on wrong place, line $.\n" .
" You either need to put %if-%endif are the whole dictionary or you must\n" .
" use :%if-%else-%endif to define a specific dictionary for a specific\n" .
" distribution, line\n";
}
if (defined $condition) {
$condition_tag = $condition;
$condition = undef;
}
if ( $tag eq 'encoding' ||
$tag eq 'language' ||
$tag eq 'pack_suffix' ||
$tag eq 'prov_lang' ||
$tag eq 'source' ||
$tag eq 'url' ||
$tag eq 'version' ) {
save_locale_data_string($pld, $locale, $condition_tag, $tag, $value);
next;
}
if ( $tag eq 'dependency' ||
$tag eq 'license' ||
$tag eq 'types' ||
$tag eq 'prep' ) {
add_locale_data_list($pld, $locale, $condition_tag, $tag, $value);
next;
}
}
die "Synrax error in $data_file, line $.\n";
}
save_locale_data($pdata, $locale, $pld) if defined ($locale);
close(DATA);
}
############################################################
# writing sections
############################################################
##################################
# conditional lines
sub write_conditional_prep_hack($$$)
{
my ($pld, $locale, $condition_tag) = @_;
my $out = "";
foreach my $hack (@{$pld->{$condition_tag}{'prep'}}) {
$out .= "$hack\n";
}
return $out;
}
sub write_conditional_version_definition($$$)
{
my ($pld, $locale, $condition_tag) = @_;
my $out = "";
$out .= "\%define ${locale}_version $pld->{$condition_tag}{'version'}\n" if (defined $pld->{$condition_tag}{'version'});
return $out;
}
sub write_conditional_sources_unpack($$$)
{
my ($pld, $locale, $condition_tag) = @_;
my $out = "";
$out .= " \%\{S:$pld->{$condition_tag}{'source_number'}\} \\\n" if (defined $pld->{$condition_tag}{'source_number'});
}
sub write_conditional_encoding($$$)
{
my ($pld, $locale, $condition_tag) = @_;
my $out = "";
$out .= " \"${locale}\") coding=\"$pld->{$condition_tag}{'encoding'}\" ;;\n" if (defined $pld->{$condition_tag}{'encoding'});
return $out;
}
##############################
# simple sections
sub write_do_not_edit_section($)
{
my $out = "###################################################################\n" .
"## DO NOT EDIT THIS SPEC FILE\n" .
"## Generated by:\n" .
"## perl libreoffice-hyphen-gen-spec $args\n" .
"###################################################################";
return $out;
}
sub write_all_licenses_section($)
{
my ($pdata) = @_;
my @licenses = sort (keys %{$pdata->{'license'}});
my $out = join ", ", @licenses;
return $out;
}
##############################
# all langs sections
sub write_provides_section($$)
{
my ($pdata, $locale) = @_;
my $pld = \% {$pdata->{'dict'}{$locale}};
my $out = "";
$out .= "Provides: locale(libreoffice:$pld->{'default'}{'prov_lang'}) locale(OpenOffice_org:$pld->{'default'}{'prov_lang'})\n";
return $out;
}
sub write_sources_section($$)
{
my ($pdata, $locale) = @_;
my $pld = \% {$pdata->{'dict'}{$locale}};
my $out = "";
my @conditions = ();
push @conditions, $pld->{'condition_tag'} if (defined $pld->{'condition_tag'});
push @conditions, 'default';
foreach my $condition_tag (@conditions) {
if (defined $pld->{$condition_tag}{'source'}) {
$pld->{$condition_tag}{'source_number'} = $sources_counter++;
$out .= "Source$pld->{$condition_tag}{'source_number'}: $pld->{$condition_tag}{'source'}\n";
}
}
return $out;
}
sub write_lang_name_section($$)
{
my ($pdata, $locale) = @_;
my $pld = \% {$pdata->{'dict'}{$locale}};
my $out = "";
$out .= " \"${locale}\") lang_name=\"$pld->{'default'}{'pack_suffix'}\" ;;\n" if (defined $pld->{'default'}{'pack_suffix'});
return $out;
}
sub write_encoding_section($$)
{
my ($pdata, $locale) = @_;
my $pld = \% {$pdata->{'dict'}{$locale}};
my $out = "";
my $condition_tag = undef;
$condition_tag = $pld->{'condition_tag'} if (defined $pld->{'condition_tag'});
if ((defined $pld->{'default'}{'encoding'}) ||
(defined $condition_tag && defined $pld->{$condition_tag}{'encoding'})) {
$out .= write_conditional_lines($pld, $locale, \&write_conditional_encoding, 1);
}
return $out;
}
##############################
# optional langs sections
sub write_metadata_section($$)
{
my ($pdata, $locale) = @_;
my $pld = \% {$pdata->{'dict'}{$locale}};
my $out = "";
my $condition_tag = undef;
$condition_tag = $pld->{'condition_tag'} if (defined $pld->{'condition_tag'});
$out .= write_conditional_lines($pld, $locale, \&write_conditional_version_definition, 0);
$out .= "\%package $pld->{'default'}{'pack_suffix'}\n";
$out .= "License: " . join (", ", @{$pld->{'default'}{'license'}}) . "\n";
if (defined $condition_tag) {
$out .= "Version: %${locale}_version\n";
} else {
$out .= "Version: $pld->{'default'}{'version'}\n";
}
$out .= "Release: 1\n";
$out .= "Group: Productivity/Office/Dictionary\n";
$out .= "Summary: $pld->{'default'}{'language'} Thesaurus Dictionary for OpenOffice.org\n";
$out .= "Provides: OpenOffice_org:/usr/lib/ooo-2.0/share/dict/ooo/th_${locale}_v2.dat\n";
$out .= "Provides: locale(libreoffice:$pld->{'default'}{'prov_lang'}) locale(OpenOffice_org:$pld->{'default'}{'prov_lang'})\n";
if (defined $pld->{'default'}{'dependency'}) {
foreach my $dep (@{$pld->{'default'}{'dependency'}}) {
$out .= "$dep\n";
}
}
$out .= "\n";
$out .= "%description $pld->{'default'}{'pack_suffix'}\n";
$out .= "The $pld->{'default'}{'language'} thesaurus dictionary that can be used to look up for\n";
$out .= "synonyms and related words in the OpenOffice.org office suite.\n";
return $out;
}
sub write_sources_unpack_section($$)
{
my ($pdata, $locale) = @_;
my $pld = \% {$pdata->{'dict'}{$locale}};
return write_conditional_lines($pld, $locale, \&write_conditional_sources_unpack, 1);
}
# dict-specific hacks in the %prep section
sub write_prep_hacks_section($$)
{
my ($pdata, $locale) = @_;
my $pld = \% {$pdata->{'dict'}{$locale}};
my $out = "";
my $condition_tag = undef;
$condition_tag = $pld->{'condition_tag'} if (defined $pld->{'condition_tag'});
if ((defined $pld->{'default'}{'prep'}) ||
(defined $condition_tag && defined $pld->{$condition_tag}{'prep'})) {
$out .= write_conditional_lines($pld, $locale, \&write_conditional_prep_hack, 1);
}
return $out;
}
sub write_files_section($$)
{
my ($pdata, $locale) = @_;
my $pld = \% {$pdata->{'dict'}{$locale}};
return "\%files -f ${locale}.list $pld->{'default'}{'pack_suffix'}\n";
}
########################################################
# universal writing functions
sub sort_dictionaries()
{
# sort the dictionaries by the package name
$pdata_sort->{'dict'}{$a}->{'default'}{'pack_suffix'} cmp $pdata_sort->{'dict'}{$b}->{'default'}{'pack_suffix'};
}
# the text is different on different distributions
sub write_conditional_lines($$$$)
{
my ($pld, $locale, $write_conditional_lines, $write_default ) = @_;
my $out = "";
my $condition_tag = undef;
$condition_tag = $pld->{'condition_tag'} if (defined $pld->{'condition_tag'});
my $conditional_lines = "";
$conditional_lines = & {$write_conditional_lines} ($pld, $locale, $condition_tag) if (defined $condition_tag);
my $default_lines = & {$write_conditional_lines} ($pld, $locale, 'default');
# write the %if, %else, %endif liens only when the condition is defined and something to write is defined
my $write_condition = undef;
$write_condition = 1 if ($condition_tag && ($conditional_lines || $default_lines));
if ($write_condition) {
$out .= "$pld->{'condition_tag'}\n";
$out .= $conditional_lines;
$out .= "\%else\n";
}
if ($write_condition || $write_default) {
$out .= & {$write_conditional_lines} ($pld, $locale, 'default');
}
if ($write_condition) {
$out .= "\%endif\n";
}
return $out;
}
# the text is repeated for each locale
# it is used only for selected distributions
# entire dictionary is disabled on some distributions
sub write_locale_sections_optional($$$)
{
my ($pdata, $write_section, $delimiter) = @_;
my $out = "";
$pdata_sort = $pdata;
foreach my $locale (sort sort_dictionaries keys %{$pdata->{'dict'}}) {
$out .= "$pdata->{'dict'}{$locale}->{'contition_dict'}\n" if (defined $pdata->{'dict'}{$locale}->{'contition_dict'});
$out .= & {$write_section} ($pdata, $locale);
$out .= "\%endif\n" if (defined $pdata->{'dict'}{$locale}->{'contition_dict'});
$out .= "$delimiter";
}
return $out;
}
# the text is repeated for each locale
# it is used on all distributions
sub write_locale_sections($$)
{
my ($pdata, $write_section) = @_;
my $out = "";
$pdata_sort = $pdata;
foreach my $locale (sort sort_dictionaries keys %{$pdata->{'dict'}}) {
$out .= & {$write_section} ($pdata, $locale);
}
return $out;
}
# the text is not repeated for each locale
sub write_simple_section($$)
{
my ($pdata, $write_section) = @_;
return & {$write_section} ($pdata);
}
sub write_spec($$)
{
my ($pdata, $spec_template) = @_;
my $all_licenses = write_simple_section ($pdata, \&write_all_licenses_section);
my $do_not_edit = write_simple_section ($pdata, \&write_do_not_edit_section);
my $provides = write_locale_sections ($pdata, \&write_provides_section);
my $sources = write_locale_sections ($pdata, \&write_sources_section);
# my $metadata = write_locale_sections_optional ($pdata, \&write_metadata_section, "\n\n\n");
my $sources_unpack = write_locale_sections_optional ($pdata, \&write_sources_unpack_section, "");
my $prep_hacks = write_locale_sections_optional ($pdata, \&write_prep_hacks_section, "");
my $lang_name = write_locale_sections ($pdata, \&write_lang_name_section);
my $encoding = write_locale_sections ($pdata, \&write_encoding_section);
my $files = write_locale_sections_optional ($pdata, \&write_files_section, "\n");
my $spec = $spec_template;
$spec =~ s/.in$//;
print "Generating $spec...\n";
open TEMPLATE , "< $spec_template" or die "Can't open '$spec_template'\n";
open SPEC , "> $spec" or die "Can't open '$spec for writing'\n";
while( my $line = <TEMPLATE>) {
$line =~ s/\@ALL_LICENSES\@/$all_licenses/;
$line =~ s/\@DO_NOT_EDIT_COMMENT\@/$do_not_edit/;
$line =~ s/\@PROVIDES\@/$provides/;
$line =~ s/\@SOURCES\@/$sources/;
# $line =~ s/\@METADATA\@/$metadata/;
$line =~ s/\@SOURCES_UNPACK@/$sources_unpack/;
$line =~ s/\@PREP_HACKS@/$prep_hacks/;
$line =~ s/\@LANG_NAME@/$lang_name/;
$line =~ s/\@ENCODING@/$encoding/;
$line =~ s/\@FILES@/$files/;
print SPEC $line;
}
close(TEMPLATE);
close(SPEC);
}
############################################################
# main stuff
############################################################
sub usage()
{
print "This tool generates the help spec files\n\n" .
"Usage:\n".
"\tmyspell-dictionaries-gen-spec [--help] spec_template.in data_file\n\n";
}
# info about data structure
# it is a hash, keys introduce perl-like structure items:
# 'dict' ... hash; key is the primary locale for the given dictioanry, e.g. "en_US"
# the value is hash that store an information about ech dictionary using
# tags; the known tags are:
# 'language' hash(*) of string; language name, e.g Catalan, German, Norwegian Bokmaal
# 'pack_suffix' hash(*) of string; package name is normaly defined by the language name (lowercase,
# underscore instread of space); you might define a non-standard one
# using this tag, e.g norsk-bokmaal for Norwegian Bokmaal
# 'prov_lang' hash(*) of string; most dictionaries provides the language defined by the locale;
# you might force the full locale using this tag; just mention
# the full locale here again, e.g. de_DE vs de_AT
# 'version' hash(*) of string; package version, e.g. 2.1.5
# 'source' hash(*) of string; source file name, e.g. catalan.oxt
# 'source_number' hash(*) of integers; number of the source file, e.g. 1000, 1001, 1002
# 'license' hash(*) of array of strings; comma separated list of dictionary licenses, e.g. GPLv2, LGPLv2.1
# 'types' hash(*) of array of strings; comma separated list of included dictionary types; possible values are:
# myspell, hunspell, hyphen, thesaurus
# 'url' hash(*) of string; url where the dictionary sources was taken from, e.g.
# http://extensions.services.openoffice.org/project/dict-catalan
# 'encoding' hash(*) of string; documentation files in non-UTF-8 enconding should be recoded; the original
# encoding is guessed from the locale; you might define another encoding
# here, e.g. ISO-8859-1
# 'depdendency' hash(*) of array of strings; extra dependency, e,g. Conflicts: myspell-german-old
# 'prep' hash(*) of array of strings; extra hacks for the %prep section
#
# the above tags are taken from the data file; the hash also support some extra keys:
# 'contition_dict' string; defines a condition under which the dictionary is packaged, e.g. %if %suse_version <= 1120
# 'license' ... hash; key is the license used for a dictionary, e.g. LGPLv2; it is used to generate
# licenses for the source package
#
# hash(*) of ... - key is the condition under which the tag is defined; it is usually used to define a special tag
# for another distro; the key 'default' defines tags for the default distro, ...
my %data;
my $spec_template;
my $data_file;
my $help;
for my $arg (@ARGV) {
if ($arg eq '--help' || $arg eq '-h') {
usage;
exit 0;
} else {
-f $arg || die "Error: The file does not exist: $arg\n";
if (! defined $spec_template) {
$spec_template = $arg;
} elsif (! defined $data_file) {
$data_file = $arg;
} else {
die "Error: Too many arguments!\n";
}
}
}
die "Error: Spec file template is not defined, try --help" unless (defined $spec_template);
die "Error: Data file is not defined, try --help" unless (defined $data_file);
read_data(\%data, $data_file);
write_spec(\%data, $spec_template);