A new user interface for you! Read more...

File tesseract.spec of Package tesseract

# This is a rough draft that may only work on Fedora Core 6.
# Andrew Ziem, 25 May 2007
# Hacked to add the new langeuages as separate language packs.
# Ray Smith, 16 July 2007


Name:           tesseract
Version:        2.03
%define LanguagePacksVersion 2.00
Release:        1%{?dist}
Summary:        Open source OCR Engine developed by HP Labs - now improved by Google

Group:          Applications/Multimedia
License:        Apache License
URL:            http://code.google.com/p/tesseract-ocr/
Source0:        http://tesseract-ocr.googlecode.com/files/tesseract-%{version}.tar.gz
Source1:        http://tesseract-ocr.googlecode.com/files/tesseract-%{LanguagePacksVersion}.eng.tar.gz
Source2:        http://tesseract-ocr.googlecode.com/files/tesseract-%{LanguagePacksVersion}.fra.tar.gz
Source3:        http://tesseract-ocr.googlecode.com/files/tesseract-%{LanguagePacksVersion}.ita.tar.gz
Source4:        http://tesseract-ocr.googlecode.com/files/tesseract-%{LanguagePacksVersion}.deu.tar.gz
Source5:        http://tesseract-ocr.googlecode.com/files/tesseract-%{LanguagePacksVersion}.spa.tar.gz
Source6:        http://tesseract-ocr.googlecode.com/files/tesseract-%{LanguagePacksVersion}.nld.tar.gz
Patch0:         %{name}-%{version}-missing-includes.patch
Patch1:         %{name}-%{version}-remove-java.patch
Patch2:         %{name}-%{version}-patch.diff
BuildRoot:      %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n)

BuildRequires:  gcc-c++
BuildRequires:  libtiff-devel
BuildRequires:  java-devel

%package devel
Summary: Development files for tesseract
Group: Development/Libraries
Requires: %name = %{version}


%description
The Tesseract OCR engine was one of the top 3 engines in the 1995 UNLV
Accuracy test. Since then it has had little work done on it, but it is
probably one of the most accurate open source OCR engines available. The
source code will read a binary, grey or color image and output text. A tiff
reader is built in that will read uncompressed TIFF images, or libtiff can
be added to read compressed images.

%description devel
tesseract libraries and includes

%prep
%setup -q
%patch0 -p1
%patch1 -p1
%patch2 -p1
tar xzvf %{_sourcedir}/tesseract-%{LanguagePacksVersion}.eng.tar.gz
tar xzvf %{_sourcedir}/tesseract-%{LanguagePacksVersion}.fra.tar.gz
tar xzvf %{_sourcedir}/tesseract-%{LanguagePacksVersion}.ita.tar.gz
tar xzvf %{_sourcedir}/tesseract-%{LanguagePacksVersion}.deu.tar.gz
tar xzvf %{_sourcedir}/tesseract-%{LanguagePacksVersion}.spa.tar.gz
tar xzvf %{_sourcedir}/tesseract-%{LanguagePacksVersion}.nld.tar.gz


%build
export CFLAGS=
export CXXFLAGS=
# Should build with gcc4.1 now...
#export CC=gcc34
#export CXX=g++34
# % configure
./configure --bindir=%{_bindir} --datadir=%{_datadir} --libdir=%{_libdir}  --includedir=%{_includedir}
make %{?_smp_mflags}


%install
rm -rf $RPM_BUILD_ROOT
export LD_LIBRARY_PATH=$(find /usr/lib* -type d | grep jli | head -n 1)
make install DESTDIR=$RPM_BUILD_ROOT


%clean
rm -rf $RPM_BUILD_ROOT


%files
%defattr(-,root,root,-)
%doc AUTHORS COPYING ChangeLog README
%{_bindir}/cntraining
%{_bindir}/mftraining
%{_bindir}/tesseract
%{_bindir}/unicharset_extractor
%{_bindir}/wordlist2dawg
%dir %{_datadir}/tessdata
%{_datadir}/tessdata/configs
%{_datadir}/tessdata/confsets
%{_datadir}/tessdata/tessconfigs

%files devel
%defattr(-,root,root,-)
%{_includedir}/tesseract/
%{_libdir}/libtesseract*

%package eng
Group:          Applications/Multimedia
Summary:        English language pack for tesseract
%description eng
The %{name}-%{version}.eng package contains the data files required to recognize English

%files eng
%defattr(-,root,root,-)
%dir %{_datadir}/tessdata
%{_datadir}/tessdata/eng.DangAmbigs
%{_datadir}/tessdata/eng.freq-dawg
%{_datadir}/tessdata/eng.inttemp
%{_datadir}/tessdata/eng.normproto
%{_datadir}/tessdata/eng.pffmtable
%{_datadir}/tessdata/eng.unicharset
%{_datadir}/tessdata/eng.user-words
%{_datadir}/tessdata/eng.word-dawg

%package fra
Group:          Applications/Multimedia
Summary:        French language pack for tesseract
%description fra
The %{name}-%{version}.fra package contains the data files required to recognize French

%files fra
%defattr(-,root,root,-)
%dir %{_datadir}/tessdata
%{_datadir}/tessdata/fra.DangAmbigs
%{_datadir}/tessdata/fra.freq-dawg
%{_datadir}/tessdata/fra.inttemp
%{_datadir}/tessdata/fra.normproto
%{_datadir}/tessdata/fra.pffmtable
%{_datadir}/tessdata/fra.unicharset
%{_datadir}/tessdata/fra.user-words
%{_datadir}/tessdata/fra.word-dawg

%package ita
Group:          Applications/Multimedia
Summary:        Italian language pack for tesseract
%description ita
The %{name}-%{version}.ita package contains the data files required to recognize Italian

%files ita
%defattr(-,root,root,-)
%dir %{_datadir}/tessdata
%{_datadir}/tessdata/ita.DangAmbigs
%{_datadir}/tessdata/ita.freq-dawg
%{_datadir}/tessdata/ita.inttemp
%{_datadir}/tessdata/ita.normproto
%{_datadir}/tessdata/ita.pffmtable
%{_datadir}/tessdata/ita.unicharset
%{_datadir}/tessdata/ita.user-words
%{_datadir}/tessdata/ita.word-dawg

%package deu
Group:          Applications/Multimedia
Summary:        German language pack for tesseract
%description deu
The %{name}-%{version}.deu package contains the data files required to recognize German

%files deu
%defattr(-,root,root,-)
%dir %{_datadir}/tessdata
%{_datadir}/tessdata/deu.DangAmbigs
%{_datadir}/tessdata/deu.freq-dawg
%{_datadir}/tessdata/deu.inttemp
%{_datadir}/tessdata/deu.normproto
%{_datadir}/tessdata/deu.pffmtable
%{_datadir}/tessdata/deu.unicharset
%{_datadir}/tessdata/deu.user-words
%{_datadir}/tessdata/deu.word-dawg

%package spa
Group:          Applications/Multimedia
Summary:        Spanish language pack for tesseract
%description spa
The %{name}-%{version}.spa package contains the data files required to recognize Spanish

%files spa
%defattr(-,root,root,-)
%dir %{_datadir}/tessdata
%{_datadir}/tessdata/spa.DangAmbigs
%{_datadir}/tessdata/spa.freq-dawg
%{_datadir}/tessdata/spa.inttemp
%{_datadir}/tessdata/spa.normproto
%{_datadir}/tessdata/spa.pffmtable
%{_datadir}/tessdata/spa.unicharset
%{_datadir}/tessdata/spa.user-words
%{_datadir}/tessdata/spa.word-dawg

%package nld
Group:          Applications/Multimedia
Summary:        Dutch language pack for tesseract
%description nld
The %{name}-%{version}.nld package contains the data files required to recognize Dutch

%files nld
%defattr(-,root,root,-)
%dir %{_datadir}/tessdata
%{_datadir}/tessdata/nld.DangAmbigs
%{_datadir}/tessdata/nld.freq-dawg
%{_datadir}/tessdata/nld.inttemp
%{_datadir}/tessdata/nld.normproto
%{_datadir}/tessdata/nld.pffmtable
%{_datadir}/tessdata/nld.unicharset
%{_datadir}/tessdata/nld.user-words
%{_datadir}/tessdata/nld.word-dawg


%changelog