File python-ocrmypdf.spec of Package python-ocrmypdf

#
# spec file for package python-ocrmypdf
#
# Copyright (c) 2021 SUSE LLC
#
# All modifications and additions to the file contributed by third parties
# remain the property of their copyright owners, unless otherwise agreed
# upon. The license for this file, and modifications and additions to the
# file, is the same license as for the pristine package itself (unless the
# license for the pristine package is not an Open Source License, in which
# case the license is the MIT License). An "Open Source License" is a
# license that conforms to the Open Source Definition (Version 1.9)
# published by the Open Source Initiative.

# Please submit bugfixes or comments via https://bugs.opensuse.org/
#

%define pythons python3
%define skip_python2 1
%{?!python_module:%define python_module() python-%{**} python3-%{**}}
%define srcname ocrmypdf
Name:           python-ocrmypdf
Version:        12.7.2
Release:        0
Summary:        OCRmyPDF adds a searcheable OCR text layer to scanned PDF files
Group:          Productivity/Publishing/PDF

Provides:	OCRmyPDF
Provides:	ocrmypdf

# Main code: MPL2.0;
# Completion files, hocrtransform.py: MIT;
# _unicodefun.py: BSD;
# Test files: CC-BY-SA and Public Domain
License:        MPL-2.0
URL:            https://github.com/jbarlow83/OCRmyPDF
Source:         https://files.pythonhosted.org/packages/source/o/ocrmypdf/ocrmypdf-%{version}.tar.gz
BuildRequires:  python-rpm-macros
BuildRequires:	liblua5_3-5
BuildRequires:  leptonica-devel
BuildRequires:  %{python_module setuptools}
BuildRequires:  %{python_module cffi >= 1.9.1}
BuildRequires:  %{python_module setuptools_scm}
BuildRequires:  %{python_module setuptools_scm_git_archive}
# SECTION test requirements
BuildRequires:  %{python_module cffi >= 1.9.1}
BuildRequires:  %{python_module coloredlogs >= 14.0}
BuildRequires:  %{python_module img2pdf >= 0.3.0}
BuildRequires:  %{python_module pdfminer.six >= 20191110}
BuildRequires:  %{python_module pikepdf >= 2.10.0}
BuildRequires:  %{python_module Pillow >= 8.2.0}
BuildRequires:  %{python_module pluggy >= 0.13.0}
BuildRequires:	%{python_module pytest >= 6.0.0}
BuildRequires:  %{python_module pytest-xdist >= 2.2.0}
BuildRequires:  %{python_module pytest-cov >= 2.8.1}
BuildRequires:  %{python_module reportlab >= 3.5.66}
BuildRequires:  %{python_module setuptools}
BuildRequires:  %{python_module tqdm >= 4}
BuildRequires:  %{python_module importlib-metadata >= 4}
BuildRequires:  %{python_module importlib-resources >= 5}
BuildRequires:  qpdf >= 8.1.0
BuildRequires:  tesseract-ocr >= 4.0.0
BuildRequires:  icc-profiles
BuildRequires:  liblept5 
BuildRequires:  libjpeg-devel
BuildRequires:  leptonica-devel
BuildRequires:  libxml2 
BuildRequires:  python3-pip 
BuildRequires:  zlib-devel
BuildRequires:  tesseract-ocr-traineddata-eng >= 4.0.0
BuildRequires:  tesseract-ocr-traineddata-deu >= 4.0.0
BuildRequires:  tesseract-ocr-traineddata-orientation_and_script_detection >= 3.03
BuildRequires:  ghostscript >= 9.15
BuildRequires:  pngquant > 2.0.0
BuildRequires:  libjpeg-devel
BuildRequires:	unpaper >= 6.1

BuildRequires:  %{python_module PyPDF2 >= 1.25.1}
###Needed? BuildRequires:  python3-pytest-runner
BuildRequires:  %{python_module ruffus >= 2.6.3}


# /SECTION
BuildRequires:  fdupes
Requires:       python-cffi >= 1.9.1
Requires:       python-coloredlogs >= 14.0
Requires:       python-img2pdf >= 0.3.0
Requires:       python-pdfminer.six >= 20191110
Requires:       python-pikepdf >= 2.10.0
Requires:       python-Pillow >= 8.2.0
Requires:       python-pluggy >= 0.13.0
Requires:       python-reportlab >= 3.5.66
Requires:       python-setuptools
Requires:       python-tqdm >= 4
Requires:       tesseract-ocr >= 4.1.1
Requires:       tesseract-ocr-traineddata-eng >= 4.1.0
Requires:       tesseract-ocr-traineddata-deu >= 4.1.0a
Requires:       unpaper

Suggests:       python-sphinx
Suggests:       python-sphinx_rtd_theme
Suggests:       python-sphinx-issues
Suggests:       python-PyMuPDF == 1.13.4
Suggests:       python-pytest >= 6.0.0
Suggests:       python-pytest-xdist >= 2.2.0
Suggests:       python-pytest-cov >= 2.11.1
Suggests:       python-python-xmp-toolkit == 2.0.1
Suggests:       python-watchdog >= 1.0.2
Suggests:       python-Flask >= 1
BuildArch:      noarch

Provides:      OCRmyPDF
Provides:      ocrmypdf

%python_subpackages

%description
OCRmyPDF adds an OCR text layer to scanned PDF files, allowing them to be searched

Main features:
 -  Generates a searchable PDF/A file from a regular PDF
 -  Places OCR text accurately below the image to ease copy / paste
 -  Keeps the exact resolution of the original embedded images
 -  When possible, inserts OCR information as a "lossless" operation without rendering vector information
 -  Keeps file size about the same
 -  If requested deskews and/or cleans the image before performing OCR
 -  Validates input and output files
 -  Provides debug mode to enable easy verification of the OCR results
 -  Processes pages in parallel when more than one CPU core is available
 -  Uses Tesseract OCR engine
 -  Supports the 39 languages recognized by Tesseract
 -  Battle-tested on thousands of PDFs, a test suite and continuous integration

%prep
%setup -q -n ocrmypdf-%{version}

# Cleanup shebang and executable bits.
for f in src/%{srcname}/*.py src/%{srcname}/*/*.py; do
    sed -e '1{\@^#!/usr/bin/env python@d}' $f > $f.new &&
    touch -r $f $f.new &&
    mv $f.new $f
    chmod -x $f
done

%build
%python_build

%install
%python_install
%python_clone -a %{buildroot}%{_bindir}/ocrmypdf
%python_expand %fdupes %{buildroot}%{$python_sitelib}

%check
#%#pytest
%pyunittest -v

%post
%python_install_alternative ocrmypdf

%postun
%python_uninstall_alternative ocrmypdf

%files %{python_files}
%doc README.md
%license LICENSE
%python_alternative %{_bindir}/ocrmypdf
%{python_sitelib}/*

%changelog