File python-ocrmypdf.spec of Package python-ocrmypdf
#
# spec file for package python-ocrmypdf
#
# Copyright (c) 2021 SUSE LLC
#
# All modifications and additions to the file contributed by third parties
# remain the property of their copyright owners, unless otherwise agreed
# upon. The license for this file, and modifications and additions to the
# file, is the same license as for the pristine package itself (unless the
# license for the pristine package is not an Open Source License, in which
# case the license is the MIT License). An "Open Source License" is a
# license that conforms to the Open Source Definition (Version 1.9)
# published by the Open Source Initiative.
# Please submit bugfixes or comments via https://bugs.opensuse.org/
#
%define pythons python3
%define skip_python2 1
%{?!python_module:%define python_module() python-%{**} python3-%{**}}
%define srcname ocrmypdf
Name: python-ocrmypdf
Version: 12.7.2
Release: 0
Summary: OCRmyPDF adds a searcheable OCR text layer to scanned PDF files
Group: Productivity/Publishing/PDF
Provides: OCRmyPDF
Provides: ocrmypdf
# Main code: MPL2.0;
# Completion files, hocrtransform.py: MIT;
# _unicodefun.py: BSD;
# Test files: CC-BY-SA and Public Domain
License: MPL-2.0
URL: https://github.com/jbarlow83/OCRmyPDF
Source: https://files.pythonhosted.org/packages/source/o/ocrmypdf/ocrmypdf-%{version}.tar.gz
BuildRequires: python-rpm-macros
BuildRequires: liblua5_3-5
BuildRequires: leptonica-devel
BuildRequires: %{python_module setuptools}
BuildRequires: %{python_module cffi >= 1.9.1}
BuildRequires: %{python_module setuptools_scm}
BuildRequires: %{python_module setuptools_scm_git_archive}
# SECTION test requirements
BuildRequires: %{python_module cffi >= 1.9.1}
BuildRequires: %{python_module coloredlogs >= 14.0}
BuildRequires: %{python_module img2pdf >= 0.3.0}
BuildRequires: %{python_module pdfminer.six >= 20191110}
BuildRequires: %{python_module pikepdf >= 2.10.0}
BuildRequires: %{python_module Pillow >= 8.2.0}
BuildRequires: %{python_module pluggy >= 0.13.0}
BuildRequires: %{python_module pytest >= 6.0.0}
BuildRequires: %{python_module pytest-xdist >= 2.2.0}
BuildRequires: %{python_module pytest-cov >= 2.8.1}
BuildRequires: %{python_module reportlab >= 3.5.66}
BuildRequires: %{python_module setuptools}
BuildRequires: %{python_module tqdm >= 4}
BuildRequires: %{python_module importlib-metadata >= 4}
BuildRequires: %{python_module importlib-resources >= 5}
BuildRequires: qpdf >= 8.1.0
BuildRequires: tesseract-ocr >= 4.0.0
BuildRequires: icc-profiles
BuildRequires: liblept5
BuildRequires: libjpeg-devel
BuildRequires: leptonica-devel
BuildRequires: libxml2
BuildRequires: python3-pip
BuildRequires: zlib-devel
BuildRequires: tesseract-ocr-traineddata-eng >= 4.0.0
BuildRequires: tesseract-ocr-traineddata-deu >= 4.0.0
BuildRequires: tesseract-ocr-traineddata-orientation_and_script_detection >= 3.03
BuildRequires: ghostscript >= 9.15
BuildRequires: pngquant > 2.0.0
BuildRequires: libjpeg-devel
BuildRequires: unpaper >= 6.1
BuildRequires: %{python_module PyPDF2 >= 1.25.1}
###Needed? BuildRequires: python3-pytest-runner
BuildRequires: %{python_module ruffus >= 2.6.3}
# /SECTION
BuildRequires: fdupes
Requires: python-cffi >= 1.9.1
Requires: python-coloredlogs >= 14.0
Requires: python-img2pdf >= 0.3.0
Requires: python-pdfminer.six >= 20191110
Requires: python-pikepdf >= 2.10.0
Requires: python-Pillow >= 8.2.0
Requires: python-pluggy >= 0.13.0
Requires: python-reportlab >= 3.5.66
Requires: python-setuptools
Requires: python-tqdm >= 4
Requires: tesseract-ocr >= 4.1.1
Requires: tesseract-ocr-traineddata-eng >= 4.1.0
Requires: tesseract-ocr-traineddata-deu >= 4.1.0a
Requires: unpaper
Suggests: python-sphinx
Suggests: python-sphinx_rtd_theme
Suggests: python-sphinx-issues
Suggests: python-PyMuPDF == 1.13.4
Suggests: python-pytest >= 6.0.0
Suggests: python-pytest-xdist >= 2.2.0
Suggests: python-pytest-cov >= 2.11.1
Suggests: python-python-xmp-toolkit == 2.0.1
Suggests: python-watchdog >= 1.0.2
Suggests: python-Flask >= 1
BuildArch: noarch
Provides: OCRmyPDF
Provides: ocrmypdf
%python_subpackages
%description
OCRmyPDF adds an OCR text layer to scanned PDF files, allowing them to be searched
Main features:
- Generates a searchable PDF/A file from a regular PDF
- Places OCR text accurately below the image to ease copy / paste
- Keeps the exact resolution of the original embedded images
- When possible, inserts OCR information as a "lossless" operation without rendering vector information
- Keeps file size about the same
- If requested deskews and/or cleans the image before performing OCR
- Validates input and output files
- Provides debug mode to enable easy verification of the OCR results
- Processes pages in parallel when more than one CPU core is available
- Uses Tesseract OCR engine
- Supports the 39 languages recognized by Tesseract
- Battle-tested on thousands of PDFs, a test suite and continuous integration
%prep
%setup -q -n ocrmypdf-%{version}
# Cleanup shebang and executable bits.
for f in src/%{srcname}/*.py src/%{srcname}/*/*.py; do
sed -e '1{\@^#!/usr/bin/env python@d}' $f > $f.new &&
touch -r $f $f.new &&
mv $f.new $f
chmod -x $f
done
%build
%python_build
%install
%python_install
%python_clone -a %{buildroot}%{_bindir}/ocrmypdf
%python_expand %fdupes %{buildroot}%{$python_sitelib}
%check
#%#pytest
%pyunittest -v
%post
%python_install_alternative ocrmypdf
%postun
%python_uninstall_alternative ocrmypdf
%files %{python_files}
%doc README.md
%license LICENSE
%python_alternative %{_bindir}/ocrmypdf
%{python_sitelib}/*
%changelog