File OCRmyPDF.spec of Package OCRmyPDF

#
# spec file for package OCRmyPDF
#
# Copyright (c) 2020 SUSE LLC
#
# All modifications and additions to the file contributed by third parties
# remain the property of their copyright owners, unless otherwise agreed
# upon. The license for this file, and modifications and additions to the
# file, is the same license as for the pristine package itself (unless the
# license for the pristine package is not an Open Source License, in which
# case the license is the MIT License). An "Open Source License" is a
# license that conforms to the Open Source Definition (Version 1.9)
# published by the Open Source Initiative.

# Please submit bugfixes or comments via https://bugs.opensuse.org/
#

%define modname ocrmypdf
Name:           OCRmyPDF
Version:        16.6.1
Release:        0
Summary:        Add an OCR text layer to scanned PDF files
License:        GPL-3.0-or-later
Group:          Productivity/Publishing/PDF
URL:            https://github.com/ocrmypdf/OCRmyPDF.git
Source0:        https://github.com/ocrmypdf/OCRmyPDF/archive/refs/tags/v%{version}.tar.gz#/%{name}-%{version}.tar.gz
BuildRequires:  python-rpm-macros
BuildRequires:  fdupes
BuildRequires:  %{python_module setuptools}
BuildRequires:  %{python_module pip}
BuildRequires:  %{python_module wheel}
# SECTION test requirements
BuildRequires:  %{python_module pytest}
# /SECTION
BuildRequires:  %{python_module img2pdf >= 0.4.4}
BuildRequires:  %{python_module pikepdf >= 8}
BuildRequires:  %{python_module Pillow >= 9.0.1}
BuildRequires:  %{python_module reportlab >= 3.6.8}
BuildRequires:  %{python_module pdfminer.six}
BuildRequires:  %{python_module coloredlogs >= 14.0}
BuildRequires:  %{python_module tqdm >= 4}
BuildRequires:  %{python_module pytesseract}
BuildRequires:  %{python_module hypothesis}
BuildRequires:  %{python_module rich >= 13}
BuildRequires:  %{python_module deprecation >= 2.1.0}
BuildRequires:  %{python_module packaging >= 20}
BuildRequires:  %{python_module pluggy >= 0.13.0}
BuildRequires:  %{python_module importlib-resources >= 5}
BuildRequires:  %{python_module typing-extensions >= 4}
BuildRequires:  %{python_module hatch_vcs}
BuildRequires:  ghostscript
Requires:       python-img2pdf >= 0.4.4
Requires:       python-pikepdf >= 8
Requires:       python-Pillow >= 9.0.1
Requires:       python-reportlab >= 3.6.8
Requires:       python-pdfminer.six
Requires:       python-coloredlogs >= 14.0
Requires:       python-tqdm >= 4
Requires:       python-pytesseract
Requires:       python-hypothesis
Requires:       python-rich >= 13
Requires:       python-deprecation >= 2.1.0
Requires:       python-packaging >= 20
Requires:       python-pluggy >= 0.13.0
Requires:       python-importlib-resources >= 5
Requires:       python-typing-extensions >= 4
Requires(post):    update-alternatives
Requires(postun):  update-alternatives
BuildArch:      noarch
%python_subpackages

%description
OCRmyPDF adds an OCR text layer to scanned PDF files, allowing them to be searched.

Main features:
 -  Generates a searchable PDF/A file from a regular PDF
 -  Places OCR text accurately below the image to ease copy / paste
 -  Keeps the exact resolution of the original embedded images
 -  When possible, inserts OCR information as a "lossless" operation without rendering vector information
 -  Keeps file size about the same
 -  If requested deskews and/or cleans the image before performing OCR
 -  Validates input and output files
 -  Provides debug mode to enable easy verification of the OCR results
 -  Processes pages in parallel when more than one CPU core is available
 -  Uses Tesseract OCR engine
 -  Supports the 39 languages recognized by Tesseract
 -  Battle-tested on thousands of PDFs, a test suite and continuous integration

%prep
%autosetup

%build
%pyproject_wheel

%install
%pyproject_install
%python_clone -a %{buildroot}%{_bindir}/%{modname}
%{python_expand # fixup install
	sed -i "s|#!/usr/bin/env python3||g" %{buildroot}%{$python_sitelib}/%{modname}/__main__.py
	sed -i "s|#!/usr/bin/env python3||g" %{buildroot}%{$python_sitelib}/%{modname}/_validation.py
	sed -i "s|#!/usr/bin/env python3||g" %{buildroot}%{$python_sitelib}/%{modname}/pdfinfo/__init__.py
	sed -i "s|#!/usr/bin/env python3||g" %{buildroot}%{$python_sitelib}/%{modname}/pdfinfo/info.py
	chmod -x %{buildroot}%{$python_sitelib}/%{modname}/__main__.py
	chmod -x %{buildroot}%{$python_sitelib}/%{modname}/hocrtransform/__init__.py
	install -m 0644 src/ocrmypdf/data/pdf.ttf %{buildroot}%{$python_sitelib}/%{modname}/data/
	%fdupes %{buildroot}%{$python_sitelib}/%{modname}
}

%check

%post
%python_install_alternative %{modname}

%postun
%python_uninstall_alternative %{modname}

%files %{python_files}
%license LICENSE
%doc README.md
%python_alternative %{_bindir}/%{modname}
%{python_sitelib}/%{modname}*
%pycache_only %{python_sitelib}/%{modname}/__pycache__

%changelog
openSUSE Build Service is sponsored by