File OCRmyPDF.spec of Package OCRmyPDF
#
# spec file for package OCRmyPDF
#
# Copyright (c) 2020 SUSE LLC
#
# All modifications and additions to the file contributed by third parties
# remain the property of their copyright owners, unless otherwise agreed
# upon. The license for this file, and modifications and additions to the
# file, is the same license as for the pristine package itself (unless the
# license for the pristine package is not an Open Source License, in which
# case the license is the MIT License). An "Open Source License" is a
# license that conforms to the Open Source Definition (Version 1.9)
# published by the Open Source Initiative.
# Please submit bugfixes or comments via https://bugs.opensuse.org/
#
%define modname ocrmypdf
Name: OCRmyPDF
Version: 16.6.1
Release: 0
Summary: Add an OCR text layer to scanned PDF files
License: GPL-3.0-or-later
Group: Productivity/Publishing/PDF
URL: https://github.com/ocrmypdf/OCRmyPDF.git
Source0: https://github.com/ocrmypdf/OCRmyPDF/archive/refs/tags/v%{version}.tar.gz#/%{name}-%{version}.tar.gz
BuildRequires: python-rpm-macros
BuildRequires: fdupes
BuildRequires: %{python_module setuptools}
BuildRequires: %{python_module pip}
BuildRequires: %{python_module wheel}
# SECTION test requirements
BuildRequires: %{python_module pytest}
# /SECTION
BuildRequires: %{python_module img2pdf >= 0.4.4}
BuildRequires: %{python_module pikepdf >= 8}
BuildRequires: %{python_module Pillow >= 9.0.1}
BuildRequires: %{python_module reportlab >= 3.6.8}
BuildRequires: %{python_module pdfminer.six}
BuildRequires: %{python_module coloredlogs >= 14.0}
BuildRequires: %{python_module tqdm >= 4}
BuildRequires: %{python_module pytesseract}
BuildRequires: %{python_module hypothesis}
BuildRequires: %{python_module rich >= 13}
BuildRequires: %{python_module deprecation >= 2.1.0}
BuildRequires: %{python_module packaging >= 20}
BuildRequires: %{python_module pluggy >= 0.13.0}
BuildRequires: %{python_module importlib-resources >= 5}
BuildRequires: %{python_module typing-extensions >= 4}
BuildRequires: %{python_module hatch_vcs}
BuildRequires: ghostscript
Requires: python-img2pdf >= 0.4.4
Requires: python-pikepdf >= 8
Requires: python-Pillow >= 9.0.1
Requires: python-reportlab >= 3.6.8
Requires: python-pdfminer.six
Requires: python-coloredlogs >= 14.0
Requires: python-tqdm >= 4
Requires: python-pytesseract
Requires: python-hypothesis
Requires: python-rich >= 13
Requires: python-deprecation >= 2.1.0
Requires: python-packaging >= 20
Requires: python-pluggy >= 0.13.0
Requires: python-importlib-resources >= 5
Requires: python-typing-extensions >= 4
Requires(post): update-alternatives
Requires(postun): update-alternatives
BuildArch: noarch
%python_subpackages
%description
OCRmyPDF adds an OCR text layer to scanned PDF files, allowing them to be searched.
Main features:
- Generates a searchable PDF/A file from a regular PDF
- Places OCR text accurately below the image to ease copy / paste
- Keeps the exact resolution of the original embedded images
- When possible, inserts OCR information as a "lossless" operation without rendering vector information
- Keeps file size about the same
- If requested deskews and/or cleans the image before performing OCR
- Validates input and output files
- Provides debug mode to enable easy verification of the OCR results
- Processes pages in parallel when more than one CPU core is available
- Uses Tesseract OCR engine
- Supports the 39 languages recognized by Tesseract
- Battle-tested on thousands of PDFs, a test suite and continuous integration
%prep
%autosetup
%build
%pyproject_wheel
%install
%pyproject_install
%python_clone -a %{buildroot}%{_bindir}/%{modname}
%{python_expand # fixup install
sed -i "s|#!/usr/bin/env python3||g" %{buildroot}%{$python_sitelib}/%{modname}/__main__.py
sed -i "s|#!/usr/bin/env python3||g" %{buildroot}%{$python_sitelib}/%{modname}/_validation.py
sed -i "s|#!/usr/bin/env python3||g" %{buildroot}%{$python_sitelib}/%{modname}/pdfinfo/__init__.py
sed -i "s|#!/usr/bin/env python3||g" %{buildroot}%{$python_sitelib}/%{modname}/pdfinfo/info.py
chmod -x %{buildroot}%{$python_sitelib}/%{modname}/__main__.py
chmod -x %{buildroot}%{$python_sitelib}/%{modname}/hocrtransform/__init__.py
install -m 0644 src/ocrmypdf/data/pdf.ttf %{buildroot}%{$python_sitelib}/%{modname}/data/
%fdupes %{buildroot}%{$python_sitelib}/%{modname}
}
%check
%post
%python_install_alternative %{modname}
%postun
%python_uninstall_alternative %{modname}
%files %{python_files}
%license LICENSE
%doc README.md
%python_alternative %{_bindir}/%{modname}
%{python_sitelib}/%{modname}*
%pycache_only %{python_sitelib}/%{modname}/__pycache__
%changelog