openSUSE Build Service

File project.diff of Package OCRmyPDF

--- OCRmyPDF.changes.orig
+++ OCRmyPDF.changes
@@ -1,4 +1,19 @@
 -------------------------------------------------------------------
+Wed Mar  4 09:26:51 UTC 2020 - Luigi Baldoni <aloisio@gmx.com>
+
+- Update to version 9.6.1
+- Spec cleanup
+- Removed group tag
+
+-------------------------------------------------------------------
+Wed Feb 12 20:52:12 UTC 2020 - Martin Hauke <mardnh@gmx.de>
+
+- Update to version 9.6.0
+- Run spec-cleaner
+- Use python rpm-macros
+- Use github source URL
+
+-------------------------------------------------------------------
 Tue Dec 24 08:40:12 UTC 2019 - Adriaan Struys <amjsfw@gmail.com>
 
 - update to version 9.2.0 
--- OCRmyPDF.spec.orig
+++ OCRmyPDF.spec
@@ -1,7 +1,8 @@
 #
 # spec file for package OCRmyPDF
 #
-# Copyright (c) 2017 SUSE LINUX GmbH, Nuernberg, Germany.
+# Copyright (c) 2020 SUSE LLC
+# Copyright (c) 2013 Sören Plönnigs <soeren@ploennigs.net>
 #
 # All modifications and additions to the file contributed by third parties
 # remain the property of their copyright owners, unless otherwise agreed
@@ -12,81 +13,90 @@
 # license that conforms to the Open Source Definition (Version 1.9)
 # published by the Open Source Initiative.
 
-# Please submit bugfixes or comments via http://bugs.opensuse.org/
+# Please submit bugfixes or comments via https://bugs.opensuse.org/
 #
 
 
-%define modname ocrmypdf
 Name:           OCRmyPDF
-Version:        9.2.0
+Version:        9.6.1
 Release:        0
 Summary:        Add an OCR text layer to scanned PDF files
-License:        GPL-3.0
-Group:          Productivity/Publishing/PDF
-Url:            https://github.com/jbarlow83/OCRmyPDF
-Source0:        %{name}-%{version}.tar.gz
+License:        GPL-3.0-or-later
+URL:            https://github.com/jbarlow83/OCRmyPDF
+Source:         https://github.com/jbarlow83/OCRmyPDF/archive/v%{version}.tar.gz#/ocrmypdf-%{version}.tar.gz
 BuildRequires:  fdupes
-BuildRequires:  ghostscript >= 9.15
-BuildRequires:  libjpeg-devel
-BuildRequires:  python3-Pillow >= 3.1.1
-BuildRequires:  python3-PyPDF2 >= 1.25.1
 BuildRequires:  python3-cffi >= 1.9.1
-BuildRequires:  python3-img2pdf >= 0.2
+BuildRequires:  python3-pytest
 BuildRequires:  python3-pytest-runner
-BuildRequires:  python3-ruffus >= 2.6.3
-BuildRequires:  python3-setuptools_scm >= 1.8.0
+BuildRequires:  python3-setuptools
 BuildRequires:  python3-setuptools_scm_git_archive
+# SECTION test requirements
+BuildRequires:  ghostscript >= 9.15
+BuildRequires:  python3-img2pdf >= 0.3.0
+BuildRequires:  python3-pdfminer.six
+BuildRequires:  python3-pikepdf >= 1.8.1
+BuildRequires:  python3-pytest-helpers-namespace
+BuildRequires:  python3-reportlab >= 3.2.0
+BuildRequires:  python3-tqdm
 BuildRequires:  qpdf >= 5.1.1
 BuildRequires:  tesseract-ocr >= 3.03
+BuildRequires:  tesseract-ocr-traineddata-english
+BuildRequires:  tesseract-ocr-traineddata-orientation_and_script_detection >= 3.03
 BuildRequires:  unpaper >= 6.1
+# /SECTION
 Requires:       ghostscript >= 9.15
-Requires:       python3-Pillow >= 3.1.1
-Requires:       python3-PyPDF2 >= 1.25.1
-Requires:       python3-cffi >= 1.9.1
-Requires:       python3-img2pdf >= 0.2
-Requires:       python3-reportlab >= 3.2.0
-Requires:       python3-ruffus >= 2.6.3
+Requires:       python3-img2pdf >= 0.3.0
+Requires:       python3-pdfminer.six
+Requires:       python3-pikepdf >= 1.8.1
+Requires:       python3-reportlab >= 3.3.0
 Requires:       qpdf >= 5.1.1
 Requires:       tesseract-ocr >= 3.03
+Requires:       tesseract-ocr-traineddata-english
 Requires:       tesseract-ocr-traineddata-orientation_and_script_detection >= 3.03
 Requires:       unpaper >= 6.1
-BuildRoot:      %{_tmppath}/%{name}-%{version}-build
 BuildArch:      noarch
 
 %description
-OCRmyPDF adds an OCR text layer to scanned PDF files, allowing them to be searched.
+OCRmyPDF adds an OCR text layer to scanned PDF files, allowing them
+to be searched.
 
 Main features:
- -  Generates a searchable PDF/A file from a regular PDF
- -  Places OCR text accurately below the image to ease copy / paste
- -  Keeps the exact resolution of the original embedded images
- -  When possible, inserts OCR information as a "lossless" operation without rendering vector information
- -  Keeps file size about the same
- -  If requested deskews and/or cleans the image before performing OCR
- -  Validates input and output files
- -  Provides debug mode to enable easy verification of the OCR results
- -  Processes pages in parallel when more than one CPU core is available
- -  Uses Tesseract OCR engine
- -  Supports the 39 languages recognized by Tesseract
- -  Battle-tested on thousands of PDFs, a test suite and continuous integration
+  * Generates a searchable PDF/A file from a regular PDF
+  * Places OCR text accurately below the image to ease copy / paste
+  * Keeps the exact resolution of the original embedded images
+  * When possible, inserts OCR information as a lossless operation
+    without rendering vector information
+  * Keeps file size about the same
+  * If requested deskews and/or cleans the image before performing OCR
+  * Validates input and output files
+  * Provides debug mode to enable easy verification of the OCR results
+  * Processes pages in parallel when more than one CPU core is
+    available
+  * Battle-tested on thousands of PDFs, a test suite and continuous
+    integration.
 
 %prep
 %setup -q
+find src/ -name "*.py" -exec sed -i -e '/^#!\//, 1d' {} \;
 
 %build
-CFLAGS="%{optflags}" python3 setup.py build
+%python3_build
 
 %install
-python3 setup.py install --prefix=%{_prefix} --root=%{buildroot}
-# chmod ugo+x  %{buildroot}%{python3_sitelib}/%{modname}/{hocrtransform,leptonica,main,pageinfo,pdfa}.py
+%python3_install
 %fdupes %{buildroot}%{python3_sitelib}
-chmod ugo+x %{buildroot}%{python3_sitelib}/ocrmypdf/*.py
-chmod ugo+x %{buildroot}%{python3_sitelib}/ocrmypdf/exec/*.py
+
+%check
+# generate tesseract profile file
+tesseract --list-langs
+# run testsuite (exclude tests that are known to be broken atm)
+export PYTHONPATH=%{buildroot}%{python3_sitelib}
+python3 -m pytest -v -k 'not (test_links or test_redo_ocr or test_blank_input_pdf or test_tesseract_config_valid or test_pagesize_consistency or test_user_words_ocr)'
 
 %files
-%defattr(-,root,root)
-%doc README.md LICENSE
-%{_bindir}/%{modname}
-%{python3_sitelib}/*
+%license LICENSE
+%doc README.md
+%{_bindir}/ocrmypdf
+%{python3_sitelib}/ocrmypdf*
 
 %changelog