File python-datasets.spec of Package python-datasets

#
# spec file for package python-datasets
#
# Copyright (c) 2024 SUSE LLC
#
# All modifications and additions to the file contributed by third parties
# remain the property of their copyright owners, unless otherwise agreed
# upon. The license for this file, and modifications and additions to the
# file, is the same license as for the pristine package itself (unless the
# license for the pristine package is not an Open Source License, in which
# case the license is the MIT License). An "Open Source License" is a
# license that conforms to the Open Source Definition (Version 1.9)
# published by the Open Source Initiative.

# Please submit bugfixes or comments via https://bugs.opensuse.org/
#


%{?sle15_python_module_pythons}
Name:           python-datasets
Version:        3.6.0
Release:        0
Summary:        HuggingFace community-driven open-source library of datasets
License:        Apache-2.0
URL:            https://github.com/huggingface/datasets
Source:         https://files.pythonhosted.org/packages/source/d/datasets/datasets-%{version}.tar.gz
BuildRequires:  python-rpm-macros
BuildRequires:  %{python_module pip}
BuildRequires:  %{python_module setuptools}
BuildRequires:  %{python_module wheel}
# SECTION test requirements
BuildRequires:  %{python_module aiohttp}
BuildRequires:  %{python_module absl-py}
BuildRequires:  %{python_module decorator}
BuildRequires:  %{python_module dill >= 0.3.0}
BuildRequires:  %{python_module filelock}
BuildRequires:  %{python_module fsspec >= 2023.1.0}
BuildRequires:  %{python_module huggingface-hub >= 0.24.0}
BuildRequires:  %{python_module multiprocess <= 0.71.0}
BuildRequires:  %{python_module numpy >= 1.17}
BuildRequires:  %{python_module packaging}
BuildRequires:  %{python_module pandas}
BuildRequires:  %{python_module pyarrow >= 15.0.0}
BuildRequires:  %{python_module Pillow}
BuildRequires:  %{python_module PyYAML >= 5.1}
BuildRequires:  %{python_module requests >= 2.32.2}
BuildRequires:  %{python_module SQLAlchemy}
BuildRequires:  %{python_module torch}
BuildRequires:  %{python_module tqdm >= 4.66.3}
BuildRequires:  %{python_module xxhash}
BuildRequires:  %{python_module zstandard}
BuildRequires:  %{python_module SoundFile}
BuildRequires:  %{python_module pytest}
BuildRequires:  %{python_module pytest-datadir}
BuildRequires:  %{python_module sympy}
# /SECTION
BuildRequires:  fdupes
Requires:       python-aiohttp
Requires:       python-dill >= 0.3.0
Requires:       python-filelock
Requires:       python-fsspec >= 2023.1.0
Requires:       python-huggingface-hub >= 0.24.0
Requires:       python-multiprocess <= 0.71.0
Requires:       python-numpy >= 1.17
Requires:       python-packaging
Requires:       python-pandas
Requires:       python-pyarrow >= 15.0.0
Requires:       python-PyYAML >= 5.1
Requires:       python-requests >= 2.32.2
Requires:       python-tqdm >= 4.66.3
Requires:       python-xxhash
Suggests:       python-soundfile >= 0.12.1
Suggests:       python-librosa
Suggests:       python-soxr >= 0.4.0
Suggests:       python-tensorflow == 2.12.0
Suggests:       python-torch == 2.0.1
Suggests:       python-transformers == 4.30.1
Suggests:       python-absl-py
Suggests:       python-decorator
Suggests:       python-joblib < 1.3.0
Suggests:       python-joblibspark
Suggests:       python-pytest
Suggests:       python-pytest-datadir
Suggests:       python-pytest-xdist
Suggests:       python-elasticsearch < 8.0.0
Suggests:       python-faiss-cpu >= 1.8.0.post1
Suggests:       python-jax >= 0.3.14
Suggests:       python-jaxlib >= 0.3.14
Suggests:       python-lz4
Suggests:       python-moto
Suggests:       python-py7zr
Suggests:       python-rarfile >= 4.0
Suggests:       python-sqlalchemy
Suggests:       python-s3fs >= 2021.11.1
Suggests:       python-protobuf < 4.0.0
Suggests:       python-tensorflow >= 2.16.0
Suggests:       python-tiktoken
Suggests:       python-torch >= 2.0.0
Suggests:       python-torchdata
Suggests:       python-soundfile >= 0.12.1
Suggests:       python-transformers >= 4.42.0
Suggests:       python-zstandard
Suggests:       python-polars >= 0.20.0
Suggests:       python-Pillow >= 9.4.0
Suggests:       python-soundfile >= 0.12.1
Suggests:       python-librosa
Suggests:       python-soxr >= 0.4.0
Suggests:       python-ruff >= 0.3.0
Suggests:       python-s3fs
Suggests:       python-transformers
Suggests:       python-torch
Suggests:       python-tensorflow >= 2.6.0
Suggests:       python-s3fs
Suggests:       python-transformers
Suggests:       python-torch
Suggests:       python-tensorflow >= 2.6.0
Suggests:       python-jax >= 0.3.14
Suggests:       python-jaxlib >= 0.3.14
Suggests:       python-ruff >= 0.3.0
Suggests:       python-s3fs
Suggests:       python-tensorflow >= 2.6.0
Suggests:       python-tensorflow >= 2.6.0
Suggests:       python-absl-py
Suggests:       python-decorator
Suggests:       python-joblib < 1.3.0
Suggests:       python-joblibspark
Suggests:       python-pytest
Suggests:       python-pytest-datadir
Suggests:       python-pytest-xdist
Suggests:       python-elasticsearch < 8.0.0
Suggests:       python-faiss-cpu >= 1.8.0.post1
Suggests:       python-jax >= 0.3.14
Suggests:       python-jaxlib >= 0.3.14
Suggests:       python-lz4
Suggests:       python-moto
Suggests:       python-py7zr
Suggests:       python-rarfile >= 4.0
Suggests:       python-sqlalchemy
Suggests:       python-s3fs >= 2021.11.1
Suggests:       python-protobuf < 4.0.0
Suggests:       python-tensorflow >= 2.16.0
Suggests:       python-tiktoken
Suggests:       python-torch >= 2.0.0
Suggests:       python-torchdata
Suggests:       python-soundfile >= 0.12.1
Suggests:       python-transformers >= 4.42.0
Suggests:       python-zstandard
Suggests:       python-polars >= 0.20.0
Suggests:       python-Pillow >= 9.4.0
Suggests:       python-soundfile >= 0.12.1
Suggests:       python-librosa
Suggests:       python-soxr >= 0.4.0
Suggests:       python-absl-py
Suggests:       python-decorator
Suggests:       python-joblib < 1.3.0
Suggests:       python-joblibspark
Suggests:       python-pytest
Suggests:       python-pytest-datadir
Suggests:       python-pytest-xdist
Suggests:       python-elasticsearch < 8.0.0
Suggests:       python-jax >= 0.3.14
Suggests:       python-jaxlib >= 0.3.14
Suggests:       python-lz4
Suggests:       python-moto
Suggests:       python-py7zr
Suggests:       python-rarfile >= 4.0
Suggests:       python-sqlalchemy
Suggests:       python-s3fs >= 2021.11.1
Suggests:       python-protobuf < 4.0.0
Suggests:       python-tiktoken
Suggests:       python-torch >= 2.0.0
Suggests:       python-torchdata
Suggests:       python-soundfile >= 0.12.1
Suggests:       python-transformers >= 4.42.0
Suggests:       python-zstandard
Suggests:       python-polars >= 0.20.0
Suggests:       python-Pillow >= 9.4.0
Suggests:       python-soundfile >= 0.12.1
Suggests:       python-soxr >= 0.4.0
Suggests:       python-torch
Suggests:       python-Pillow >= 9.4.0
BuildArch:      noarch

Requires(post):    update-alternatives
Requires(postun):  update-alternatives

%python_subpackages

%description
HuggingFace community-driven open-source library of datasets.

%prep
%autosetup -p1 -n datasets-%{version}
sed -i -e '1{/#!.*env python/d;}' \
  src/datasets/commands/datasets_cli.py \
  src/datasets/utils/_filelock.py

%build
%pyproject_wheel

%install
%pyproject_install
%python_clone -a %{buildroot}%{_bindir}/datasets-cli
%python_expand %fdupes %{buildroot}%{$python_sitelib}

%check
# NOTE: disable Apache Spark tests since we don't have
# a python-pyspark package. That package depends on
# py4j which only supports upto Python 3.5.
# NOTE: disable tests which required the undefined "shared_datadir" fixture.
#  * test_audio.py
#  * test_image.py
# NOTE: disable tests which required internet connect.
#  * test_json.py
#  * test_file_utils.py
#  * test_inspect.py
#  * test_load.py
#  * test_upstream_hub.py
#  * test_cache.py
#  * test_hub.py
#  * test_iterable_dataset.py
#  * test_folder_based_builder.py
#  * test_test.py
#  * test_parquet.py
#  * test_offline_util.py
# NOTE: disable these tests as it requires AWS S3 access.
#  * test_arrow_dataset.py
# FIXME: skip test_iterable_dataset_persists_epoch_in_torch_workers for now as it got stuck on a wait,
# possibly due to lack of GPU.
# NOTE: disable tests/test_fingerprint.py as it involves NVIDIA CUDA.
# NOTE: disable tests/test_distributed.py as coroutines is prohibited.
# NOTE: skip test_tensor_webdataset till this bugfix is included: https://github.com/python/cpython/issues/125631
%pytest --ignore tests/test_offline_util.py --ignore tests/test_distributed.py --ignore tests/test_fingerprint.py --ignore tests/test_data_files.py --ignore tests/test_arrow_dataset.py --ignore tests/io/test_parquet.py --ignore tests/commands/test_test.py --ignore tests/packaged_modules/test_spark.py --ignore tests/packaged_modules/test_folder_based_builder.py --ignore tests/features/test_audio.py --ignore tests/features/test_image.py --ignore tests/io/test_json.py --ignore tests/fixtures/hub.py --ignore tests/test_file_utils.py --ignore tests/test_inspect.py --ignore tests/test_load.py --ignore tests/test_upstream_hub.py --ignore tests/packaged_modules/test_cache.py --ignore tests/test_hub.py --ignore tests/test_iterable_dataset.py -k "not (test_iterable_dataset_persists_epoch_in_torch_workers or test_dataset_save_to_disk_and_load_from_disk_round_trip_with_large_list or test_tensor_webdataset)"

%post
%python_install_alternative datasets-cli

%postun
%python_uninstall_alternative datasets-cli

%files %{python_files}
%doc AUTHORS README.md
%license LICENSE
%python_alternative %{_bindir}/datasets-cli
%{python_sitelib}/datasets
%{python_sitelib}/datasets-%{version}.dist-info

%changelog
openSUSE Build Service is sponsored by