diff options
Diffstat (limited to 'sci-ml/datasets/datasets-4.2.0.ebuild')
| -rw-r--r-- | sci-ml/datasets/datasets-4.2.0.ebuild | 113 |
1 files changed, 113 insertions, 0 deletions
diff --git a/sci-ml/datasets/datasets-4.2.0.ebuild b/sci-ml/datasets/datasets-4.2.0.ebuild new file mode 100644 index 000000000000..7b1b9f849470 --- /dev/null +++ b/sci-ml/datasets/datasets-4.2.0.ebuild @@ -0,0 +1,113 @@ +# Copyright 2023-2026 Gentoo Authors +# Distributed under the terms of the GNU General Public License v2 + +EAPI=8 + +DISTUTILS_USE_PEP517=setuptools +PYTHON_COMPAT=( python3_{10..14} ) +DISTUTILS_SINGLE_IMPL=1 +inherit distutils-r1 + +DESCRIPTION="Access and share datasets for Audio, Computer Vision, and NLP tasks" +HOMEPAGE="https://pypi.org/project/datasets/" +SRC_URI="https://github.com/huggingface/${PN}/archive/refs/tags/${PV}.tar.gz + -> ${P}.gh.tar.gz" + +LICENSE="Apache-2.0" +SLOT="0" +KEYWORDS="~amd64 ~arm64" +IUSE="torch vision" +REQUIRES_USE="test? ( torch vision )" + +RDEPEND=" + sci-ml/huggingface_hub[${PYTHON_SINGLE_USEDEP}] + $(python_gen_cond_dep ' + dev-python/dill[${PYTHON_USEDEP}] + dev-python/filelock[${PYTHON_USEDEP}] + dev-python/fsspec[${PYTHON_USEDEP}] + dev-python/multiprocess[${PYTHON_USEDEP}] + dev-python/numpy[${PYTHON_USEDEP}] + dev-python/packaging[${PYTHON_USEDEP}] + dev-python/pandas[${PYTHON_USEDEP}] + dev-python/pyarrow[${PYTHON_USEDEP},parquet,snappy] + dev-python/pyyaml[${PYTHON_USEDEP}] + dev-python/requests[${PYTHON_USEDEP}] + dev-python/tqdm[${PYTHON_USEDEP}] + dev-python/xxhash[${PYTHON_USEDEP}] + vision? ( + dev-python/pillow[${PYTHON_USEDEP}] + ) + ') + torch? ( + sci-ml/caffe2[${PYTHON_SINGLE_USEDEP},numpy] + sci-ml/pytorch[${PYTHON_SINGLE_USEDEP}] + ) +" +DEPEND="${RDEPEND}" +# Missing x test: +# joblib +# joblibspark +# faiss-cpu +# jax +# jaxlib +# polars +# pyav +# pyspark +# py7zr +# s3fs +# tensorflow +# tiktoken +# torchdata +# transformers +BDEPEND="test? ( + sci-ml/torchvision[${PYTHON_SINGLE_USEDEP}] + $(python_gen_cond_dep ' + dev-python/absl-py[${PYTHON_USEDEP}] + dev-python/decorator[${PYTHON_USEDEP}] + dev-python/elasticsearch[${PYTHON_USEDEP}] + dev-python/h5py[${PYTHON_USEDEP}] + dev-python/lz4[${PYTHON_USEDEP}] + dev-python/moto[${PYTHON_USEDEP}] + dev-python/protobuf:=[${PYTHON_USEDEP}] + dev-python/pytest-datadir[${PYTHON_USEDEP}] + dev-python/pytest-xdist[${PYTHON_USEDEP}] + dev-python/soundfile[${PYTHON_USEDEP}] + dev-python/sqlalchemy[${PYTHON_USEDEP}] + dev-python/zstandard[${PYTHON_USEDEP}] + ') +)" + +EPYTEST_PLUGINS=( pytest-datadir ) + +distutils_enable_tests pytest + +src_prepare() { + sed -i \ + -e "/log(pickler/d" \ + src/datasets/utils/_dill.py \ + || die + distutils-r1_src_prepare +} + +python_test() { + local EPYTEST_IGNORE=( + tests/packaged_modules/test_spark.py + ) + + local EPYTEST_DESELECT=( + tests/io/test_parquet.py::test_parquet_read_geoparquet + tests/packaged_modules/test_folder_based_builder.py::test_data_files_with_different_levels_no_metadata + tests/packaged_modules/test_folder_based_builder.py::test_data_files_with_one_label_no_metadata + tests/test_arrow_dataset.py::BaseDatasetTest::test_filter_caching_on_disk + tests/test_arrow_dataset.py::BaseDatasetTest::test_map_caching_on_disk + tests/test_arrow_dataset.py::BaseDatasetTest::test_map_caching_partial_remap_on_disk + tests/test_arrow_dataset.py::BaseDatasetTest::test_map_caching_reuses_cache_with_different_num_proc_on_disk + tests/features/test_audio.py + tests/test_data_files.py + tests/test_fingerprint.py + tests/test_hub.py::test_delete_from_hub + tests/test_load.py + ) + + epytest -m 'unit' +} |
