summaryrefslogtreecommitdiff
path: root/sci-ml/sentencepiece/sentencepiece-0.2.0-r3.ebuild
blob: 2d749006435e2d909dd938f37468260c7febc4d8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# Copyright 2025 Gentoo Authors
# Distributed under the terms of the GNU General Public License v2

EAPI=8

DISTUTILS_USE_PEP517=setuptools
DISTUTILS_EXT=1
PYTHON_COMPAT=( python3_{10..14} )
inherit cmake distutils-r1 dot-a

DESCRIPTION="Text tokenizer for Neural Network-based text generation"
HOMEPAGE="https://github.com/google/sentencepiece"
SRC_URI="https://github.com/google/${PN}/archive/refs/tags/v${PV}.tar.gz
	-> ${P}.tar.gz"

LICENSE="Apache-2.0"
SLOT="0"
KEYWORDS="~amd64"

RDEPEND="
	dev-cpp/abseil-cpp:=
	dev-libs/protobuf:=
	dev-util/google-perftools
	!sci-ml/pysentencepiece
"
DEPEND="${RDEPEND}
	dev-libs/darts
"

DOCS=(
	README.md
	doc/api.md
	doc/experiments.md
	doc/normalization.md
	doc/options.md
	doc/special_symbols.md
)

src_prepare() {
	sed -i \
		-e "s:third_party/darts_clone/darts.h:darts.h:" \
		src/model_interface.h \
		src/normalizer.h \
		src/normalizer.cc \
		src/unigram_model.h \
		src/builder.cc \
		|| die
	eapply "${FILESDIR}"/${P}-gcc15.patch \
		"${FILESDIR}"/${P}-cmake.patch \
		"${FILESDIR}"/${P}-nostrip.patch
	cmake_src_prepare
	distutils-r1_src_prepare
	sed \
		-e 's|@libprotobuf_lite@|protobuf-lite|' \
		-e "s|@includedir_for_pc_file@|${S}/src|" \
		-e "s|@libdir_for_pc_file@|${BUILD_DIR}/src|" \
		${PN}.pc.in \
		> python/${PN}.pc \
		|| die
}

src_configure() {
	lto-guarantee-fat
	local mycmakeargs=(
		-DSPM_ABSL_PROVIDER=package
		-DSPM_PROTOBUF_PROVIDER=package
	)
	cmake_src_configure
}

src_compile() {
	cmake_src_compile
	cd python
	PKG_CONFIG_PATH=. distutils-r1_src_compile
}

src_test() {
	LD_LIBRARY_PATH=${BUILD_DIR}/src distutils-r1_src_test
}

python_test() {
	cd python
	${EPYTHON} test/sentencepiece_test.py || die
}

src_install() {
	cmake_src_install
	distutils-r1_src_install
	strip-lto-bytecode
}