r-quanteda: fix build

This commit is contained in:
Pekka Ristola 2024-04-05 11:48:51 +03:00
parent b1269735cf
commit e11b9e121d
No known key found for this signature in database
GPG key ID: 2C20BE716E05213E
5 changed files with 27 additions and 149 deletions

View file

@ -2,10 +2,10 @@
# Contributor: Guoyi Zhang <guoyizhang at malacology dot net>
_pkgname=quanteda
_pkgver=3.3.1
_pkgver=4.0.0
pkgname=r-${_pkgname,,}
pkgver=${_pkgver//-/.}
pkgrel=3
pkgrel=0
pkgdesc="Quantitative Analysis of Textual Data"
arch=(x86_64)
url="https://cran.r-project.org/package=$_pkgname"
@ -14,9 +14,9 @@ depends=(
onetbb
r-fastmatch
r-jsonlite
r-lifecycle
r-magrittr
r-rcpp
r-rcppparallel
r-snowballc
r-stopwords
r-stringi
@ -38,43 +38,26 @@ checkdepends=(
r-topicmodels
)
optdepends=(
r-dplyr
r-formatr
r-ggplot2
r-knitr
r-lda
r-lsa
r-purrr
r-quanteda.textmodels
r-quanteda.textplots
r-quanteda.textstats
r-rcolorbrewer
r-rlang
r-rmarkdown
r-slam
r-spacyr
r-spelling
r-stm
r-testthat
r-text2vec
r-tibble
r-tidytext
r-tm
r-tokenizers
r-topicmodels
r-xtable
)
source=("https://cran.r-project.org/src/contrib/${_pkgname}_${_pkgver}.tar.gz"
"std-atomic.patch")
md5sums=('b34e0169f0ac07848ecf17dc32585cd9'
'09a300caa20e8b24350b8999388a42a2')
b2sums=('eea198adf7214955560d2a39c83355925a8282ae814fcb4a7bc332d4c5a791e1d4ddc4af34ec4082b5304fc98b07df09e6be60a5036bb75e20790b8715246a30'
'aa84229d25909d3c4054775df808fe9b0d1a76c33bc04dc477d22c801f8888985030e5f9cbcbc5a5897ee4a19a60eabbcb854ebfcc0b424bf5d6624680fbb60e')
"skip-tests.patch")
md5sums=('b6116adb5b3a35f38a20869f55564626'
'fd46ba9ed33ccf9a5ae1e79737fd5c21')
b2sums=('dd76426a99f09beef483af2a4b592d2b59350afb62089f85e98e0c0bc29c937f386721725d392e7db7ed18affd46a59eb4b0d3ce8a2bc8744dc8c8df461c2ec1'
'a8123128d9c00cf99f8d3d059886c2c3a5a175deca05f8281f3db2e0b01f418e7e5cfaf7fe0f8f0fe4c5dd7db262b07c15062b5267a65b74d7b80719740b10e8')
prepare() {
# Switch from tbb::atomic to std::atomic to allow building with newer tbb.
# Modified from https://github.com/quanteda/quanteda/pull/2296
# Skip failing test
patch -Np1 -i std-atomic.patch
patch -Np1 -i skip-tests.patch
}
build() {

View file

@ -7,7 +7,10 @@ sys.path.append(os.path.normpath(f'{__file__}/../../../lilac-extensions'))
from lilac_r_utils import r_pre_build
def pre_build():
r_pre_build(_G)
r_pre_build(
_G,
expect_systemrequirements = "Intel TBB: tbb-devel (Fedora, CentOS, RHEL), libtbb-dev (Debian, Ubuntu, etc) or tbb (Mac).",
)
def post_build():
git_pkgbuild_commit()

View file

@ -5,9 +5,9 @@ maintainers:
repo_depends:
- r-fastmatch
- r-jsonlite
- r-lifecycle
- r-magrittr
- r-rcpp
- r-rcppparallel
- r-snowballc
- r-stopwords
- r-stringi

View file

@ -0,0 +1,12 @@
diff --git a/quanteda/tests/testthat/test-tokens-word4.R b/quanteda/tests/testthat/test-tokens-word4.R
index 3ee220a..ef1d644 100644
--- a/quanteda/tests/testthat/test-tokens-word4.R
+++ b/quanteda/tests/testthat/test-tokens-word4.R
@@ -105,6 +105,7 @@ test_that("tokens works as expected for what = character", {
})
test_that("tokens works with unusual hiragana #554", {
+ skip("fails")
skip_on_cran()
skip_on_os("windows")
skip_on_os("mac")

View file

@ -1,120 +0,0 @@
diff --git a/quanteda/inst/include/lib.h b/quanteda/inst/include/lib.h
index eb6b09f..68aff1b 100644
--- a/quanteda/inst/include/lib.h
+++ b/quanteda/inst/include/lib.h
@@ -32,21 +32,16 @@ namespace quanteda{
typedef std::vector<unsigned int> Text;
typedef std::vector<Text> Texts;
+ typedef std::atomic<int> IntParam;
+ typedef std::atomic<unsigned int> UintParam;
+ typedef std::atomic<long> LongParam;
+ typedef std::atomic<double> DoubleParam;
#if QUANTEDA_USE_TBB
- typedef tbb::atomic<int> IntParam;
- typedef tbb::atomic<unsigned int> UintParam;
- typedef tbb::atomic<long> LongParam;
- typedef tbb::atomic<double> DoubleParam;
typedef tbb::concurrent_vector<int> IntParams;
typedef tbb::concurrent_vector<long> LongParams;
typedef tbb::concurrent_vector<double> DoubleParams;
typedef tbb::concurrent_vector<std::string> StringParams;
- typedef tbb::spin_mutex Mutex;
#else
- typedef int IntParam;
- typedef unsigned int UintParam;
- typedef long LongParam;
- typedef double DoubleParam;
typedef std::vector<int> IntParams;
typedef std::vector<long> LongParams;
typedef std::vector<double> DoubleParams;
@@ -77,9 +72,9 @@ namespace quanteda{
};
#if QUANTEDA_USE_TBB
- typedef tbb::atomic<unsigned int> IdNgram;
- typedef tbb::concurrent_unordered_multimap<Ngram, UintParam, hash_ngram, equal_ngram> MultiMapNgrams;
- typedef tbb::concurrent_unordered_map<Ngram, UintParam, hash_ngram, equal_ngram> MapNgrams;
+ typedef std::atomic<unsigned int> IdNgram;
+ typedef tbb::concurrent_unordered_multimap<Ngram, unsigned int, hash_ngram, equal_ngram> MultiMapNgrams;
+ typedef tbb::concurrent_unordered_map<Ngram, unsigned int, hash_ngram, equal_ngram> MapNgrams;
typedef tbb::concurrent_unordered_set<Ngram, hash_ngram, equal_ngram> SetNgrams;
typedef tbb::concurrent_vector<Ngram> VecNgrams;
typedef tbb::concurrent_unordered_set<unsigned int> SetUnigrams;
@@ -217,7 +212,7 @@ namespace quanteda{
std::vector<unsigned int> ids = Rcpp::as< std::vector<unsigned int> >(ids_);
std::vector<std::size_t> spans(patterns.size());
for (size_t g = 0; g < std::min(patterns.size(), ids.size()); g++) {
- map.insert(std::pair<Ngram, IdNgram>(patterns[g], ids[g]));
+ map.insert(std::pair<Ngram, unsigned int>(patterns[g], ids[g]));
spans[g] = patterns[g].size();
}
diff --git a/quanteda/inst/include/skipgram.h b/quanteda/inst/include/skipgram.h
index 8b14e77..723a4b7 100644
--- a/quanteda/inst/include/skipgram.h
+++ b/quanteda/inst/include/skipgram.h
@@ -9,11 +9,7 @@ inline unsigned int ngram_id(const Ngram &ngram,
auto it1 = map_ngram.find(ngram);
if (it1 != map_ngram.end()) return it1->second;
-#if QUANTEDA_USE_TBB
- auto it2 = map_ngram.insert(std::pair<Ngram, unsigned int>(ngram, id_ngram.fetch_and_increment()));
-#else
- auto it2 = map_ngram.insert(std::pair<Ngram, unsigned int>(ngram, id_ngram++));
-#endif
+ auto it2 = map_ngram.insert(std::pair<Ngram, unsigned int>(ngram, id_ngram.fetch_add(1, std::memory_order_relaxed)));
return it2.first->second;
}
diff --git a/quanteda/src/index.cpp b/quanteda/src/index.cpp
index 178d7ae..9dd8c4a 100644
--- a/quanteda/src/index.cpp
+++ b/quanteda/src/index.cpp
@@ -107,7 +107,7 @@ DataFrame qatd_cpp_index(const List &texts_,
std::vector<Matches> temp(texts.size());
//dev::start_timer("Search keywords", timer);
- UintParam n_match = 0;
+ UintParam n_match(0);
#if QUANTEDA_USE_TBB
index_mt index_mt(texts, temp, spans, map_pats, n_match);
parallelFor(0, texts.size(), index_mt);
diff --git a/quanteda/src/tokens_chunk.cpp b/quanteda/src/tokens_chunk.cpp
index 5dd92e4..b048bde 100644
--- a/quanteda/src/tokens_chunk.cpp
+++ b/quanteda/src/tokens_chunk.cpp
@@ -62,7 +62,7 @@ List qatd_cpp_tokens_chunk(const List &texts_,
Texts texts = Rcpp::as<Texts>(texts_);
Types types = Rcpp::as< Types >(types_);
- UintParam count = 0;
+ UintParam count(0);
// dev::Timer timer;
std::vector<Texts> temp(texts.size());
diff --git a/quanteda/src/tokens_lookup.cpp b/quanteda/src/tokens_lookup.cpp
index c9323af..0f251e6 100644
--- a/quanteda/src/tokens_lookup.cpp
+++ b/quanteda/src/tokens_lookup.cpp
@@ -167,7 +167,7 @@ List qatd_cpp_tokens_lookup(const List &texts_,
Texts texts = Rcpp::as<Texts>(texts_);
Types types = Rcpp::as<Types>(types_);
- unsigned int id_max(0);
+ unsigned int id_max = 0;
if (nomatch == 2) {
id_max = keys_.size() > 0 ? Rcpp::max(keys_) : 0;
} else {
diff --git a/quanteda/tests/testthat/test-tokens-word4.R b/quanteda/tests/testthat/test-tokens-word4.R
index 1798c16..28306f3 100644
--- a/quanteda/tests/testthat/test-tokens-word4.R
+++ b/quanteda/tests/testthat/test-tokens-word4.R
@@ -125,6 +125,7 @@ test_that("tokens works as expected for what = character", {
})
test_that("tokens works with unusual hiragana #554", {
+ skip("fails")
skip_on_cran()
skip_on_os("windows")
skip_on_os("mac")