r-quanteda: fix build

2025-03-10 12:02:42 +00:00 · 2024-04-05 11:48:51 +03:00 · 2024-04-05 11:48:51 +03:00 · e11b9e121d
commit e11b9e121d
parent b1269735cf
5 changed files with 27 additions and 149 deletions
--- a/BioArchLinux/r-quanteda/PKGBUILD
+++ b/BioArchLinux/r-quanteda/PKGBUILD
@ -2,10 +2,10 @@
 # Contributor: Guoyi Zhang <guoyizhang at malacology dot net>

 _pkgname=quanteda
-_pkgver=3.3.1
+_pkgver=4.0.0
 pkgname=r-${_pkgname,,}
 pkgver=${_pkgver//-/.}
-pkgrel=3
+pkgrel=0
 pkgdesc="Quantitative Analysis of Textual Data"
 arch=(x86_64)
 url="https://cran.r-project.org/package=$_pkgname"
@ -14,9 +14,9 @@ depends=(
  onetbb
  r-fastmatch
  r-jsonlite
+  r-lifecycle
  r-magrittr
  r-rcpp
-  r-rcppparallel
  r-snowballc
  r-stopwords
  r-stringi
@ -38,43 +38,26 @@ checkdepends=(
  r-topicmodels
 )
 optdepends=(
-  r-dplyr
  r-formatr
-  r-ggplot2
  r-knitr
-  r-lda
  r-lsa
-  r-purrr
-  r-quanteda.textmodels
-  r-quanteda.textplots
-  r-quanteda.textstats
-  r-rcolorbrewer
+  r-rlang
  r-rmarkdown
  r-slam
-  r-spacyr
  r-spelling
-  r-stm
  r-testthat
-  r-text2vec
-  r-tibble
-  r-tidytext
  r-tm
-  r-tokenizers
-  r-topicmodels
-  r-xtable
 )
 source=("https://cran.r-project.org/src/contrib/${_pkgname}_${_pkgver}.tar.gz"
-        "std-atomic.patch")
-md5sums=('b34e0169f0ac07848ecf17dc32585cd9'
-         '09a300caa20e8b24350b8999388a42a2')
-b2sums=('eea198adf7214955560d2a39c83355925a8282ae814fcb4a7bc332d4c5a791e1d4ddc4af34ec4082b5304fc98b07df09e6be60a5036bb75e20790b8715246a30'
-        'aa84229d25909d3c4054775df808fe9b0d1a76c33bc04dc477d22c801f8888985030e5f9cbcbc5a5897ee4a19a60eabbcb854ebfcc0b424bf5d6624680fbb60e')
+        "skip-tests.patch")
+md5sums=('b6116adb5b3a35f38a20869f55564626'
+         'fd46ba9ed33ccf9a5ae1e79737fd5c21')
+b2sums=('dd76426a99f09beef483af2a4b592d2b59350afb62089f85e98e0c0bc29c937f386721725d392e7db7ed18affd46a59eb4b0d3ce8a2bc8744dc8c8df461c2ec1'
+        'a8123128d9c00cf99f8d3d059886c2c3a5a175deca05f8281f3db2e0b01f418e7e5cfaf7fe0f8f0fe4c5dd7db262b07c15062b5267a65b74d7b80719740b10e8')

 prepare() {
-  # Switch from tbb::atomic to std::atomic to allow building with newer tbb.
-  # Modified from https://github.com/quanteda/quanteda/pull/2296
  # Skip failing test
-  patch -Np1 -i std-atomic.patch
+  patch -Np1 -i skip-tests.patch
 }

 build() {
--- a/BioArchLinux/r-quanteda/lilac.py
+++ b/BioArchLinux/r-quanteda/lilac.py
@ -7,7 +7,10 @@ sys.path.append(os.path.normpath(f'{__file__}/../../../lilac-extensions'))
 from lilac_r_utils import r_pre_build

 def pre_build():
-    r_pre_build(_G)
+    r_pre_build(
+        _G,
+        expect_systemrequirements = "Intel TBB: tbb-devel (Fedora, CentOS, RHEL), libtbb-dev (Debian, Ubuntu, etc) or tbb (Mac).",
+    )

 def post_build():
    git_pkgbuild_commit()
--- a/BioArchLinux/r-quanteda/lilac.yaml
+++ b/BioArchLinux/r-quanteda/lilac.yaml
@ -5,9 +5,9 @@ maintainers:
 repo_depends:
 - r-fastmatch
 - r-jsonlite
+- r-lifecycle
 - r-magrittr
 - r-rcpp
- r-rcppparallel
 - r-snowballc
 - r-stopwords
 - r-stringi
--- a/BioArchLinux/r-quanteda/skip-tests.patch
+++ b/BioArchLinux/r-quanteda/skip-tests.patch
@ -0,0 +1,12 @@
+diff --git a/quanteda/tests/testthat/test-tokens-word4.R b/quanteda/tests/testthat/test-tokens-word4.R
+index 3ee220a..ef1d644 100644
+--- a/quanteda/tests/testthat/test-tokens-word4.R
+++ b/quanteda/tests/testthat/test-tokens-word4.R
+@@ -105,6 +105,7 @@ test_that("tokens works as expected for what = character", {
+ })
+ 
+ test_that("tokens works with unusual hiragana #554", {
+    skip("fails")
+     skip_on_cran()
+     skip_on_os("windows")
+     skip_on_os("mac")
--- a/BioArchLinux/r-quanteda/std-atomic.patch
+++ b/BioArchLinux/r-quanteda/std-atomic.patch
@ -1,120 +0,0 @@
-diff --git a/quanteda/inst/include/lib.h b/quanteda/inst/include/lib.h
-index eb6b09f..68aff1b 100644
--- a/quanteda/inst/include/lib.h
-+++ b/quanteda/inst/include/lib.h
-@@ -32,21 +32,16 @@ namespace quanteda{
-     typedef std::vector<unsigned int> Text;
-     typedef std::vector<Text> Texts;
-     
-+    typedef std::atomic<int> IntParam;
-+    typedef std::atomic<unsigned int> UintParam;
-+    typedef std::atomic<long> LongParam;
-+    typedef std::atomic<double> DoubleParam;
- #if QUANTEDA_USE_TBB
-    typedef tbb::atomic<int> IntParam;
-    typedef tbb::atomic<unsigned int> UintParam;
-    typedef tbb::atomic<long> LongParam;
-    typedef tbb::atomic<double> DoubleParam;
-     typedef tbb::concurrent_vector<int> IntParams;
-     typedef tbb::concurrent_vector<long> LongParams;
-     typedef tbb::concurrent_vector<double> DoubleParams;
-     typedef tbb::concurrent_vector<std::string> StringParams;
-    typedef tbb::spin_mutex Mutex;
- #else
-    typedef int IntParam;
-    typedef unsigned int UintParam;
-    typedef long LongParam;
-    typedef double DoubleParam;
-     typedef std::vector<int> IntParams;
-     typedef std::vector<long> LongParams;
-     typedef std::vector<double> DoubleParams;
-@@ -77,9 +72,9 @@ namespace quanteda{
-     };
-     
- #if QUANTEDA_USE_TBB
-    typedef tbb::atomic<unsigned int> IdNgram;
-    typedef tbb::concurrent_unordered_multimap<Ngram, UintParam, hash_ngram, equal_ngram> MultiMapNgrams;
-    typedef tbb::concurrent_unordered_map<Ngram, UintParam, hash_ngram, equal_ngram> MapNgrams;
-+    typedef std::atomic<unsigned int> IdNgram;
-+    typedef tbb::concurrent_unordered_multimap<Ngram, unsigned int, hash_ngram, equal_ngram> MultiMapNgrams;
-+    typedef tbb::concurrent_unordered_map<Ngram, unsigned int, hash_ngram, equal_ngram> MapNgrams;
-     typedef tbb::concurrent_unordered_set<Ngram, hash_ngram, equal_ngram> SetNgrams;
-     typedef tbb::concurrent_vector<Ngram> VecNgrams;
-     typedef tbb::concurrent_unordered_set<unsigned int> SetUnigrams;
-@@ -217,7 +212,7 @@ namespace quanteda{
-         std::vector<unsigned int> ids = Rcpp::as< std::vector<unsigned int> >(ids_);
-         std::vector<std::size_t> spans(patterns.size());
-         for (size_t g = 0; g < std::min(patterns.size(), ids.size()); g++) {
-            map.insert(std::pair<Ngram, IdNgram>(patterns[g], ids[g]));
-+            map.insert(std::pair<Ngram, unsigned int>(patterns[g], ids[g]));
-             spans[g] = patterns[g].size();
-         }
- 
-diff --git a/quanteda/inst/include/skipgram.h b/quanteda/inst/include/skipgram.h
-index 8b14e77..723a4b7 100644
--- a/quanteda/inst/include/skipgram.h
-+++ b/quanteda/inst/include/skipgram.h
-@@ -9,11 +9,7 @@ inline unsigned int ngram_id(const Ngram &ngram,
-     
-     auto it1 = map_ngram.find(ngram);
-     if (it1 != map_ngram.end()) return it1->second;
-#if QUANTEDA_USE_TBB    
-    auto it2 = map_ngram.insert(std::pair<Ngram, unsigned int>(ngram, id_ngram.fetch_and_increment()));
-#else
-    auto it2 = map_ngram.insert(std::pair<Ngram, unsigned int>(ngram, id_ngram++));
-#endif
-+    auto it2 = map_ngram.insert(std::pair<Ngram, unsigned int>(ngram, id_ngram.fetch_add(1, std::memory_order_relaxed)));
-     return it2.first->second;
-     
- }
-diff --git a/quanteda/src/index.cpp b/quanteda/src/index.cpp
-index 178d7ae..9dd8c4a 100644
--- a/quanteda/src/index.cpp
-+++ b/quanteda/src/index.cpp
-@@ -107,7 +107,7 @@ DataFrame qatd_cpp_index(const List &texts_,
-     std::vector<Matches> temp(texts.size());
-     
-     //dev::start_timer("Search keywords", timer);
-    UintParam n_match = 0;
-+    UintParam n_match(0);
- #if QUANTEDA_USE_TBB
-     index_mt index_mt(texts, temp, spans, map_pats, n_match);
-     parallelFor(0, texts.size(), index_mt);
-diff --git a/quanteda/src/tokens_chunk.cpp b/quanteda/src/tokens_chunk.cpp
-index 5dd92e4..b048bde 100644
--- a/quanteda/src/tokens_chunk.cpp
-+++ b/quanteda/src/tokens_chunk.cpp
-@@ -62,7 +62,7 @@ List qatd_cpp_tokens_chunk(const List &texts_,
-     
-     Texts texts = Rcpp::as<Texts>(texts_);
-     Types types = Rcpp::as< Types >(types_);
-    UintParam count = 0;
-+    UintParam count(0);
-     // dev::Timer timer;
-     std::vector<Texts> temp(texts.size());
-     
-diff --git a/quanteda/src/tokens_lookup.cpp b/quanteda/src/tokens_lookup.cpp
-index c9323af..0f251e6 100644
--- a/quanteda/src/tokens_lookup.cpp
-+++ b/quanteda/src/tokens_lookup.cpp
-@@ -167,7 +167,7 @@ List qatd_cpp_tokens_lookup(const List &texts_,
-     
-     Texts texts = Rcpp::as<Texts>(texts_);
-     Types types = Rcpp::as<Types>(types_);
-    unsigned int id_max(0);
-+    unsigned int id_max = 0;
-     if (nomatch == 2) {
-         id_max = keys_.size() > 0 ? Rcpp::max(keys_) : 0;
-     } else {
-diff --git a/quanteda/tests/testthat/test-tokens-word4.R b/quanteda/tests/testthat/test-tokens-word4.R
-index 1798c16..28306f3 100644
--- a/quanteda/tests/testthat/test-tokens-word4.R
-+++ b/quanteda/tests/testthat/test-tokens-word4.R
-@@ -125,6 +125,7 @@ test_that("tokens works as expected for what = character", {
- })
- 
- test_that("tokens works with unusual hiragana #554", {
-+    skip("fails")
-     skip_on_cran()
-     skip_on_os("windows")
-     skip_on_os("mac")