add new scripts

This commit is contained in:
sukanka 2022-11-06 01:05:08 +08:00
parent 1be7fc92e0
commit 614b2b7075
4 changed files with 505 additions and 0 deletions

356
updater/depends_updater.py Executable file
View file

@ -0,0 +1,356 @@
#!/usr/bin/python
'''
Update the `depends` and `optdepends` part of an R package PKGBUILD listed in `pkgname.txt`
'''
import requests
from re import findall
from packaging import version
import configparser
import logging
from lilac2 import api as lilac
import argparse
import os
import yaml
from typing import Optional
EXCLUDED_PKGS = {
"base",
"boot",
"class",
"cluster",
"codetools",
"compiler",
"datasets",
"foreign",
"graphics",
"grDevices",
"grid",
"KernSmooth",
"lattice",
"MASS",
"Matrix",
"methods",
"mgcv",
"nlme",
"nnet",
"parallel",
"rpart",
"spatial",
"splines",
"stats",
"stats4",
"survival",
"tcltk",
"tools",
"utils",
"R"
}
class PkgInfo:
def __init__(self, pkgname=None, depends=None, optdepends=None,
cran_meta_mirror="https://cran.r-project.org",
bioc_meta_mirror="https://bioconductor.org",
bioc_versions=[],
bioc_min_version="3.0",):
'''
pkgname: name of the package, style in CRAN and Bioconductor, e.g. "Rcpp",
depends: depends of the package, style in PKGBUILD, e.g. "r-base". Updated automatically if not provided.
optdepends: optdepends of the package, style in PKGBUILD, e.g. "r-rmarkdown: for vignettes". Updated automatically if not provided.
cran_meta_mirror: remote mirror of CRAN use to download PACKAGES file, default to "https://cran.r-project.org"
bioc_meta_mirror: remote mirror of Bioconductor use to download PACKAGES file, default to "https://bioconductor.org"
bioc_versions: list of Bioconductor versions to be supported, default to empty list. Updated automatically if not provided.
bioc_min_version: minimum version of Bioconductor we want to support, default to "3.0".
'''
self.pkgname = pkgname
self.depends = depends
self.optdepends = optdepends
self.pkgver = None
self.new_depends = []
self.new_optdepends = []
self.bioc_versions = bioc_versions
self.cran_meta_mirror = cran_meta_mirror
self.bioc_meta_mirror = bioc_meta_mirror
self.bioc_min_version = bioc_min_version
self.depends_changed = False
self.optdepends_changed = False
if self.bioc_verions == []:
self.set_bioc_versions()
self.parse_pkgbuild()
desc = self.get_desc()
self.update_info(desc)
self.merge_depends()
def set_bioc_versions(self) -> None:
'''
get all Bioconductor versions
'''
version_page = requests.get(
f"{self.bioc_meta_mirror}/bioc_version")
if version_page.status_code != requests.codes.ok:
raise RuntimeError(
f"Failed to get Bioconductor versions due to: {version_page.status_code}: {version_page.reason}")
z = version_page.text.split(',')
self.bioc_versions = list(map(lambda x: version.parse(x), z))
def __str__(self) -> str:
return f"""
Pkgname: {self.pkgname}
Pkgver: {self.pkgver}
Depends: {self.depends}
Optdepends: {self.optdepends}
new_depends: {self.new_depends}
new_optdepends: {self.new_optdepends}
"""
def parse_pkgbuild(self) -> None:
'''
use lilac to get _pkgname and depends and optdepends of PKGBUILD, set the value to self
'''
with open('PKGBUILD', 'r') as f:
lines = f.readlines()
for line in lines:
if line.startswith('_pkgname'):
self.pkgname = line.split(
'=')[-1].strip().strip("'").strip('"')
break
depends = lilac.obtain_depends()
optdepends = lilac.obtain_optdepends()
self.depends = depends
self.optdepends = optdepends
def get_desc(self) -> Optional[str]:
'''
get new depends from CRAN or Bioconductor
'''
pkgname = self.pkgname
CRAN_URL = f"{self.cran_mirror}/src/contrib/PACKAGES"
# try cran first
r_cran = requests.get(CRAN_URL)
if r_cran.status_code == requests.codes.ok:
self.cran_descs = r_cran.text.split('\n\n')
for desc in self.cran_descs:
if desc.startswith(f'Package: {pkgname}'):
logging.info(f"Found {pkgname} in CRAN")
return desc
else:
raise RuntimeError(
f"Failed to get CRAN descriptions due to: {r_cran.status_code}: {r_cran.reason}")
# try bioconductor
for ver in self.bioc_versions:
if ver < version.parse(self.bioc_min_version):
continue
for p in ['bioc', 'data/annotation', 'data/experiment']:
url = f"{self.bioc_meta_mirror}/packages/{ver}/{p}/src/contrib/PACKAGES"
bioconductor_descs = requests.get(url)
if bioconductor_descs.status_code == requests.codes.ok:
bioconductor_descs = bioconductor_descs.text.split('\n\n')
for desc in bioconductor_descs:
if desc.startswith(f'Package: {pkgname}'):
logging.info(
f"Found {pkgname} in Bioconductor {ver}: {p}")
return desc
else:
logging.error(
f'Failed to get Bioconductor descriptions for version: {ver}, {p}, due to: {bioconductor_descs.status_code}: {bioconductor_descs.reason}')
continue
def update_info(self, desc) -> None:
'''
obtain new depends and optdepends from `desc`, and write them to `self`
'''
config = configparser.ConfigParser()
config.read_string('[pkg\n]'+desc)
self.pkgver = config['pkg'].get('version')
r_deps = []
r_optdeps = []
# depends
dep_depends = config['pkg'].get('depends')
if dep_depends:
r_deps.extend(dep_depends.split(','))
dep_imports = config['pkg'].get('imports')
if dep_imports:
r_deps.extend(dep_imports.split(','))
dep_linkingto = config['pkg'].get('linkingto')
if dep_linkingto:
r_deps.extend(dep_linkingto.split(','))
r_deps = [_.split('(')[0].strip() for _ in r_deps]
r_deps = list(set(r_deps) - EXCLUDED_PKGS)
if '' in r_deps:
r_deps.remove('')
self.new_depends += [f"r-{_.lower()}" for _ in r_deps]
self.new_depends.sort()
if 'r' in self.new_depends:
self.new_depends.remove('r')
# opt depends
dep_optdepends = config['pkg'].get('suggests')
if dep_optdepends:
r_optdeps.extend(dep_optdepends.split(','))
dep_enhances = config['pkg'].get('enhances')
if dep_enhances:
r_optdeps.extend(dep_enhances.split(','))
r_optdeps = [_.split('(')[0].strip() for _ in r_optdeps]
if '' in r_optdeps:
r_optdeps.remove('')
self.new_optdepends += [f"r-{_.lower()}" for _ in r_optdeps]
self.new_optdepends.sort()
def merge_depends(self):
'''
Merge old `depends` and `optdepends` in to the new ones
'''
system_reqs = [x for x in self.depends if not x.startswith('r-')]
system_reqs.sort()
self.new_depends = system_reqs+self.new_depends
if sorted(self.new_depends) != sorted(self.depends):
self.depends_changed = True
# keep explanation of optdepends
if type(self.optdepends) == 'dict':
if sorted(self.new_optdepends) != sorted(self.optdepends.keys()):
self.optdepends_changed = True
for i in range(len(self.new_optdepends)):
val = self.optdepends.get(self.new_optdepends[i])
if val:
self.new_optdepends[i] += ': '+val
else:
if sorted(self.new_optdepends) != sorted(self.optdepends):
self.optdepends_changed = True
def update_pkgbuild(self):
'''
write new depends to PKGBUILD if depends change
'''
if not self.depends_changed and not self.optdepends_changed:
return
with open("PKGBUILD", "r") as f:
lines = f.readlines()
depends_interval = [-1, -1]
optdepends_interval = [-1, -1]
for i, line in enumerate(lines):
if line.strip().startswith("depends"):
depends_interval[0] = i
elif line.strip().startswith("optdepends"):
optdepends_interval[0] = i
if depends_interval[0] > -1:
if ')' in line:
# end depends
depends_interval[1] = i
if optdepends_interval[0] > -1:
if ')' in line:
# end optdepends
depends_interval[1] = i
if not (depends_interval[1] < optdepends_interval[0] or optdepends_interval[1] < depends_interval[0]):
logging.error(
"depends and optdepends overlap, please fix it manually")
return
if self.depends_changed:
for i in range(depends_interval[0], depends_interval[1]):
lines[i] = ''
lines[depends_interval[1]] = '\n'.join(
['depends=(', '\n'.join([' ' + _ for _ in self.new_depends]), ')'])
if self.optdepends_changed:
for i in range(optdepends_interval[0], optdepends_interval[1]):
lines[i] = ''
lines[optdepends_interval[1]] = '\n'.join(
['optdepends=(', '\n'.join([' ' + _ for _ in self.new_optdepends]), ')'])
logging.info(f"Writing new PKGBUILD for {self.pkgname}")
with open("PKGBUILD", "w") as f:
f.writelines(lines)
def update_yaml(self, yaml_file='lilac.yaml'):
'''
update the `repo_depends` part of pkg
'''
with open(yaml_file, "r") as f:
docs = yaml.load(f, Loader=yaml.FullLoader)
old_depends = docs.get('repo_depends', [])
non_r_depends = [x for x in old_depends if not x.startswith('r-')]
# only keep non-r depends also in new_depends
non_r_depends = [x for x in non_r_depends if x in self.new_depends]
non_r_depends.sort()
r_new_depends = [x for x in self.new_depends if x.startswith('r-')]
r_new_depends.sort()
new_deps = non_r_depends+r_new_depends
if new_deps:
docs['repo_depends'] = new_deps
with open(yaml_file, 'w') as f:
yaml.dump(docs, f, sort_keys=False)
def update_depends_by_file(file, bioarch_path="BioArchLinux", bioc_min_ver="3.0", cran_meta_mirror="https://cran.r-project.org",
bioc_meta_mirror="https://bioconductor.org",):
'''
Update depends of packages listed in `file`, one package name per line, CRAN style(e.g. `Rcpp`) and pkgname style (`r-rcpp`) are both supported.
file: file containing package names
bioarch_path: path to BioArchLinux
bioc_min_ver: minimum version of Bioconductor to be supported, generally not needed to be changed
cran_meta_mirror: mirror of CRAN metadata, recommended to be changed to a local https mirror.
bioc_meta_mirror: mirror of Bioconductor metadata, recommended to be changed to a local https mirror.
'''
current_dir = os.getcwd()
# where the name are _pkgname (May have upper letters) or pkgname (r-xxx)
case = "_pkgname"
with open(file, "r") as f:
for pkgname in f:
pkgname = pkgname.strip()
if pkgname.startswith("r-"):
case = "pkgname"
break
with open(file, "r") as f:
for pkgname in f:
pkgname = pkgname.strip()
if case == '_pkgname':
pkgname = 'r-'+pkgname.lower()
os.chdir(f"{bioarch_path}/{pkgname}")
pkginfo = PkgInfo(bioc_min_version=bioc_min_ver,
bioc_meta_mirror=bioc_meta_mirror, cran_meta_mirror=cran_meta_mirror)
pkginfo.update_pkgbuild()
pkginfo.update_yaml()
os.chdir(current_dir)
if __name__ == '__main__':
parser = argparse.ArgumentParser(
prog='depends updater for BioArchLinux',
description='update the depends of R packages from CRAN and Bioconductor automatically',
epilog='Text at the bottom of help'
)
parser.add_argument(
'--file', help='The file that contains the pkgname to be archived, one pkgname per line')
parser.add_argument(
'--bioarch_path', help='The path of BioArchLinux repo', default="BioArchLinux")
parser.add_argument(
'--bioc_min_ver', help="The minimum version of Bioconductor supported, must be greater than 3.0", default="3.0")
parser.add_argument(
'--cran_meta_mirror', help="The mirror of CRAN metadata, recommended to be changed to a local https mirror. Only http(s) is supported", default="https://cran.r-project.org")
parser.add_argument(
'--bioc_meta_mirror', help="The mirror of Bioconductor metadata, recommended to be changed to a local https mirror. Only http(s) is supported", default="https://bioconductor.org")
args = parser.parse_args()
if args.file:
update_depends_by_file(args.file, args.bioarch_path, args.bioc_min_ver,
cran_meta_mirror=args.cran_meta_mirror, bioc_meta_mirror=args.bioc_meta_mirror)
else:
parser.print_help()

5
updater/readme.md Normal file
View file

@ -0,0 +1,5 @@
# Updater
Scripts used to help update the PKGBUILD and `lilac.yaml` automatically.
For usage, run with argument `-h`.

144
updater/sync_meta_data.py Executable file
View file

@ -0,0 +1,144 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
import requests
import re
from pathlib import Path
from packaging import version
import logging
from distutils.dir_util import copy_tree
import os
import datetime
from dateutil.parser import parse as parsedate
import argparse
class Downloader:
def __init__(self, bioc_mirror="https://bioconductor.org", cran_mirror="https://cran.r-project.org", bioc_min_ver="3.0") -> None:
'''
bioc_mirror: remote Bioconductor mirror, default https://bioconductor.org
cran_mirror: remote CRAN mirror, default https://cran.r-project.org
bioc_min_ver: minimum version of Bioconductor to download, default 3.0
'''
self.bioc_mirror = bioc_mirror
self.cran_mirror = cran_mirror
self.bioc_min_ver = bioc_min_ver
self.bioc_versions = []
self.set_bioc_versions()
def set_bioc_versions(self):
'''
obtain and set all available Bioconductor versions from remote mirror.
'''
version_page = requests.get(
f"{self.bioc_mirror}/about/release-announcements/#release-versions/")
if version_page.status_code != requests.codes.ok:
raise RuntimeError(
f"Failed to get Bioconductor versions due to: {version_page.status_code}: {version_page.reason}")
z = re.findall(r"/packages/(\d.\d+)/", version_page.text)
# mannually add 1.7 to 1.0 to the list.
for i in range(7, -1, -1):
z.append(f"1.{i}")
self.bioc_versions = list(map(lambda x: version.parse(x), z))
def download_package_meta(self, path='bioc'):
'''
Download package metadata from Bioconductor and CRAN.
min_ver: minimum version of Bioconductor to download, default 3.0.
path: path to save metadata, default 'bioc' under current directory.
'''
min_ver = self.bioc_min_ver
if path and not path.endswith('/'):
path = path+'/'
else:
path = ''
# BIOC
latestver = self.bioc_versions[0]
for p in ['bioc', 'data/annotation', 'data/experiment']:
for ver in self.bioc_versions:
logging.info(f"Downloading Bioconductor {ver} {p}...")
if ver >= version.parse(min_ver):
Path(
path+f'packages/{ver}/{p}/src/contrib/'
).mkdir(parents=True, exist_ok=True)
url = f"{self.bioc_mirror}/packages/{ver}/{p}/src/contrib/PACKAGES"
dstFile = path+f'packages/{ver}/{p}/src/contrib/PACKAGES'
if not remote_is_newer(url, dstFile):
logging.info(
f"Local Package List for Bioconductor below {ver}: {p} is newer than remote, skip.")
break
meta = requests.get(url)
if meta.status_code != requests.codes.ok:
logging.error(
f"failed to download Package List for Bioconductor {ver}: {p} due to {meta.status_code}: {meta.reason}")
else:
with open(dstFile, 'w') as f:
f.write(meta.text)
copy_tree(path+f'packages/{latestver}', path+f'packages/release')
bioc_ver_file = path+'bioc_version'
with open(bioc_ver_file, 'w') as f:
f.write(','.join(map(lambda x: str(x), self.bioc_versions)))
# CRAN
logging.info("Downloading CRAN metadata...")
url = f"{self.cran_mirror}/src/contrib/PACKAGES"
dstFile = path+f'src/contrib/PACKAGES'
if remote_is_newer(url, dstFile):
meta = requests.get(url)
if meta.status_code != requests.codes.ok:
logging.error(
f"failed to download Package List for CRAN due to {meta.status_code}: {meta.reason}")
else:
Path(path+f'src/contrib/').mkdir(parents=True, exist_ok=True)
with open(dstFile, 'w') as f:
f.write(meta.text)
else: # skip if local is newer
logging.info(
"Local Package List for CRAN is newer than remote, skip.")
def remote_is_newer(url, dstFile) -> bool:
'''
whether the remote file is newer than local file.
return True if dstFile does not exist.
returns False if remote does not provide `Last-Modified` header.
'''
if not os.path.exists(dstFile):
return True
r = requests.head(url)
url_time = r.headers.get('last-modified')
if not url_time:
return False
url_date = parsedate(url_time)
file_time = datetime.datetime.fromtimestamp(
os.path.getmtime(dstFile))
return url_date > file_time.astimezone()
if __name__ == '__main__':
logging.basicConfig(level=logging.INFO)
d = Downloader()
download_path = os.getenv('BIO_META_PATH', 'bioc')
parser = argparse.ArgumentParser(
prog='R package metadata sync tool',
description='Sync metadata of R packages from CRAN and Bioconductor to a local path',
)
parser.add_argument(
'--path', help='The path to store the metadata files. '
"if not given the environment variable BIO_META_PATH will be read, if it's not set, the default (bioc) will be used.",
default='bioc')
parser.add_argument(
'--bioc_min_ver', help="The minimum version of Bioconductor supported, must be greater than 3.0", default="3.0")
parser.add_argument(
'--cran_meta_mirror', help="The remote mirror of CRAN metadata, only http(s) is supported", default="https://cran.r-project.org")
parser.add_argument(
'--bioc_meta_mirror', help="The remote mirror of Bioconductor metadata, only http(s) is supported", default="https://bioconductor.org")
args = parser.parse_args()
d.download_package_meta(download_path)