mirror of
https://github.com/BioArchLinux/bioarchlinux-tools.git
synced 2025-03-09 22:53:31 +00:00
add new scripts
This commit is contained in:
parent
1be7fc92e0
commit
614b2b7075
4 changed files with 505 additions and 0 deletions
356
updater/depends_updater.py
Executable file
356
updater/depends_updater.py
Executable file
|
@ -0,0 +1,356 @@
|
|||
#!/usr/bin/python
|
||||
'''
|
||||
Update the `depends` and `optdepends` part of an R package PKGBUILD listed in `pkgname.txt`
|
||||
'''
|
||||
import requests
|
||||
from re import findall
|
||||
from packaging import version
|
||||
import configparser
|
||||
import logging
|
||||
from lilac2 import api as lilac
|
||||
import argparse
|
||||
import os
|
||||
import yaml
|
||||
from typing import Optional
|
||||
|
||||
EXCLUDED_PKGS = {
|
||||
"base",
|
||||
"boot",
|
||||
"class",
|
||||
"cluster",
|
||||
"codetools",
|
||||
"compiler",
|
||||
"datasets",
|
||||
"foreign",
|
||||
"graphics",
|
||||
"grDevices",
|
||||
"grid",
|
||||
"KernSmooth",
|
||||
"lattice",
|
||||
"MASS",
|
||||
"Matrix",
|
||||
"methods",
|
||||
"mgcv",
|
||||
"nlme",
|
||||
"nnet",
|
||||
"parallel",
|
||||
"rpart",
|
||||
"spatial",
|
||||
"splines",
|
||||
"stats",
|
||||
"stats4",
|
||||
"survival",
|
||||
"tcltk",
|
||||
"tools",
|
||||
"utils",
|
||||
"R"
|
||||
}
|
||||
|
||||
|
||||
class PkgInfo:
|
||||
def __init__(self, pkgname=None, depends=None, optdepends=None,
|
||||
cran_meta_mirror="https://cran.r-project.org",
|
||||
bioc_meta_mirror="https://bioconductor.org",
|
||||
bioc_versions=[],
|
||||
bioc_min_version="3.0",):
|
||||
'''
|
||||
pkgname: name of the package, style in CRAN and Bioconductor, e.g. "Rcpp",
|
||||
depends: depends of the package, style in PKGBUILD, e.g. "r-base". Updated automatically if not provided.
|
||||
optdepends: optdepends of the package, style in PKGBUILD, e.g. "r-rmarkdown: for vignettes". Updated automatically if not provided.
|
||||
cran_meta_mirror: remote mirror of CRAN use to download PACKAGES file, default to "https://cran.r-project.org"
|
||||
bioc_meta_mirror: remote mirror of Bioconductor use to download PACKAGES file, default to "https://bioconductor.org"
|
||||
bioc_versions: list of Bioconductor versions to be supported, default to empty list. Updated automatically if not provided.
|
||||
bioc_min_version: minimum version of Bioconductor we want to support, default to "3.0".
|
||||
'''
|
||||
self.pkgname = pkgname
|
||||
self.depends = depends
|
||||
self.optdepends = optdepends
|
||||
|
||||
self.pkgver = None
|
||||
self.new_depends = []
|
||||
self.new_optdepends = []
|
||||
|
||||
self.bioc_versions = bioc_versions
|
||||
self.cran_meta_mirror = cran_meta_mirror
|
||||
self.bioc_meta_mirror = bioc_meta_mirror
|
||||
self.bioc_min_version = bioc_min_version
|
||||
|
||||
self.depends_changed = False
|
||||
self.optdepends_changed = False
|
||||
|
||||
if self.bioc_verions == []:
|
||||
self.set_bioc_versions()
|
||||
self.parse_pkgbuild()
|
||||
desc = self.get_desc()
|
||||
self.update_info(desc)
|
||||
self.merge_depends()
|
||||
|
||||
def set_bioc_versions(self) -> None:
|
||||
'''
|
||||
get all Bioconductor versions
|
||||
'''
|
||||
version_page = requests.get(
|
||||
f"{self.bioc_meta_mirror}/bioc_version")
|
||||
if version_page.status_code != requests.codes.ok:
|
||||
raise RuntimeError(
|
||||
f"Failed to get Bioconductor versions due to: {version_page.status_code}: {version_page.reason}")
|
||||
z = version_page.text.split(',')
|
||||
|
||||
self.bioc_versions = list(map(lambda x: version.parse(x), z))
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"""
|
||||
Pkgname: {self.pkgname}
|
||||
Pkgver: {self.pkgver}
|
||||
Depends: {self.depends}
|
||||
Optdepends: {self.optdepends}
|
||||
new_depends: {self.new_depends}
|
||||
new_optdepends: {self.new_optdepends}
|
||||
"""
|
||||
|
||||
def parse_pkgbuild(self) -> None:
|
||||
'''
|
||||
use lilac to get _pkgname and depends and optdepends of PKGBUILD, set the value to self
|
||||
'''
|
||||
with open('PKGBUILD', 'r') as f:
|
||||
lines = f.readlines()
|
||||
for line in lines:
|
||||
if line.startswith('_pkgname'):
|
||||
self.pkgname = line.split(
|
||||
'=')[-1].strip().strip("'").strip('"')
|
||||
break
|
||||
depends = lilac.obtain_depends()
|
||||
optdepends = lilac.obtain_optdepends()
|
||||
self.depends = depends
|
||||
self.optdepends = optdepends
|
||||
|
||||
def get_desc(self) -> Optional[str]:
|
||||
'''
|
||||
get new depends from CRAN or Bioconductor
|
||||
'''
|
||||
pkgname = self.pkgname
|
||||
CRAN_URL = f"{self.cran_mirror}/src/contrib/PACKAGES"
|
||||
|
||||
# try cran first
|
||||
r_cran = requests.get(CRAN_URL)
|
||||
if r_cran.status_code == requests.codes.ok:
|
||||
self.cran_descs = r_cran.text.split('\n\n')
|
||||
for desc in self.cran_descs:
|
||||
if desc.startswith(f'Package: {pkgname}'):
|
||||
logging.info(f"Found {pkgname} in CRAN")
|
||||
return desc
|
||||
else:
|
||||
raise RuntimeError(
|
||||
f"Failed to get CRAN descriptions due to: {r_cran.status_code}: {r_cran.reason}")
|
||||
|
||||
# try bioconductor
|
||||
for ver in self.bioc_versions:
|
||||
if ver < version.parse(self.bioc_min_version):
|
||||
continue
|
||||
for p in ['bioc', 'data/annotation', 'data/experiment']:
|
||||
url = f"{self.bioc_meta_mirror}/packages/{ver}/{p}/src/contrib/PACKAGES"
|
||||
bioconductor_descs = requests.get(url)
|
||||
if bioconductor_descs.status_code == requests.codes.ok:
|
||||
bioconductor_descs = bioconductor_descs.text.split('\n\n')
|
||||
for desc in bioconductor_descs:
|
||||
if desc.startswith(f'Package: {pkgname}'):
|
||||
logging.info(
|
||||
f"Found {pkgname} in Bioconductor {ver}: {p}")
|
||||
return desc
|
||||
else:
|
||||
logging.error(
|
||||
f'Failed to get Bioconductor descriptions for version: {ver}, {p}, due to: {bioconductor_descs.status_code}: {bioconductor_descs.reason}')
|
||||
continue
|
||||
|
||||
def update_info(self, desc) -> None:
|
||||
'''
|
||||
obtain new depends and optdepends from `desc`, and write them to `self`
|
||||
'''
|
||||
config = configparser.ConfigParser()
|
||||
config.read_string('[pkg\n]'+desc)
|
||||
self.pkgver = config['pkg'].get('version')
|
||||
r_deps = []
|
||||
r_optdeps = []
|
||||
|
||||
# depends
|
||||
dep_depends = config['pkg'].get('depends')
|
||||
if dep_depends:
|
||||
r_deps.extend(dep_depends.split(','))
|
||||
dep_imports = config['pkg'].get('imports')
|
||||
if dep_imports:
|
||||
r_deps.extend(dep_imports.split(','))
|
||||
dep_linkingto = config['pkg'].get('linkingto')
|
||||
if dep_linkingto:
|
||||
r_deps.extend(dep_linkingto.split(','))
|
||||
|
||||
r_deps = [_.split('(')[0].strip() for _ in r_deps]
|
||||
r_deps = list(set(r_deps) - EXCLUDED_PKGS)
|
||||
|
||||
if '' in r_deps:
|
||||
r_deps.remove('')
|
||||
|
||||
self.new_depends += [f"r-{_.lower()}" for _ in r_deps]
|
||||
self.new_depends.sort()
|
||||
if 'r' in self.new_depends:
|
||||
self.new_depends.remove('r')
|
||||
|
||||
# opt depends
|
||||
dep_optdepends = config['pkg'].get('suggests')
|
||||
if dep_optdepends:
|
||||
r_optdeps.extend(dep_optdepends.split(','))
|
||||
dep_enhances = config['pkg'].get('enhances')
|
||||
if dep_enhances:
|
||||
r_optdeps.extend(dep_enhances.split(','))
|
||||
|
||||
r_optdeps = [_.split('(')[0].strip() for _ in r_optdeps]
|
||||
if '' in r_optdeps:
|
||||
r_optdeps.remove('')
|
||||
|
||||
self.new_optdepends += [f"r-{_.lower()}" for _ in r_optdeps]
|
||||
self.new_optdepends.sort()
|
||||
|
||||
def merge_depends(self):
|
||||
'''
|
||||
Merge old `depends` and `optdepends` in to the new ones
|
||||
'''
|
||||
system_reqs = [x for x in self.depends if not x.startswith('r-')]
|
||||
system_reqs.sort()
|
||||
self.new_depends = system_reqs+self.new_depends
|
||||
|
||||
if sorted(self.new_depends) != sorted(self.depends):
|
||||
self.depends_changed = True
|
||||
|
||||
# keep explanation of optdepends
|
||||
if type(self.optdepends) == 'dict':
|
||||
if sorted(self.new_optdepends) != sorted(self.optdepends.keys()):
|
||||
self.optdepends_changed = True
|
||||
for i in range(len(self.new_optdepends)):
|
||||
val = self.optdepends.get(self.new_optdepends[i])
|
||||
if val:
|
||||
self.new_optdepends[i] += ': '+val
|
||||
else:
|
||||
if sorted(self.new_optdepends) != sorted(self.optdepends):
|
||||
self.optdepends_changed = True
|
||||
|
||||
def update_pkgbuild(self):
|
||||
'''
|
||||
write new depends to PKGBUILD if depends change
|
||||
'''
|
||||
if not self.depends_changed and not self.optdepends_changed:
|
||||
return
|
||||
|
||||
with open("PKGBUILD", "r") as f:
|
||||
lines = f.readlines()
|
||||
|
||||
depends_interval = [-1, -1]
|
||||
optdepends_interval = [-1, -1]
|
||||
for i, line in enumerate(lines):
|
||||
if line.strip().startswith("depends"):
|
||||
depends_interval[0] = i
|
||||
elif line.strip().startswith("optdepends"):
|
||||
optdepends_interval[0] = i
|
||||
|
||||
if depends_interval[0] > -1:
|
||||
if ')' in line:
|
||||
# end depends
|
||||
depends_interval[1] = i
|
||||
if optdepends_interval[0] > -1:
|
||||
if ')' in line:
|
||||
# end optdepends
|
||||
depends_interval[1] = i
|
||||
if not (depends_interval[1] < optdepends_interval[0] or optdepends_interval[1] < depends_interval[0]):
|
||||
logging.error(
|
||||
"depends and optdepends overlap, please fix it manually")
|
||||
return
|
||||
|
||||
if self.depends_changed:
|
||||
for i in range(depends_interval[0], depends_interval[1]):
|
||||
lines[i] = ''
|
||||
lines[depends_interval[1]] = '\n'.join(
|
||||
['depends=(', '\n'.join([' ' + _ for _ in self.new_depends]), ')'])
|
||||
if self.optdepends_changed:
|
||||
for i in range(optdepends_interval[0], optdepends_interval[1]):
|
||||
lines[i] = ''
|
||||
lines[optdepends_interval[1]] = '\n'.join(
|
||||
['optdepends=(', '\n'.join([' ' + _ for _ in self.new_optdepends]), ')'])
|
||||
|
||||
logging.info(f"Writing new PKGBUILD for {self.pkgname}")
|
||||
with open("PKGBUILD", "w") as f:
|
||||
f.writelines(lines)
|
||||
|
||||
def update_yaml(self, yaml_file='lilac.yaml'):
|
||||
'''
|
||||
update the `repo_depends` part of pkg
|
||||
'''
|
||||
with open(yaml_file, "r") as f:
|
||||
docs = yaml.load(f, Loader=yaml.FullLoader)
|
||||
old_depends = docs.get('repo_depends', [])
|
||||
non_r_depends = [x for x in old_depends if not x.startswith('r-')]
|
||||
# only keep non-r depends also in new_depends
|
||||
non_r_depends = [x for x in non_r_depends if x in self.new_depends]
|
||||
non_r_depends.sort()
|
||||
r_new_depends = [x for x in self.new_depends if x.startswith('r-')]
|
||||
r_new_depends.sort()
|
||||
new_deps = non_r_depends+r_new_depends
|
||||
if new_deps:
|
||||
docs['repo_depends'] = new_deps
|
||||
with open(yaml_file, 'w') as f:
|
||||
yaml.dump(docs, f, sort_keys=False)
|
||||
|
||||
|
||||
def update_depends_by_file(file, bioarch_path="BioArchLinux", bioc_min_ver="3.0", cran_meta_mirror="https://cran.r-project.org",
|
||||
bioc_meta_mirror="https://bioconductor.org",):
|
||||
'''
|
||||
Update depends of packages listed in `file`, one package name per line, CRAN style(e.g. `Rcpp`) and pkgname style (`r-rcpp`) are both supported.
|
||||
file: file containing package names
|
||||
bioarch_path: path to BioArchLinux
|
||||
bioc_min_ver: minimum version of Bioconductor to be supported, generally not needed to be changed
|
||||
cran_meta_mirror: mirror of CRAN metadata, recommended to be changed to a local https mirror.
|
||||
bioc_meta_mirror: mirror of Bioconductor metadata, recommended to be changed to a local https mirror.
|
||||
'''
|
||||
current_dir = os.getcwd()
|
||||
# where the name are _pkgname (May have upper letters) or pkgname (r-xxx)
|
||||
case = "_pkgname"
|
||||
with open(file, "r") as f:
|
||||
for pkgname in f:
|
||||
pkgname = pkgname.strip()
|
||||
if pkgname.startswith("r-"):
|
||||
case = "pkgname"
|
||||
break
|
||||
with open(file, "r") as f:
|
||||
for pkgname in f:
|
||||
pkgname = pkgname.strip()
|
||||
if case == '_pkgname':
|
||||
pkgname = 'r-'+pkgname.lower()
|
||||
os.chdir(f"{bioarch_path}/{pkgname}")
|
||||
pkginfo = PkgInfo(bioc_min_version=bioc_min_ver,
|
||||
bioc_meta_mirror=bioc_meta_mirror, cran_meta_mirror=cran_meta_mirror)
|
||||
pkginfo.update_pkgbuild()
|
||||
pkginfo.update_yaml()
|
||||
os.chdir(current_dir)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(
|
||||
prog='depends updater for BioArchLinux',
|
||||
description='update the depends of R packages from CRAN and Bioconductor automatically',
|
||||
epilog='Text at the bottom of help'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--file', help='The file that contains the pkgname to be archived, one pkgname per line')
|
||||
parser.add_argument(
|
||||
'--bioarch_path', help='The path of BioArchLinux repo', default="BioArchLinux")
|
||||
parser.add_argument(
|
||||
'--bioc_min_ver', help="The minimum version of Bioconductor supported, must be greater than 3.0", default="3.0")
|
||||
parser.add_argument(
|
||||
'--cran_meta_mirror', help="The mirror of CRAN metadata, recommended to be changed to a local https mirror. Only http(s) is supported", default="https://cran.r-project.org")
|
||||
parser.add_argument(
|
||||
'--bioc_meta_mirror', help="The mirror of Bioconductor metadata, recommended to be changed to a local https mirror. Only http(s) is supported", default="https://bioconductor.org")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.file:
|
||||
update_depends_by_file(args.file, args.bioarch_path, args.bioc_min_ver,
|
||||
cran_meta_mirror=args.cran_meta_mirror, bioc_meta_mirror=args.bioc_meta_mirror)
|
||||
else:
|
||||
parser.print_help()
|
5
updater/readme.md
Normal file
5
updater/readme.md
Normal file
|
@ -0,0 +1,5 @@
|
|||
# Updater
|
||||
|
||||
Scripts used to help update the PKGBUILD and `lilac.yaml` automatically.
|
||||
|
||||
For usage, run with argument `-h`.
|
144
updater/sync_meta_data.py
Executable file
144
updater/sync_meta_data.py
Executable file
|
@ -0,0 +1,144 @@
|
|||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
import requests
|
||||
import re
|
||||
from pathlib import Path
|
||||
from packaging import version
|
||||
import logging
|
||||
from distutils.dir_util import copy_tree
|
||||
import os
|
||||
import datetime
|
||||
from dateutil.parser import parse as parsedate
|
||||
import argparse
|
||||
|
||||
|
||||
class Downloader:
|
||||
def __init__(self, bioc_mirror="https://bioconductor.org", cran_mirror="https://cran.r-project.org", bioc_min_ver="3.0") -> None:
|
||||
'''
|
||||
bioc_mirror: remote Bioconductor mirror, default https://bioconductor.org
|
||||
cran_mirror: remote CRAN mirror, default https://cran.r-project.org
|
||||
bioc_min_ver: minimum version of Bioconductor to download, default 3.0
|
||||
'''
|
||||
self.bioc_mirror = bioc_mirror
|
||||
self.cran_mirror = cran_mirror
|
||||
self.bioc_min_ver = bioc_min_ver
|
||||
self.bioc_versions = []
|
||||
self.set_bioc_versions()
|
||||
|
||||
def set_bioc_versions(self):
|
||||
'''
|
||||
obtain and set all available Bioconductor versions from remote mirror.
|
||||
'''
|
||||
version_page = requests.get(
|
||||
f"{self.bioc_mirror}/about/release-announcements/#release-versions/")
|
||||
if version_page.status_code != requests.codes.ok:
|
||||
raise RuntimeError(
|
||||
f"Failed to get Bioconductor versions due to: {version_page.status_code}: {version_page.reason}")
|
||||
z = re.findall(r"/packages/(\d.\d+)/", version_page.text)
|
||||
|
||||
# mannually add 1.7 to 1.0 to the list.
|
||||
for i in range(7, -1, -1):
|
||||
z.append(f"1.{i}")
|
||||
|
||||
self.bioc_versions = list(map(lambda x: version.parse(x), z))
|
||||
|
||||
def download_package_meta(self, path='bioc'):
|
||||
'''
|
||||
Download package metadata from Bioconductor and CRAN.
|
||||
|
||||
min_ver: minimum version of Bioconductor to download, default 3.0.
|
||||
path: path to save metadata, default 'bioc' under current directory.
|
||||
'''
|
||||
min_ver = self.bioc_min_ver
|
||||
if path and not path.endswith('/'):
|
||||
path = path+'/'
|
||||
else:
|
||||
path = ''
|
||||
|
||||
# BIOC
|
||||
latestver = self.bioc_versions[0]
|
||||
for p in ['bioc', 'data/annotation', 'data/experiment']:
|
||||
for ver in self.bioc_versions:
|
||||
logging.info(f"Downloading Bioconductor {ver} {p}...")
|
||||
if ver >= version.parse(min_ver):
|
||||
Path(
|
||||
path+f'packages/{ver}/{p}/src/contrib/'
|
||||
).mkdir(parents=True, exist_ok=True)
|
||||
url = f"{self.bioc_mirror}/packages/{ver}/{p}/src/contrib/PACKAGES"
|
||||
dstFile = path+f'packages/{ver}/{p}/src/contrib/PACKAGES'
|
||||
if not remote_is_newer(url, dstFile):
|
||||
logging.info(
|
||||
f"Local Package List for Bioconductor below {ver}: {p} is newer than remote, skip.")
|
||||
break
|
||||
meta = requests.get(url)
|
||||
if meta.status_code != requests.codes.ok:
|
||||
logging.error(
|
||||
f"failed to download Package List for Bioconductor {ver}: {p} due to {meta.status_code}: {meta.reason}")
|
||||
else:
|
||||
with open(dstFile, 'w') as f:
|
||||
f.write(meta.text)
|
||||
copy_tree(path+f'packages/{latestver}', path+f'packages/release')
|
||||
|
||||
bioc_ver_file = path+'bioc_version'
|
||||
with open(bioc_ver_file, 'w') as f:
|
||||
f.write(','.join(map(lambda x: str(x), self.bioc_versions)))
|
||||
|
||||
# CRAN
|
||||
logging.info("Downloading CRAN metadata...")
|
||||
url = f"{self.cran_mirror}/src/contrib/PACKAGES"
|
||||
dstFile = path+f'src/contrib/PACKAGES'
|
||||
if remote_is_newer(url, dstFile):
|
||||
meta = requests.get(url)
|
||||
if meta.status_code != requests.codes.ok:
|
||||
logging.error(
|
||||
f"failed to download Package List for CRAN due to {meta.status_code}: {meta.reason}")
|
||||
else:
|
||||
Path(path+f'src/contrib/').mkdir(parents=True, exist_ok=True)
|
||||
with open(dstFile, 'w') as f:
|
||||
f.write(meta.text)
|
||||
else: # skip if local is newer
|
||||
logging.info(
|
||||
"Local Package List for CRAN is newer than remote, skip.")
|
||||
|
||||
|
||||
def remote_is_newer(url, dstFile) -> bool:
|
||||
'''
|
||||
whether the remote file is newer than local file.
|
||||
return True if dstFile does not exist.
|
||||
returns False if remote does not provide `Last-Modified` header.
|
||||
'''
|
||||
if not os.path.exists(dstFile):
|
||||
return True
|
||||
r = requests.head(url)
|
||||
url_time = r.headers.get('last-modified')
|
||||
if not url_time:
|
||||
return False
|
||||
|
||||
url_date = parsedate(url_time)
|
||||
file_time = datetime.datetime.fromtimestamp(
|
||||
os.path.getmtime(dstFile))
|
||||
return url_date > file_time.astimezone()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
d = Downloader()
|
||||
download_path = os.getenv('BIO_META_PATH', 'bioc')
|
||||
parser = argparse.ArgumentParser(
|
||||
prog='R package metadata sync tool',
|
||||
description='Sync metadata of R packages from CRAN and Bioconductor to a local path',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--path', help='The path to store the metadata files. '
|
||||
"if not given the environment variable BIO_META_PATH will be read, if it's not set, the default (bioc) will be used.",
|
||||
default='bioc')
|
||||
parser.add_argument(
|
||||
'--bioc_min_ver', help="The minimum version of Bioconductor supported, must be greater than 3.0", default="3.0")
|
||||
parser.add_argument(
|
||||
'--cran_meta_mirror', help="The remote mirror of CRAN metadata, only http(s) is supported", default="https://cran.r-project.org")
|
||||
parser.add_argument(
|
||||
'--bioc_meta_mirror', help="The remote mirror of Bioconductor metadata, only http(s) is supported", default="https://bioconductor.org")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
d.download_package_meta(download_path)
|
Loading…
Add table
Reference in a new issue