bioarchlinux-tools/updater/depends_updater.py
2023-04-28 22:25:42 +08:00

461 lines
18 KiB
Python
Executable file

#!/usr/bin/python
'''
Update the `depends` and `optdepends` part of an R package PKGBUILD listed in `pkgname.txt`
'''
from packaging import version
import configparser
import logging
from lilac2 import api as lilac
import argparse
import os
import shutil
import tempfile
import filecmp
import yaml
from typing import Optional
import sqlite3
from dbmanager import get_bioc_versions, EXCLUDED_PKGS
from pkg_archiver import archive_pkg_yaml, archive_pkg_pkgbuild, unarchive_cran
import re
import requests
# use | in lilac.yaml
def str_presenter(dumper, data):
"""configures yaml for dumping multiline strings
Ref: https://stackoverflow.com/questions/8640959/how-can-i-control-what-scalar-form-pyyaml-uses-for-my-data"""
if data.count('\n') > 0: # check for multiline string
return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='|')
return dumper.represent_scalar('tag:yaml.org,2002:str', data)
yaml.add_representer(str, str_presenter)
yaml.representer.SafeRepresenter.add_representer(
str, str_presenter) # to use with safe_dum
class PkgInfo:
def __init__(self, pkgname=None, depends=None, optdepends=None,
bioc_meta_mirror="https://bioconductor.org",
bioc_versions=[],
bioc_min_version="3.0",):
'''
pkgname: name of the package, style in CRAN and Bioconductor, e.g. "Rcpp",
depends: depends of the package, style in PKGBUILD, e.g. "r-base". Updated automatically if not provided.
optdepends: optdepends of the package, style in PKGBUILD, e.g. "r-rmarkdown: for vignettes". Updated automatically if not provided.
bioc_mirror: remote mirror of Bioconductor, default to "https://bioconductor.org"
bioc_versions: list of Bioconductor versions to be supported, default to empty list. Updated automatically if not provided.
bioc_min_version: minimum version of Bioconductor we want to support, default to "3.0".
'''
self.pkgname = pkgname
self.depends = depends
self.optdepends = optdepends
self.pkgver = None
self.new_depends = []
self.new_optdepends = []
# newly introduced depends, may be missing in BioArchLinux, need to be added
self.added_depends = [] # named in CRAN style
self.bioc_versions = bioc_versions
self.bioc_meta_mirror = bioc_meta_mirror
self.bioc_min_version = bioc_min_version
# for BIOC pkgs, the latest BIOC version that contains the pkg.
self.bioc_ver = None
self.depends_changed = False
self.optdepends_changed = False
self.desc = None
if self.bioc_versions == []:
self.bioc_versions = get_bioc_versions(self.bioc_meta_mirror)
def build_body(self, conn_cursor):
self.parse_pkgbuild()
desc = self.get_desc(conn_cursor)
if desc:
self.update_info(desc)
self.merge_depends()
def __str__(self) -> str:
return f"""
Pkgname: {self.pkgname}
Pkgver: {self.pkgver}
Depends: {self.depends}
Optdepends: {self.optdepends}
new_depends: {self.new_depends}
new_optdepends: {self.new_optdepends}
"""
def parse_pkgbuild(self) -> None:
'''
use lilac to get _pkgname and depends and optdepends of PKGBUILD, set the value to self
'''
with open('PKGBUILD', 'r') as f:
lines = f.readlines()
for line in lines:
if line.startswith('_pkgname'):
self.pkgname = line.split(
'=')[-1].strip().strip("'").strip('"')
break
depends = lilac.obtain_depends()
optdepends = lilac.obtain_optdepends(parse_dict=False)
self.depends = depends
self.optdepends = optdepends
def get_desc(self, conn_cursor) -> Optional[str]:
'''
Get description of the package from database
conn_cursor: sqlite3 cursor, e.g., `conn = sqlite3.connect('sqlite.db'); conn_cursor = conn.cursor()`
'''
c = conn_cursor
cursor = c.execute(
"SELECT desc,bioc_ver from pkgmeta where name = ?", (self.pkgname,))
descall = cursor.fetchone()
if descall:
desc, self.bioc_ver = descall
self.bioc_ver = version.parse(
self.bioc_ver) if self.bioc_ver else None
self.desc = desc
return desc
else:
return None
def is_archived(self, release_ver) -> bool:
'''
Check if the package is archived in CRAN or BIOC
release_ver: current release of BIOC, should be a Version e.g., "3.16"
'''
if not self.desc: # not in database, archived in CRAN
return True
# not in the latest BIOC version, archived in BIOC
elif self.bioc_ver and self.bioc_ver < release_ver:
return True
return False
def update_info(self, desc) -> None:
'''
obtain new depends and optdepends from `desc`, and write them to `self`
'''
if not desc:
logging.warning(f"Description of {self.pkgname} is empty")
return
logging.debug(f"Updating {self.pkgname} using \n {desc}")
config = configparser.ConfigParser()
config.read_string('[pkg]\n'+desc)
self.pkgver = config['pkg'].get('version')
r_deps = []
r_optdeps = []
# depends
dep_depends = config['pkg'].get('depends')
if dep_depends:
r_deps.extend(dep_depends.split(','))
dep_imports = config['pkg'].get('imports')
if dep_imports:
r_deps.extend(dep_imports.split(','))
dep_linkingto = config['pkg'].get('linkingto')
if dep_linkingto:
r_deps.extend(dep_linkingto.split(','))
r_deps = [_.split('(')[0].strip() for _ in r_deps]
r_deps = list(set(r_deps) - EXCLUDED_PKGS)
if '' in r_deps:
r_deps.remove('')
# now r_deps contains all depends in named CRAN style
self.added_depends = [
x for x in r_deps if f"r-{x.lower()}" not in self.depends]
self.new_depends += [f"r-{_.lower()}" for _ in r_deps]
self.new_depends.sort()
if 'r' in self.new_depends:
self.new_depends.remove('r')
# opt depends
dep_optdepends = config['pkg'].get('suggests')
if dep_optdepends:
r_optdeps.extend(dep_optdepends.split(','))
dep_enhances = config['pkg'].get('enhances')
if dep_enhances:
r_optdeps.extend(dep_enhances.split(','))
r_optdeps = [_.split('(')[0].strip() for _ in r_optdeps]
if '' in r_optdeps:
r_optdeps.remove('')
self.new_optdepends += [f"r-{_.lower()}" for _ in r_optdeps]
self.new_optdepends.sort()
def merge_depends(self):
'''
Merge old `depends` and `optdepends` in to the new ones
'''
system_reqs = [x for x in self.depends if not x.startswith('r-')]
system_reqs.sort()
self.new_depends = system_reqs+self.new_depends
if sorted(self.new_depends) != sorted(self.depends):
self.depends_changed = True
# no optdepends
if not self.optdepends:
self.optdepends_changed = bool(self.new_optdepends)
return
if not self.new_optdepends:
self.optdepends_changed = bool(self.optdepends)
return
system_opt_reqs = [
x for x in self.optdepends if not x.startswith('r-')]
system_opt_reqs.sort()
# keep explanation of optdepends
if any(map(lambda x: ':' in x, self.optdepends)):
opt_dict = lilac.obtain_optdepends(parse_dict=True)
if sorted(self.new_optdepends) != sorted(opt_dict.keys()):
self.optdepends_changed = True
for i in range(len(self.new_optdepends)):
val = opt_dict.get(self.optdepends[i])
if val:
self.new_optdepends[i] += ': '+val
else:
if sorted(self.new_optdepends) != sorted(self.optdepends):
self.optdepends_changed = True
self.new_optdepends = system_opt_reqs+self.new_optdepends
def update_pkgbuild(self) -> list[str]:
'''
write new depends to PKGBUILD if depends change
return the newly added depends which may not be in BioArchLinux Repo.
'''
if not self.depends_changed and not self.optdepends_changed:
return
with open("PKGBUILD", "r") as f:
lines = f.readlines()
depends_interval = [-1, -1]
optdepends_interval = [-1, -1]
for i, line in enumerate(lines):
if line.strip().startswith("depends"):
depends_interval[0] = i
if depends_interval[0] > -1 and depends_interval[1] == -1:
if line.strip().endswith(")"):
# end depends
depends_interval[1] = i
break
if self.depends_changed:
for i in range(depends_interval[0], depends_interval[1]):
lines[i] = ''
lines[depends_interval[1]] = '\n'.join(
['depends=(', '\n'.join([' ' + _ for _ in self.new_depends]), ')\n'])
# new lines for new optdepends
new_optdepends_line = ""
for i, line in enumerate(lines):
if line.strip().startswith("optdepends"):
optdepends_interval[0] = i
if optdepends_interval[0] > -1 and optdepends_interval[1] == -1:
if line.strip().endswith(")"):
# end optdepends
optdepends_interval[1] = i
break
if self.new_optdepends:
new_optdepends_line = '\n'.join(
['optdepends=(', '\n'.join(
[' ' + f"'{x}'" if ":" in x else ' '+x for x in self.new_optdepends]), ')\n'])
if self.optdepends_changed:
# no old, but has new
if optdepends_interval[0] == -1:
# add optdepends
lines.insert(depends_interval[1]+1, new_optdepends_line)
optdepends_interval[0] = depends_interval[1]+1
optdepends_interval[1] = depends_interval[1]+1
# has old,
else:
for i in range(optdepends_interval[0], optdepends_interval[1]):
lines[i] = ''
lines[optdepends_interval[1]] = ""
if self.new_optdepends:
lines[optdepends_interval[1]] = new_optdepends_line
logging.info(f"Writing new PKGBUILD for {self.pkgname}")
with open("PKGBUILD", "w") as f:
f.writelines(lines)
change_arch(self.desc)
return self.added_depends
def get_web_bioc_version(self) -> version.Version:
'''
get bioc version from BIOC website, usually newer than the one in BIOC metadata
'''
if self.bioc_ver is None or self.bioc_ver == max(self.bioc_versions):
return self.bioc_ver
for ver in self.bioc_versions:
if ver > self.bioc_ver:
url = f"{self.bioc_meta_mirror}/packages/{ver}/{self.pkgname}"
r = requests.get(url)
if r.status_code == 200:
if not 'packages-removed-with-bioconductor' in r.text:
return ver
return self.bioc_ver
def update_yaml(self, yaml_file='lilac.yaml'):
'''
update the `repo_depends` part of pkg, repo_depends will be sorted (systemlibs first, then r-pkgs)
'''
with open(yaml_file, "r") as f:
docs = yaml.load(f, Loader=yaml.FullLoader)
old_depends = docs.get('repo_depends', [])
non_r_depends = [x for x in old_depends if not x.startswith('r-')]
# only keep non-r depends also in new_depends
non_r_depends = [x for x in non_r_depends if x in self.new_depends]
non_r_depends.sort()
r_new_depends = [x for x in self.new_depends if x.startswith('r-')]
r_new_depends.sort()
new_deps = non_r_depends+r_new_depends
if new_deps:
docs['repo_depends'] = new_deps
with open(yaml_file, 'w') as f:
stream = yaml.dump(docs, sort_keys=False,
default_flow_style=False, indent=2)
f.write(stream)
def create_temporary_copy(path):
'''
create temporary copy of path, remember to manually delete it.
'''
tmp = tempfile.NamedTemporaryFile(delete=False)
shutil.copy2(path, tmp.name)
return tmp.name
def change_arch(desc: str):
'''
change `arch` to `any` if NeedsCompilation is No, and change it to x86_64 if it is Yes
'''
# change any to x86_64 in PKGBUILD or vice versa
need_compile = re.search("NeedsCompilation: (.*)", desc)
if not need_compile:
return
need_compile = need_compile.group(1)
with open("PKGBUILD", "r") as f:
lines = f.readlines()
started = False
for i, line in enumerate(lines):
if line.strip().startswith("arch"):
started = True
if started:
if need_compile.lower() == 'no':
lines[i] = line.replace('x86_64', 'any')
elif need_compile.lower() == 'yes':
lines[i] = line.replace('any', 'x86_64')
if started and line.strip().endswith(')'):
break
with open("PKGBUILD", "w") as f:
f.writelines(lines)
def update_depends_by_file(file, bioarch_path="BioArchLinux", db="sqlite.db",
auto_archive=False, auto_unarchive=True,
bioc_min_ver="3.0", bioc_meta_mirror="https://bioconductor.org", output_file="added_depends.txt"):
'''
Update depends of packages listed in `file`, one package name per line, CRAN style(e.g. `Rcpp`) and pkgname style (`r-rcpp`) are both supported.
file: file containing package names
bioarch_path: path to BioArchLinux
db: path to the database to be read
auto_archive: whether to archive the package if it is not in CRAN or the latest BIOC
auto_unarchive: whether to unarchive the archived package if it is in CRAN.
bioc_min_ver: minimum version of Bioconductor to be supported.
bioc_meta_mirror: The server used to get all version numbers of BIOC
output_file: file to write the added depends to.
'''
bioc_versions = get_bioc_versions(bioc_meta_mirror)
MAX_BIOC_VERSION = max(bioc_versions)
current_dir = os.getcwd()
# where the name are _pkgname (May have upper letters) or pkgname (r-xxx)
conn = sqlite3.connect(db)
cursor = conn.cursor()
added_deps = []
with open(file, "r") as f:
for pkgname in f:
logging.debug(f"Dealing with {pkgname}")
pkgname = pkgname.strip()
if not pkgname.strip().startswith("r-"):
pkgname = "r-"+pkgname.lower()
os.chdir(f"{bioarch_path}/{pkgname}")
temp_pkgbuild = create_temporary_copy("PKGBUILD")
temp_lilac = create_temporary_copy("lilac.yaml")
pkginfo = PkgInfo(bioc_min_version=bioc_min_ver,
bioc_meta_mirror=bioc_meta_mirror, bioc_versions=bioc_versions)
pkginfo.build_body(cursor)
pkginfo.update_pkgbuild()
pkginfo.update_yaml()
if auto_archive and pkginfo.is_archived(MAX_BIOC_VERSION):
temp_bioc_ver = pkginfo.bioc_ver
if pkginfo.bioc_ver != None and pkginfo.bioc_ver < MAX_BIOC_VERSION:
try:
temp_bioc_ver = pkginfo.get_web_bioc_version()
except Exception as e:
logging.error(
f"Failed to getting web bioc version for {pkgname} due to {e}")
temp_bioc_ver = None
if temp_bioc_ver == None or temp_bioc_ver < MAX_BIOC_VERSION:
archive_pkg_yaml(bioconductor_version=temp_bioc_ver)
archive_pkg_pkgbuild(bioconductor_version=temp_bioc_ver)
# if PKGBUILD changed, bump pkgrel
if auto_unarchive and pkginfo.desc:
unarchive_cran()
if not filecmp.cmp(temp_pkgbuild, "PKGBUILD"):
lilac.update_pkgrel()
logging.info(f"Updating {pkgname}")
else:
# else revert changes to lilac.yaml
shutil.copy2(temp_lilac, "lilac.yaml")
logging.debug(f"No changes to {pkgname}")
os.remove(temp_pkgbuild)
os.remove(temp_lilac)
if pkginfo.added_depends:
added_deps += pkginfo.added_depends
os.chdir(current_dir)
conn.close()
with open(output_file, "w") as f:
f.write('\n'.join(set(added_deps)))
if __name__ == '__main__':
logging.basicConfig(level=logging.INFO)
parser = argparse.ArgumentParser(
description='update the depends of R packages from CRAN and Bioconductor automatically',
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument(
'-f', '--file', help='The file that contains the pkgname to be archived, one pkgname per line')
parser.add_argument(
'-p', '--bioarch_path', help='The path of BioArchLinux repo', default="BioArchLinux")
parser.add_argument(
'-db', help="The database file used to query metadata of packages", default="/tmp/dbmanager/sqlite.db")
parser.add_argument(
'--bioc_min_ver', help="The minimum version of Bioconductor supported, must be greater than 3.0", default="3.0")
parser.add_argument(
'--bioc_meta_mirror', help="The server used to get all version numbers of BIOC", default="https://bioconductor.org")
parser.add_argument(
'-o', '--output', help='The file to save newly added depends name', default="added_depends.txt")
parser.add_argument(
'-a', '--auto-archive', help='Automatically archive pkgs that are not in CRAN or the latest BIOC release', action='store_true')
parser.add_argument(
'--no-auto-unarchive', help='DO NOT Automatically unarchive pkgs that are now in CRAN', action='store_false')
args = parser.parse_args()
if args.file:
update_depends_by_file(args.file, args.bioarch_path, args.db,
output_file=args.output,
bioc_min_ver=args.bioc_min_ver, bioc_meta_mirror=args.bioc_meta_mirror,
auto_archive=args.auto_archive, auto_unarchive=args.no_auto_unarchive)
else:
parser.print_help()