Compare commits

..

No commits in common. "main" and "0.0.1" have entirely different histories.
main ... 0.0.1

6 changed files with 95 additions and 1383 deletions

View file

@ -28,7 +28,7 @@ def search_pkgbuild_for_pkgname(
m = re_package.match(l)
if m:
if m.group(1):
ret.add(m.group(1).strip())
ret.add(m.group(1).strip()))
else:
ret.add(pkgbuild.parent.name)
except FileNotFoundError:

File diff suppressed because it is too large Load diff

View file

@ -12,39 +12,6 @@ import datetime
from dateutil.parser import parse as parsedate
import re
EXCLUDED_PKGS = {
"base",
"boot",
"class",
"cluster",
"codetools",
"compiler",
"datasets",
"foreign",
"graphics",
"grDevices",
"grid",
"KernSmooth",
"lattice",
"MASS",
"Matrix",
"methods",
"mgcv",
"nlme",
"nnet",
"parallel",
"rpart",
"spatial",
"splines",
"stats",
"stats4",
"survival",
"tcltk",
"tools",
"utils",
"R"
}
Base = declarative_base()
@ -69,9 +36,6 @@ class PkgMeta(Base):
self.bioc_ver = bioc_ver
self.bioc_category = bioc_category
def __repr__(self) -> str:
return f"Name: {self.name},\nDesc: {self.desc},\nRepo: {self.repo},\nVersion: {self.bioc_ver},\nCategory: {self.bioc_category}"
def from_str(data, bioc_ver, bioc_cat):
'''
@ -106,13 +70,15 @@ def get_bioc_versions(bioc_mirror="https://bioconductor.org") -> list[str]:
return bioc_versions
def get_package_meta(url, mtime=None, compare=False):
def get_package_meta(url, mtime=None,):
'''
get package metadata from Bioconductor and CRAN.
url: the url to be downloaded, e.g. https://bioconductor.org/packages/3.16/bioc/src/contrib/PACKAGES
mtime: the last modified time of the local file. if remote is older than mtime, ignore it.
'''
if compare and not remote_is_newer(url, mtime):
if not remote_is_newer(url, mtime):
logging.info(
f"Local Package List for is newer than remote, skip.")
return None
meta = requests.get(url)
if meta.status_code != requests.codes.ok:
@ -142,15 +108,6 @@ def remote_is_newer(url, mtime) -> bool:
return url_date > file_time.astimezone()
def remove_all_cran_pkg(engine):
'''
remove all CRAN packages from database.
'''
session = Session(engine)
session.query(PkgMeta).filter_by(repo='CRAN').delete()
session.commit()
def update_DB(engine, min_ver=None, first_run=False, mtime=None,
bioc_mirror="https://bioconductor.org", cran_mirror="https://cran.r-project.org"):
'''
@ -163,18 +120,29 @@ def update_DB(engine, min_ver=None, first_run=False, mtime=None,
cran_mirror: the CRAN mirror to use.
'''
bioc_vers = get_bioc_versions(bioc_mirror)
bioc_vers.sort()
bioc_vers.sort(reverse=True)
if min_ver:
min_ver = version.parse(min_ver)
else:
if first_run:
min_ver = bioc_vers[0]
min_ver = bioc_vers[-1]
else:
min_ver = bioc_vers[-2]
min_ver = max(min_ver, version.parse("1.8"))
min_ver = bioc_vers[1]
with Session(engine) as session:
# CRAN
url = f"{cran_mirror}/src/contrib/PACKAGES"
logging.info("Downloading CRAN Package List")
f = get_package_meta(url, mtime)
if f:
descs = f.split('\n\n')
pkgmetas = map(lambda x: from_str(x, None, None), descs)
# insert or skip
for pkgmeta in pkgmetas:
add_or_skip(session, PkgMeta, pkgmeta)
# BIOC
for ver in bioc_vers:
if ver < min_ver:
@ -192,21 +160,7 @@ def update_DB(engine, min_ver=None, first_run=False, mtime=None,
# insert or skip
for pkgmeta in pkgmetas:
add_or_update(session, PkgMeta, pkgmeta)
# CRAN
logging.info("Removing old package list for CRAN")
remove_all_cran_pkg(engine)
url = f"{cran_mirror}/src/contrib/PACKAGES"
logging.info("Downloading CRAN Package List")
f = get_package_meta(url, mtime)
if f:
descs = f.split('\n\n')
pkgmetas = map(lambda x: from_str(x, None, None), descs)
# insert or skip
for pkgmeta in pkgmetas:
# we already deleted all CRAN packages, so we can just add them.
add_or_update(session, PkgMeta, pkgmeta)
add_or_skip(session, PkgMeta, pkgmeta)
def add_or_skip(session, table, pkgmeta):
@ -220,26 +174,6 @@ def add_or_skip(session, table, pkgmeta):
session.commit()
def add_or_update(session, table, pkgmeta):
def getmd5sum(desc):
return re.search(r"MD5sum: ([a-z0-9]+)\n", desc).group(1)
if not pkgmeta:
return
if session.get(table, pkgmeta.name):
pkg = session.query(table).filter_by(
name=pkgmeta.name).first()
if pkg.repo == 'CRAN' and getmd5sum(pkg.desc) != getmd5sum(pkgmeta.desc) and not (pkg.name in EXCLUDED_PKGS):
logging.warning(
f"Overwritting package: {pkg.name}\n old meta :{pkg}\n new meta: {pkgmeta} \n")
pkg.desc = pkgmeta.desc
pkg.repo = pkgmeta.repo
pkg.bioc_ver = pkgmeta.bioc_ver
pkg.bioc_category = pkgmeta.bioc_category
else:
session.add(pkgmeta)
session.commit()
if __name__ == '__main__':
logging.basicConfig(level=logging.INFO)
logging.getLogger('sqlalchemy').setLevel(logging.ERROR)
@ -258,8 +192,6 @@ if __name__ == '__main__':
'--bioc_min_ver', help="The minimum version of Bioconductor supported, must be greater than 3.0", default=None)
parser.add_argument('-f',
'--first_run', help="If this is the first run, the database will be created", action='store_true')
parser.add_argument(
'--compare', help="Compare mtime of database and remote, if database is newer, skip remote (This can be buggy)", action='store_true')
args = parser.parse_args()
if not args:

View file

@ -8,31 +8,44 @@ import logging
from lilac2 import api as lilac
import argparse
import os
import shutil
import tempfile
import filecmp
import yaml
from typing import Optional
import sqlite3
from dbmanager import get_bioc_versions, EXCLUDED_PKGS
from pkg_archiver import archive_pkg_yaml, archive_pkg_pkgbuild, unarchive_cran
import re
import requests
from dbmanager import get_bioc_versions
from pkg_archiver import archive_pkg_yaml, archive_pkg_pkgbuild
# use | in lilac.yaml
def str_presenter(dumper, data):
"""configures yaml for dumping multiline strings
Ref: https://stackoverflow.com/questions/8640959/how-can-i-control-what-scalar-form-pyyaml-uses-for-my-data"""
if data.count('\n') > 0: # check for multiline string
return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='|')
return dumper.represent_scalar('tag:yaml.org,2002:str', data)
yaml.add_representer(str, str_presenter)
yaml.representer.SafeRepresenter.add_representer(
str, str_presenter) # to use with safe_dum
EXCLUDED_PKGS = {
"base",
"boot",
"class",
"cluster",
"codetools",
"compiler",
"datasets",
"foreign",
"graphics",
"grDevices",
"grid",
"KernSmooth",
"lattice",
"MASS",
"Matrix",
"methods",
"mgcv",
"nlme",
"nnet",
"parallel",
"rpart",
"spatial",
"splines",
"stats",
"stats4",
"survival",
"tcltk",
"tools",
"utils",
"R"
}
class PkgInfo:
@ -66,7 +79,6 @@ class PkgInfo:
self.depends_changed = False
self.optdepends_changed = False
self.desc = None
if self.bioc_versions == []:
self.bioc_versions = get_bioc_versions(self.bioc_meta_mirror)
@ -117,20 +129,18 @@ class PkgInfo:
desc, self.bioc_ver = descall
self.bioc_ver = version.parse(
self.bioc_ver) if self.bioc_ver else None
self.desc = desc
return desc
else:
return None
def is_archived(self, release_ver) -> bool:
def is_archived(self) -> bool:
'''
Check if the package is archived in CRAN or BIOC
release_ver: current release of BIOC, should be a Version e.g., "3.16"
'''
if not self.desc: # not in database, archived in CRAN
return True
# not in the latest BIOC version, archived in BIOC
elif self.bioc_ver and self.bioc_ver < release_ver:
elif self.bioc_ver and self.bioc_ver != max(self.bioc_versions):
return True
return False
@ -205,12 +215,14 @@ class PkgInfo:
if not self.new_optdepends:
self.optdepends_changed = bool(self.optdepends)
return
system_opt_reqs = [
x for x in self.optdepends if not x.startswith('r-')]
system_opt_reqs.sort()
# keep explanation of optdepends
if any(map(lambda x: ':' in x, self.optdepends)):
opt_dict = lilac.obtain_optdepends(parse_dict=True)
self.new_optdepends = [
x+': ' for x in self.new_optdepends if ':' not in x]
opt_dict = {pkg.strip(): desc.strip() for (pkg, desc) in
(item.split(':', 1) for item in self.optdepends)}
if sorted(self.new_optdepends) != sorted(opt_dict.keys()):
self.optdepends_changed = True
for i in range(len(self.new_optdepends)):
@ -220,7 +232,6 @@ class PkgInfo:
else:
if sorted(self.new_optdepends) != sorted(self.optdepends):
self.optdepends_changed = True
self.new_optdepends = system_opt_reqs+self.new_optdepends
def update_pkgbuild(self) -> list[str]:
'''
@ -237,12 +248,22 @@ class PkgInfo:
for i, line in enumerate(lines):
if line.strip().startswith("depends"):
depends_interval[0] = i
elif line.strip().startswith("optdepends"):
optdepends_interval[0] = i
if depends_interval[0] > -1 and depends_interval[1] == -1:
if line.strip().endswith(")"):
if ')' in line:
# end depends
depends_interval[1] = i
break
if optdepends_interval[0] > -1 and optdepends_interval[1] == -1:
if ')' in line:
# end optdepends
optdepends_interval[1] = i
if not (depends_interval[1] < optdepends_interval[0] or optdepends_interval[1] < depends_interval[0]):
logging.error(
"depends and optdepends overlap, please fix it manually")
return
if self.depends_changed:
for i in range(depends_interval[0], depends_interval[1]):
lines[i] = ''
@ -250,20 +271,10 @@ class PkgInfo:
['depends=(', '\n'.join([' ' + _ for _ in self.new_depends]), ')\n'])
# new lines for new optdepends
new_optdepends_line = ""
for i, line in enumerate(lines):
if line.strip().startswith("optdepends"):
optdepends_interval[0] = i
if optdepends_interval[0] > -1 and optdepends_interval[1] == -1:
if line.strip().endswith(")"):
# end optdepends
optdepends_interval[1] = i
break
if self.new_optdepends:
new_optdepends_line = '\n'.join(
['optdepends=(', '\n'.join(
[' ' + f"'{x}'" if ":" in x else ' '+x for x in self.new_optdepends]), ')\n'])
[' ' + _ for _ in self.new_optdepends]), ')\n'])
if self.optdepends_changed:
# no old, but has new
if optdepends_interval[0] == -1:
@ -273,34 +284,16 @@ class PkgInfo:
optdepends_interval[1] = depends_interval[1]+1
# has old,
else:
for i in range(optdepends_interval[0], optdepends_interval[1]):
lines[i] = ''
lines[optdepends_interval[1]] = ""
if self.new_optdepends:
lines[optdepends_interval[1]] = new_optdepends_line
for i in range(optdepends_interval[0], optdepends_interval[1]):
lines[i] = ''
if self.new_optdepends:
lines[optdepends_interval[1]] = new_optdepends_line
logging.info(f"Writing new PKGBUILD for {self.pkgname}")
with open("PKGBUILD", "w") as f:
f.writelines(lines)
change_arch(self.desc)
return self.added_depends
def get_web_bioc_version(self) -> version.Version:
'''
get bioc version from BIOC website, usually newer than the one in BIOC metadata
'''
if self.bioc_ver is None or self.bioc_ver == max(self.bioc_versions):
return self.bioc_ver
for ver in self.bioc_versions:
if ver > self.bioc_ver:
url = f"{self.bioc_meta_mirror}/packages/{ver}/{self.pkgname}"
r = requests.get(url)
if r.status_code == 200:
if not 'packages-removed-with-bioconductor' in r.text:
return ver
return self.bioc_ver
def update_yaml(self, yaml_file='lilac.yaml'):
'''
update the `repo_depends` part of pkg, repo_depends will be sorted (systemlibs first, then r-pkgs)
@ -318,48 +311,11 @@ class PkgInfo:
if new_deps:
docs['repo_depends'] = new_deps
with open(yaml_file, 'w') as f:
stream = yaml.dump(docs, sort_keys=False,
default_flow_style=False, indent=2)
f.write(stream)
def create_temporary_copy(path):
'''
create temporary copy of path, remember to manually delete it.
'''
tmp = tempfile.NamedTemporaryFile(delete=False)
shutil.copy2(path, tmp.name)
return tmp.name
def change_arch(desc: str):
'''
change `arch` to `any` if NeedsCompilation is No, and change it to x86_64 if it is Yes
'''
# change any to x86_64 in PKGBUILD or vice versa
need_compile = re.search("NeedsCompilation: (.*)", desc)
if not need_compile:
return
need_compile = need_compile.group(1)
with open("PKGBUILD", "r") as f:
lines = f.readlines()
started = False
for i, line in enumerate(lines):
if line.strip().startswith("arch"):
started = True
if started:
if need_compile.lower() == 'no':
lines[i] = line.replace('x86_64', 'any')
elif need_compile.lower() == 'yes':
lines[i] = line.replace('any', 'x86_64')
if started and line.strip().endswith(')'):
break
with open("PKGBUILD", "w") as f:
f.writelines(lines)
yaml.dump(docs, f, sort_keys=False)
def update_depends_by_file(file, bioarch_path="BioArchLinux", db="sqlite.db",
auto_archive=False, auto_unarchive=True,
auto_archive=False,
bioc_min_ver="3.0", bioc_meta_mirror="https://bioconductor.org", output_file="added_depends.txt"):
'''
Update depends of packages listed in `file`, one package name per line, CRAN style(e.g. `Rcpp`) and pkgname style (`r-rcpp`) are both supported.
@ -368,13 +324,11 @@ def update_depends_by_file(file, bioarch_path="BioArchLinux", db="sqlite.db",
bioarch_path: path to BioArchLinux
db: path to the database to be read
auto_archive: whether to archive the package if it is not in CRAN or the latest BIOC
auto_unarchive: whether to unarchive the archived package if it is in CRAN.
bioc_min_ver: minimum version of Bioconductor to be supported.
bioc_meta_mirror: The server used to get all version numbers of BIOC
output_file: file to write the added depends to.
'''
bioc_versions = get_bioc_versions(bioc_meta_mirror)
MAX_BIOC_VERSION = max(bioc_versions)
current_dir = os.getcwd()
# where the name are _pkgname (May have upper letters) or pkgname (r-xxx)
conn = sqlite3.connect(db)
@ -382,43 +336,20 @@ def update_depends_by_file(file, bioarch_path="BioArchLinux", db="sqlite.db",
added_deps = []
with open(file, "r") as f:
for pkgname in f:
logging.debug(f"Dealing with {pkgname}")
pkgname = pkgname.strip()
if not pkgname.strip().startswith("r-"):
pkgname = "r-"+pkgname.lower()
logging.info(f"Updating {pkgname}")
os.chdir(f"{bioarch_path}/{pkgname}")
temp_pkgbuild = create_temporary_copy("PKGBUILD")
temp_lilac = create_temporary_copy("lilac.yaml")
pkginfo = PkgInfo(bioc_min_version=bioc_min_ver,
bioc_meta_mirror=bioc_meta_mirror, bioc_versions=bioc_versions)
pkginfo.build_body(cursor)
pkginfo.update_pkgbuild()
pkginfo.update_yaml()
if auto_archive and pkginfo.is_archived(MAX_BIOC_VERSION):
temp_bioc_ver = pkginfo.bioc_ver
if pkginfo.bioc_ver != None and pkginfo.bioc_ver < MAX_BIOC_VERSION:
try:
temp_bioc_ver = pkginfo.get_web_bioc_version()
except Exception as e:
logging.error(
f"Failed to getting web bioc version for {pkgname} due to {e}")
temp_bioc_ver = None
if temp_bioc_ver == None or temp_bioc_ver < MAX_BIOC_VERSION:
archive_pkg_yaml(bioconductor_version=temp_bioc_ver)
archive_pkg_pkgbuild(bioconductor_version=temp_bioc_ver)
# if PKGBUILD changed, bump pkgrel
if auto_unarchive and pkginfo.desc:
unarchive_cran()
if not filecmp.cmp(temp_pkgbuild, "PKGBUILD"):
lilac.update_pkgrel()
logging.info(f"Updating {pkgname}")
else:
# else revert changes to lilac.yaml
shutil.copy2(temp_lilac, "lilac.yaml")
logging.debug(f"No changes to {pkgname}")
os.remove(temp_pkgbuild)
os.remove(temp_lilac)
if auto_archive and pkginfo.is_archived():
archive_pkg_yaml(bioconductor_version=pkginfo.bioc_ver)
archive_pkg_pkgbuild(bioconductor_version=pkginfo.bioc_ver)
lilac.update_pkgrel()
if pkginfo.added_depends:
added_deps += pkginfo.added_depends
os.chdir(current_dir)
@ -447,15 +378,11 @@ if __name__ == '__main__':
'-o', '--output', help='The file to save newly added depends name', default="added_depends.txt")
parser.add_argument(
'-a', '--auto-archive', help='Automatically archive pkgs that are not in CRAN or the latest BIOC release', action='store_true')
parser.add_argument(
'--no-auto-unarchive', help='DO NOT Automatically unarchive pkgs that are now in CRAN', action='store_false')
args = parser.parse_args()
if args.file:
update_depends_by_file(args.file, args.bioarch_path, args.db,
output_file=args.output,
bioc_min_ver=args.bioc_min_ver, bioc_meta_mirror=args.bioc_meta_mirror,
auto_archive=args.auto_archive, auto_unarchive=args.no_auto_unarchive)
args.bioc_min_ver, bioc_meta_mirror=args.bioc_meta_mirror, output_file=args.output, auto_archive=args.auto_archive)
else:
parser.print_help()

View file

@ -7,7 +7,6 @@ import os
import yaml
import argparse
from lilac2.api import update_pkgrel
import re
def archive_pkg_by_file_list(file, bioarch_path="BioArchLinux", biconductor_version=3.15, step=1):
@ -41,30 +40,21 @@ def archive_pkg_yaml(bioconductor_version=3.15, yaml_file="lilac.yaml"):
with open(yaml_file, "r") as f:
docs = yaml.load(f, Loader=yaml.FullLoader)
url_idx = -1
url = None
for i in range(len(docs['update_on'])):
if "url" in docs['update_on'][i].keys():
url = docs['update_on'][i]['url']
url_idx = i
break
if not url:
return
pkg = url.rstrip('/')
pkg = re.split('/|=', pkg)[-1]
archive_url = None
pkg = url.rstrip('/').split('/')[-1]
# CRAN ARCHIVE
if 'cran.r-project.org' in url:
archive_url = f"https://cran.r-project.org/src/contrib/Archive/{pkg}"
# Bioconductor ARCHIVE
elif 'bioconductor.org' in url:
# https://bioconductor.org/packages/AffyCompatible
# to
# https://bioconductor.org/packages/3.16/AffyCompatible
archive_url = url.replace(
'packages', f"packages/{bioconductor_version}")
if archive_url:
docs['update_on'][url_idx]['url'] = archive_url
archive_url = f"https://bioconductor.org/packages/{bioconductor_version}/{pkg}"
docs['update_on'][url_idx]['url'] = archive_url
with open(yaml_file, 'w') as f:
yaml.dump(docs, f, sort_keys=False)
@ -80,7 +70,7 @@ def archive_pkg_pkgbuild(bioconductor_version=3.15, _pkgname="_pkgname"):
flag = False
for i in range(len(lines)):
if lines[i].startswith("url=") and '//bioconductor.org' in lines[i] and not re.search("packages/[\d.]+", lines[i]):
if lines[i].startswith("url=") and '//bioconductor.org' in lines[i]:
lines[i] = lines[i].replace(
"packages/", f"packages/{bioconductor_version}/")
changed = True
@ -94,8 +84,8 @@ def archive_pkg_pkgbuild(bioconductor_version=3.15, _pkgname="_pkgname"):
# to
# https://cran.r-project.org/src/contrib/Archive/${_pkgname}/${_pkgname}_${pkgver}.tar.gz
new_line = lines[i].replace(
"src/contrib", "src/contrib/Archive/${_pkgname}")
elif '//bioconductor.org' in lines[i] and bioconductor_version != None:
"src/contrib", r"src/contrib/Archive/${" + _pkgname + '}')
elif '//bioconductor.org' in lines[i]:
# https://bioconductor.org/packages/release/bioc/src/contrib/${_pkgname}_${_pkgver}.tar.gz
# to
# https://bioconductor.org/packages/3.14/bioc/src/contrib/ABAEnrichment_1.24.0.tar.gz
@ -111,47 +101,6 @@ def archive_pkg_pkgbuild(bioconductor_version=3.15, _pkgname="_pkgname"):
return changed
def unarchive_cran():
unarchive_cran_pkgbuild()
unarchive_cran_yaml()
def unarchive_cran_pkgbuild():
with open("PKGBUILD", "r") as f:
lines = f.readlines()
for i in range(len(lines)):
if lines[i].startswith("source="):
if "src/contrib/Archive" in lines[i]:
lines[i] = lines[i].replace(
"src/contrib/Archive/${_pkgname}", "src/contrib")
with open("PKGBUILD", "w") as f:
f.writelines(lines)
def unarchive_cran_yaml():
with open("lilac.yaml", "r") as f:
docs = yaml.load(f, Loader=yaml.FullLoader)
url_idx = -1
url = None
for i in range(len(docs['update_on'])):
if "url" in docs['update_on'][i].keys():
url = docs['update_on'][i]['url']
url_idx = i
break
if not url:
return
pkg = url.rstrip('/')
pkg = re.split('/|=', pkg)[-1]
archive_url = None
# CRAN ARCHIVE
if 'cran.r-project.org' in url:
archive_url = f"https://cran.r-project.org/package={pkg}"
if archive_url:
docs['update_on'][url_idx]['url'] = archive_url
with open("lilac.yaml", 'w') as f:
yaml.dump(docs, f, sort_keys=False)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument(

View file

@ -12,6 +12,3 @@ For usage, run with argument `-h`.
- [x] `depends_updater` supports archiving PKGs automatically.
- [ ] generate PKGBUILD for missing dependencies `depends_updater`
- [x] merge `sync_meta_data` into `dbmanager`
- [x] merge `pkg_archiver` into `dbmanager`
- [x] support unarchiving CRAN pkgs automatically in `pkg_archiver`
- [ ] correct `arch` for pkgs.