diff --git a/updater/dbmanager.py b/updater/dbmanager.py new file mode 100755 index 0000000..977aa41 --- /dev/null +++ b/updater/dbmanager.py @@ -0,0 +1,127 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +from sqlalchemy import Column, String, Text, create_engine +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import Session +import requests +from packaging import version +import argparse +import logging +import os + + +Base = declarative_base() + + +class PkgMeta(Base): + # 表的名字: + __tablename__ = 'pkgmeta' + + # 表的结构: + # currently max is 48, r-illuminahumanmethylationepicanno.ilm10b4.hg19, so 50 is OK + name = Column(String(50), primary_key=True) + desc = Column(Text) + repo = Column(String(4)) # CRAN or BIOC + bioc_ver = Column(String(8)) # 3.0 to 3.16 + # bioc, data/annotation, data/experiment + bioc_category = Column(String(16)) + + def __init__(self, name, desc, repo, bioc_ver, bioc_category) -> None: + super().__init__() + self.name = name + self.desc = desc + self.repo = repo + self.bioc_ver = bioc_ver + self.bioc_category = bioc_category + + +def from_str(data, bioc_ver, bioc_cat): + for line in data.split('\n'): + if line.startswith('Package:'): + pkgname = line.split(':')[-1].strip() + if bioc_ver: + pkgmeta = PkgMeta( + pkgname, data, 'BIOC', str(bioc_ver), bioc_cat) + else: + pkgmeta = PkgMeta( + pkgname, data, 'CRAN', None, None) + return pkgmeta + + +def get_bioc_versions(url="https://bio.askk.cc") -> list[str]: + ''' + get all Bioconductor versions + ''' + version_page = requests.get( + f"{url}/bioc_version") + if version_page.status_code != requests.codes.ok: + raise RuntimeError( + f"Failed to get Bioconductor versions due to: {version_page.status_code}: {version_page.reason}") + z = version_page.text.split(',') + return z + + +def update_DB(engine, path='bioc', min_ver='3.0', verion_file_url="https://bio.askk.cc"): + bioc_vers = get_bioc_versions(verion_file_url) + bioc_vers = [version.parse(v) for v in bioc_vers] + bioc_vers.sort() + with Session(engine) as session: + for ver in bioc_vers: + if ver < version.parse(min_ver): + continue + for cat in ['bioc', 'data/annotation', 'data/experiment']: + with open(f"{path}/packages/{ver}/{cat}/src/contrib/PACKAGES") as f: + descs = f.read().split('\n\n') + pkgmetas = map( + lambda x: from_str(x, ver, cat), descs) + # insert or update + for pkgmeta in pkgmetas: + add_or_update(session, PkgMeta, pkgmeta) + # CRAN + with open(f"{path}/src/contrib/PACKAGES") as f: + descs = f.read().split('\n\n') + pkgmetas = map( + lambda x: from_str(x, None, None), descs) + # insert or update + for pkgmeta in pkgmetas: + add_or_update(session, PkgMeta, pkgmeta) + + +def add_or_update(session, table, pkgmeta): + if not pkgmeta: + return + if session.get(table, pkgmeta.name): + pkg = session.query(table).filter_by( + name=pkgmeta.name).first() + pkg.desc = pkgmeta.desc + pkg.repo = pkgmeta.repo + pkg.bioc_ver = pkgmeta.bioc_ver + pkg.bioc_category = pkgmeta.bioc_category + else: + session.add(pkgmeta) + session.commit() + + +if __name__ == '__main__': + logging.basicConfig(level=logging.INFO) + parser = argparse.ArgumentParser( + description='Manage the meta info database (only sqlite are supported) of CRAN and Bioconductor packages', + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + parser.add_argument( + '--db', help='Where the database should be placed', default='/tmp/dbmanager/sqlite.db') + parser.add_argument( + '--bioarch_path', help='The path of BioArchLinux repo', default="BioArchLinux") + parser.add_argument( + '--bioc_min_ver', help="The minimum version of Bioconductor supported, must be greater than 3.0", default="3.0") + + args = parser.parse_args() + db_dir = os.path.dirname(args.db) + os.makedirs(db_dir, exist_ok=True) + + # 创建一个 SQLite 的内存数据库,必须加上 check_same_thread=False,否则无法在多线程中使用 + engine = create_engine(f"sqlite:///{args.db}", echo=True, future=True, + connect_args={"check_same_thread": False}) + + Base.metadata.create_all(engine) + update_DB(engine=engine, min_ver=args.bioc_min_ver) diff --git a/updater/depends_updater.py b/updater/depends_updater.py index c4490c5..61ad062 100755 --- a/updater/depends_updater.py +++ b/updater/depends_updater.py @@ -12,6 +12,7 @@ import argparse import os import yaml from typing import Optional +import sqlite3 EXCLUDED_PKGS = { "base", @@ -124,9 +125,10 @@ class PkgInfo: self.depends = depends self.optdepends = optdepends - def get_desc(self) -> Optional[str]: + def get_desc_by_file(self) -> Optional[str]: ''' get new depends from CRAN or Bioconductor + @depreciated, replaced by get_desc ''' pkgname = self.pkgname CRAN_URL = f"{self.cran_meta_mirror}/src/contrib/PACKAGES" @@ -164,6 +166,14 @@ class PkgInfo: f'Failed to get Bioconductor descriptions for version: {ver}, {p}, due to: {bioconductor_descs.status_code}: {bioconductor_descs.reason}') continue + def get_desc(self, conn_cursor) -> Optional[str]: + c = conn_cursor + cursor = c.execute( + "SELECT desc from pkgmeta where name = ?", (self.pkgname,)) + descall = cursor.fetchone() + desc = descall[0] + return desc + def update_info(self, desc) -> None: ''' obtain new depends and optdepends from `desc`, and write them to `self` @@ -366,20 +376,20 @@ if __name__ == '__main__': formatter_class=argparse.ArgumentDefaultsHelpFormatter ) parser.add_argument( - '--file', help='The file that contains the pkgname to be archived, one pkgname per line') + '-f', '--file', help='The file that contains the pkgname to be archived, one pkgname per line') parser.add_argument( - '--bioarch_path', help='The path of BioArchLinux repo', default="BioArchLinux") + '-p', '--bioarch_path', help='The path of BioArchLinux repo', default="BioArchLinux") + parser.add_argument( + '-db', help="The database file used to query metadata of packages", default="/tmp/dbmanager/sqlite.db") parser.add_argument( '--bioc_min_ver', help="The minimum version of Bioconductor supported, must be greater than 3.0", default="3.0") parser.add_argument( - '--cran_meta_mirror', help="The mirror of CRAN metadata, recommended to be changed to a local https mirror. Only http(s) is supported", default="https://cran.r-project.org") - parser.add_argument( - '--bioc_meta_mirror', help="The mirror of Bioconductor metadata, recommended to be changed to a local https mirror. Only http(s) is supported", default="https://bioconductor.org") + '--bioc_meta_mirror', help="The server used to get all version numbers of BIOC", default="https://bio.askk.cc") args = parser.parse_args() if args.file: - update_depends_by_file(args.file, args.bioarch_path, args.bioc_min_ver, - cran_meta_mirror=args.cran_meta_mirror, bioc_meta_mirror=args.bioc_meta_mirror) + update_depends_by_file(args.file, args.bioarch_path, + args.bioc_min_ver, bioc_meta_mirror=args.bioc_meta_mirror) else: parser.print_help()