bioarchlinux-tools/updater/dbmanager.py

127 lines
4.4 KiB
Python
Executable file
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/python
# -*- coding: utf-8 -*-
from sqlalchemy import Column, String, Text, create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import Session
import requests
from packaging import version
import argparse
import logging
import os
Base = declarative_base()
class PkgMeta(Base):
# 表的名字:
__tablename__ = 'pkgmeta'
# 表的结构:
# currently max is 48, r-illuminahumanmethylationepicanno.ilm10b4.hg19, so 50 is OK
name = Column(String(50), primary_key=True)
desc = Column(Text)
repo = Column(String(4)) # CRAN or BIOC
bioc_ver = Column(String(8)) # 3.0 to 3.16
# bioc, data/annotation, data/experiment
bioc_category = Column(String(16))
def __init__(self, name, desc, repo, bioc_ver, bioc_category) -> None:
super().__init__()
self.name = name
self.desc = desc
self.repo = repo
self.bioc_ver = bioc_ver
self.bioc_category = bioc_category
def from_str(data, bioc_ver, bioc_cat):
for line in data.split('\n'):
if line.startswith('Package:'):
pkgname = line.split(':')[-1].strip()
if bioc_ver:
pkgmeta = PkgMeta(
pkgname, data, 'BIOC', str(bioc_ver), bioc_cat)
else:
pkgmeta = PkgMeta(
pkgname, data, 'CRAN', None, None)
return pkgmeta
def get_bioc_versions(url="https://bio.askk.cc") -> list[str]:
'''
get all Bioconductor versions
'''
version_page = requests.get(
f"{url}/bioc_version")
if version_page.status_code != requests.codes.ok:
raise RuntimeError(
f"Failed to get Bioconductor versions due to: {version_page.status_code}: {version_page.reason}")
z = version_page.text.split(',')
return z
def update_DB(engine, path='bioc', min_ver='3.0', verion_file_url="https://bio.askk.cc"):
bioc_vers = get_bioc_versions(verion_file_url)
bioc_vers = [version.parse(v) for v in bioc_vers]
bioc_vers.sort()
with Session(engine) as session:
for ver in bioc_vers:
if ver < version.parse(min_ver):
continue
for cat in ['bioc', 'data/annotation', 'data/experiment']:
with open(f"{path}/packages/{ver}/{cat}/src/contrib/PACKAGES") as f:
descs = f.read().split('\n\n')
pkgmetas = map(
lambda x: from_str(x, ver, cat), descs)
# insert or update
for pkgmeta in pkgmetas:
add_or_update(session, PkgMeta, pkgmeta)
# CRAN
with open(f"{path}/src/contrib/PACKAGES") as f:
descs = f.read().split('\n\n')
pkgmetas = map(
lambda x: from_str(x, None, None), descs)
# insert or update
for pkgmeta in pkgmetas:
add_or_update(session, PkgMeta, pkgmeta)
def add_or_update(session, table, pkgmeta):
if not pkgmeta:
return
if session.get(table, pkgmeta.name):
pkg = session.query(table).filter_by(
name=pkgmeta.name).first()
pkg.desc = pkgmeta.desc
pkg.repo = pkgmeta.repo
pkg.bioc_ver = pkgmeta.bioc_ver
pkg.bioc_category = pkgmeta.bioc_category
else:
session.add(pkgmeta)
session.commit()
if __name__ == '__main__':
logging.basicConfig(level=logging.INFO)
parser = argparse.ArgumentParser(
description='Manage the meta info database (only sqlite are supported) of CRAN and Bioconductor packages',
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument(
'--db', help='Where the database should be placed', default='/tmp/dbmanager/sqlite.db')
parser.add_argument(
'--bioarch_path', help='The path of BioArchLinux repo', default="BioArchLinux")
parser.add_argument(
'--bioc_min_ver', help="The minimum version of Bioconductor supported, must be greater than 3.0", default="3.0")
args = parser.parse_args()
db_dir = os.path.dirname(args.db)
os.makedirs(db_dir, exist_ok=True)
# 创建一个 SQLite 的内存数据库,必须加上 check_same_thread=False否则无法在多线程中使用
engine = create_engine(f"sqlite:///{args.db}", echo=True, future=True,
connect_args={"check_same_thread": False})
Base.metadata.create_all(engine)
update_DB(engine=engine, min_ver=args.bioc_min_ver)