mirror of
https://github.com/BioArchLinux/bioarchlinux-tools.git
synced 2025-03-09 22:53:31 +00:00
127 lines
4.4 KiB
Python
Executable file
127 lines
4.4 KiB
Python
Executable file
#!/usr/bin/python
|
||
# -*- coding: utf-8 -*-
|
||
from sqlalchemy import Column, String, Text, create_engine
|
||
from sqlalchemy.ext.declarative import declarative_base
|
||
from sqlalchemy.orm import Session
|
||
import requests
|
||
from packaging import version
|
||
import argparse
|
||
import logging
|
||
import os
|
||
|
||
|
||
Base = declarative_base()
|
||
|
||
|
||
class PkgMeta(Base):
|
||
# 表的名字:
|
||
__tablename__ = 'pkgmeta'
|
||
|
||
# 表的结构:
|
||
# currently max is 48, r-illuminahumanmethylationepicanno.ilm10b4.hg19, so 50 is OK
|
||
name = Column(String(50), primary_key=True)
|
||
desc = Column(Text)
|
||
repo = Column(String(4)) # CRAN or BIOC
|
||
bioc_ver = Column(String(8)) # 3.0 to 3.16
|
||
# bioc, data/annotation, data/experiment
|
||
bioc_category = Column(String(16))
|
||
|
||
def __init__(self, name, desc, repo, bioc_ver, bioc_category) -> None:
|
||
super().__init__()
|
||
self.name = name
|
||
self.desc = desc
|
||
self.repo = repo
|
||
self.bioc_ver = bioc_ver
|
||
self.bioc_category = bioc_category
|
||
|
||
|
||
def from_str(data, bioc_ver, bioc_cat):
|
||
for line in data.split('\n'):
|
||
if line.startswith('Package:'):
|
||
pkgname = line.split(':')[-1].strip()
|
||
if bioc_ver:
|
||
pkgmeta = PkgMeta(
|
||
pkgname, data, 'BIOC', str(bioc_ver), bioc_cat)
|
||
else:
|
||
pkgmeta = PkgMeta(
|
||
pkgname, data, 'CRAN', None, None)
|
||
return pkgmeta
|
||
|
||
|
||
def get_bioc_versions(url="https://bio.askk.cc") -> list[str]:
|
||
'''
|
||
get all Bioconductor versions
|
||
'''
|
||
version_page = requests.get(
|
||
f"{url}/bioc_version")
|
||
if version_page.status_code != requests.codes.ok:
|
||
raise RuntimeError(
|
||
f"Failed to get Bioconductor versions due to: {version_page.status_code}: {version_page.reason}")
|
||
z = version_page.text.split(',')
|
||
return z
|
||
|
||
|
||
def update_DB(engine, path='bioc', min_ver='3.0', verion_file_url="https://bio.askk.cc"):
|
||
bioc_vers = get_bioc_versions(verion_file_url)
|
||
bioc_vers = [version.parse(v) for v in bioc_vers]
|
||
bioc_vers.sort()
|
||
with Session(engine) as session:
|
||
for ver in bioc_vers:
|
||
if ver < version.parse(min_ver):
|
||
continue
|
||
for cat in ['bioc', 'data/annotation', 'data/experiment']:
|
||
with open(f"{path}/packages/{ver}/{cat}/src/contrib/PACKAGES") as f:
|
||
descs = f.read().split('\n\n')
|
||
pkgmetas = map(
|
||
lambda x: from_str(x, ver, cat), descs)
|
||
# insert or update
|
||
for pkgmeta in pkgmetas:
|
||
add_or_update(session, PkgMeta, pkgmeta)
|
||
# CRAN
|
||
with open(f"{path}/src/contrib/PACKAGES") as f:
|
||
descs = f.read().split('\n\n')
|
||
pkgmetas = map(
|
||
lambda x: from_str(x, None, None), descs)
|
||
# insert or update
|
||
for pkgmeta in pkgmetas:
|
||
add_or_update(session, PkgMeta, pkgmeta)
|
||
|
||
|
||
def add_or_update(session, table, pkgmeta):
|
||
if not pkgmeta:
|
||
return
|
||
if session.get(table, pkgmeta.name):
|
||
pkg = session.query(table).filter_by(
|
||
name=pkgmeta.name).first()
|
||
pkg.desc = pkgmeta.desc
|
||
pkg.repo = pkgmeta.repo
|
||
pkg.bioc_ver = pkgmeta.bioc_ver
|
||
pkg.bioc_category = pkgmeta.bioc_category
|
||
else:
|
||
session.add(pkgmeta)
|
||
session.commit()
|
||
|
||
|
||
if __name__ == '__main__':
|
||
logging.basicConfig(level=logging.INFO)
|
||
parser = argparse.ArgumentParser(
|
||
description='Manage the meta info database (only sqlite are supported) of CRAN and Bioconductor packages',
|
||
formatter_class=argparse.ArgumentDefaultsHelpFormatter
|
||
)
|
||
parser.add_argument(
|
||
'--db', help='Where the database should be placed', default='/tmp/dbmanager/sqlite.db')
|
||
parser.add_argument(
|
||
'--bioarch_path', help='The path of BioArchLinux repo', default="BioArchLinux")
|
||
parser.add_argument(
|
||
'--bioc_min_ver', help="The minimum version of Bioconductor supported, must be greater than 3.0", default="3.0")
|
||
|
||
args = parser.parse_args()
|
||
db_dir = os.path.dirname(args.db)
|
||
os.makedirs(db_dir, exist_ok=True)
|
||
|
||
# 创建一个 SQLite 的内存数据库,必须加上 check_same_thread=False,否则无法在多线程中使用
|
||
engine = create_engine(f"sqlite:///{args.db}", echo=True, future=True,
|
||
connect_args={"check_same_thread": False})
|
||
|
||
Base.metadata.create_all(engine)
|
||
update_DB(engine=engine, min_ver=args.bioc_min_ver)
|