mirror of
https://github.com/BioArchLinux/bioarchlinux-tools.git
synced 2025-03-09 22:53:31 +00:00
add dbmanager, use sqlite to store metadata
This commit is contained in:
parent
f979bffa7a
commit
3a3bb1b7f8
2 changed files with 145 additions and 8 deletions
127
updater/dbmanager.py
Executable file
127
updater/dbmanager.py
Executable file
|
@ -0,0 +1,127 @@
|
|||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
from sqlalchemy import Column, String, Text, create_engine
|
||||
from sqlalchemy.ext.declarative import declarative_base
|
||||
from sqlalchemy.orm import Session
|
||||
import requests
|
||||
from packaging import version
|
||||
import argparse
|
||||
import logging
|
||||
import os
|
||||
|
||||
|
||||
Base = declarative_base()
|
||||
|
||||
|
||||
class PkgMeta(Base):
|
||||
# 表的名字:
|
||||
__tablename__ = 'pkgmeta'
|
||||
|
||||
# 表的结构:
|
||||
# currently max is 48, r-illuminahumanmethylationepicanno.ilm10b4.hg19, so 50 is OK
|
||||
name = Column(String(50), primary_key=True)
|
||||
desc = Column(Text)
|
||||
repo = Column(String(4)) # CRAN or BIOC
|
||||
bioc_ver = Column(String(8)) # 3.0 to 3.16
|
||||
# bioc, data/annotation, data/experiment
|
||||
bioc_category = Column(String(16))
|
||||
|
||||
def __init__(self, name, desc, repo, bioc_ver, bioc_category) -> None:
|
||||
super().__init__()
|
||||
self.name = name
|
||||
self.desc = desc
|
||||
self.repo = repo
|
||||
self.bioc_ver = bioc_ver
|
||||
self.bioc_category = bioc_category
|
||||
|
||||
|
||||
def from_str(data, bioc_ver, bioc_cat):
|
||||
for line in data.split('\n'):
|
||||
if line.startswith('Package:'):
|
||||
pkgname = line.split(':')[-1].strip()
|
||||
if bioc_ver:
|
||||
pkgmeta = PkgMeta(
|
||||
pkgname, data, 'BIOC', str(bioc_ver), bioc_cat)
|
||||
else:
|
||||
pkgmeta = PkgMeta(
|
||||
pkgname, data, 'CRAN', None, None)
|
||||
return pkgmeta
|
||||
|
||||
|
||||
def get_bioc_versions(url="https://bio.askk.cc") -> list[str]:
|
||||
'''
|
||||
get all Bioconductor versions
|
||||
'''
|
||||
version_page = requests.get(
|
||||
f"{url}/bioc_version")
|
||||
if version_page.status_code != requests.codes.ok:
|
||||
raise RuntimeError(
|
||||
f"Failed to get Bioconductor versions due to: {version_page.status_code}: {version_page.reason}")
|
||||
z = version_page.text.split(',')
|
||||
return z
|
||||
|
||||
|
||||
def update_DB(engine, path='bioc', min_ver='3.0', verion_file_url="https://bio.askk.cc"):
|
||||
bioc_vers = get_bioc_versions(verion_file_url)
|
||||
bioc_vers = [version.parse(v) for v in bioc_vers]
|
||||
bioc_vers.sort()
|
||||
with Session(engine) as session:
|
||||
for ver in bioc_vers:
|
||||
if ver < version.parse(min_ver):
|
||||
continue
|
||||
for cat in ['bioc', 'data/annotation', 'data/experiment']:
|
||||
with open(f"{path}/packages/{ver}/{cat}/src/contrib/PACKAGES") as f:
|
||||
descs = f.read().split('\n\n')
|
||||
pkgmetas = map(
|
||||
lambda x: from_str(x, ver, cat), descs)
|
||||
# insert or update
|
||||
for pkgmeta in pkgmetas:
|
||||
add_or_update(session, PkgMeta, pkgmeta)
|
||||
# CRAN
|
||||
with open(f"{path}/src/contrib/PACKAGES") as f:
|
||||
descs = f.read().split('\n\n')
|
||||
pkgmetas = map(
|
||||
lambda x: from_str(x, None, None), descs)
|
||||
# insert or update
|
||||
for pkgmeta in pkgmetas:
|
||||
add_or_update(session, PkgMeta, pkgmeta)
|
||||
|
||||
|
||||
def add_or_update(session, table, pkgmeta):
|
||||
if not pkgmeta:
|
||||
return
|
||||
if session.get(table, pkgmeta.name):
|
||||
pkg = session.query(table).filter_by(
|
||||
name=pkgmeta.name).first()
|
||||
pkg.desc = pkgmeta.desc
|
||||
pkg.repo = pkgmeta.repo
|
||||
pkg.bioc_ver = pkgmeta.bioc_ver
|
||||
pkg.bioc_category = pkgmeta.bioc_category
|
||||
else:
|
||||
session.add(pkgmeta)
|
||||
session.commit()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Manage the meta info database (only sqlite are supported) of CRAN and Bioconductor packages',
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter
|
||||
)
|
||||
parser.add_argument(
|
||||
'--db', help='Where the database should be placed', default='/tmp/dbmanager/sqlite.db')
|
||||
parser.add_argument(
|
||||
'--bioarch_path', help='The path of BioArchLinux repo', default="BioArchLinux")
|
||||
parser.add_argument(
|
||||
'--bioc_min_ver', help="The minimum version of Bioconductor supported, must be greater than 3.0", default="3.0")
|
||||
|
||||
args = parser.parse_args()
|
||||
db_dir = os.path.dirname(args.db)
|
||||
os.makedirs(db_dir, exist_ok=True)
|
||||
|
||||
# 创建一个 SQLite 的内存数据库,必须加上 check_same_thread=False,否则无法在多线程中使用
|
||||
engine = create_engine(f"sqlite:///{args.db}", echo=True, future=True,
|
||||
connect_args={"check_same_thread": False})
|
||||
|
||||
Base.metadata.create_all(engine)
|
||||
update_DB(engine=engine, min_ver=args.bioc_min_ver)
|
|
@ -12,6 +12,7 @@ import argparse
|
|||
import os
|
||||
import yaml
|
||||
from typing import Optional
|
||||
import sqlite3
|
||||
|
||||
EXCLUDED_PKGS = {
|
||||
"base",
|
||||
|
@ -124,9 +125,10 @@ class PkgInfo:
|
|||
self.depends = depends
|
||||
self.optdepends = optdepends
|
||||
|
||||
def get_desc(self) -> Optional[str]:
|
||||
def get_desc_by_file(self) -> Optional[str]:
|
||||
'''
|
||||
get new depends from CRAN or Bioconductor
|
||||
@depreciated, replaced by get_desc
|
||||
'''
|
||||
pkgname = self.pkgname
|
||||
CRAN_URL = f"{self.cran_meta_mirror}/src/contrib/PACKAGES"
|
||||
|
@ -164,6 +166,14 @@ class PkgInfo:
|
|||
f'Failed to get Bioconductor descriptions for version: {ver}, {p}, due to: {bioconductor_descs.status_code}: {bioconductor_descs.reason}')
|
||||
continue
|
||||
|
||||
def get_desc(self, conn_cursor) -> Optional[str]:
|
||||
c = conn_cursor
|
||||
cursor = c.execute(
|
||||
"SELECT desc from pkgmeta where name = ?", (self.pkgname,))
|
||||
descall = cursor.fetchone()
|
||||
desc = descall[0]
|
||||
return desc
|
||||
|
||||
def update_info(self, desc) -> None:
|
||||
'''
|
||||
obtain new depends and optdepends from `desc`, and write them to `self`
|
||||
|
@ -366,20 +376,20 @@ if __name__ == '__main__':
|
|||
formatter_class=argparse.ArgumentDefaultsHelpFormatter
|
||||
)
|
||||
parser.add_argument(
|
||||
'--file', help='The file that contains the pkgname to be archived, one pkgname per line')
|
||||
'-f', '--file', help='The file that contains the pkgname to be archived, one pkgname per line')
|
||||
parser.add_argument(
|
||||
'--bioarch_path', help='The path of BioArchLinux repo', default="BioArchLinux")
|
||||
'-p', '--bioarch_path', help='The path of BioArchLinux repo', default="BioArchLinux")
|
||||
parser.add_argument(
|
||||
'-db', help="The database file used to query metadata of packages", default="/tmp/dbmanager/sqlite.db")
|
||||
parser.add_argument(
|
||||
'--bioc_min_ver', help="The minimum version of Bioconductor supported, must be greater than 3.0", default="3.0")
|
||||
parser.add_argument(
|
||||
'--cran_meta_mirror', help="The mirror of CRAN metadata, recommended to be changed to a local https mirror. Only http(s) is supported", default="https://cran.r-project.org")
|
||||
parser.add_argument(
|
||||
'--bioc_meta_mirror', help="The mirror of Bioconductor metadata, recommended to be changed to a local https mirror. Only http(s) is supported", default="https://bioconductor.org")
|
||||
'--bioc_meta_mirror', help="The server used to get all version numbers of BIOC", default="https://bio.askk.cc")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.file:
|
||||
update_depends_by_file(args.file, args.bioarch_path, args.bioc_min_ver,
|
||||
cran_meta_mirror=args.cran_meta_mirror, bioc_meta_mirror=args.bioc_meta_mirror)
|
||||
update_depends_by_file(args.file, args.bioarch_path,
|
||||
args.bioc_min_ver, bioc_meta_mirror=args.bioc_meta_mirror)
|
||||
else:
|
||||
parser.print_help()
|
||||
|
|
Loading…
Add table
Reference in a new issue