mirror of
https://github.com/BioArchLinux/bioarchlinux-tools.git
synced 2025-03-09 22:53:31 +00:00
144 lines
5.8 KiB
Python
Executable file
144 lines
5.8 KiB
Python
Executable file
#!/usr/bin/python
|
|
# -*- coding: utf-8 -*-
|
|
import requests
|
|
import re
|
|
from pathlib import Path
|
|
from packaging import version
|
|
import logging
|
|
from distutils.dir_util import copy_tree
|
|
import os
|
|
import datetime
|
|
from dateutil.parser import parse as parsedate
|
|
import argparse
|
|
|
|
|
|
class Downloader:
|
|
def __init__(self, bioc_mirror="https://bioconductor.org", cran_mirror="https://cran.r-project.org", bioc_min_ver="3.0") -> None:
|
|
'''
|
|
bioc_mirror: remote Bioconductor mirror, default https://bioconductor.org
|
|
cran_mirror: remote CRAN mirror, default https://cran.r-project.org
|
|
bioc_min_ver: minimum version of Bioconductor to download, default 3.0
|
|
'''
|
|
self.bioc_mirror = bioc_mirror
|
|
self.cran_mirror = cran_mirror
|
|
self.bioc_min_ver = bioc_min_ver
|
|
self.bioc_versions = []
|
|
self.set_bioc_versions()
|
|
|
|
def set_bioc_versions(self):
|
|
'''
|
|
obtain and set all available Bioconductor versions from remote mirror.
|
|
'''
|
|
version_page = requests.get(
|
|
f"{self.bioc_mirror}/about/release-announcements/#release-versions/")
|
|
if version_page.status_code != requests.codes.ok:
|
|
raise RuntimeError(
|
|
f"Failed to get Bioconductor versions due to: {version_page.status_code}: {version_page.reason}")
|
|
z = re.findall(r"/packages/(\d.\d+)/", version_page.text)
|
|
|
|
# mannually add 1.7 to 1.0 to the list.
|
|
for i in range(7, -1, -1):
|
|
z.append(f"1.{i}")
|
|
|
|
self.bioc_versions = list(map(lambda x: version.parse(x), z))
|
|
|
|
def download_package_meta(self, path='bioc'):
|
|
'''
|
|
Download package metadata from Bioconductor and CRAN.
|
|
|
|
min_ver: minimum version of Bioconductor to download, default 3.0.
|
|
path: path to save metadata, default 'bioc' under current directory.
|
|
'''
|
|
min_ver = self.bioc_min_ver
|
|
if path and not path.endswith('/'):
|
|
path = path+'/'
|
|
else:
|
|
path = ''
|
|
|
|
# BIOC
|
|
latestver = self.bioc_versions[0]
|
|
for p in ['bioc', 'data/annotation', 'data/experiment']:
|
|
for ver in self.bioc_versions:
|
|
logging.info(f"Downloading Bioconductor {ver} {p}...")
|
|
if ver >= version.parse(min_ver):
|
|
Path(
|
|
path+f'packages/{ver}/{p}/src/contrib/'
|
|
).mkdir(parents=True, exist_ok=True)
|
|
url = f"{self.bioc_mirror}/packages/{ver}/{p}/src/contrib/PACKAGES"
|
|
dstFile = path+f'packages/{ver}/{p}/src/contrib/PACKAGES'
|
|
if not remote_is_newer(url, dstFile):
|
|
logging.info(
|
|
f"Local Package List for Bioconductor below {ver}: {p} is newer than remote, skip.")
|
|
break
|
|
meta = requests.get(url)
|
|
if meta.status_code != requests.codes.ok:
|
|
logging.error(
|
|
f"failed to download Package List for Bioconductor {ver}: {p} due to {meta.status_code}: {meta.reason}")
|
|
else:
|
|
with open(dstFile, 'w') as f:
|
|
f.write(meta.text)
|
|
copy_tree(path+f'packages/{latestver}', path+f'packages/release')
|
|
|
|
bioc_ver_file = path+'bioc_version'
|
|
with open(bioc_ver_file, 'w') as f:
|
|
f.write(','.join(map(lambda x: str(x), self.bioc_versions)))
|
|
|
|
# CRAN
|
|
logging.info("Downloading CRAN metadata...")
|
|
url = f"{self.cran_mirror}/src/contrib/PACKAGES"
|
|
dstFile = path+f'src/contrib/PACKAGES'
|
|
if remote_is_newer(url, dstFile):
|
|
meta = requests.get(url)
|
|
if meta.status_code != requests.codes.ok:
|
|
logging.error(
|
|
f"failed to download Package List for CRAN due to {meta.status_code}: {meta.reason}")
|
|
else:
|
|
Path(path+f'src/contrib/').mkdir(parents=True, exist_ok=True)
|
|
with open(dstFile, 'w') as f:
|
|
f.write(meta.text)
|
|
else: # skip if local is newer
|
|
logging.info(
|
|
"Local Package List for CRAN is newer than remote, skip.")
|
|
|
|
|
|
def remote_is_newer(url, dstFile) -> bool:
|
|
'''
|
|
whether the remote file is newer than local file.
|
|
return True if dstFile does not exist.
|
|
returns False if remote does not provide `Last-Modified` header.
|
|
'''
|
|
if not os.path.exists(dstFile):
|
|
return True
|
|
r = requests.head(url)
|
|
url_time = r.headers.get('last-modified')
|
|
if not url_time:
|
|
return False
|
|
|
|
url_date = parsedate(url_time)
|
|
file_time = datetime.datetime.fromtimestamp(
|
|
os.path.getmtime(dstFile))
|
|
return url_date > file_time.astimezone()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
logging.basicConfig(level=logging.INFO)
|
|
d = Downloader()
|
|
download_path = os.getenv('BIO_META_PATH', 'bioc')
|
|
parser = argparse.ArgumentParser(
|
|
prog='R package metadata sync tool',
|
|
description='Sync metadata of R packages from CRAN and Bioconductor to a local path',
|
|
)
|
|
parser.add_argument(
|
|
'--path', help='The path to store the metadata files. '
|
|
"if not given the environment variable BIO_META_PATH will be read, if it's not set, the default (bioc) will be used.",
|
|
default='bioc')
|
|
parser.add_argument(
|
|
'--bioc_min_ver', help="The minimum version of Bioconductor supported, must be greater than 3.0", default="3.0")
|
|
parser.add_argument(
|
|
'--cran_meta_mirror', help="The remote mirror of CRAN metadata, only http(s) is supported", default="https://cran.r-project.org")
|
|
parser.add_argument(
|
|
'--bioc_meta_mirror', help="The remote mirror of Bioconductor metadata, only http(s) is supported", default="https://bioconductor.org")
|
|
|
|
args = parser.parse_args()
|
|
|
|
d.download_package_meta(download_path)
|