From 3252303573df84ba9c30d5e03658c645798273d8 Mon Sep 17 00:00:00 2001 From: brent s Date: Sun, 15 Apr 2018 11:36:41 -0400 Subject: [PATCH] centos repo mirror script is done --- arch/repoclone.py | 5 +- centos/repoclone/centos.dflts.ini | 144 +++++++++++++++ centos/repoclone/repoclone.py | 296 ++++++++++++++++++++++++++++++ 3 files changed, 444 insertions(+), 1 deletion(-) create mode 100644 centos/repoclone/centos.dflts.ini create mode 100755 centos/repoclone/repoclone.py diff --git a/arch/repoclone.py b/arch/repoclone.py index ab9fcae..f786cc5 100755 --- a/arch/repoclone.py +++ b/arch/repoclone.py @@ -8,6 +8,9 @@ import pprint import subprocess import sys +# TODO: convert .ini to treat [section]s as repositories, with a [DEFAULT] +# section for URL etc. + cfgfile = os.path.join(os.environ['HOME'], '.config', 'optools', @@ -90,7 +93,7 @@ def getDefaults(): 'destination': '/srv/repos/arch', 'mount': '/', 'bwlimit': 0, - 'lockfile': '/var/run/repo-sync.lck', + 'lockfile': '/var/run/repo-sync_arch.lck', 'logfile': '/var/log/repo/arch.log'} realcfg = configparser.ConfigParser(defaults = dflt) if not os.path.isfile(cfgfile): diff --git a/centos/repoclone/centos.dflts.ini b/centos/repoclone/centos.dflts.ini new file mode 100644 index 0000000..096571f --- /dev/null +++ b/centos/repoclone/centos.dflts.ini @@ -0,0 +1,144 @@ +# This is an example ~/.config/optools/repoclone/centos.ini. +# You may need to change some options, but they are all commented so you know +# what to replace. +############################################################################### +# SPECIAL VALUES # +# You may recognize some values as used in yum's repo files +# (i.e. /etc/yum.repos.d/*.repo). THEY ARE NOT THE SAME. You CANNOT and SHOULD +# NOT simply copy-and-paste them in here, as they are constructed/used +# differently. +# That said, the following are special values/variables that are generated +# automatically (*case-sensitive*): +# +# {name}: The name of the repo (i.e. what appears in [brackets]). +# It is best that you leave this set in [DEFAULT] the way it +# is. +# +# {cur_arch}: The current hardware architecture of the host running the +# script e.g. x86_64 or i686. +# +# {rel_ver}: The release version. This will substitute for each version +# in the repository's "releases" directive. For example: +# +# destination = /srv/repos/centos/{rel_ver}/os/x86_64 +# releases = 6,7 +# baseuri = mirrors.centos.org/{rel_ver}/os/x86_64 +# +# would clone from the following to the following: +# +# mirrors.centos.org/6/os/x86_64 => +# /srv/repos/centos/6/os/x86_64 +# +# mirrors.centos.org/7/os/x86_64 => +# /srv/repos/centos/7/os/x86_64 +# +# {arch}: Similar to {rel_ver} in that it iterates, but for each item +# in the "arches" directive. +# +# {cur_ver}: *IF* (and *only* if) you are running this script on CentOS +# itself, this will be a "meta-variable" containing the full +# version, major version, minor version, and revision. +# i.e. For CentOS 7.4.1708, {cur_ver} or {cur_ver.full} will +# both replace as "7.4.1708". {cur_ver.maj} will replace as +# "7", {cur.min} will replace as "4", and {cur.rev} will +# replace as "1708". If you use this mechanism and are NOT +# running the script on CentOS, an error will occur. +# +# You can also refer to directives themselves -- see below for an example of +# this. +############################################################################### +# The default section provides... well, defaults. All of these values can be +# overridden in each repository defined. If any of these are not specified, +# either in DEFAULT or in the repo section itself, then an error will occur. +[DEFAULT] + +# The name of the repository. By default, this (repo_name) is the name of the +# section. +# {name} will ALWAYS be the section name and cannot be changed. +repo_name = {name} + +# Whether we should sync this repository or not. +# To enable a repository, set this to one of: 1, yes, on, true +# To disable a repository, set this to one of: 0, no, off, false +enabled = 0 + +# The list of architectures to clone, separated by commas (if you have more +# than one). This is iterated over. +arches = i686,{arch} + +# The full path to the "base" of the repository where we'll be rsyncing from. +# As shown, if an "$" is in front of curly brackets, you can use another +# directive in the same section. To specify a directive from another section, +# you would use ${section:directive} (e.g. ${base:repo_name}) +# Note that this has the potential to generate multiple iterations. +# isomirrors_sort.py should offer package repository mirrors as well, so that +# may be handy to find a fast mirror. +# Note that the default probably will not work for you since you need to be +# whitelisted to use it. +baseuri = mirror.centos.org/centos/{rel_ver}/${repo_name}/{arch} + +# Where the clone should go. If you are using iterables, make sure you use them +# here, too, otherwise you'll most likely overwrite parts and end up with a +# totally broken repository! +# The parent directories will be created if necessary (assuming we have proper +# permissions). +destination = ${mount}/centos/{rel_ver}/${repo_name}/{arch} + +# Perform a check before we start to make sure this mountpoint has a device +# mounted at it. If you don't store your repository mirrors at a different +# mountpoint, just set this to "/" (without quotes). +mount = /mnt/repos + +# If set, throttle the transfer speeds down to this number of Kilobytes per +# second (KB/s, *not* kbps!). +# If it's set to 0, don't perform any throttling. +bwlimit = 0 + +# The lockfile for the repository. If this file is present, the clone will +# abort to avoid rsync/file differentiation conflicts. The parent directories +# will be created if necessary (assuming we have proper permissions). +lockfile = /var/run/repo-sync_{name}.lck + +# The logfile for this repository. The parent directories will be created if +# necessary (assuming we have proper permissions). +logfile = /var/log/repo/centos.log + +# The releases to clone for. Note that we follow symlinks, so you should *NOT* +# include e.g. both 7 and 7.4.1708. This is also an iterable in the form of a +# comma-separated list (if you have more than one). +releases = 6,{cur_ver.maj} + +# A comma-separated list of paths/patterns on the rsync server to exclude (if +# you have more than one). Leave empty for no excludes (this should be fine if +# your "baseuri"s are explicit enough). +excludes = + +# This would be equivalent to cloning the [base] repository found in +# /etc/yum.repos.d/CentOS-Base.repo +[base] +repo_name = os +enabled = 1 + +# Likewise with [base], but with [updates] instead, etc. +[updates] +enabled = 1 + +[extras] +enabled = 1 + +[centosplus] +enabled = 1 + +# /etc/yum.repos.d/epel.repo (assuming you installed the epel-release package) +# Just like CentOS mirrors, you probably need to change this since they run a +# whitelist. +[epel] +enabled = 1 +baseuri = dl.fedoraproject.org::fedora-{name}0/{rel_ver}/{arch} +destination = ${mount}/centos/{name}/{rel_ver}/{arch} + +# It even works with non-RedHat-supplied repositories, too! As long as they +# offer rsync access. I *highly* recommend you check IUS out: https://ius.io/ +[ius] +enabled = 0 +baseuri = dl.iuscommunity.org/{name}/stable/CentOS/{rel_ver}/{arch} diff --git a/centos/repoclone/repoclone.py b/centos/repoclone/repoclone.py new file mode 100755 index 0000000..ae3da15 --- /dev/null +++ b/centos/repoclone/repoclone.py @@ -0,0 +1,296 @@ +#!/usr/bin/env python3 + +import argparse +import configparser +import copy +import datetime +import os +import platform +import pprint +import re +import socket +import subprocess +import sys +from collections import OrderedDict + +cfgfile = os.path.join(os.environ['HOME'], + '.config', + 'optools', + 'repoclone', + 'centos.ini') + +class cur_ver(object): + def __init__(self): + _distname = platform.linux_distribution()[0] + if not re.search('^CentOS( Linux)$', _distname, re.IGNORECASE): + raise ValueError(('You have specified "{cur_ver}" in your ' + + 'config, but you are not running this script ' + + 'on CentOS!')) + _ver = platform.linux_distribution()[1].split('.') + self.full = '.'.join(_ver) + self.maj = int(_ver[0]) + self.min = int(_ver[1]) + self.rev = _ver[2] + + def __str__(self): + return(self.full) + +# Rsync options +opts = [ + '--recursive', # recurse into directories + '--times', # preserve modification times + '--links', # copy symlinks as symlinks + '--hard-links', # preserve hard links + '--quiet', # suppress non-error messages + '--delete-after', # receiver deletes after transfer, not during + '--delay-updates', # put all updated files into place at end + '--copy-links', # transform symlink into referent file/dir + '--safe-links', # ignore symlinks that point outside the tree + #'--max-delete', # don't delete more than NUM files + '--delete-excluded', # also delete excluded files from dest dirs + ] + +dflts = {'DEFAULT': {'repo_name': '{name}', + 'enabled': False, + 'arches': ['i686', 'x86_64'], + 'baseuri': ('mirror.centos.org/centos/{rel_ver}/' + + '${repo_name}/{arch}'), + 'destination': ('${mount}/centos/{rel_ver}/' + + '${repo_name}/{arch}'), + 'mount': '/mnt/repos', + 'bwlimit': 0, + 'lockfile': '/var/run/repo-sync_{name}.lck', + 'logfile': '/var/log/repo/centos.log', + 'releases': [6, 7], + 'excludes': None}, + 'base': {'repo_name': 'os', + 'enabled': True}, + 'updates': {'enabled': True}, + 'extras': {'enabled': True}, + 'centosplus': {'enabled': True}, + 'epel': {'enabled': True, + 'baseuri': ('dl.fedoraproject.org::fedora-{name}0/' + + '{rel_ver}/{arch}'), + 'destination': '${mount}/centos/{name}/{rel_ver}/{arch}'}, + 'ius': {'enabled': False, + 'baseuri': ('dl.iuscommunity.org/{name}/stable/CentOS/' + + '{rel_ver}/{arch}')}} + +class MirrorMgr(object): + def __init__(self): + self.cfg = configparser.ConfigParser( + interpolation = configparser.ExtendedInterpolation(), + defaults = dflts['DEFAULT']) + self.strvars = {'cur_ver': None, + 'name': None, + 'arches': [], + 'releases': [], + 'cur_arch': platform.machine(), + 'rel_ver': None, + 'arch': None} + if not os.path.isfile(cfgfile): + self.gen_cfg() + self.get_cfg() + self.chk_cur_ver() + self.parse_cfg() + + def get_cfg(self): + with open(cfgfile, 'r') as f: + self.cfg_in = f.read() + return() + + def chk_cur_ver(self): + for line in self.cfg_in.splitlines(): + _line = line + # Strip out inline comments -- this is disabled by default(?). + #_line = re.sub('\s*(#|;).*$', '', line) + # Skip empty lines/comments. + if re.search('^\s*((#|;).*)?$', line): + continue + # Check to see if cur_ver is referenced. + if re.search('^.*{cur_ver}.*$', _line): + self.strvars['cur_ver'] = cur_ver() + break + return() + + def gen_cfg(self): + cfg = configparser.ConfigParser( + interpolation = configparser.ExtendedInterpolation(), + defaults = dflts['DEFAULT']) + for i in dflts.keys(): + if i != 'DEFAULT': + cfg[i] = copy.deepcopy(dflts[i]) + with open(cfgfile, 'w') as f: + cfg.write(f) + # And add the comment about how it's a stripped down default conf. + with open(cfgfile, 'r+') as f: + cfgdata = f.read() + f.seek(0, 0) + cmnt = ('# This is an autogenerated configuration file for ' + + 'r00t^s\'s OpTools CentOS\n# mirror script.\n# You ' + + 'should reference the fully commented version ' + + 'distributed with the script,\n# "centos.dflts.ini".\n\n') + f.write(cmnt + cfgdata) + print(('A configuration file has been automatically generated for ' + + 'you at {0}. You should review and customize it, because it ' + + 'most likely will not work out of the box.').format(cfgfile)) + return() + + def parse_cfg(self): + self.cfg.read_string(self.cfg_in) + return() + + def sync(self): + for repo in self.cfg.sections(): + # Skip disabled repos. + if not self.cfg.getboolean(repo, 'enabled'): + continue + self.repo = copy.deepcopy(dict(self.cfg[repo])) + self.strvars['name'] = repo + # This should be safe since the only thing that makes sense here is + # {cur_arch}, which we populate in __init__(). + self.strvars['arches'] = [i.strip() for i in \ + self.repo['arches'].format( + **self.strvars).split(',')] + self.strvars['releases'] = [i.strip() for i in \ + self.repo['releases'].format( + **self.strvars).split(',')] + self._repo_chk(repo) + for arch in self.strvars['arches']: + for rel_ver in self.strvars['releases']: + self.strvars['arch'] = arch + self.strvars['rel_ver'] = rel_ver + self._repo_sync(repo) + return() + + def _repo_sync(self, repo): + # Reset the Rsync options + self.opts = opts + self.repo['bwlimit'] = float(self.repo['bwlimit']) + if self.repo['bwlimit'] > 0.0: + # limit socket I/O bandwidth + self.opts.append('--bwlimit=' + str(self.repo['bwlimit'])) + paths = os.environ['PATH'].split(':') + cmd = ['rsync'] # Set up a cmd list for subprocess + cmd.extend(opts) # The arguments for rsync + # The path on the remote mirror + _path = os.path.join('rsync://{0}'.format(self.repo['baseuri']), '.') + cmd.append(_path) + # The local destination + cmd.append(os.path.join(self.repo['destination'], '.')) + if os.path.isfile(self.repo['lockfile']): + with open(self.repo['lockfile'], 'r') as f: + existingpid = f.read().strip() + if os.isatty(sys.stdin.fileno()): + # Running from shell + exit(('!! A repo synchronization seems to already be ' + + 'running (PID: {0}). Quitting. !!').format(existingpid)) + else: + exit() # We're running in cron, shut the hell up. + else: + with open(self.repo['lockfile'], 'w') as f: + f.write(str(os.getpid())) + with open(self.repo['logfile'], 'a') as log: + c = subprocess.run(cmd, stdout = log, stderr = subprocess.PIPE) + now = int(datetime.datetime.utcnow().timestamp()) + with open(os.path.join(self.repo['destination'], + 'lastsync'), 'w') as f: + f.write(str(now) + '\n') + os.remove(self.repo['lockfile']) + # Only report errors at the end of the run if we aren't running in + # cron. Otherwise, log them. + errors = c.stderr.decode('utf-8').splitlines() + # CentOS 7 main doesn't have an i686. + if self.strvars['rel_ver'] == 7: + for e in errors[:]: + if re.search('^rsync: change_dir.*/6/.*$', e): + errors.remove(e) + if os.isatty(sys.stdin.fileno()) and errors: + print('[{0}] We encountered some errors:'.format(repo)) + for e in errors: + if e.startswith('symlink has no referent: '): + print(('Broken upstream symlink: ' + + '{0}').format(e.split()[1].replace('"', ''))) + else: + print(e) + else: + for e in errors: + log.write('{0}\n'.format(e)) + return() + + def _repo_chk(self, repo): + def chkmnt(): + self.repo['mount'] = os.path.abspath( + os.path.expanduser( + self.repo['mount'].format( + **self.strvars))) + with open(os.devnull, 'w') as devnull: + mntchk = subprocess.run(['findmnt', + self.repo['mount']], + stdout = devnull, + stderr = devnull) + if mntchk.returncode != 0: + raise RuntimeError(('!! BAILING OUT; {0} isn\'t ' + + 'mounted !!').format(self.repo['mount'])) + return() + def chkrsync(): + _port = 873 + _open = False + self.repo['baseuri'] = re.sub('^\s*rsync://', + '', + self.repo['baseuri'].format( + **self.strvars), + re.IGNORECASE) + _raw_srv = self.repo['baseuri'].split('/')[0] + _split_srv = re.sub('::.*$', '', _raw_srv).split(':') + if len(_split_srv) >= 2: + _port = _split_srv[1] + for proto in (socket.AF_INET, socket.AF_INET6): + s = socket.socket(proto, socket.SOCK_STREAM) + chk = s.connect_ex((_split_srv[0], _port)) + if chk == 0: + _open = True + break + if not _open: + raise RuntimeError(('Rsync on host {0}:{1} is not ' + + 'accessible!').format(_split_srv[0], + _port)) + return() + def chkdest(): + _dest = os.path.abspath( + os.path.expanduser( + self.cfg[repo]['destination'].format( + **self.strvars))) + self.repo['destination'] = _dest + os.makedirs(self.repo['destination'], exist_ok = True) + return() + def chkdest_files(): + for f in ('logfile', 'lockfile'): + _dest = os.path.abspath( + os.path.expanduser( + self.repo[f].format(**self.strvars))) + self.repo[f] = _dest + os.makedirs(os.path.dirname(self.repo[f]), exist_ok = True) + return() + def chkmisc(): + # Odds and ends. + pass + return() + # The Business-End(TM) + for arch in self.strvars['arches']: + for rel_ver in self.strvars['releases']: + self.strvars['arch'] = arch + self.strvars['rel_ver'] = rel_ver + chkmnt() + chkrsync() + chkdest() + chkdest_files() + chkmisc() + return() + +def main(): + m = MirrorMgr() + m.sync() + +if __name__ == '__main__': + main()