centos repo mirror script is done

This commit is contained in:
brent s 2018-04-15 11:36:41 -04:00
parent 166f5a9021
commit 3252303573
3 changed files with 444 additions and 1 deletions

View File

@ -8,6 +8,9 @@ import pprint
import subprocess
import sys

# TODO: convert .ini to treat [section]s as repositories, with a [DEFAULT]
# section for URL etc.

cfgfile = os.path.join(os.environ['HOME'],
'.config',
'optools',
@ -90,7 +93,7 @@ def getDefaults():
'destination': '/srv/repos/arch',
'mount': '/',
'bwlimit': 0,
'lockfile': '/var/run/repo-sync.lck',
'lockfile': '/var/run/repo-sync_arch.lck',
'logfile': '/var/log/repo/arch.log'}
realcfg = configparser.ConfigParser(defaults = dflt)
if not os.path.isfile(cfgfile):

View File

@ -0,0 +1,144 @@
# This is an example ~/.config/optools/repoclone/centos.ini.
# You may need to change some options, but they are all commented so you know
# what to replace.
###############################################################################
# SPECIAL VALUES #
# You may recognize some values as used in yum's repo files
# (i.e. /etc/yum.repos.d/*.repo). THEY ARE NOT THE SAME. You CANNOT and SHOULD
# NOT simply copy-and-paste them in here, as they are constructed/used
# differently.
# That said, the following are special values/variables that are generated
# automatically (*case-sensitive*):
#
# {name}: The name of the repo (i.e. what appears in [brackets]).
# It is best that you leave this set in [DEFAULT] the way it
# is.
#
# {cur_arch}: The current hardware architecture of the host running the
# script e.g. x86_64 or i686.
#
# {rel_ver}: The release version. This will substitute for each version
# in the repository's "releases" directive. For example:
#
# destination = /srv/repos/centos/{rel_ver}/os/x86_64
# releases = 6,7
# baseuri = mirrors.centos.org/{rel_ver}/os/x86_64
#
# would clone from the following to the following:
#
# mirrors.centos.org/6/os/x86_64 =>
# /srv/repos/centos/6/os/x86_64
#
# mirrors.centos.org/7/os/x86_64 =>
# /srv/repos/centos/7/os/x86_64
#
# {arch}: Similar to {rel_ver} in that it iterates, but for each item
# in the "arches" directive.
#
# {cur_ver}: *IF* (and *only* if) you are running this script on CentOS
# itself, this will be a "meta-variable" containing the full
# version, major version, minor version, and revision.
# i.e. For CentOS 7.4.1708, {cur_ver} or {cur_ver.full} will
# both replace as "7.4.1708". {cur_ver.maj} will replace as
# "7", {cur.min} will replace as "4", and {cur.rev} will
# replace as "1708". If you use this mechanism and are NOT
# running the script on CentOS, an error will occur.
#
# You can also refer to directives themselves -- see below for an example of
# this.
###############################################################################
# The default section provides... well, defaults. All of these values can be
# overridden in each repository defined. If any of these are not specified,
# either in DEFAULT or in the repo section itself, then an error will occur.
[DEFAULT]

# The name of the repository. By default, this (repo_name) is the name of the
# section.
# {name} will ALWAYS be the section name and cannot be changed.
repo_name = {name}

# Whether we should sync this repository or not.
# To enable a repository, set this to one of: 1, yes, on, true
# To disable a repository, set this to one of: 0, no, off, false
enabled = 0

# The list of architectures to clone, separated by commas (if you have more
# than one). This is iterated over.
arches = i686,{arch}

# The full path to the "base" of the repository where we'll be rsyncing from.
# As shown, if an "$" is in front of curly brackets, you can use another
# directive in the same section. To specify a directive from another section,
# you would use ${section:directive} (e.g. ${base:repo_name})
# Note that this has the potential to generate multiple iterations.
# isomirrors_sort.py should offer package repository mirrors as well, so that
# may be handy to find a fast mirror.
# Note that the default probably will not work for you since you need to be
# whitelisted to use it.
baseuri = mirror.centos.org/centos/{rel_ver}/${repo_name}/{arch}

# Where the clone should go. If you are using iterables, make sure you use them
# here, too, otherwise you'll most likely overwrite parts and end up with a
# totally broken repository!
# The parent directories will be created if necessary (assuming we have proper
# permissions).
destination = ${mount}/centos/{rel_ver}/${repo_name}/{arch}

# Perform a check before we start to make sure this mountpoint has a device
# mounted at it. If you don't store your repository mirrors at a different
# mountpoint, just set this to "/" (without quotes).
mount = /mnt/repos

# If set, throttle the transfer speeds down to this number of Kilobytes per
# second (KB/s, *not* kbps!).
# If it's set to 0, don't perform any throttling.
bwlimit = 0

# The lockfile for the repository. If this file is present, the clone will
# abort to avoid rsync/file differentiation conflicts. The parent directories
# will be created if necessary (assuming we have proper permissions).
lockfile = /var/run/repo-sync_{name}.lck

# The logfile for this repository. The parent directories will be created if
# necessary (assuming we have proper permissions).
logfile = /var/log/repo/centos.log

# The releases to clone for. Note that we follow symlinks, so you should *NOT*
# include e.g. both 7 and 7.4.1708. This is also an iterable in the form of a
# comma-separated list (if you have more than one).
releases = 6,{cur_ver.maj}

# A comma-separated list of paths/patterns on the rsync server to exclude (if
# you have more than one). Leave empty for no excludes (this should be fine if
# your "baseuri"s are explicit enough).
excludes =

# This would be equivalent to cloning the [base] repository found in
# /etc/yum.repos.d/CentOS-Base.repo
[base]
repo_name = os
enabled = 1

# Likewise with [base], but with [updates] instead, etc.
[updates]
enabled = 1

[extras]
enabled = 1

[centosplus]
enabled = 1

# /etc/yum.repos.d/epel.repo (assuming you installed the epel-release package)
# Just like CentOS mirrors, you probably need to change this since they run a
# whitelist.
[epel]
enabled = 1
baseuri = dl.fedoraproject.org::fedora-{name}0/{rel_ver}/{arch}
destination = ${mount}/centos/{name}/{rel_ver}/{arch}

# It even works with non-RedHat-supplied repositories, too! As long as they
# offer rsync access. I *highly* recommend you check IUS out: https://ius.io/
[ius]
enabled = 0
baseuri = dl.iuscommunity.org/{name}/stable/CentOS/{rel_ver}/{arch}

296
centos/repoclone/repoclone.py Executable file
View File

@ -0,0 +1,296 @@
#!/usr/bin/env python3

import argparse
import configparser
import copy
import datetime
import os
import platform
import pprint
import re
import socket
import subprocess
import sys
from collections import OrderedDict

cfgfile = os.path.join(os.environ['HOME'],
'.config',
'optools',
'repoclone',
'centos.ini')

class cur_ver(object):
def __init__(self):
_distname = platform.linux_distribution()[0]
if not re.search('^CentOS( Linux)$', _distname, re.IGNORECASE):
raise ValueError(('You have specified "{cur_ver}" in your ' +
'config, but you are not running this script ' +
'on CentOS!'))
_ver = platform.linux_distribution()[1].split('.')
self.full = '.'.join(_ver)
self.maj = int(_ver[0])
self.min = int(_ver[1])
self.rev = _ver[2]

def __str__(self):
return(self.full)

# Rsync options
opts = [
'--recursive', # recurse into directories
'--times', # preserve modification times
'--links', # copy symlinks as symlinks
'--hard-links', # preserve hard links
'--quiet', # suppress non-error messages
'--delete-after', # receiver deletes after transfer, not during
'--delay-updates', # put all updated files into place at end
'--copy-links', # transform symlink into referent file/dir
'--safe-links', # ignore symlinks that point outside the tree
#'--max-delete', # don't delete more than NUM files
'--delete-excluded', # also delete excluded files from dest dirs
]

dflts = {'DEFAULT': {'repo_name': '{name}',
'enabled': False,
'arches': ['i686', 'x86_64'],
'baseuri': ('mirror.centos.org/centos/{rel_ver}/' +
'${repo_name}/{arch}'),
'destination': ('${mount}/centos/{rel_ver}/' +
'${repo_name}/{arch}'),
'mount': '/mnt/repos',
'bwlimit': 0,
'lockfile': '/var/run/repo-sync_{name}.lck',
'logfile': '/var/log/repo/centos.log',
'releases': [6, 7],
'excludes': None},
'base': {'repo_name': 'os',
'enabled': True},
'updates': {'enabled': True},
'extras': {'enabled': True},
'centosplus': {'enabled': True},
'epel': {'enabled': True,
'baseuri': ('dl.fedoraproject.org::fedora-{name}0/' +
'{rel_ver}/{arch}'),
'destination': '${mount}/centos/{name}/{rel_ver}/{arch}'},
'ius': {'enabled': False,
'baseuri': ('dl.iuscommunity.org/{name}/stable/CentOS/' +
'{rel_ver}/{arch}')}}

class MirrorMgr(object):
def __init__(self):
self.cfg = configparser.ConfigParser(
interpolation = configparser.ExtendedInterpolation(),
defaults = dflts['DEFAULT'])
self.strvars = {'cur_ver': None,
'name': None,
'arches': [],
'releases': [],
'cur_arch': platform.machine(),
'rel_ver': None,
'arch': None}
if not os.path.isfile(cfgfile):
self.gen_cfg()
self.get_cfg()
self.chk_cur_ver()
self.parse_cfg()

def get_cfg(self):
with open(cfgfile, 'r') as f:
self.cfg_in = f.read()
return()

def chk_cur_ver(self):
for line in self.cfg_in.splitlines():
_line = line
# Strip out inline comments -- this is disabled by default(?).
#_line = re.sub('\s*(#|;).*$', '', line)
# Skip empty lines/comments.
if re.search('^\s*((#|;).*)?$', line):
continue
# Check to see if cur_ver is referenced.
if re.search('^.*{cur_ver}.*$', _line):
self.strvars['cur_ver'] = cur_ver()
break
return()

def gen_cfg(self):
cfg = configparser.ConfigParser(
interpolation = configparser.ExtendedInterpolation(),
defaults = dflts['DEFAULT'])
for i in dflts.keys():
if i != 'DEFAULT':
cfg[i] = copy.deepcopy(dflts[i])
with open(cfgfile, 'w') as f:
cfg.write(f)
# And add the comment about how it's a stripped down default conf.
with open(cfgfile, 'r+') as f:
cfgdata = f.read()
f.seek(0, 0)
cmnt = ('# This is an autogenerated configuration file for ' +
'r00t^s\'s OpTools CentOS\n# mirror script.\n# You ' +
'should reference the fully commented version ' +
'distributed with the script,\n# "centos.dflts.ini".\n\n')
f.write(cmnt + cfgdata)
print(('A configuration file has been automatically generated for ' +
'you at {0}. You should review and customize it, because it ' +
'most likely will not work out of the box.').format(cfgfile))
return()

def parse_cfg(self):
self.cfg.read_string(self.cfg_in)
return()

def sync(self):
for repo in self.cfg.sections():
# Skip disabled repos.
if not self.cfg.getboolean(repo, 'enabled'):
continue
self.repo = copy.deepcopy(dict(self.cfg[repo]))
self.strvars['name'] = repo
# This should be safe since the only thing that makes sense here is
# {cur_arch}, which we populate in __init__().
self.strvars['arches'] = [i.strip() for i in \
self.repo['arches'].format(
**self.strvars).split(',')]
self.strvars['releases'] = [i.strip() for i in \
self.repo['releases'].format(
**self.strvars).split(',')]
self._repo_chk(repo)
for arch in self.strvars['arches']:
for rel_ver in self.strvars['releases']:
self.strvars['arch'] = arch
self.strvars['rel_ver'] = rel_ver
self._repo_sync(repo)
return()

def _repo_sync(self, repo):
# Reset the Rsync options
self.opts = opts
self.repo['bwlimit'] = float(self.repo['bwlimit'])
if self.repo['bwlimit'] > 0.0:
# limit socket I/O bandwidth
self.opts.append('--bwlimit=' + str(self.repo['bwlimit']))
paths = os.environ['PATH'].split(':')
cmd = ['rsync'] # Set up a cmd list for subprocess
cmd.extend(opts) # The arguments for rsync
# The path on the remote mirror
_path = os.path.join('rsync://{0}'.format(self.repo['baseuri']), '.')
cmd.append(_path)
# The local destination
cmd.append(os.path.join(self.repo['destination'], '.'))
if os.path.isfile(self.repo['lockfile']):
with open(self.repo['lockfile'], 'r') as f:
existingpid = f.read().strip()
if os.isatty(sys.stdin.fileno()):
# Running from shell
exit(('!! A repo synchronization seems to already be ' +
'running (PID: {0}). Quitting. !!').format(existingpid))
else:
exit() # We're running in cron, shut the hell up.
else:
with open(self.repo['lockfile'], 'w') as f:
f.write(str(os.getpid()))
with open(self.repo['logfile'], 'a') as log:
c = subprocess.run(cmd, stdout = log, stderr = subprocess.PIPE)
now = int(datetime.datetime.utcnow().timestamp())
with open(os.path.join(self.repo['destination'],
'lastsync'), 'w') as f:
f.write(str(now) + '\n')
os.remove(self.repo['lockfile'])
# Only report errors at the end of the run if we aren't running in
# cron. Otherwise, log them.
errors = c.stderr.decode('utf-8').splitlines()
# CentOS 7 main doesn't have an i686.
if self.strvars['rel_ver'] == 7:
for e in errors[:]:
if re.search('^rsync: change_dir.*/6/.*$', e):
errors.remove(e)
if os.isatty(sys.stdin.fileno()) and errors:
print('[{0}] We encountered some errors:'.format(repo))
for e in errors:
if e.startswith('symlink has no referent: '):
print(('Broken upstream symlink: ' +
'{0}').format(e.split()[1].replace('"', '')))
else:
print(e)
else:
for e in errors:
log.write('{0}\n'.format(e))
return()

def _repo_chk(self, repo):
def chkmnt():
self.repo['mount'] = os.path.abspath(
os.path.expanduser(
self.repo['mount'].format(
**self.strvars)))
with open(os.devnull, 'w') as devnull:
mntchk = subprocess.run(['findmnt',
self.repo['mount']],
stdout = devnull,
stderr = devnull)
if mntchk.returncode != 0:
raise RuntimeError(('!! BAILING OUT; {0} isn\'t ' +
'mounted !!').format(self.repo['mount']))
return()
def chkrsync():
_port = 873
_open = False
self.repo['baseuri'] = re.sub('^\s*rsync://',
'',
self.repo['baseuri'].format(
**self.strvars),
re.IGNORECASE)
_raw_srv = self.repo['baseuri'].split('/')[0]
_split_srv = re.sub('::.*$', '', _raw_srv).split(':')
if len(_split_srv) >= 2:
_port = _split_srv[1]
for proto in (socket.AF_INET, socket.AF_INET6):
s = socket.socket(proto, socket.SOCK_STREAM)
chk = s.connect_ex((_split_srv[0], _port))
if chk == 0:
_open = True
break
if not _open:
raise RuntimeError(('Rsync on host {0}:{1} is not ' +
'accessible!').format(_split_srv[0],
_port))
return()
def chkdest():
_dest = os.path.abspath(
os.path.expanduser(
self.cfg[repo]['destination'].format(
**self.strvars)))
self.repo['destination'] = _dest
os.makedirs(self.repo['destination'], exist_ok = True)
return()
def chkdest_files():
for f in ('logfile', 'lockfile'):
_dest = os.path.abspath(
os.path.expanduser(
self.repo[f].format(**self.strvars)))
self.repo[f] = _dest
os.makedirs(os.path.dirname(self.repo[f]), exist_ok = True)
return()
def chkmisc():
# Odds and ends.
pass
return()
# The Business-End(TM)
for arch in self.strvars['arches']:
for rel_ver in self.strvars['releases']:
self.strvars['arch'] = arch
self.strvars['rel_ver'] = rel_ver
chkmnt()
chkrsync()
chkdest()
chkdest_files()
chkmisc()
return()

def main():
m = MirrorMgr()
m.sync()

if __name__ == '__main__':
main()