time to test.

This commit is contained in:
brent s. 2020-06-16 15:44:50 -04:00
parent 66d1ad7af5
commit 961952ed56
Signed by: bts
GPG Key ID: 8C004C2F93481F6B
9 changed files with 155 additions and 92 deletions

View File

@ -1,6 +1,7 @@
<?xml version="1.0" encoding="UTF-8" ?> <?xml version="1.0" encoding="UTF-8" ?>
<!-- <!--
This is by default placed in ~/.config/repomirror.xml This is by default placed in ~/.config/repomirror.xml
Remember to replace any necessary special characters (https://stackoverflow.com/a/1091953/733214).
--> -->
<mirror xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" <mirror xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="https://git.square-r00t.net/RepoMirror/" xmlns="https://git.square-r00t.net/RepoMirror/"
@ -71,7 +72,7 @@
<long>delay-updates</long> <long>delay-updates</long>
<long>copy-links</long> <long>copy-links</long>
<long>safe-links</long> <long>safe-links</long>
<long>delete-extended</long> <long>delete-excluded</long>
<long value=".*">exclude</long> <long value=".*">exclude</long>
<rsyncArgs> <rsyncArgs>
These arguments should be sane for most, if not all, rsync-driven repository mirroring. The last one (exclude) may These arguments should be sane for most, if not all, rsync-driven repository mirroring. The last one (exclude) may
@ -86,9 +87,9 @@
<long>archive</long> <long>archive</long>
<long>delete-after</long> <long>delete-after</long>
<!-- <!--
An argument with a value (info=2). An argument with a value (info=progress2).
--> -->
<long value="2">info</long> <long value="progress2">info</long>
<!-- <!--
A "short" option (single hyphen). A "short" option (single hyphen).
--> -->
@ -121,11 +122,6 @@
Required; the *remote* path part of the URI. The leading / is necessary. A trailing one will be assumed. Required; the *remote* path part of the URI. The leading / is necessary. A trailing one will be assumed.
--> -->
<path>/archlinux/</path> <path>/archlinux/</path>
<!--
The speed to cap socket bandwidth at (in KiB). Decimals are okay.
Only valid for rsync; ignored for FTP. If not provided, the default is to not throttle.
-->
<bwlimit>7000</bwlimit>
</upstream> </upstream>
<!-- <!--
Multiple upstreams can be specified. They are tried in order specified and if connection fails or times out, Multiple upstreams can be specified. They are tried in order specified and if connection fails or times out,
@ -153,7 +149,9 @@
<path>/centos/</path> <path>/centos/</path>
</upstream> </upstream>
<dest>/srv/repos/arch/.</dest> <dest>/srv/repos/arch/.</dest>
<lastLocalCheck timeFormat="MICROSECOND_EPOCH">/srv/http/centos.lastcheck</lastLocalCheck> <lastLocalCheck timeFormat="MICROSECOND_EPOCH">/srv/repos/centos/CHECKED</lastLocalCheck>
<lastLocalSync timeFormat="UNIX_EPOCH">/srv/repos/arch/lastsync</lastLocalSync> <lastLocalSync timeFormat="UNIX_EPOCH">/srv/repos/centos/TIME</lastLocalSync>
<lastRemoteUpdate timeFormat="%a %d %b %H:%M:%S UTC %Y">/timestamp.txt</lastRemoteUpdate>
<lastRemoteSync timeFormat="UNIX_EPOCH">/TIME</lastRemoteSync>
</distro> </distro>
</mirror> </mirror>

View File

@ -86,7 +86,7 @@ class Config(object):
'Be sure to configure it appropriately.').format(self.default_xml_path)) 'Be sure to configure it appropriately.').format(self.default_xml_path))
raise ValueError('Config does not exist') raise ValueError('Config does not exist')
else: else:
with open(xml_path, 'rb') as fh: with open(self.xml_path, 'rb') as fh:
self.raw = fh.read() self.raw = fh.read()
self.xml = None self.xml = None
self.xsd = None self.xsd = None

View File

@ -1,12 +1,14 @@
PROTO_DEF_PORTS = {'ftp': 21, PROTO_DEF_PORTS = {'ftp': 21,
'rsync': 873} 'rsync': 873}
RSYNC_DEF_ARGS = ['recursive', RSYNC_DEF_ARGS = ['--recursive',
'times', '--times',
'links', '--links',
'hard-links', '--hard-links',
'delete-after', '--delete-after',
'delay-updates', '--delay-updates',
'copy-links', '--copy-links',
'safe-links', '--safe-links',
'delete-extended', '--delete-excluded',
'exclude=.*'] '--exclude=.*']
# How many days an upstream should have last synced by before it's considered stale.
DAYS_WARN = 2

View File

@ -23,10 +23,13 @@ class BaseFetcher(object):
os.chown(self.dest, **self.owner) os.chown(self.dest, **self.owner)


def check(self): def check(self):
for k, v in self.filechecks['remote']: for k, v in self.filechecks['remote'].items():
if v: if v:
tstmp_raw = self.fetch_content(v.path).decode('utf-8').strip() tstmp_raw = self.fetch_content(v.path).decode('utf-8').strip()
tstmp = datetime.datetime.strptime(tstmp_raw, v.fmt) if '%s' in v.fmt:
tstmp = datetime.datetime.fromtimestamp(int(tstmp_raw))
else:
tstmp = datetime.datetime.strptime(tstmp_raw, v.fmt)
self.timestamps[k] = tstmp self.timestamps[k] = tstmp
_logger.debug('Updated timestamps: {0}'.format(self.timestamps)) _logger.debug('Updated timestamps: {0}'.format(self.timestamps))
return(None) return(None)

View File

@ -32,9 +32,9 @@ class RSync(_base.BaseFetcher):
super().__init__(domain, port, path, dest, owner = owner, filechecks = filechecks, *args, **kwargs) super().__init__(domain, port, path, dest, owner = owner, filechecks = filechecks, *args, **kwargs)
_logger.debug('Instantiated RSync fetcher') _logger.debug('Instantiated RSync fetcher')
if rsync_args: if rsync_args:
self.rsync_args = rsync_args self.rsync_args = rsync_args.args[:]
else: else:
self.rsync_args = constants.RSYNC_DEF_ARGS self.rsync_args = constants.RSYNC_DEF_ARGS[:]
_logger.debug('RSync args given: {0}'.format(self.rsync_args)) _logger.debug('RSync args given: {0}'.format(self.rsync_args))
if log: if log:
# Do I want to do this in subprocess + logging module? Or keep this? # Do I want to do this in subprocess + logging module? Or keep this?
@ -43,7 +43,7 @@ class RSync(_base.BaseFetcher):
_log_path = None _log_path = None
for h in _logger.handlers: for h in _logger.handlers:
if isinstance(h, logging.handlers.RotatingFileHandler): if isinstance(h, logging.handlers.RotatingFileHandler):
_log_path = h.baseFileName _log_path = h.baseFilename
break break
self.rsync_args.extend(['--verbose', self.rsync_args.extend(['--verbose',
'--log-file-format="[RSYNC {0}:{1}]:%l:%f%L"'.format(self.domain, self.port), '--log-file-format="[RSYNC {0}:{1}]:%l:%f%L"'.format(self.domain, self.port),
@ -61,11 +61,12 @@ class RSync(_base.BaseFetcher):
*self.rsync_args, *self.rsync_args,
path, path,
dest] dest]
_logger.debug('Running command: {0}'.format(' '.join(cmd_str)))
cmd = subprocess.run(cmd_str, cmd = subprocess.run(cmd_str,
stdout = subprocess.PIPE, stdout = subprocess.PIPE,
stderr = subprocess.PIPE) stderr = subprocess.PIPE)
stdout = cmd.stdout.read().decode('utf-8').strip() stdout = cmd.stdout.decode('utf-8').strip()
stderr = cmd.stderr.read().decode('utf-8').strip() stderr = cmd.stderr.decode('utf-8').strip()
if stdout != '': if stdout != '':
_logger.debug('STDOUT: {0}'.format(stdout)) _logger.debug('STDOUT: {0}'.format(stdout))
if stderr != '' or cmd.returncode != 0: if stderr != '' or cmd.returncode != 0:
@ -81,11 +82,12 @@ class RSync(_base.BaseFetcher):
*self.rsync_args, *self.rsync_args,
url, url,
tf] tf]
_logger.debug('Running command: {0}'.format(' '.join(cmd_str)))
cmd = subprocess.run(cmd_str, cmd = subprocess.run(cmd_str,
stdout = subprocess.PIPE, stdout = subprocess.PIPE,
stderr = subprocess.PIPE) stderr = subprocess.PIPE)
stdout = cmd.stdout.read().decode('utf-8').strip() stdout = cmd.stdout.decode('utf-8').strip()
stderr = cmd.stderr.read().decode('utf-8').strip() stderr = cmd.stderr.decode('utf-8').strip()
if stdout != '': if stdout != '':
_logger.debug('STDOUT: {0}'.format(stdout)) _logger.debug('STDOUT: {0}'.format(stdout))
if stderr != '' or cmd.returncode != 0: if stderr != '' or cmd.returncode != 0:

View File

@ -12,9 +12,9 @@ except ImportError:
def preplog(logfile = None): def preplog(logfile = None):
if not logfile: if not logfile:
if os.geteuid() == 0: if os.geteuid() == 0:
logfile = '/var/log/repo/main.log' logfile = '/var/log/repo/_main.log'
else: else:
logfile = '~/.cache/repo.log' logfile = '~/.cache/repomirror.log'
# Prep the log file. # Prep the log file.
logfile = os.path.abspath(os.path.expanduser(logfile)) logfile = os.path.abspath(os.path.expanduser(logfile))
os.makedirs(os.path.dirname(logfile), exist_ok = True, mode = 0o0700) os.makedirs(os.path.dirname(logfile), exist_ok = True, mode = 0o0700)

View File

@ -4,10 +4,14 @@ import pwd
import grp import grp
import os import os
import socket import socket
import warnings
##
import psutil
## ##
from . import config from . import config
from . import constants from . import constants
from . import fetcher from . import fetcher
from . import logger




_logger = logging.getLogger() _logger = logging.getLogger()
@ -16,10 +20,10 @@ _logger = logging.getLogger()
def get_owner(owner_xml): def get_owner(owner_xml):
owner = {} owner = {}
user = owner_xml.find('user') user = owner_xml.find('user')
if user: if user is not None:
user = user.text user = user.text
group = owner_xml.find('group') group = owner_xml.find('group')
if group: if group is not None:
group = group.text group = group.text
if user: if user:
user_obj = pwd.getpwnam(user) user_obj = pwd.getpwnam(user)
@ -60,7 +64,7 @@ class Args(object):


class Mount(object): class Mount(object):
def __init__(self, mpchk_xml): def __init__(self, mpchk_xml):
self.path = os.path.abspath(os.path.expanduser(mpchk_xml)) self.path = os.path.abspath(os.path.expanduser(mpchk_xml.text))
self.is_mounted = None self.is_mounted = None
self._check_mount() self._check_mount()


@ -90,21 +94,27 @@ class TimestampFile(object):
_logger.debug('Set timestamp format string to {0}'.format(self.fmt)) _logger.debug('Set timestamp format string to {0}'.format(self.fmt))
self.owner_xml = owner_xml self.owner_xml = owner_xml
self.owner = {} self.owner = {}
if self.owner_xml: if self.owner_xml is not None:
self.owner = get_owner(self.owner_xml) self.owner = get_owner(self.owner_xml)
_logger.debug('Owner set is {0}'.format(self.owner)) _logger.debug('Owner set is {0}'.format(self.owner))
self.path = os.path.abspath(os.path.expanduser(ts_xml.text)) self.path = os.path.abspath(os.path.expanduser(ts_xml.text))
_logger.debug('Path resolved to {0}'.format(self.path)) _logger.debug('Path resolved to {0}'.format(self.path))


def read(self, parentdir = None): def read(self, parentdir = None):
timestamp = None
if parentdir: if parentdir:
path = os.path.join(os.path.abspath(os.path.expanduser(parentdir)), path = os.path.join(os.path.abspath(os.path.expanduser(parentdir)),
self.path.lstrip('/')) self.path.lstrip('/'))
else: else:
path = self.path path = self.path
with open(path, 'r') as fh: if os.path.isfile(path):
timestamp = datetime.datetime.strptime(fh.read().strip(), self.fmt) with open(path, 'r') as fh:
_logger.debug('Read timestamp {0} from {1}'.format(str(timestamp), self.path)) ts_raw = fh.read().strip()
if '%s' in self.fmt:
timestamp = datetime.datetime.fromtimestamp(int(ts_raw))
else:
timestamp = datetime.datetime.strptime(ts_raw, self.fmt)
_logger.debug('Read timestamp {0} from {1}'.format(str(timestamp), self.path))
return(timestamp) return(timestamp)


def write(self): def write(self):
@ -136,13 +146,10 @@ class Upstream(object):
self.filechecks = filechecks self.filechecks = filechecks
self.has_new = False self.has_new = False
# These are optional. # These are optional.
for i in ('port', 'bwlimit'): port = self.xml.find('port')
e = self.xml.find(i) if port is not None:
if e: self.port = int(port.text)
setattr(self, i, int(e.text)) else:
else:
setattr(self, i, None)
if not getattr(self, 'port'):
self.port = constants.PROTO_DEF_PORTS[self.sync_type] self.port = constants.PROTO_DEF_PORTS[self.sync_type]
self.available = None self.available = None
if self.sync_type == 'rsync': if self.sync_type == 'rsync':
@ -176,8 +183,8 @@ class Upstream(object):
class Distro(object): class Distro(object):
def __init__(self, distro_xml): def __init__(self, distro_xml):
self.xml = distro_xml self.xml = distro_xml
self.name = distro_xml.attrib['name'] self.name = self.xml.attrib['name']
self.dest = os.path.abspath(os.path.expanduser(distro_xml.find('dest').text)) self.dest = os.path.abspath(os.path.expanduser(self.xml.find('dest').text))
self.mount = Mount(self.xml.find('mountCheck')) self.mount = Mount(self.xml.find('mountCheck'))
self.filechecks = {'local': {'check': None, self.filechecks = {'local': {'check': None,
'sync': None}, 'sync': None},
@ -187,20 +194,21 @@ class Distro(object):
self.rsync_args = None self.rsync_args = None
self.owner = None self.owner = None
self.upstreams = [] self.upstreams = []
self.lockfile = '/var/run/repomirror/{0}.lck'.format(self.name)
# These are optional. # These are optional.
self.owner_xml = self.xml.find('owner') self.owner_xml = self.xml.find('owner')
if self.owner_xml: if self.owner_xml is not None:
self.owner = get_owner(self.owner_xml) self.owner = get_owner(self.owner_xml)
self.rsync_xml = self.xml.find('rsyncArgs') self.rsync_xml = self.xml.find('rsyncArgs')
if self.rsync_xml: if self.rsync_xml is not None:
self.rsync_args = Args(self.rsync_xml) self.rsync_args = Args(self.rsync_xml)
for i in ('Check', 'Sync'): for i in ('Check', 'Sync'):
e = self.xml.find('lastLocal{0}'.format(i)) e = self.xml.find('lastLocal{0}'.format(i))
if e: if e is not None:
self.filechecks['local'][i.lower()] = TimestampFile(e) self.filechecks['local'][i.lower()] = TimestampFile(e)
for i in ('Sync', 'Update'): for i in ('Sync', 'Update'):
e = self.xml.find('lastRemote{0}'.format(i)) e = self.xml.find('lastRemote{0}'.format(i))
if e: if e is not None:
self.filechecks['remote'][i.lower()] = TimestampFile(e) self.filechecks['remote'][i.lower()] = TimestampFile(e)
for u in self.xml.findall('upstream'): for u in self.xml.findall('upstream'):
self.upstreams.append(Upstream(u, self.upstreams.append(Upstream(u,
@ -210,18 +218,69 @@ class Distro(object):
filechecks = self.filechecks)) filechecks = self.filechecks))


def check(self): def check(self):
for k, v in self.filechecks['local']: for k, v in self.filechecks['local'].items():
if v: if v:
tstmp = v.read() tstmp = v.read()
self.timestamps[k] = tstmp self.timestamps[k] = tstmp
_logger.debug('Updated timestamps: {0}'.format(self.timestamps)) _logger.debug('Updated timestamps: {0}'.format(self.timestamps))

local_checks = sorted([i for i in self.timestamps.values() if i])
def sync(self): for u in self.upstreams:
self.check() if not u.available:
continue
u.fetcher.check()
remote_checks = sorted([i for i in u.fetcher.timestamps.values() if i])
if not any((local_checks, remote_checks)) or not remote_checks:
u.has_new = True
else:
update = u.fetcher.timestamps.get('update')
sync = u.fetcher.timestamps.get('sync')
if update:
if local_checks and local_checks[-1] < update:
u.has_new = True
elif not local_checks:
u.has_new = True
if sync:
td = datetime.datetime.utcnow() - sync
if td.days > constants.DAYS_WARN:
_logger.warning(('Upstream {0} has not synced for {1}} or more days; this '
'repository may be out of date.').format(u.fetcher.url, constants.DAYS_WARN))
warnings.warn('Upstream may be out of date')
return(None)

def sync(self):
self.check()
my_pid = os.getpid()
if os.path.isfile(self.lockfile):
with open(self.lockfile, 'r') as fh:
pid = int(fh.read().strip())
if my_pid == pid: # This logically should not happen, but something might have gone stupid.
_logger.warning('Someone call the Ghostbusters because this machine is haunted.')
return(False)
else:
warnmsg = 'The sync process for {0} is locked with file {1} and PID {2}'.format(self.name,
self.lockfile,
pid)
try:
proc = psutil.Process(pid)
warnmsg += '.'
except (psutil.NoSuchProcess, FileNotFoundError, AttributeError):
proc = None
warnmsg += ' but that PID no longer exists.'
_logger.warning(warnmsg)
if proc:
_logger.warning('PID information: {0}'.format(vars(proc)))
warnings.warn(warnmsg)
return(False)
if not self.mount.is_mounted:
_logger.error(('The mountpoint {0} for distro {1} is not mounted; '
'refusing to sync').format(self.mount.path, self.name))
return(False)
os.makedirs(os.path.dirname(self.lockfile), mode = 0o0755, exist_ok = True)
with open(self.lockfile, 'w') as fh:
fh.write('{0}\n'.format(str(my_pid)))
for u in self.upstreams: for u in self.upstreams:
if not u.available: if not u.available:
continue continue
u.fetcher.check(self.filechecks['local'])
if u.has_new: if u.has_new:
u.sync() u.sync()
if self.filechecks['local']['sync']: if self.filechecks['local']['sync']:
@ -229,11 +288,20 @@ class Distro(object):
break break
if self.filechecks['local']['check']: if self.filechecks['local']['check']:
self.filechecks['local']['check'].write() self.filechecks['local']['check'].write()
return(None) os.remove(self.lockfile)
return(True)




class Sync(object): class Sync(object):
def __init__(self, cfg = None, dummy = False, distro = None, logdir = None, *args, **kwargs): def __init__(self, cfg = None, dummy = False, distro = None, logdir = None, *args, **kwargs):
if logdir:
self.logdir = logdir
else:
self.logdir = os.path.dirname(logger.filehandler.baseFilename)
self._orig_log_old = logger.filehandler.baseFilename
self._orig_log = logger.preplog(os.path.join(self.logdir, '_main.log'))
logger.filehandler.close()
logger.filehandler.baseFilename = self._orig_log
try: try:
_args = dict(locals()) _args = dict(locals())
del(_args['self']) del(_args['self'])
@ -243,16 +311,28 @@ class Sync(object):
self.distro = distro self.distro = distro
else: else:
self.distro = [] self.distro = []
self._distro_objs = [] self.cfg = config.Config(cfg)
self.logdir = logdir except Exception as e:
self.xml = config.Config(cfg)
self._distro_populate()
except Exception:
_logger.error('FATAL ERROR. Stacktrace follows.', exc_info = True) _logger.error('FATAL ERROR. Stacktrace follows.', exc_info = True)

raise e
def _distro_populate(self):
pass


def sync(self): def sync(self):
for d in self._distro_objs: if self.distro:
d.sync() for d in self.distro:
e = self.cfg.xml.xpath('//distro[@name="{0}"]'.format(d))
if e is None:
_logger.error('Could not find specified distro {0}; skipping'.format(d))
continue
logger.filehandler.close()
logger.filehandler.baseFilename = os.path.join(self.logdir, '{0}.log'.format(e.attrib['name']))
distro = Distro(e[0])
distro.sync()
else:
for e in self.cfg.xml.findall('distro'):
logger.filehandler.close()
logger.filehandler.baseFilename = os.path.join(self.logdir, '{0}.log'.format(e.attrib['name']))
distro = Distro(e)
distro.sync()
logger.filehandler.close()
logger.filehandler.baseFilename = self._orig_log
return(None)

View File

@ -1,22 +0,0 @@
#!/usr/bin/env python3

import os
import shutil
##
import logger
import fetcher

dest = '/tmp/ipxe_ftp'
path = 'ipxe'


def main():
if os.path.isdir(dest):
shutil.rmtree(dest)
f = fetcher.FTP('10.11.12.12', 21, path, dest)
f.fetch()


if __name__ == '__main__':
main()

4
reposync.py Normal file → Executable file
View File

@ -15,7 +15,7 @@ import repomirror
if os.geteuid() == 0: if os.geteuid() == 0:
_def_logdir = '/var/log/repo' _def_logdir = '/var/log/repo'
else: else:
_def_logdir = '~/.cache/logs' _def_logdir = '~/.cache/repologs'




def parseArgs(): def parseArgs():
@ -24,7 +24,7 @@ def parseArgs():
default = '~/.config/repomirror.xml', default = '~/.config/repomirror.xml',
dest = 'cfg', dest = 'cfg',
help = ('The path to the config file. If it does not exist, a bare version will be created. ' help = ('The path to the config file. If it does not exist, a bare version will be created. '
'Default: ~/.config/repomirror.xmlost')) 'Default: ~/.config/repomirror.xml'))
# args.add_argument('-n', '--dry-run', # args.add_argument('-n', '--dry-run',
# action = 'store_true', # action = 'store_true',
# dest = 'dummy', # dest = 'dummy',