time to test.

This commit is contained in:
brent s. 2020-06-16 15:44:50 -04:00
parent 66d1ad7af5
commit 961952ed56
Signed by: bts
GPG Key ID: 8C004C2F93481F6B
9 changed files with 155 additions and 92 deletions

View File

@ -1,6 +1,7 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!--
This is by default placed in ~/.config/repomirror.xml
Remember to replace any necessary special characters (https://stackoverflow.com/a/1091953/733214).
-->
<mirror xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="https://git.square-r00t.net/RepoMirror/"
@ -71,7 +72,7 @@
<long>delay-updates</long>
<long>copy-links</long>
<long>safe-links</long>
<long>delete-extended</long>
<long>delete-excluded</long>
<long value=".*">exclude</long>
<rsyncArgs>
These arguments should be sane for most, if not all, rsync-driven repository mirroring. The last one (exclude) may
@ -86,9 +87,9 @@
<long>archive</long>
<long>delete-after</long>
<!--
An argument with a value (info=2).
An argument with a value (info=progress2).
-->
<long value="2">info</long>
<long value="progress2">info</long>
<!--
A "short" option (single hyphen).
-->
@ -121,11 +122,6 @@
Required; the *remote* path part of the URI. The leading / is necessary. A trailing one will be assumed.
-->
<path>/archlinux/</path>
<!--
The speed to cap socket bandwidth at (in KiB). Decimals are okay.
Only valid for rsync; ignored for FTP. If not provided, the default is to not throttle.
-->
<bwlimit>7000</bwlimit>
</upstream>
<!--
Multiple upstreams can be specified. They are tried in order specified and if connection fails or times out,
@ -153,7 +149,9 @@
<path>/centos/</path>
</upstream>
<dest>/srv/repos/arch/.</dest>
<lastLocalCheck timeFormat="MICROSECOND_EPOCH">/srv/http/centos.lastcheck</lastLocalCheck>
<lastLocalSync timeFormat="UNIX_EPOCH">/srv/repos/arch/lastsync</lastLocalSync>
<lastLocalCheck timeFormat="MICROSECOND_EPOCH">/srv/repos/centos/CHECKED</lastLocalCheck>
<lastLocalSync timeFormat="UNIX_EPOCH">/srv/repos/centos/TIME</lastLocalSync>
<lastRemoteUpdate timeFormat="%a %d %b %H:%M:%S UTC %Y">/timestamp.txt</lastRemoteUpdate>
<lastRemoteSync timeFormat="UNIX_EPOCH">/TIME</lastRemoteSync>
</distro>
</mirror>

View File

@ -86,7 +86,7 @@ class Config(object):
'Be sure to configure it appropriately.').format(self.default_xml_path))
raise ValueError('Config does not exist')
else:
with open(xml_path, 'rb') as fh:
with open(self.xml_path, 'rb') as fh:
self.raw = fh.read()
self.xml = None
self.xsd = None

View File

@ -1,12 +1,14 @@
PROTO_DEF_PORTS = {'ftp': 21,
'rsync': 873}
RSYNC_DEF_ARGS = ['recursive',
'times',
'links',
'hard-links',
'delete-after',
'delay-updates',
'copy-links',
'safe-links',
'delete-extended',
'exclude=.*']
RSYNC_DEF_ARGS = ['--recursive',
'--times',
'--links',
'--hard-links',
'--delete-after',
'--delay-updates',
'--copy-links',
'--safe-links',
'--delete-excluded',
'--exclude=.*']
# How many days an upstream should have last synced by before it's considered stale.
DAYS_WARN = 2

View File

@ -23,10 +23,13 @@ class BaseFetcher(object):
os.chown(self.dest, **self.owner)

def check(self):
for k, v in self.filechecks['remote']:
for k, v in self.filechecks['remote'].items():
if v:
tstmp_raw = self.fetch_content(v.path).decode('utf-8').strip()
tstmp = datetime.datetime.strptime(tstmp_raw, v.fmt)
if '%s' in v.fmt:
tstmp = datetime.datetime.fromtimestamp(int(tstmp_raw))
else:
tstmp = datetime.datetime.strptime(tstmp_raw, v.fmt)
self.timestamps[k] = tstmp
_logger.debug('Updated timestamps: {0}'.format(self.timestamps))
return(None)

View File

@ -32,9 +32,9 @@ class RSync(_base.BaseFetcher):
super().__init__(domain, port, path, dest, owner = owner, filechecks = filechecks, *args, **kwargs)
_logger.debug('Instantiated RSync fetcher')
if rsync_args:
self.rsync_args = rsync_args
self.rsync_args = rsync_args.args[:]
else:
self.rsync_args = constants.RSYNC_DEF_ARGS
self.rsync_args = constants.RSYNC_DEF_ARGS[:]
_logger.debug('RSync args given: {0}'.format(self.rsync_args))
if log:
# Do I want to do this in subprocess + logging module? Or keep this?
@ -43,7 +43,7 @@ class RSync(_base.BaseFetcher):
_log_path = None
for h in _logger.handlers:
if isinstance(h, logging.handlers.RotatingFileHandler):
_log_path = h.baseFileName
_log_path = h.baseFilename
break
self.rsync_args.extend(['--verbose',
'--log-file-format="[RSYNC {0}:{1}]:%l:%f%L"'.format(self.domain, self.port),
@ -61,11 +61,12 @@ class RSync(_base.BaseFetcher):
*self.rsync_args,
path,
dest]
_logger.debug('Running command: {0}'.format(' '.join(cmd_str)))
cmd = subprocess.run(cmd_str,
stdout = subprocess.PIPE,
stderr = subprocess.PIPE)
stdout = cmd.stdout.read().decode('utf-8').strip()
stderr = cmd.stderr.read().decode('utf-8').strip()
stdout = cmd.stdout.decode('utf-8').strip()
stderr = cmd.stderr.decode('utf-8').strip()
if stdout != '':
_logger.debug('STDOUT: {0}'.format(stdout))
if stderr != '' or cmd.returncode != 0:
@ -81,11 +82,12 @@ class RSync(_base.BaseFetcher):
*self.rsync_args,
url,
tf]
_logger.debug('Running command: {0}'.format(' '.join(cmd_str)))
cmd = subprocess.run(cmd_str,
stdout = subprocess.PIPE,
stderr = subprocess.PIPE)
stdout = cmd.stdout.read().decode('utf-8').strip()
stderr = cmd.stderr.read().decode('utf-8').strip()
stdout = cmd.stdout.decode('utf-8').strip()
stderr = cmd.stderr.decode('utf-8').strip()
if stdout != '':
_logger.debug('STDOUT: {0}'.format(stdout))
if stderr != '' or cmd.returncode != 0:

View File

@ -12,9 +12,9 @@ except ImportError:
def preplog(logfile = None):
if not logfile:
if os.geteuid() == 0:
logfile = '/var/log/repo/main.log'
logfile = '/var/log/repo/_main.log'
else:
logfile = '~/.cache/repo.log'
logfile = '~/.cache/repomirror.log'
# Prep the log file.
logfile = os.path.abspath(os.path.expanduser(logfile))
os.makedirs(os.path.dirname(logfile), exist_ok = True, mode = 0o0700)

View File

@ -4,10 +4,14 @@ import pwd
import grp
import os
import socket
import warnings
##
import psutil
##
from . import config
from . import constants
from . import fetcher
from . import logger


_logger = logging.getLogger()
@ -16,10 +20,10 @@ _logger = logging.getLogger()
def get_owner(owner_xml):
owner = {}
user = owner_xml.find('user')
if user:
if user is not None:
user = user.text
group = owner_xml.find('group')
if group:
if group is not None:
group = group.text
if user:
user_obj = pwd.getpwnam(user)
@ -60,7 +64,7 @@ class Args(object):

class Mount(object):
def __init__(self, mpchk_xml):
self.path = os.path.abspath(os.path.expanduser(mpchk_xml))
self.path = os.path.abspath(os.path.expanduser(mpchk_xml.text))
self.is_mounted = None
self._check_mount()

@ -90,21 +94,27 @@ class TimestampFile(object):
_logger.debug('Set timestamp format string to {0}'.format(self.fmt))
self.owner_xml = owner_xml
self.owner = {}
if self.owner_xml:
if self.owner_xml is not None:
self.owner = get_owner(self.owner_xml)
_logger.debug('Owner set is {0}'.format(self.owner))
self.path = os.path.abspath(os.path.expanduser(ts_xml.text))
_logger.debug('Path resolved to {0}'.format(self.path))

def read(self, parentdir = None):
timestamp = None
if parentdir:
path = os.path.join(os.path.abspath(os.path.expanduser(parentdir)),
self.path.lstrip('/'))
else:
path = self.path
with open(path, 'r') as fh:
timestamp = datetime.datetime.strptime(fh.read().strip(), self.fmt)
_logger.debug('Read timestamp {0} from {1}'.format(str(timestamp), self.path))
if os.path.isfile(path):
with open(path, 'r') as fh:
ts_raw = fh.read().strip()
if '%s' in self.fmt:
timestamp = datetime.datetime.fromtimestamp(int(ts_raw))
else:
timestamp = datetime.datetime.strptime(ts_raw, self.fmt)
_logger.debug('Read timestamp {0} from {1}'.format(str(timestamp), self.path))
return(timestamp)

def write(self):
@ -136,13 +146,10 @@ class Upstream(object):
self.filechecks = filechecks
self.has_new = False
# These are optional.
for i in ('port', 'bwlimit'):
e = self.xml.find(i)
if e:
setattr(self, i, int(e.text))
else:
setattr(self, i, None)
if not getattr(self, 'port'):
port = self.xml.find('port')
if port is not None:
self.port = int(port.text)
else:
self.port = constants.PROTO_DEF_PORTS[self.sync_type]
self.available = None
if self.sync_type == 'rsync':
@ -176,8 +183,8 @@ class Upstream(object):
class Distro(object):
def __init__(self, distro_xml):
self.xml = distro_xml
self.name = distro_xml.attrib['name']
self.dest = os.path.abspath(os.path.expanduser(distro_xml.find('dest').text))
self.name = self.xml.attrib['name']
self.dest = os.path.abspath(os.path.expanduser(self.xml.find('dest').text))
self.mount = Mount(self.xml.find('mountCheck'))
self.filechecks = {'local': {'check': None,
'sync': None},
@ -187,20 +194,21 @@ class Distro(object):
self.rsync_args = None
self.owner = None
self.upstreams = []
self.lockfile = '/var/run/repomirror/{0}.lck'.format(self.name)
# These are optional.
self.owner_xml = self.xml.find('owner')
if self.owner_xml:
if self.owner_xml is not None:
self.owner = get_owner(self.owner_xml)
self.rsync_xml = self.xml.find('rsyncArgs')
if self.rsync_xml:
if self.rsync_xml is not None:
self.rsync_args = Args(self.rsync_xml)
for i in ('Check', 'Sync'):
e = self.xml.find('lastLocal{0}'.format(i))
if e:
if e is not None:
self.filechecks['local'][i.lower()] = TimestampFile(e)
for i in ('Sync', 'Update'):
e = self.xml.find('lastRemote{0}'.format(i))
if e:
if e is not None:
self.filechecks['remote'][i.lower()] = TimestampFile(e)
for u in self.xml.findall('upstream'):
self.upstreams.append(Upstream(u,
@ -210,18 +218,69 @@ class Distro(object):
filechecks = self.filechecks))

def check(self):
for k, v in self.filechecks['local']:
for k, v in self.filechecks['local'].items():
if v:
tstmp = v.read()
self.timestamps[k] = tstmp
_logger.debug('Updated timestamps: {0}'.format(self.timestamps))

def sync(self):
self.check()
local_checks = sorted([i for i in self.timestamps.values() if i])
for u in self.upstreams:
if not u.available:
continue
u.fetcher.check()
remote_checks = sorted([i for i in u.fetcher.timestamps.values() if i])
if not any((local_checks, remote_checks)) or not remote_checks:
u.has_new = True
else:
update = u.fetcher.timestamps.get('update')
sync = u.fetcher.timestamps.get('sync')
if update:
if local_checks and local_checks[-1] < update:
u.has_new = True
elif not local_checks:
u.has_new = True
if sync:
td = datetime.datetime.utcnow() - sync
if td.days > constants.DAYS_WARN:
_logger.warning(('Upstream {0} has not synced for {1}} or more days; this '
'repository may be out of date.').format(u.fetcher.url, constants.DAYS_WARN))
warnings.warn('Upstream may be out of date')
return(None)

def sync(self):
self.check()
my_pid = os.getpid()
if os.path.isfile(self.lockfile):
with open(self.lockfile, 'r') as fh:
pid = int(fh.read().strip())
if my_pid == pid: # This logically should not happen, but something might have gone stupid.
_logger.warning('Someone call the Ghostbusters because this machine is haunted.')
return(False)
else:
warnmsg = 'The sync process for {0} is locked with file {1} and PID {2}'.format(self.name,
self.lockfile,
pid)
try:
proc = psutil.Process(pid)
warnmsg += '.'
except (psutil.NoSuchProcess, FileNotFoundError, AttributeError):
proc = None
warnmsg += ' but that PID no longer exists.'
_logger.warning(warnmsg)
if proc:
_logger.warning('PID information: {0}'.format(vars(proc)))
warnings.warn(warnmsg)
return(False)
if not self.mount.is_mounted:
_logger.error(('The mountpoint {0} for distro {1} is not mounted; '
'refusing to sync').format(self.mount.path, self.name))
return(False)
os.makedirs(os.path.dirname(self.lockfile), mode = 0o0755, exist_ok = True)
with open(self.lockfile, 'w') as fh:
fh.write('{0}\n'.format(str(my_pid)))
for u in self.upstreams:
if not u.available:
continue
u.fetcher.check(self.filechecks['local'])
if u.has_new:
u.sync()
if self.filechecks['local']['sync']:
@ -229,11 +288,20 @@ class Distro(object):
break
if self.filechecks['local']['check']:
self.filechecks['local']['check'].write()
return(None)
os.remove(self.lockfile)
return(True)


class Sync(object):
def __init__(self, cfg = None, dummy = False, distro = None, logdir = None, *args, **kwargs):
if logdir:
self.logdir = logdir
else:
self.logdir = os.path.dirname(logger.filehandler.baseFilename)
self._orig_log_old = logger.filehandler.baseFilename
self._orig_log = logger.preplog(os.path.join(self.logdir, '_main.log'))
logger.filehandler.close()
logger.filehandler.baseFilename = self._orig_log
try:
_args = dict(locals())
del(_args['self'])
@ -243,16 +311,28 @@ class Sync(object):
self.distro = distro
else:
self.distro = []
self._distro_objs = []
self.logdir = logdir
self.xml = config.Config(cfg)
self._distro_populate()
except Exception:
self.cfg = config.Config(cfg)
except Exception as e:
_logger.error('FATAL ERROR. Stacktrace follows.', exc_info = True)

def _distro_populate(self):
pass
raise e

def sync(self):
for d in self._distro_objs:
d.sync()
if self.distro:
for d in self.distro:
e = self.cfg.xml.xpath('//distro[@name="{0}"]'.format(d))
if e is None:
_logger.error('Could not find specified distro {0}; skipping'.format(d))
continue
logger.filehandler.close()
logger.filehandler.baseFilename = os.path.join(self.logdir, '{0}.log'.format(e.attrib['name']))
distro = Distro(e[0])
distro.sync()
else:
for e in self.cfg.xml.findall('distro'):
logger.filehandler.close()
logger.filehandler.baseFilename = os.path.join(self.logdir, '{0}.log'.format(e.attrib['name']))
distro = Distro(e)
distro.sync()
logger.filehandler.close()
logger.filehandler.baseFilename = self._orig_log
return(None)

View File

@ -1,22 +0,0 @@
#!/usr/bin/env python3

import os
import shutil
##
import logger
import fetcher

dest = '/tmp/ipxe_ftp'
path = 'ipxe'


def main():
if os.path.isdir(dest):
shutil.rmtree(dest)
f = fetcher.FTP('10.11.12.12', 21, path, dest)
f.fetch()


if __name__ == '__main__':
main()

4
reposync.py Normal file → Executable file
View File

@ -15,7 +15,7 @@ import repomirror
if os.geteuid() == 0:
_def_logdir = '/var/log/repo'
else:
_def_logdir = '~/.cache/logs'
_def_logdir = '~/.cache/repologs'


def parseArgs():
@ -24,7 +24,7 @@ def parseArgs():
default = '~/.config/repomirror.xml',
dest = 'cfg',
help = ('The path to the config file. If it does not exist, a bare version will be created. '
'Default: ~/.config/repomirror.xmlost'))
'Default: ~/.config/repomirror.xml'))
# args.add_argument('-n', '--dry-run',
# action = 'store_true',
# dest = 'dummy',