summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorbrent s <bts@square-r00t.net>2020-06-16 15:44:50 -0400
committerbrent s <bts@square-r00t.net>2020-06-17 01:05:32 -0400
commit961952ed56deeeccdf49f40a517775ca3f622d2e (patch)
treee7394cb87bef407f3ae176a1349dae06f2574709
parent66d1ad7af549bc634e2b12da0b9cd47ec5cff1b2 (diff)
downloadRepoMirror-961952ed56deeeccdf49f40a517775ca3f622d2e.tar.xz
time to test.
-rw-r--r--example.config.xml (renamed from sample.config.xml)18
-rw-r--r--repomirror/config.py2
-rw-r--r--repomirror/constants.py22
-rw-r--r--repomirror/fetcher/_base.py7
-rw-r--r--repomirror/fetcher/rsync.py16
-rw-r--r--repomirror/logger.py4
-rw-r--r--repomirror/sync.py146
-rwxr-xr-xrepomirror/test.py22
-rwxr-xr-x[-rw-r--r--]reposync.py4
9 files changed, 152 insertions, 89 deletions
diff --git a/sample.config.xml b/example.config.xml
index db68d96..73ec7ac 100644
--- a/sample.config.xml
+++ b/example.config.xml
@@ -1,6 +1,7 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!--
This is by default placed in ~/.config/repomirror.xml
+ Remember to replace any necessary special characters (https://stackoverflow.com/a/1091953/733214).
-->
<mirror xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="https://git.square-r00t.net/RepoMirror/"
@@ -71,7 +72,7 @@
<long>delay-updates</long>
<long>copy-links</long>
<long>safe-links</long>
- <long>delete-extended</long>
+ <long>delete-excluded</long>
<long value=".*">exclude</long>
<rsyncArgs>
These arguments should be sane for most, if not all, rsync-driven repository mirroring. The last one (exclude) may
@@ -86,9 +87,9 @@
<long>archive</long>
<long>delete-after</long>
<!--
- An argument with a value (info=2).
+ An argument with a value (info=progress2).
-->
- <long value="2">info</long>
+ <long value="progress2">info</long>
<!--
A "short" option (single hyphen).
-->
@@ -121,11 +122,6 @@
Required; the *remote* path part of the URI. The leading / is necessary. A trailing one will be assumed.
-->
<path>/archlinux/</path>
- <!--
- The speed to cap socket bandwidth at (in KiB). Decimals are okay.
- Only valid for rsync; ignored for FTP. If not provided, the default is to not throttle.
- -->
- <bwlimit>7000</bwlimit>
</upstream>
<!--
Multiple upstreams can be specified. They are tried in order specified and if connection fails or times out,
@@ -153,7 +149,9 @@
<path>/centos/</path>
</upstream>
<dest>/srv/repos/arch/.</dest>
- <lastLocalCheck timeFormat="MICROSECOND_EPOCH">/srv/http/centos.lastcheck</lastLocalCheck>
- <lastLocalSync timeFormat="UNIX_EPOCH">/srv/repos/arch/lastsync</lastLocalSync>
+ <lastLocalCheck timeFormat="MICROSECOND_EPOCH">/srv/repos/centos/CHECKED</lastLocalCheck>
+ <lastLocalSync timeFormat="UNIX_EPOCH">/srv/repos/centos/TIME</lastLocalSync>
+ <lastRemoteUpdate timeFormat="%a %d %b %H:%M:%S UTC %Y">/timestamp.txt</lastRemoteUpdate>
+ <lastRemoteSync timeFormat="UNIX_EPOCH">/TIME</lastRemoteSync>
</distro>
</mirror>
diff --git a/repomirror/config.py b/repomirror/config.py
index bd31f21..b552b83 100644
--- a/repomirror/config.py
+++ b/repomirror/config.py
@@ -86,7 +86,7 @@ class Config(object):
'Be sure to configure it appropriately.').format(self.default_xml_path))
raise ValueError('Config does not exist')
else:
- with open(xml_path, 'rb') as fh:
+ with open(self.xml_path, 'rb') as fh:
self.raw = fh.read()
self.xml = None
self.xsd = None
diff --git a/repomirror/constants.py b/repomirror/constants.py
index bd025e6..97f4beb 100644
--- a/repomirror/constants.py
+++ b/repomirror/constants.py
@@ -1,12 +1,14 @@
PROTO_DEF_PORTS = {'ftp': 21,
'rsync': 873}
-RSYNC_DEF_ARGS = ['recursive',
- 'times',
- 'links',
- 'hard-links',
- 'delete-after',
- 'delay-updates',
- 'copy-links',
- 'safe-links',
- 'delete-extended',
- 'exclude=.*']
+RSYNC_DEF_ARGS = ['--recursive',
+ '--times',
+ '--links',
+ '--hard-links',
+ '--delete-after',
+ '--delay-updates',
+ '--copy-links',
+ '--safe-links',
+ '--delete-excluded',
+ '--exclude=.*']
+# How many days an upstream should have last synced by before it's considered stale.
+DAYS_WARN = 2
diff --git a/repomirror/fetcher/_base.py b/repomirror/fetcher/_base.py
index b70c553..c4094a1 100644
--- a/repomirror/fetcher/_base.py
+++ b/repomirror/fetcher/_base.py
@@ -23,10 +23,13 @@ class BaseFetcher(object):
os.chown(self.dest, **self.owner)
def check(self):
- for k, v in self.filechecks['remote']:
+ for k, v in self.filechecks['remote'].items():
if v:
tstmp_raw = self.fetch_content(v.path).decode('utf-8').strip()
- tstmp = datetime.datetime.strptime(tstmp_raw, v.fmt)
+ if '%s' in v.fmt:
+ tstmp = datetime.datetime.fromtimestamp(int(tstmp_raw))
+ else:
+ tstmp = datetime.datetime.strptime(tstmp_raw, v.fmt)
self.timestamps[k] = tstmp
_logger.debug('Updated timestamps: {0}'.format(self.timestamps))
return(None)
diff --git a/repomirror/fetcher/rsync.py b/repomirror/fetcher/rsync.py
index b3ae612..3ec3b74 100644
--- a/repomirror/fetcher/rsync.py
+++ b/repomirror/fetcher/rsync.py
@@ -32,9 +32,9 @@ class RSync(_base.BaseFetcher):
super().__init__(domain, port, path, dest, owner = owner, filechecks = filechecks, *args, **kwargs)
_logger.debug('Instantiated RSync fetcher')
if rsync_args:
- self.rsync_args = rsync_args
+ self.rsync_args = rsync_args.args[:]
else:
- self.rsync_args = constants.RSYNC_DEF_ARGS
+ self.rsync_args = constants.RSYNC_DEF_ARGS[:]
_logger.debug('RSync args given: {0}'.format(self.rsync_args))
if log:
# Do I want to do this in subprocess + logging module? Or keep this?
@@ -43,7 +43,7 @@ class RSync(_base.BaseFetcher):
_log_path = None
for h in _logger.handlers:
if isinstance(h, logging.handlers.RotatingFileHandler):
- _log_path = h.baseFileName
+ _log_path = h.baseFilename
break
self.rsync_args.extend(['--verbose',
'--log-file-format="[RSYNC {0}:{1}]:%l:%f%L"'.format(self.domain, self.port),
@@ -61,11 +61,12 @@ class RSync(_base.BaseFetcher):
*self.rsync_args,
path,
dest]
+ _logger.debug('Running command: {0}'.format(' '.join(cmd_str)))
cmd = subprocess.run(cmd_str,
stdout = subprocess.PIPE,
stderr = subprocess.PIPE)
- stdout = cmd.stdout.read().decode('utf-8').strip()
- stderr = cmd.stderr.read().decode('utf-8').strip()
+ stdout = cmd.stdout.decode('utf-8').strip()
+ stderr = cmd.stderr.decode('utf-8').strip()
if stdout != '':
_logger.debug('STDOUT: {0}'.format(stdout))
if stderr != '' or cmd.returncode != 0:
@@ -81,11 +82,12 @@ class RSync(_base.BaseFetcher):
*self.rsync_args,
url,
tf]
+ _logger.debug('Running command: {0}'.format(' '.join(cmd_str)))
cmd = subprocess.run(cmd_str,
stdout = subprocess.PIPE,
stderr = subprocess.PIPE)
- stdout = cmd.stdout.read().decode('utf-8').strip()
- stderr = cmd.stderr.read().decode('utf-8').strip()
+ stdout = cmd.stdout.decode('utf-8').strip()
+ stderr = cmd.stderr.decode('utf-8').strip()
if stdout != '':
_logger.debug('STDOUT: {0}'.format(stdout))
if stderr != '' or cmd.returncode != 0:
diff --git a/repomirror/logger.py b/repomirror/logger.py
index 980c7e3..78a990c 100644
--- a/repomirror/logger.py
+++ b/repomirror/logger.py
@@ -12,9 +12,9 @@ except ImportError:
def preplog(logfile = None):
if not logfile:
if os.geteuid() == 0:
- logfile = '/var/log/repo/main.log'
+ logfile = '/var/log/repo/_main.log'
else:
- logfile = '~/.cache/repo.log'
+ logfile = '~/.cache/repomirror.log'
# Prep the log file.
logfile = os.path.abspath(os.path.expanduser(logfile))
os.makedirs(os.path.dirname(logfile), exist_ok = True, mode = 0o0700)
diff --git a/repomirror/sync.py b/repomirror/sync.py
index c243d15..daf7cf0 100644
--- a/repomirror/sync.py
+++ b/repomirror/sync.py
@@ -4,10 +4,14 @@ import pwd
import grp
import os
import socket
+import warnings
+##
+import psutil
##
from . import config
from . import constants
from . import fetcher
+from . import logger
_logger = logging.getLogger()
@@ -16,10 +20,10 @@ _logger = logging.getLogger()
def get_owner(owner_xml):
owner = {}
user = owner_xml.find('user')
- if user:
+ if user is not None:
user = user.text
group = owner_xml.find('group')
- if group:
+ if group is not None:
group = group.text
if user:
user_obj = pwd.getpwnam(user)
@@ -60,7 +64,7 @@ class Args(object):
class Mount(object):
def __init__(self, mpchk_xml):
- self.path = os.path.abspath(os.path.expanduser(mpchk_xml))
+ self.path = os.path.abspath(os.path.expanduser(mpchk_xml.text))
self.is_mounted = None
self._check_mount()
@@ -90,21 +94,27 @@ class TimestampFile(object):
_logger.debug('Set timestamp format string to {0}'.format(self.fmt))
self.owner_xml = owner_xml
self.owner = {}
- if self.owner_xml:
+ if self.owner_xml is not None:
self.owner = get_owner(self.owner_xml)
_logger.debug('Owner set is {0}'.format(self.owner))
self.path = os.path.abspath(os.path.expanduser(ts_xml.text))
_logger.debug('Path resolved to {0}'.format(self.path))
def read(self, parentdir = None):
+ timestamp = None
if parentdir:
path = os.path.join(os.path.abspath(os.path.expanduser(parentdir)),
self.path.lstrip('/'))
else:
path = self.path
- with open(path, 'r') as fh:
- timestamp = datetime.datetime.strptime(fh.read().strip(), self.fmt)
- _logger.debug('Read timestamp {0} from {1}'.format(str(timestamp), self.path))
+ if os.path.isfile(path):
+ with open(path, 'r') as fh:
+ ts_raw = fh.read().strip()
+ if '%s' in self.fmt:
+ timestamp = datetime.datetime.fromtimestamp(int(ts_raw))
+ else:
+ timestamp = datetime.datetime.strptime(ts_raw, self.fmt)
+ _logger.debug('Read timestamp {0} from {1}'.format(str(timestamp), self.path))
return(timestamp)
def write(self):
@@ -136,13 +146,10 @@ class Upstream(object):
self.filechecks = filechecks
self.has_new = False
# These are optional.
- for i in ('port', 'bwlimit'):
- e = self.xml.find(i)
- if e:
- setattr(self, i, int(e.text))
- else:
- setattr(self, i, None)
- if not getattr(self, 'port'):
+ port = self.xml.find('port')
+ if port is not None:
+ self.port = int(port.text)
+ else:
self.port = constants.PROTO_DEF_PORTS[self.sync_type]
self.available = None
if self.sync_type == 'rsync':
@@ -176,8 +183,8 @@ class Upstream(object):
class Distro(object):
def __init__(self, distro_xml):
self.xml = distro_xml
- self.name = distro_xml.attrib['name']
- self.dest = os.path.abspath(os.path.expanduser(distro_xml.find('dest').text))
+ self.name = self.xml.attrib['name']
+ self.dest = os.path.abspath(os.path.expanduser(self.xml.find('dest').text))
self.mount = Mount(self.xml.find('mountCheck'))
self.filechecks = {'local': {'check': None,
'sync': None},
@@ -187,20 +194,21 @@ class Distro(object):
self.rsync_args = None
self.owner = None
self.upstreams = []
+ self.lockfile = '/var/run/repomirror/{0}.lck'.format(self.name)
# These are optional.
self.owner_xml = self.xml.find('owner')
- if self.owner_xml:
+ if self.owner_xml is not None:
self.owner = get_owner(self.owner_xml)
self.rsync_xml = self.xml.find('rsyncArgs')
- if self.rsync_xml:
+ if self.rsync_xml is not None:
self.rsync_args = Args(self.rsync_xml)
for i in ('Check', 'Sync'):
e = self.xml.find('lastLocal{0}'.format(i))
- if e:
+ if e is not None:
self.filechecks['local'][i.lower()] = TimestampFile(e)
for i in ('Sync', 'Update'):
e = self.xml.find('lastRemote{0}'.format(i))
- if e:
+ if e is not None:
self.filechecks['remote'][i.lower()] = TimestampFile(e)
for u in self.xml.findall('upstream'):
self.upstreams.append(Upstream(u,
@@ -210,18 +218,69 @@ class Distro(object):
filechecks = self.filechecks))
def check(self):
- for k, v in self.filechecks['local']:
+ for k, v in self.filechecks['local'].items():
if v:
tstmp = v.read()
self.timestamps[k] = tstmp
_logger.debug('Updated timestamps: {0}'.format(self.timestamps))
+ local_checks = sorted([i for i in self.timestamps.values() if i])
+ for u in self.upstreams:
+ if not u.available:
+ continue
+ u.fetcher.check()
+ remote_checks = sorted([i for i in u.fetcher.timestamps.values() if i])
+ if not any((local_checks, remote_checks)) or not remote_checks:
+ u.has_new = True
+ else:
+ update = u.fetcher.timestamps.get('update')
+ sync = u.fetcher.timestamps.get('sync')
+ if update:
+ if local_checks and local_checks[-1] < update:
+ u.has_new = True
+ elif not local_checks:
+ u.has_new = True
+ if sync:
+ td = datetime.datetime.utcnow() - sync
+ if td.days > constants.DAYS_WARN:
+ _logger.warning(('Upstream {0} has not synced for {1}} or more days; this '
+ 'repository may be out of date.').format(u.fetcher.url, constants.DAYS_WARN))
+ warnings.warn('Upstream may be out of date')
+ return(None)
def sync(self):
self.check()
+ my_pid = os.getpid()
+ if os.path.isfile(self.lockfile):
+ with open(self.lockfile, 'r') as fh:
+ pid = int(fh.read().strip())
+ if my_pid == pid: # This logically should not happen, but something might have gone stupid.
+ _logger.warning('Someone call the Ghostbusters because this machine is haunted.')
+ return(False)
+ else:
+ warnmsg = 'The sync process for {0} is locked with file {1} and PID {2}'.format(self.name,
+ self.lockfile,
+ pid)
+ try:
+ proc = psutil.Process(pid)
+ warnmsg += '.'
+ except (psutil.NoSuchProcess, FileNotFoundError, AttributeError):
+ proc = None
+ warnmsg += ' but that PID no longer exists.'
+ _logger.warning(warnmsg)
+ if proc:
+ _logger.warning('PID information: {0}'.format(vars(proc)))
+ warnings.warn(warnmsg)
+ return(False)
+ if not self.mount.is_mounted:
+ _logger.error(('The mountpoint {0} for distro {1} is not mounted; '
+ 'refusing to sync').format(self.mount.path, self.name))
+ return(False)
+ os.makedirs(os.path.dirname(self.lockfile), mode = 0o0755, exist_ok = True)
+ with open(self.lockfile, 'w') as fh:
+ fh.write('{0}\n'.format(str(my_pid)))
for u in self.upstreams:
if not u.available:
continue
- u.fetcher.check(self.filechecks['local'])
if u.has_new:
u.sync()
if self.filechecks['local']['sync']:
@@ -229,11 +288,20 @@ class Distro(object):
break
if self.filechecks['local']['check']:
self.filechecks['local']['check'].write()
- return(None)
+ os.remove(self.lockfile)
+ return(True)
class Sync(object):
def __init__(self, cfg = None, dummy = False, distro = None, logdir = None, *args, **kwargs):
+ if logdir:
+ self.logdir = logdir
+ else:
+ self.logdir = os.path.dirname(logger.filehandler.baseFilename)
+ self._orig_log_old = logger.filehandler.baseFilename
+ self._orig_log = logger.preplog(os.path.join(self.logdir, '_main.log'))
+ logger.filehandler.close()
+ logger.filehandler.baseFilename = self._orig_log
try:
_args = dict(locals())
del(_args['self'])
@@ -243,16 +311,28 @@ class Sync(object):
self.distro = distro
else:
self.distro = []
- self._distro_objs = []
- self.logdir = logdir
- self.xml = config.Config(cfg)
- self._distro_populate()
- except Exception:
+ self.cfg = config.Config(cfg)
+ except Exception as e:
_logger.error('FATAL ERROR. Stacktrace follows.', exc_info = True)
-
- def _distro_populate(self):
- pass
+ raise e
def sync(self):
- for d in self._distro_objs:
- d.sync()
+ if self.distro:
+ for d in self.distro:
+ e = self.cfg.xml.xpath('//distro[@name="{0}"]'.format(d))
+ if e is None:
+ _logger.error('Could not find specified distro {0}; skipping'.format(d))
+ continue
+ logger.filehandler.close()
+ logger.filehandler.baseFilename = os.path.join(self.logdir, '{0}.log'.format(e.attrib['name']))
+ distro = Distro(e[0])
+ distro.sync()
+ else:
+ for e in self.cfg.xml.findall('distro'):
+ logger.filehandler.close()
+ logger.filehandler.baseFilename = os.path.join(self.logdir, '{0}.log'.format(e.attrib['name']))
+ distro = Distro(e)
+ distro.sync()
+ logger.filehandler.close()
+ logger.filehandler.baseFilename = self._orig_log
+ return(None)
diff --git a/repomirror/test.py b/repomirror/test.py
deleted file mode 100755
index 82ca827..0000000
--- a/repomirror/test.py
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/usr/bin/env python3
-
-import os
-import shutil
-##
-import logger
-import fetcher
-
-dest = '/tmp/ipxe_ftp'
-path = 'ipxe'
-
-
-def main():
- if os.path.isdir(dest):
- shutil.rmtree(dest)
- f = fetcher.FTP('10.11.12.12', 21, path, dest)
- f.fetch()
-
-
-if __name__ == '__main__':
- main()
-
diff --git a/reposync.py b/reposync.py
index 3b5ccf1..edf3569 100644..100755
--- a/reposync.py
+++ b/reposync.py
@@ -15,7 +15,7 @@ import repomirror
if os.geteuid() == 0:
_def_logdir = '/var/log/repo'
else:
- _def_logdir = '~/.cache/logs'
+ _def_logdir = '~/.cache/repologs'
def parseArgs():
@@ -24,7 +24,7 @@ def parseArgs():
default = '~/.config/repomirror.xml',
dest = 'cfg',
help = ('The path to the config file. If it does not exist, a bare version will be created. '
- 'Default: ~/.config/repomirror.xmlost'))
+ 'Default: ~/.config/repomirror.xml'))
# args.add_argument('-n', '--dry-run',
# action = 'store_true',
# dest = 'dummy',