13 Commits

9 changed files with 167 additions and 14 deletions

11
README
View File

@@ -17,13 +17,14 @@ Configuration/Deployment:
See example.config.xml for details on a configuration file, as it's extensively commented and won't be covered in this README. See example.config.xml for details on a configuration file, as it's extensively commented and won't be covered in this README.
You may take special interest in repomirror/utils/find_fastest_upstream/ scripts as they're pre-written to find the fastest (in theory) upstream you can use. Currently only Arch Linux and CentOS have scripts written, but I'll gladly try to add other distros if you open a feature request (see below). You can also run reposync -h (or reposync --help) to see all supported options.
You may take special interest in repomirror/utils/find_fastest_upstream/ scripts as they're pre-written to find the fastest (in theory) upstream you can use. Currently only Arch Linux and CentOS have scripts written, but I'll gladly try to add other distros if you open a feature request (see below). You can run -h/--help to see the supported options (there aren't many).
Bugs/Feature Requests: Bugs/Feature Requests:
Please use my tracker at https://bugs.square-r00t.net/index.php?project=14 or, if you prefer to not create an account, Please use my tracker at https://bugs.square-r00t.net/index.php?project=14 or, if you prefer to not create an account, simply email me at bts(at)square-r00t[dot]net.
simply email me at bts(at)square-r00t[dot]net.
Sources: Sources:
@@ -36,3 +37,7 @@ This project can be found at/cloned from:
And has a mirrored repository on GitHub (Issues/Wiki/etc. disabled) at: And has a mirrored repository on GitHub (Issues/Wiki/etc. disabled) at:
* https://github.com/johnnybubonic/repomirror * https://github.com/johnnybubonic/repomirror
It can also be found on PyPi at:
* https://pypi.org/project/repomirror/

View File

@@ -12,4 +12,4 @@ RSYNC_DEF_ARGS = ['--recursive',
'--exclude=.*'] '--exclude=.*']
# How many days an upstream should have last synced by before it's considered stale. # How many days an upstream should have last synced by before it's considered stale.
DAYS_WARN = 2 DAYS_WARN = 2
VERSION = '1.0.1' VERSION = '1.0.3'

View File

@@ -31,7 +31,7 @@ class BaseFetcher(object):
else: else:
tstmp = datetime.datetime.strptime(tstmp_raw, v.fmt) tstmp = datetime.datetime.strptime(tstmp_raw, v.fmt)
self.timestamps[k] = tstmp self.timestamps[k] = tstmp
_logger.debug('Updated timestamps: {0}'.format(self.timestamps)) _logger.debug('Updated upstream timestamps: {0}'.format(self.timestamps))
return(None) return(None)
def fetch_content(self, path): def fetch_content(self, path):

View File

@@ -10,6 +10,7 @@ sys.path.append(os.path.abspath(os.path.join(_cur_dir, '..')))
import constants import constants
# import logger # import logger
from . import _base from . import _base
from . import rsync_returns
_logger = logging.getLogger() _logger = logging.getLogger()
@@ -70,9 +71,11 @@ class RSync(_base.BaseFetcher):
if stdout != '': if stdout != '':
_logger.debug('STDOUT: {0}'.format(stdout)) _logger.debug('STDOUT: {0}'.format(stdout))
if stderr != '' or cmd.returncode != 0: if stderr != '' or cmd.returncode != 0:
_logger.error('Rsync to {0}:{1} returned exit status {2}'.format(self.domain, self.port, cmd.returncode)) rtrn = cmd.returncode
err = rsync_returns.returns[rtrn]
_logger.error(('Rsync to {0}:{1} returned exit status {2}: {3}').format(self.domain, self.port, rtrn, err))
_logger.debug('STDERR: {0}'.format(stderr)) _logger.debug('STDERR: {0}'.format(stderr))
warnings.warn('Rsync process returned non-zero') warnings.warn('Rsync process returned non-zero {0} ({1}) for {2}'.format(rtrn, err, ' '.join(cmd_str)))
return(None) return(None)
def fetch_content(self, remote_filepath): def fetch_content(self, remote_filepath):
@@ -91,9 +94,11 @@ class RSync(_base.BaseFetcher):
if stdout != '': if stdout != '':
_logger.debug('STDOUT: {0}'.format(stdout)) _logger.debug('STDOUT: {0}'.format(stdout))
if stderr != '' or cmd.returncode != 0: if stderr != '' or cmd.returncode != 0:
_logger.error('Rsync to {0}:{1} returned exit status {2}'.format(self.domain, self.port, cmd.returncode)) rtrn = cmd.returncode
err = rsync_returns.returns[rtrn]
_logger.error(('Rsync to {0}:{1} returned exit status {2}: {3}').format(self.domain, self.port, rtrn, err))
_logger.debug('STDERR: {0}'.format(stderr)) _logger.debug('STDERR: {0}'.format(stderr))
warnings.warn('Rsync process returned non-zero') warnings.warn('Rsync process returned non-zero {0} ({1}) for {2}'.format(rtrn, err, ' '.join(cmd_str)))
with open(tf, 'rb') as fh: with open(tf, 'rb') as fh:
raw_content = fh.read() raw_content = fh.read()
os.remove(tf) os.remove(tf)

View File

@@ -0,0 +1,22 @@
returns = {0: 'Success',
1: 'Syntax or usage error',
2: 'Protocol incompatibility',
3: 'Errors selecting input/output files, dirs',
4: ('Requested action not supported: '
'an attempt was made to manipulate 64-bit files on a platform that cannot support them; '
'or an option was specified that is supported by the client and not by the server.'),
5: 'Error starting client-server protocol',
6: 'Daemon unable to append to log-file',
10: 'Error in socket I/O',
11: 'Error in file I/O',
12: 'Error in rsync protocol data stream',
13: 'Errors with program diagnostics',
14: 'Error in IPC code',
20: 'Received SIGUSR1 or SIGINT',
21: 'Some error returned by waitpid()',
22: 'Error allocating core memory buffers',
23: 'Partial transfer due to error',
24: 'Partial transfer due to vanished source files',
25: 'The --max-delete limit stopped deletions',
30: 'Timeout in data send/receive',
35: 'Timeout waiting for daemon connection'}

View File

@@ -229,27 +229,40 @@ class Distro(object):
if v: if v:
tstmp = v.read() tstmp = v.read()
self.timestamps[k] = tstmp self.timestamps[k] = tstmp
_logger.debug('Updated timestamps: {0}'.format(self.timestamps)) _logger.debug('Updated local timestamps: {0}'.format(self.timestamps))
local_checks = sorted([i for i in self.timestamps.values() if i]) local_checks = sorted([i for i in self.timestamps.values() if i])
if local_checks:
_logger.info('Local timestamps: {0}'.format(', '.join([str(t) for t in local_checks])))
for u in self.upstreams: for u in self.upstreams:
if not u.available: if not u.available:
continue continue
u.fetcher.check() u.fetcher.check()
remote_checks = sorted([i for i in u.fetcher.timestamps.values() if i]) remote_checks = sorted([i for i in u.fetcher.timestamps.values() if i])
if remote_checks:
_logger.info('Remote timestamps for {0}: {1}'.format(u.domain, ', '.join([str(t)
for t in remote_checks])))
if not any((local_checks, remote_checks)) or not remote_checks: if not any((local_checks, remote_checks)) or not remote_checks:
_logger.info('There are no reliable timestamp comparisons; syncing.')
u.has_new = True u.has_new = True
else: else:
update = u.fetcher.timestamps.get('update') update = u.fetcher.timestamps.get('update')
sync = u.fetcher.timestamps.get('sync') sync = u.fetcher.timestamps.get('sync')
if update: if update:
if local_checks and local_checks[-1] < update: if local_checks and (local_checks[-1] < update):
_logger.info('Newest local timestamp is older than the remote update; syncing.')
u.has_new = True u.has_new = True
elif not local_checks: elif not local_checks:
_logger.info('No local timestamps; syncing.')
u.has_new = True u.has_new = True
else:
_logger.info('Local checks are newer than upstream.')
else:
_logger.info('No remote update timestamp; syncing.')
u.has_new = True
if sync: if sync:
td = datetime.datetime.utcnow() - sync td = datetime.datetime.utcnow() - sync
if td.days > constants.DAYS_WARN: if td.days > constants.DAYS_WARN:
_logger.warning(('Upstream {0} has not synced for {1}} or more days; this ' _logger.warning(('Upstream {0} has not synced for {1} or more days; this '
'repository may be out of date.').format(u.fetcher.url, constants.DAYS_WARN)) 'repository may be out of date.').format(u.fetcher.url, constants.DAYS_WARN))
warnings.warn('Upstream may be out of date') warnings.warn('Upstream may be out of date')
return(None) return(None)
@@ -295,12 +308,17 @@ class Distro(object):
fh.write('{0}\n'.format(str(my_pid))) fh.write('{0}\n'.format(str(my_pid)))
for u in self.upstreams: for u in self.upstreams:
if not u.available: if not u.available:
_logger.debug('Upstream {0} is not available; skipping.'.format(u.domain))
continue continue
if u.has_new: if u.has_new:
_logger.info('Initiating syncing upstream {0}.'.format(u.domain))
u.sync() u.sync()
_logger.debug('Sync for upstream {0} complete.'.format(u.domain))
if self.filechecks['local']['sync']: if self.filechecks['local']['sync']:
self.filechecks['local']['sync'].write() self.filechecks['local']['sync'].write()
break break
else:
_logger.debug('Upstream {0} is not new; not syncing.'.format(u.domain))
if self.filechecks['local']['check']: if self.filechecks['local']['check']:
self.filechecks['local']['check'].write() self.filechecks['local']['check'].write()
os.remove(self.lockfile) os.remove(self.lockfile)
@@ -338,9 +356,10 @@ class Sync(object):
if e is None: if e is None:
_logger.error('Could not find specified distro {0}; skipping'.format(d)) _logger.error('Could not find specified distro {0}; skipping'.format(d))
continue continue
e = e[0]
logger.filehandler.close() logger.filehandler.close()
logger.filehandler.baseFilename = os.path.join(self.logdir, '{0}.log'.format(e.attrib['name'])) logger.filehandler.baseFilename = os.path.join(self.logdir, '{0}.log'.format(e.attrib['name']))
distro = Distro(e[0]) distro = Distro(e)
distro.sync() distro.sync()
else: else:
for e in self.cfg.xml.findall('distro'): for e in self.cfg.xml.findall('distro'):

View File

@@ -0,0 +1,2 @@
from . import constants
from . import classes

View File

@@ -2,7 +2,6 @@
import argparse import argparse
import csv import csv
import datetime
import io import io
import re import re
## ##

View File

@@ -0,0 +1,101 @@
#!/usr/bin/env python3
import argparse
import re
##
import classes
_proto_re = re.compile(r'^(?P<proto>https?)(?P<uri>.*)')
class Ranker(classes.Ranker):
# No CSV, JSON, or XML that I could find, unfortunately.
# There's apparently? an API to mirrormanager2 but I can't seem to find a public endpoint nor an endpoint that
# would return the mirrors.
mirrorlist_url = 'https://admin.fedoraproject.org/mirrormanager/mirrors/EPEL'
distro_name = 'EPEL'
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.get_mirrors()
def extract_mirrors(self, preferred_proto = 'rsync'):
preferred_proto = preferred_proto.lower()
if preferred_proto not in ('rsync', 'ftp'):
raise ValueError('Invalid preferred_proto; must be one of rsync or ftp')
non_preferred = ('rsync' if preferred_proto == 'ftp' else 'ftp')
print(('Fedora (who maintains EPEL) do their mirroring in an extremely weird way.\n'
'See https://fedoraproject.org/wiki/Infrastructure/Mirroring and '
'https://fedoraproject.org/wiki/Infrastructure/Mirroring/Tiering#Tier_1_Mirrors for which mirrors and '
'how to sync.'))
return(None)
# mirror_section = self.bs.find('h2', string = 'Public active mirrors')
# mirror_table = mirror_section.find_next('table')
# if mirror_table is None:
# return(None)
# # https://stackoverflow.com/a/56835562/733214
# headers = [h.text for h in mirror_table.find_all('th')]
# rows = [m for m in mirror_table.find_all('tr')][1:]
# for row in rows:
# mirror = {}
# do_skip = False
# for idx, cell in enumerate(row.find_all('td')):
# k = headers[idx]
# v = cell.text.strip()
# if k == 'Country' and v != self.my_info['country']:
# do_skip = True
# continue
# if k == 'Categories' and not do_skip:
# # TODO: DO THIS BETTER! Their mirrorlist sucks and is not easily parsed at all.
# # I need to check and try to grab the specific URL that contains "epel".
# if 'EPEL' not in v:
# do_skip = True
# continue
# pref_proto = cell.find('a', attrs = {
# 'href': re.compile(r'^{0}://'.format(preferred_proto), re.IGNORECASE)})
# non_pref = cell.find('a', attrs = {
# 'href': re.compile(r'^{0}://'.format(non_preferred), re.IGNORECASE)})
# if pref_proto is not None:
# v = pref_proto['href']
# elif non_pref is not None:
# v = non_pref['href']
# else:
# v = None
# mirror['url'] = v
# # Fedora team can't spell.
# elif k in ('Bandwidth', 'Bandwith'):
# mirror['bw'] = int(v)
# if do_skip:
# continue
# if not mirror['url']:
# continue
# self.raw_mirrors.append(mirror)
# self.mirror_candidates.append(mirror['url'])
# return(None)
def parseArgs():
args = argparse.ArgumentParser(description = 'Generate a list of suitable EPEL upstream mirrors in order of '
'speed.')
args.add_argument('-x', '--xml',
dest = 'xml',
action = 'store_true',
help = ('If specified, generate a config stub instead of a printed list of URLs'))
return(args)
def main():
args = parseArgs().parse_args()
r = Ranker()
r.extract_mirrors()
r.speedcheck()
if args.xml:
print(r.gen_xml())
else:
r.print()
return(None)
if __name__ == '__main__':
main()