14 Commits

Author SHA1 Message Date
a3203ab03a fix reporting zero exit for rsync as non-zero
would give warning output like the following:

/usr/local/lib/repomirror/repomirror/fetcher/rsync.py:78: UserWarning: Rsync process returned non-zero 0 (Success) for rsync --recursive --times --links --hard-links --delete-after --delay-updates --copy-links --safe-links --delete-excluded --no-motd --exclude=.* --verbose --log-file-format="[RSYNC arch.mirror.constant.com:873]:%l:%f%L" --log-file=/var/log/repo/arch.log rsync://arch.mirror.constant.com:873/archlinux/. /srv/repos/arch/.
2020-07-07 13:32:02 -04:00
6d384e71ae version bump 2020-07-07 02:31:01 -04:00
09afe59b91 double curly braces where there should have been one. minor fix, but breaks otherwise. 2020-07-07 02:30:15 -04:00
845cd90ddf fixed apparently. version bump 2020-06-24 01:52:52 -04:00
ac431a873e more logging to try to find cause of refusal to sync. 2020-06-24 01:25:18 -04:00
2ff334f220 still not syncing properly... 2020-06-24 01:15:56 -04:00
c8bf61ea98 whoops 2020-06-24 01:01:58 -04:00
0e16214e45 found a bug in distro selection 2020-06-24 00:59:14 -04:00
0ff9af4c48 way better rsync logging 2020-06-18 13:01:25 -04:00
ac1886e46b better rsync logging 2020-06-18 12:45:08 -04:00
1a5068d77d fucking fedora. 2020-06-18 04:14:18 -04:00
3ece313a6f SOOOO fedora implements ACL bullshit in their mirroring. Rendering this script useless. 2020-06-18 03:42:48 -04:00
061a18e8f4 dang it, don't need a linebreak there. 2020-06-18 00:40:08 -04:00
6d5a382e53 updating README 2020-06-18 00:38:28 -04:00
9 changed files with 188 additions and 16 deletions

11
README
View File

@@ -17,13 +17,14 @@ Configuration/Deployment:
See example.config.xml for details on a configuration file, as it's extensively commented and won't be covered in this README.
You may take special interest in repomirror/utils/find_fastest_upstream/ scripts as they're pre-written to find the fastest (in theory) upstream you can use. Currently only Arch Linux and CentOS have scripts written, but I'll gladly try to add other distros if you open a feature request (see below).
You can also run reposync -h (or reposync --help) to see all supported options.
You may take special interest in repomirror/utils/find_fastest_upstream/ scripts as they're pre-written to find the fastest (in theory) upstream you can use. Currently only Arch Linux and CentOS have scripts written, but I'll gladly try to add other distros if you open a feature request (see below). You can run -h/--help to see the supported options (there aren't many).
Bugs/Feature Requests:
Please use my tracker at https://bugs.square-r00t.net/index.php?project=14 or, if you prefer to not create an account,
simply email me at bts(at)square-r00t[dot]net.
Please use my tracker at https://bugs.square-r00t.net/index.php?project=14 or, if you prefer to not create an account, simply email me at bts(at)square-r00t[dot]net.
Sources:
@@ -36,3 +37,7 @@ This project can be found at/cloned from:
And has a mirrored repository on GitHub (Issues/Wiki/etc. disabled) at:
* https://github.com/johnnybubonic/repomirror
It can also be found on PyPi at:
* https://pypi.org/project/repomirror/

View File

@@ -11,5 +11,6 @@ RSYNC_DEF_ARGS = ['--recursive',
'--delete-excluded',
'--exclude=.*']
# How many days an upstream should have last synced by before it's considered stale.
## TODO: make this part of the upstream config? repo config?
DAYS_WARN = 2
VERSION = '1.0.1'
VERSION = '1.0.4'

View File

@@ -31,7 +31,7 @@ class BaseFetcher(object):
else:
tstmp = datetime.datetime.strptime(tstmp_raw, v.fmt)
self.timestamps[k] = tstmp
_logger.debug('Updated timestamps: {0}'.format(self.timestamps))
_logger.debug('Updated upstream timestamps: {0}'.format(self.timestamps))
return(None)
def fetch_content(self, path):

View File

@@ -10,6 +10,7 @@ sys.path.append(os.path.abspath(os.path.join(_cur_dir, '..')))
import constants
# import logger
from . import _base
from . import rsync_returns
_logger = logging.getLogger()
@@ -70,9 +71,20 @@ class RSync(_base.BaseFetcher):
if stdout != '':
_logger.debug('STDOUT: {0}'.format(stdout))
if stderr != '' or cmd.returncode != 0:
_logger.error('Rsync to {0}:{1} returned exit status {2}'.format(self.domain, self.port, cmd.returncode))
_logger.debug('STDERR: {0}'.format(stderr))
warnings.warn('Rsync process returned non-zero')
rtrn = cmd.returncode
err = rsync_returns.returns[rtrn]
errmsg = 'Rsync to {0}:{1} returned'.format(self.domain, self.port)
debugmsg = 'Rsync command {0} returned'.format(' '.join(cmd_str))
if stderr != '':
errmsg += ' an error message: {0}'.format(stderr)
debugmsg += ' an error message: {0}'.format(stderr)
if rtrn != 0:
errmsg += ' with exit status {0} ({1})'.format(rtrn, err)
debugmsg += ' with exit status {0} ({1})'.format(rtrn, err)
errmsg += '.'
_logger.error(errmsg)
_logger.debug(debugmsg)
warnings.warn(errmsg)
return(None)
def fetch_content(self, remote_filepath):
@@ -91,9 +103,20 @@ class RSync(_base.BaseFetcher):
if stdout != '':
_logger.debug('STDOUT: {0}'.format(stdout))
if stderr != '' or cmd.returncode != 0:
_logger.error('Rsync to {0}:{1} returned exit status {2}'.format(self.domain, self.port, cmd.returncode))
_logger.debug('STDERR: {0}'.format(stderr))
warnings.warn('Rsync process returned non-zero')
rtrn = cmd.returncode
err = rsync_returns.returns[rtrn]
errmsg = 'Rsync to {0}:{1} returned'.format(self.domain, self.port)
debugmsg = 'Rsync command {0} returned'.format(' '.join(cmd_str))
if stderr != '':
errmsg += ' an error message: {0}'.format(stderr)
debugmsg += ' an error message: {0}'.format(stderr)
if rtrn != 0:
errmsg += ' with exit status {0} ({1})'.format(rtrn, err)
debugmsg += ' with exit status {0} ({1})'.format(rtrn, err)
errmsg += '.'
_logger.error(errmsg)
_logger.debug(debugmsg)
warnings.warn(errmsg)
with open(tf, 'rb') as fh:
raw_content = fh.read()
os.remove(tf)

View File

@@ -0,0 +1,22 @@
returns = {0: 'Success',
1: 'Syntax or usage error',
2: 'Protocol incompatibility',
3: 'Errors selecting input/output files, dirs',
4: ('Requested action not supported: '
'an attempt was made to manipulate 64-bit files on a platform that cannot support them; '
'or an option was specified that is supported by the client and not by the server.'),
5: 'Error starting client-server protocol',
6: 'Daemon unable to append to log-file',
10: 'Error in socket I/O',
11: 'Error in file I/O',
12: 'Error in rsync protocol data stream',
13: 'Errors with program diagnostics',
14: 'Error in IPC code',
20: 'Received SIGUSR1 or SIGINT',
21: 'Some error returned by waitpid()',
22: 'Error allocating core memory buffers',
23: 'Partial transfer due to error',
24: 'Partial transfer due to vanished source files',
25: 'The --max-delete limit stopped deletions',
30: 'Timeout in data send/receive',
35: 'Timeout waiting for daemon connection'}

View File

@@ -229,27 +229,40 @@ class Distro(object):
if v:
tstmp = v.read()
self.timestamps[k] = tstmp
_logger.debug('Updated timestamps: {0}'.format(self.timestamps))
_logger.debug('Updated local timestamps: {0}'.format(self.timestamps))
local_checks = sorted([i for i in self.timestamps.values() if i])
if local_checks:
_logger.info('Local timestamps: {0}'.format(', '.join([str(t) for t in local_checks])))
for u in self.upstreams:
if not u.available:
continue
u.fetcher.check()
remote_checks = sorted([i for i in u.fetcher.timestamps.values() if i])
if remote_checks:
_logger.info('Remote timestamps for {0}: {1}'.format(u.domain, ', '.join([str(t)
for t in remote_checks])))
if not any((local_checks, remote_checks)) or not remote_checks:
_logger.info('There are no reliable timestamp comparisons; syncing.')
u.has_new = True
else:
update = u.fetcher.timestamps.get('update')
sync = u.fetcher.timestamps.get('sync')
if update:
if local_checks and local_checks[-1] < update:
if local_checks and (local_checks[-1] < update):
_logger.info('Newest local timestamp is older than the remote update; syncing.')
u.has_new = True
elif not local_checks:
_logger.info('No local timestamps; syncing.')
u.has_new = True
else:
_logger.info('Local checks are newer than upstream.')
else:
_logger.info('No remote update timestamp; syncing.')
u.has_new = True
if sync:
td = datetime.datetime.utcnow() - sync
if td.days > constants.DAYS_WARN:
_logger.warning(('Upstream {0} has not synced for {1}} or more days; this '
_logger.warning(('Upstream {0} has not synced for {1} or more days; this '
'repository may be out of date.').format(u.fetcher.url, constants.DAYS_WARN))
warnings.warn('Upstream may be out of date')
return(None)
@@ -295,12 +308,17 @@ class Distro(object):
fh.write('{0}\n'.format(str(my_pid)))
for u in self.upstreams:
if not u.available:
_logger.debug('Upstream {0} is not available; skipping.'.format(u.domain))
continue
if u.has_new:
_logger.info('Initiating syncing upstream {0}.'.format(u.domain))
u.sync()
_logger.debug('Sync for upstream {0} complete.'.format(u.domain))
if self.filechecks['local']['sync']:
self.filechecks['local']['sync'].write()
break
else:
_logger.debug('Upstream {0} is not new; not syncing.'.format(u.domain))
if self.filechecks['local']['check']:
self.filechecks['local']['check'].write()
os.remove(self.lockfile)
@@ -338,9 +356,10 @@ class Sync(object):
if e is None:
_logger.error('Could not find specified distro {0}; skipping'.format(d))
continue
e = e[0]
logger.filehandler.close()
logger.filehandler.baseFilename = os.path.join(self.logdir, '{0}.log'.format(e.attrib['name']))
distro = Distro(e[0])
distro = Distro(e)
distro.sync()
else:
for e in self.cfg.xml.findall('distro'):

View File

@@ -0,0 +1,2 @@
from . import constants
from . import classes

View File

@@ -2,7 +2,6 @@
import argparse
import csv
import datetime
import io
import re
##

View File

@@ -0,0 +1,101 @@
#!/usr/bin/env python3
import argparse
import re
##
import classes
_proto_re = re.compile(r'^(?P<proto>https?)(?P<uri>.*)')
class Ranker(classes.Ranker):
# No CSV, JSON, or XML that I could find, unfortunately.
# There's apparently? an API to mirrormanager2 but I can't seem to find a public endpoint nor an endpoint that
# would return the mirrors.
mirrorlist_url = 'https://admin.fedoraproject.org/mirrormanager/mirrors/EPEL'
distro_name = 'EPEL'
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.get_mirrors()
def extract_mirrors(self, preferred_proto = 'rsync'):
preferred_proto = preferred_proto.lower()
if preferred_proto not in ('rsync', 'ftp'):
raise ValueError('Invalid preferred_proto; must be one of rsync or ftp')
non_preferred = ('rsync' if preferred_proto == 'ftp' else 'ftp')
print(('Fedora (who maintains EPEL) do their mirroring in an extremely weird way.\n'
'See https://fedoraproject.org/wiki/Infrastructure/Mirroring and '
'https://fedoraproject.org/wiki/Infrastructure/Mirroring/Tiering#Tier_1_Mirrors for which mirrors and '
'how to sync.'))
return(None)
# mirror_section = self.bs.find('h2', string = 'Public active mirrors')
# mirror_table = mirror_section.find_next('table')
# if mirror_table is None:
# return(None)
# # https://stackoverflow.com/a/56835562/733214
# headers = [h.text for h in mirror_table.find_all('th')]
# rows = [m for m in mirror_table.find_all('tr')][1:]
# for row in rows:
# mirror = {}
# do_skip = False
# for idx, cell in enumerate(row.find_all('td')):
# k = headers[idx]
# v = cell.text.strip()
# if k == 'Country' and v != self.my_info['country']:
# do_skip = True
# continue
# if k == 'Categories' and not do_skip:
# # TODO: DO THIS BETTER! Their mirrorlist sucks and is not easily parsed at all.
# # I need to check and try to grab the specific URL that contains "epel".
# if 'EPEL' not in v:
# do_skip = True
# continue
# pref_proto = cell.find('a', attrs = {
# 'href': re.compile(r'^{0}://'.format(preferred_proto), re.IGNORECASE)})
# non_pref = cell.find('a', attrs = {
# 'href': re.compile(r'^{0}://'.format(non_preferred), re.IGNORECASE)})
# if pref_proto is not None:
# v = pref_proto['href']
# elif non_pref is not None:
# v = non_pref['href']
# else:
# v = None
# mirror['url'] = v
# # Fedora team can't spell.
# elif k in ('Bandwidth', 'Bandwith'):
# mirror['bw'] = int(v)
# if do_skip:
# continue
# if not mirror['url']:
# continue
# self.raw_mirrors.append(mirror)
# self.mirror_candidates.append(mirror['url'])
# return(None)
def parseArgs():
args = argparse.ArgumentParser(description = 'Generate a list of suitable EPEL upstream mirrors in order of '
'speed.')
args.add_argument('-x', '--xml',
dest = 'xml',
action = 'store_true',
help = ('If specified, generate a config stub instead of a printed list of URLs'))
return(args)
def main():
args = parseArgs().parse_args()
r = Ranker()
r.extract_mirrors()
r.speedcheck()
if args.xml:
print(r.gen_xml())
else:
r.print()
return(None)
if __name__ == '__main__':
main()