Compare commits
14 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
a3203ab03a
|
|||
|
6d384e71ae
|
|||
|
09afe59b91
|
|||
|
845cd90ddf
|
|||
|
ac431a873e
|
|||
|
2ff334f220
|
|||
|
c8bf61ea98
|
|||
|
0e16214e45
|
|||
|
0ff9af4c48
|
|||
|
ac1886e46b
|
|||
|
1a5068d77d
|
|||
|
3ece313a6f
|
|||
|
061a18e8f4
|
|||
|
6d5a382e53
|
11
README
11
README
@@ -17,13 +17,14 @@ Configuration/Deployment:
|
|||||||
|
|
||||||
See example.config.xml for details on a configuration file, as it's extensively commented and won't be covered in this README.
|
See example.config.xml for details on a configuration file, as it's extensively commented and won't be covered in this README.
|
||||||
|
|
||||||
You may take special interest in repomirror/utils/find_fastest_upstream/ scripts as they're pre-written to find the fastest (in theory) upstream you can use. Currently only Arch Linux and CentOS have scripts written, but I'll gladly try to add other distros if you open a feature request (see below).
|
You can also run reposync -h (or reposync --help) to see all supported options.
|
||||||
|
|
||||||
|
You may take special interest in repomirror/utils/find_fastest_upstream/ scripts as they're pre-written to find the fastest (in theory) upstream you can use. Currently only Arch Linux and CentOS have scripts written, but I'll gladly try to add other distros if you open a feature request (see below). You can run -h/--help to see the supported options (there aren't many).
|
||||||
|
|
||||||
|
|
||||||
Bugs/Feature Requests:
|
Bugs/Feature Requests:
|
||||||
|
|
||||||
Please use my tracker at https://bugs.square-r00t.net/index.php?project=14 or, if you prefer to not create an account,
|
Please use my tracker at https://bugs.square-r00t.net/index.php?project=14 or, if you prefer to not create an account, simply email me at bts(at)square-r00t[dot]net.
|
||||||
simply email me at bts(at)square-r00t[dot]net.
|
|
||||||
|
|
||||||
|
|
||||||
Sources:
|
Sources:
|
||||||
@@ -36,3 +37,7 @@ This project can be found at/cloned from:
|
|||||||
And has a mirrored repository on GitHub (Issues/Wiki/etc. disabled) at:
|
And has a mirrored repository on GitHub (Issues/Wiki/etc. disabled) at:
|
||||||
|
|
||||||
* https://github.com/johnnybubonic/repomirror
|
* https://github.com/johnnybubonic/repomirror
|
||||||
|
|
||||||
|
It can also be found on PyPi at:
|
||||||
|
|
||||||
|
* https://pypi.org/project/repomirror/
|
||||||
|
|||||||
@@ -11,5 +11,6 @@ RSYNC_DEF_ARGS = ['--recursive',
|
|||||||
'--delete-excluded',
|
'--delete-excluded',
|
||||||
'--exclude=.*']
|
'--exclude=.*']
|
||||||
# How many days an upstream should have last synced by before it's considered stale.
|
# How many days an upstream should have last synced by before it's considered stale.
|
||||||
|
## TODO: make this part of the upstream config? repo config?
|
||||||
DAYS_WARN = 2
|
DAYS_WARN = 2
|
||||||
VERSION = '1.0.1'
|
VERSION = '1.0.4'
|
||||||
|
|||||||
@@ -31,7 +31,7 @@ class BaseFetcher(object):
|
|||||||
else:
|
else:
|
||||||
tstmp = datetime.datetime.strptime(tstmp_raw, v.fmt)
|
tstmp = datetime.datetime.strptime(tstmp_raw, v.fmt)
|
||||||
self.timestamps[k] = tstmp
|
self.timestamps[k] = tstmp
|
||||||
_logger.debug('Updated timestamps: {0}'.format(self.timestamps))
|
_logger.debug('Updated upstream timestamps: {0}'.format(self.timestamps))
|
||||||
return(None)
|
return(None)
|
||||||
|
|
||||||
def fetch_content(self, path):
|
def fetch_content(self, path):
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ sys.path.append(os.path.abspath(os.path.join(_cur_dir, '..')))
|
|||||||
import constants
|
import constants
|
||||||
# import logger
|
# import logger
|
||||||
from . import _base
|
from . import _base
|
||||||
|
from . import rsync_returns
|
||||||
|
|
||||||
|
|
||||||
_logger = logging.getLogger()
|
_logger = logging.getLogger()
|
||||||
@@ -70,9 +71,20 @@ class RSync(_base.BaseFetcher):
|
|||||||
if stdout != '':
|
if stdout != '':
|
||||||
_logger.debug('STDOUT: {0}'.format(stdout))
|
_logger.debug('STDOUT: {0}'.format(stdout))
|
||||||
if stderr != '' or cmd.returncode != 0:
|
if stderr != '' or cmd.returncode != 0:
|
||||||
_logger.error('Rsync to {0}:{1} returned exit status {2}'.format(self.domain, self.port, cmd.returncode))
|
rtrn = cmd.returncode
|
||||||
_logger.debug('STDERR: {0}'.format(stderr))
|
err = rsync_returns.returns[rtrn]
|
||||||
warnings.warn('Rsync process returned non-zero')
|
errmsg = 'Rsync to {0}:{1} returned'.format(self.domain, self.port)
|
||||||
|
debugmsg = 'Rsync command {0} returned'.format(' '.join(cmd_str))
|
||||||
|
if stderr != '':
|
||||||
|
errmsg += ' an error message: {0}'.format(stderr)
|
||||||
|
debugmsg += ' an error message: {0}'.format(stderr)
|
||||||
|
if rtrn != 0:
|
||||||
|
errmsg += ' with exit status {0} ({1})'.format(rtrn, err)
|
||||||
|
debugmsg += ' with exit status {0} ({1})'.format(rtrn, err)
|
||||||
|
errmsg += '.'
|
||||||
|
_logger.error(errmsg)
|
||||||
|
_logger.debug(debugmsg)
|
||||||
|
warnings.warn(errmsg)
|
||||||
return(None)
|
return(None)
|
||||||
|
|
||||||
def fetch_content(self, remote_filepath):
|
def fetch_content(self, remote_filepath):
|
||||||
@@ -91,9 +103,20 @@ class RSync(_base.BaseFetcher):
|
|||||||
if stdout != '':
|
if stdout != '':
|
||||||
_logger.debug('STDOUT: {0}'.format(stdout))
|
_logger.debug('STDOUT: {0}'.format(stdout))
|
||||||
if stderr != '' or cmd.returncode != 0:
|
if stderr != '' or cmd.returncode != 0:
|
||||||
_logger.error('Rsync to {0}:{1} returned exit status {2}'.format(self.domain, self.port, cmd.returncode))
|
rtrn = cmd.returncode
|
||||||
_logger.debug('STDERR: {0}'.format(stderr))
|
err = rsync_returns.returns[rtrn]
|
||||||
warnings.warn('Rsync process returned non-zero')
|
errmsg = 'Rsync to {0}:{1} returned'.format(self.domain, self.port)
|
||||||
|
debugmsg = 'Rsync command {0} returned'.format(' '.join(cmd_str))
|
||||||
|
if stderr != '':
|
||||||
|
errmsg += ' an error message: {0}'.format(stderr)
|
||||||
|
debugmsg += ' an error message: {0}'.format(stderr)
|
||||||
|
if rtrn != 0:
|
||||||
|
errmsg += ' with exit status {0} ({1})'.format(rtrn, err)
|
||||||
|
debugmsg += ' with exit status {0} ({1})'.format(rtrn, err)
|
||||||
|
errmsg += '.'
|
||||||
|
_logger.error(errmsg)
|
||||||
|
_logger.debug(debugmsg)
|
||||||
|
warnings.warn(errmsg)
|
||||||
with open(tf, 'rb') as fh:
|
with open(tf, 'rb') as fh:
|
||||||
raw_content = fh.read()
|
raw_content = fh.read()
|
||||||
os.remove(tf)
|
os.remove(tf)
|
||||||
|
|||||||
22
repomirror/fetcher/rsync_returns.py
Normal file
22
repomirror/fetcher/rsync_returns.py
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
returns = {0: 'Success',
|
||||||
|
1: 'Syntax or usage error',
|
||||||
|
2: 'Protocol incompatibility',
|
||||||
|
3: 'Errors selecting input/output files, dirs',
|
||||||
|
4: ('Requested action not supported: '
|
||||||
|
'an attempt was made to manipulate 64-bit files on a platform that cannot support them; '
|
||||||
|
'or an option was specified that is supported by the client and not by the server.'),
|
||||||
|
5: 'Error starting client-server protocol',
|
||||||
|
6: 'Daemon unable to append to log-file',
|
||||||
|
10: 'Error in socket I/O',
|
||||||
|
11: 'Error in file I/O',
|
||||||
|
12: 'Error in rsync protocol data stream',
|
||||||
|
13: 'Errors with program diagnostics',
|
||||||
|
14: 'Error in IPC code',
|
||||||
|
20: 'Received SIGUSR1 or SIGINT',
|
||||||
|
21: 'Some error returned by waitpid()',
|
||||||
|
22: 'Error allocating core memory buffers',
|
||||||
|
23: 'Partial transfer due to error',
|
||||||
|
24: 'Partial transfer due to vanished source files',
|
||||||
|
25: 'The --max-delete limit stopped deletions',
|
||||||
|
30: 'Timeout in data send/receive',
|
||||||
|
35: 'Timeout waiting for daemon connection'}
|
||||||
@@ -229,27 +229,40 @@ class Distro(object):
|
|||||||
if v:
|
if v:
|
||||||
tstmp = v.read()
|
tstmp = v.read()
|
||||||
self.timestamps[k] = tstmp
|
self.timestamps[k] = tstmp
|
||||||
_logger.debug('Updated timestamps: {0}'.format(self.timestamps))
|
_logger.debug('Updated local timestamps: {0}'.format(self.timestamps))
|
||||||
local_checks = sorted([i for i in self.timestamps.values() if i])
|
local_checks = sorted([i for i in self.timestamps.values() if i])
|
||||||
|
if local_checks:
|
||||||
|
_logger.info('Local timestamps: {0}'.format(', '.join([str(t) for t in local_checks])))
|
||||||
for u in self.upstreams:
|
for u in self.upstreams:
|
||||||
if not u.available:
|
if not u.available:
|
||||||
continue
|
continue
|
||||||
u.fetcher.check()
|
u.fetcher.check()
|
||||||
remote_checks = sorted([i for i in u.fetcher.timestamps.values() if i])
|
remote_checks = sorted([i for i in u.fetcher.timestamps.values() if i])
|
||||||
|
if remote_checks:
|
||||||
|
_logger.info('Remote timestamps for {0}: {1}'.format(u.domain, ', '.join([str(t)
|
||||||
|
for t in remote_checks])))
|
||||||
if not any((local_checks, remote_checks)) or not remote_checks:
|
if not any((local_checks, remote_checks)) or not remote_checks:
|
||||||
|
_logger.info('There are no reliable timestamp comparisons; syncing.')
|
||||||
u.has_new = True
|
u.has_new = True
|
||||||
else:
|
else:
|
||||||
update = u.fetcher.timestamps.get('update')
|
update = u.fetcher.timestamps.get('update')
|
||||||
sync = u.fetcher.timestamps.get('sync')
|
sync = u.fetcher.timestamps.get('sync')
|
||||||
if update:
|
if update:
|
||||||
if local_checks and local_checks[-1] < update:
|
if local_checks and (local_checks[-1] < update):
|
||||||
|
_logger.info('Newest local timestamp is older than the remote update; syncing.')
|
||||||
u.has_new = True
|
u.has_new = True
|
||||||
elif not local_checks:
|
elif not local_checks:
|
||||||
|
_logger.info('No local timestamps; syncing.')
|
||||||
u.has_new = True
|
u.has_new = True
|
||||||
|
else:
|
||||||
|
_logger.info('Local checks are newer than upstream.')
|
||||||
|
else:
|
||||||
|
_logger.info('No remote update timestamp; syncing.')
|
||||||
|
u.has_new = True
|
||||||
if sync:
|
if sync:
|
||||||
td = datetime.datetime.utcnow() - sync
|
td = datetime.datetime.utcnow() - sync
|
||||||
if td.days > constants.DAYS_WARN:
|
if td.days > constants.DAYS_WARN:
|
||||||
_logger.warning(('Upstream {0} has not synced for {1}} or more days; this '
|
_logger.warning(('Upstream {0} has not synced for {1} or more days; this '
|
||||||
'repository may be out of date.').format(u.fetcher.url, constants.DAYS_WARN))
|
'repository may be out of date.').format(u.fetcher.url, constants.DAYS_WARN))
|
||||||
warnings.warn('Upstream may be out of date')
|
warnings.warn('Upstream may be out of date')
|
||||||
return(None)
|
return(None)
|
||||||
@@ -295,12 +308,17 @@ class Distro(object):
|
|||||||
fh.write('{0}\n'.format(str(my_pid)))
|
fh.write('{0}\n'.format(str(my_pid)))
|
||||||
for u in self.upstreams:
|
for u in self.upstreams:
|
||||||
if not u.available:
|
if not u.available:
|
||||||
|
_logger.debug('Upstream {0} is not available; skipping.'.format(u.domain))
|
||||||
continue
|
continue
|
||||||
if u.has_new:
|
if u.has_new:
|
||||||
|
_logger.info('Initiating syncing upstream {0}.'.format(u.domain))
|
||||||
u.sync()
|
u.sync()
|
||||||
|
_logger.debug('Sync for upstream {0} complete.'.format(u.domain))
|
||||||
if self.filechecks['local']['sync']:
|
if self.filechecks['local']['sync']:
|
||||||
self.filechecks['local']['sync'].write()
|
self.filechecks['local']['sync'].write()
|
||||||
break
|
break
|
||||||
|
else:
|
||||||
|
_logger.debug('Upstream {0} is not new; not syncing.'.format(u.domain))
|
||||||
if self.filechecks['local']['check']:
|
if self.filechecks['local']['check']:
|
||||||
self.filechecks['local']['check'].write()
|
self.filechecks['local']['check'].write()
|
||||||
os.remove(self.lockfile)
|
os.remove(self.lockfile)
|
||||||
@@ -338,9 +356,10 @@ class Sync(object):
|
|||||||
if e is None:
|
if e is None:
|
||||||
_logger.error('Could not find specified distro {0}; skipping'.format(d))
|
_logger.error('Could not find specified distro {0}; skipping'.format(d))
|
||||||
continue
|
continue
|
||||||
|
e = e[0]
|
||||||
logger.filehandler.close()
|
logger.filehandler.close()
|
||||||
logger.filehandler.baseFilename = os.path.join(self.logdir, '{0}.log'.format(e.attrib['name']))
|
logger.filehandler.baseFilename = os.path.join(self.logdir, '{0}.log'.format(e.attrib['name']))
|
||||||
distro = Distro(e[0])
|
distro = Distro(e)
|
||||||
distro.sync()
|
distro.sync()
|
||||||
else:
|
else:
|
||||||
for e in self.cfg.xml.findall('distro'):
|
for e in self.cfg.xml.findall('distro'):
|
||||||
|
|||||||
@@ -0,0 +1,2 @@
|
|||||||
|
from . import constants
|
||||||
|
from . import classes
|
||||||
|
|||||||
@@ -2,7 +2,6 @@
|
|||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import csv
|
import csv
|
||||||
import datetime
|
|
||||||
import io
|
import io
|
||||||
import re
|
import re
|
||||||
##
|
##
|
||||||
|
|||||||
101
repomirror/utils/find_fastest_upstream/centos_epel.py
Executable file
101
repomirror/utils/find_fastest_upstream/centos_epel.py
Executable file
@@ -0,0 +1,101 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import re
|
||||||
|
##
|
||||||
|
import classes
|
||||||
|
|
||||||
|
|
||||||
|
_proto_re = re.compile(r'^(?P<proto>https?)(?P<uri>.*)')
|
||||||
|
|
||||||
|
|
||||||
|
class Ranker(classes.Ranker):
|
||||||
|
# No CSV, JSON, or XML that I could find, unfortunately.
|
||||||
|
# There's apparently? an API to mirrormanager2 but I can't seem to find a public endpoint nor an endpoint that
|
||||||
|
# would return the mirrors.
|
||||||
|
mirrorlist_url = 'https://admin.fedoraproject.org/mirrormanager/mirrors/EPEL'
|
||||||
|
distro_name = 'EPEL'
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
self.get_mirrors()
|
||||||
|
|
||||||
|
def extract_mirrors(self, preferred_proto = 'rsync'):
|
||||||
|
preferred_proto = preferred_proto.lower()
|
||||||
|
if preferred_proto not in ('rsync', 'ftp'):
|
||||||
|
raise ValueError('Invalid preferred_proto; must be one of rsync or ftp')
|
||||||
|
non_preferred = ('rsync' if preferred_proto == 'ftp' else 'ftp')
|
||||||
|
print(('Fedora (who maintains EPEL) do their mirroring in an extremely weird way.\n'
|
||||||
|
'See https://fedoraproject.org/wiki/Infrastructure/Mirroring and '
|
||||||
|
'https://fedoraproject.org/wiki/Infrastructure/Mirroring/Tiering#Tier_1_Mirrors for which mirrors and '
|
||||||
|
'how to sync.'))
|
||||||
|
return(None)
|
||||||
|
# mirror_section = self.bs.find('h2', string = 'Public active mirrors')
|
||||||
|
# mirror_table = mirror_section.find_next('table')
|
||||||
|
# if mirror_table is None:
|
||||||
|
# return(None)
|
||||||
|
# # https://stackoverflow.com/a/56835562/733214
|
||||||
|
# headers = [h.text for h in mirror_table.find_all('th')]
|
||||||
|
# rows = [m for m in mirror_table.find_all('tr')][1:]
|
||||||
|
# for row in rows:
|
||||||
|
# mirror = {}
|
||||||
|
# do_skip = False
|
||||||
|
# for idx, cell in enumerate(row.find_all('td')):
|
||||||
|
# k = headers[idx]
|
||||||
|
# v = cell.text.strip()
|
||||||
|
# if k == 'Country' and v != self.my_info['country']:
|
||||||
|
# do_skip = True
|
||||||
|
# continue
|
||||||
|
# if k == 'Categories' and not do_skip:
|
||||||
|
# # TODO: DO THIS BETTER! Their mirrorlist sucks and is not easily parsed at all.
|
||||||
|
# # I need to check and try to grab the specific URL that contains "epel".
|
||||||
|
# if 'EPEL' not in v:
|
||||||
|
# do_skip = True
|
||||||
|
# continue
|
||||||
|
# pref_proto = cell.find('a', attrs = {
|
||||||
|
# 'href': re.compile(r'^{0}://'.format(preferred_proto), re.IGNORECASE)})
|
||||||
|
# non_pref = cell.find('a', attrs = {
|
||||||
|
# 'href': re.compile(r'^{0}://'.format(non_preferred), re.IGNORECASE)})
|
||||||
|
# if pref_proto is not None:
|
||||||
|
# v = pref_proto['href']
|
||||||
|
# elif non_pref is not None:
|
||||||
|
# v = non_pref['href']
|
||||||
|
# else:
|
||||||
|
# v = None
|
||||||
|
# mirror['url'] = v
|
||||||
|
# # Fedora team can't spell.
|
||||||
|
# elif k in ('Bandwidth', 'Bandwith'):
|
||||||
|
# mirror['bw'] = int(v)
|
||||||
|
# if do_skip:
|
||||||
|
# continue
|
||||||
|
# if not mirror['url']:
|
||||||
|
# continue
|
||||||
|
# self.raw_mirrors.append(mirror)
|
||||||
|
# self.mirror_candidates.append(mirror['url'])
|
||||||
|
# return(None)
|
||||||
|
|
||||||
|
|
||||||
|
def parseArgs():
|
||||||
|
args = argparse.ArgumentParser(description = 'Generate a list of suitable EPEL upstream mirrors in order of '
|
||||||
|
'speed.')
|
||||||
|
args.add_argument('-x', '--xml',
|
||||||
|
dest = 'xml',
|
||||||
|
action = 'store_true',
|
||||||
|
help = ('If specified, generate a config stub instead of a printed list of URLs'))
|
||||||
|
return(args)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
args = parseArgs().parse_args()
|
||||||
|
r = Ranker()
|
||||||
|
r.extract_mirrors()
|
||||||
|
r.speedcheck()
|
||||||
|
if args.xml:
|
||||||
|
print(r.gen_xml())
|
||||||
|
else:
|
||||||
|
r.print()
|
||||||
|
return(None)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
Reference in New Issue
Block a user