version bump

double curly braces where there should have been one. minor fix, but breaks otherwise.
fixed apparently. version bump
2020-07-07 02:31:01 -04:00 · 2020-07-07 02:30:15 -04:00 · 2020-06-24 01:52:52 -04:00 · 2020-06-24 01:25:18 -04:00 · 2020-06-24 01:15:56 -04:00 · 2020-06-24 01:01:58 -04:00
9 changed files with 167 additions and 14 deletions
--- a/11
+++ b/11
@@ -17,13 +17,14 @@ Configuration/Deployment:
 See example.config.xml for details on a configuration file, as it's extensively commented and won't be covered in this README.
-You may take special interest in repomirror/utils/find_fastest_upstream/ scripts as they're pre-written to find the fastest (in theory) upstream you can use. Currently only Arch Linux and CentOS have scripts written, but I'll gladly try to add other distros if you open a feature request (see below).
+You can also run reposync -h (or reposync --help) to see all supported options.
 You may take special interest in repomirror/utils/find_fastest_upstream/ scripts as they're pre-written to find the fastest (in theory) upstream you can use. Currently only Arch Linux and CentOS have scripts written, but I'll gladly try to add other distros if you open a feature request (see below). You can run -h/--help to see the supported options (there aren't many).
 Bugs/Feature Requests:
-Please use my tracker at https://bugs.square-r00t.net/index.php?project=14 or, if you prefer to not create an account,
+Please use my tracker at https://bugs.square-r00t.net/index.php?project=14 or, if you prefer to not create an account, simply email me at bts(at)square-r00t[dot]net.
 simply email me at bts(at)square-r00t[dot]net.
 Sources:
@@ -36,3 +37,7 @@ This project can be found at/cloned from:
 And has a mirrored repository on GitHub (Issues/Wiki/etc. disabled) at:
 * https://github.com/johnnybubonic/repomirror
 It can also be found on PyPi at:
 * https://pypi.org/project/repomirror/
--- a/repomirror/constants.py
+++ b/repomirror/constants.py
@@ -12,4 +12,4 @@ RSYNC_DEF_ARGS = ['--recursive',
                  '--exclude=.*']
 # How many days an upstream should have last synced by before it's considered stale.
 DAYS_WARN = 2
-VERSION = '1.0.1'
+VERSION = '1.0.3'
--- a/repomirror/fetcher/_base.py
+++ b/repomirror/fetcher/_base.py
@@ -31,7 +31,7 @@ class BaseFetcher(object):
                else:
                    tstmp = datetime.datetime.strptime(tstmp_raw, v.fmt)
                self.timestamps[k] = tstmp
-        _logger.debug('Updated timestamps: {0}'.format(self.timestamps))
+        _logger.debug('Updated upstream timestamps: {0}'.format(self.timestamps))
        return(None)
    def fetch_content(self, path):
--- a/repomirror/fetcher/rsync.py
+++ b/repomirror/fetcher/rsync.py
@@ -10,6 +10,7 @@ sys.path.append(os.path.abspath(os.path.join(_cur_dir, '..')))
 import constants
 # import logger
 from . import _base
 from . import rsync_returns
 _logger = logging.getLogger()
@@ -70,9 +71,11 @@ class RSync(_base.BaseFetcher):
        if stdout != '':
            _logger.debug('STDOUT: {0}'.format(stdout))
        if stderr != '' or cmd.returncode != 0:
-            _logger.error('Rsync to {0}:{1} returned exit status {2}'.format(self.domain, self.port, cmd.returncode))
+            rtrn = cmd.returncode
            err = rsync_returns.returns[rtrn]
            _logger.error(('Rsync to {0}:{1} returned exit status {2}: {3}').format(self.domain, self.port, rtrn, err))
            _logger.debug('STDERR: {0}'.format(stderr))
-            warnings.warn('Rsync process returned non-zero')
+            warnings.warn('Rsync process returned non-zero {0} ({1}) for {2}'.format(rtrn, err, ' '.join(cmd_str)))
        return(None)
    def fetch_content(self, remote_filepath):
@@ -91,9 +94,11 @@ class RSync(_base.BaseFetcher):
        if stdout != '':
            _logger.debug('STDOUT: {0}'.format(stdout))
        if stderr != '' or cmd.returncode != 0:
-            _logger.error('Rsync to {0}:{1} returned exit status {2}'.format(self.domain, self.port, cmd.returncode))
+            rtrn = cmd.returncode
            err = rsync_returns.returns[rtrn]
            _logger.error(('Rsync to {0}:{1} returned exit status {2}: {3}').format(self.domain, self.port, rtrn, err))
            _logger.debug('STDERR: {0}'.format(stderr))
-            warnings.warn('Rsync process returned non-zero')
+            warnings.warn('Rsync process returned non-zero {0} ({1}) for {2}'.format(rtrn, err, ' '.join(cmd_str)))
        with open(tf, 'rb') as fh:
            raw_content = fh.read()
        os.remove(tf)
--- a/repomirror/fetcher/rsync_returns.py
+++ b/repomirror/fetcher/rsync_returns.py
@@ -0,0 +1,22 @@
 returns = {0: 'Success',
           1: 'Syntax or usage error',
           2: 'Protocol incompatibility',
           3: 'Errors selecting input/output files, dirs',
           4: ('Requested action not supported: '
               'an attempt was made to manipulate 64-bit files on a platform that cannot support them; '
               'or an option was specified that is supported by the client and not by the server.'),
           5: 'Error starting client-server protocol',
           6: 'Daemon unable to append to log-file',
           10: 'Error in socket I/O',
           11: 'Error in file I/O',
           12: 'Error in rsync protocol data stream',
           13: 'Errors with program diagnostics',
           14: 'Error in IPC code',
           20: 'Received SIGUSR1 or SIGINT',
           21: 'Some error returned by waitpid()',
           22: 'Error allocating core memory buffers',
           23: 'Partial transfer due to error',
           24: 'Partial transfer due to vanished source files',
           25: 'The --max-delete limit stopped deletions',
           30: 'Timeout in data send/receive',
           35: 'Timeout waiting for daemon connection'}
--- a/repomirror/sync.py
+++ b/repomirror/sync.py
@@ -229,27 +229,40 @@ class Distro(object):
            if v:
                tstmp = v.read()
                self.timestamps[k] = tstmp
-        _logger.debug('Updated timestamps: {0}'.format(self.timestamps))
+        _logger.debug('Updated local timestamps: {0}'.format(self.timestamps))
        local_checks = sorted([i for i in self.timestamps.values() if i])
        if local_checks:
            _logger.info('Local timestamps: {0}'.format(', '.join([str(t) for t in local_checks])))
        for u in self.upstreams:
            if not u.available:
                continue
            u.fetcher.check()
            remote_checks = sorted([i for i in u.fetcher.timestamps.values() if i])
            if remote_checks:
                _logger.info('Remote timestamps for {0}: {1}'.format(u.domain, ', '.join([str(t)
                                                                                           for t in remote_checks])))
            if not any((local_checks, remote_checks)) or not remote_checks:
                _logger.info('There are no reliable timestamp comparisons; syncing.')
                u.has_new = True
            else:
                update = u.fetcher.timestamps.get('update')
                sync = u.fetcher.timestamps.get('sync')
                if update:
-                    if local_checks and local_checks[-1] < update:
+                    if local_checks and (local_checks[-1] < update):
                        _logger.info('Newest local timestamp is older than the remote update; syncing.')
                        u.has_new = True
                    elif not local_checks:
                        _logger.info('No local timestamps; syncing.')
                        u.has_new = True
                    else:
                        _logger.info('Local checks are newer than upstream.')
                else:
                    _logger.info('No remote update timestamp; syncing.')
                    u.has_new = True
                if sync:
                    td = datetime.datetime.utcnow() - sync
                    if td.days > constants.DAYS_WARN:
-                        _logger.warning(('Upstream {0} has not synced for {1}} or more days; this '
+                        _logger.warning(('Upstream {0} has not synced for {1} or more days; this '
                                         'repository may be out of date.').format(u.fetcher.url, constants.DAYS_WARN))
                        warnings.warn('Upstream may be out of date')
        return(None)
@@ -295,12 +308,17 @@ class Distro(object):
            fh.write('{0}\n'.format(str(my_pid)))
        for u in self.upstreams:
            if not u.available:
                _logger.debug('Upstream {0} is not available; skipping.'.format(u.domain))
                continue
            if u.has_new:
                _logger.info('Initiating syncing upstream {0}.'.format(u.domain))
                u.sync()
                _logger.debug('Sync for upstream {0} complete.'.format(u.domain))
                if self.filechecks['local']['sync']:
                    self.filechecks['local']['sync'].write()
                break
            else:
                _logger.debug('Upstream {0} is not new; not syncing.'.format(u.domain))
        if self.filechecks['local']['check']:
            self.filechecks['local']['check'].write()
        os.remove(self.lockfile)
@@ -338,9 +356,10 @@ class Sync(object):
                if e is None:
                    _logger.error('Could not find specified distro {0}; skipping'.format(d))
                    continue
                e = e[0]
                logger.filehandler.close()
                logger.filehandler.baseFilename = os.path.join(self.logdir, '{0}.log'.format(e.attrib['name']))
-                distro = Distro(e[0])
+                distro = Distro(e)
                distro.sync()
        else:
            for e in self.cfg.xml.findall('distro'):
--- a/repomirror/utils/find_fastest_upstream/init.py
+++ b/repomirror/utils/find_fastest_upstream/init.py
@@ -0,0 +1,2 @@
 from . import constants
 from . import classes
--- a/repomirror/utils/find_fastest_upstream/centos.py
+++ b/repomirror/utils/find_fastest_upstream/centos.py
@@ -2,7 +2,6 @@
 import argparse
 import csv
 import datetime
 import io
 import re
 ##
--- a/repomirror/utils/find_fastest_upstream/centos_epel.py
+++ b/repomirror/utils/find_fastest_upstream/centos_epel.py
@@ -0,0 +1,101 @@
 #!/usr/bin/env python3
 import argparse
 import re
 ##
 import classes
 _proto_re = re.compile(r'^(?P<proto>https?)(?P<uri>.*)')
 class Ranker(classes.Ranker):
    # No CSV, JSON, or XML that I could find, unfortunately.
    # There's apparently? an API to mirrormanager2 but I can't seem to find a public endpoint nor an endpoint that
    # would return the mirrors.
    mirrorlist_url = 'https://admin.fedoraproject.org/mirrormanager/mirrors/EPEL'
    distro_name = 'EPEL'
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.get_mirrors()
    def extract_mirrors(self, preferred_proto = 'rsync'):
        preferred_proto = preferred_proto.lower()
        if preferred_proto not in ('rsync', 'ftp'):
            raise ValueError('Invalid preferred_proto; must be one of rsync or ftp')
        non_preferred = ('rsync' if preferred_proto == 'ftp' else 'ftp')
        print(('Fedora (who maintains EPEL) do their mirroring in an extremely weird way.\n'
               'See https://fedoraproject.org/wiki/Infrastructure/Mirroring and '
               'https://fedoraproject.org/wiki/Infrastructure/Mirroring/Tiering#Tier_1_Mirrors for which mirrors and '
               'how to sync.'))
        return(None)
        # mirror_section = self.bs.find('h2', string = 'Public active mirrors')
        # mirror_table = mirror_section.find_next('table')
        # if mirror_table is None:
        #     return(None)
        # # https://stackoverflow.com/a/56835562/733214
        # headers = [h.text for h in mirror_table.find_all('th')]
        # rows = [m for m in mirror_table.find_all('tr')][1:]
        # for row in rows:
        #     mirror = {}
        #     do_skip = False
        #     for idx, cell in enumerate(row.find_all('td')):
        #         k = headers[idx]
        #         v = cell.text.strip()
        #         if k == 'Country' and v != self.my_info['country']:
        #             do_skip = True
        #             continue
        #         if k == 'Categories' and not do_skip:
        #             # TODO: DO THIS BETTER! Their mirrorlist sucks and is not easily parsed at all.
        #             # I need to check and try to grab the specific URL that contains "epel".
        #             if 'EPEL' not in v:
        #                 do_skip = True
        #                 continue
        #             pref_proto = cell.find('a', attrs = {
        #                 'href': re.compile(r'^{0}://'.format(preferred_proto), re.IGNORECASE)})
        #             non_pref = cell.find('a', attrs = {
        #                 'href': re.compile(r'^{0}://'.format(non_preferred), re.IGNORECASE)})
        #             if pref_proto is not None:
        #                 v = pref_proto['href']
        #             elif non_pref is not None:
        #                 v = non_pref['href']
        #             else:
        #                 v = None
        #             mirror['url'] = v
        #         # Fedora team can't spell.
        #         elif k in ('Bandwidth', 'Bandwith'):
        #             mirror['bw'] = int(v)
        #     if do_skip:
        #         continue
        #     if not mirror['url']:
        #         continue
        #     self.raw_mirrors.append(mirror)
        #     self.mirror_candidates.append(mirror['url'])
        # return(None)
 def parseArgs():
    args = argparse.ArgumentParser(description = 'Generate a list of suitable EPEL upstream mirrors in order of '
                                                 'speed.')
    args.add_argument('-x', '--xml',
                      dest = 'xml',
                      action = 'store_true',
                      help = ('If specified, generate a config stub instead of a printed list of URLs'))
    return(args)
 def main():
    args = parseArgs().parse_args()
    r = Ranker()
    r.extract_mirrors()
    r.speedcheck()
    if args.xml:
        print(r.gen_xml())
    else:
        r.print()
    return(None)
 if __name__ == '__main__':
    main()
Author	SHA1	Message	Date
brent s	6d384e71ae	version bump	2020-07-07 02:31:01 -04:00
brent s	09afe59b91	double curly braces where there should have been one. minor fix, but breaks otherwise.	2020-07-07 02:30:15 -04:00
brent s	845cd90ddf	fixed apparently. version bump	2020-06-24 01:52:52 -04:00
brent s	ac431a873e	more logging to try to find cause of refusal to sync.	2020-06-24 01:25:18 -04:00
brent s	2ff334f220	still not syncing properly...	2020-06-24 01:15:56 -04:00
brent s	c8bf61ea98	whoops	2020-06-24 01:01:58 -04:00
brent s	0e16214e45	found a bug in distro selection	2020-06-24 00:59:14 -04:00
brent s	0ff9af4c48	way better rsync logging	2020-06-18 13:01:25 -04:00
brent s	ac1886e46b	better rsync logging	2020-06-18 12:45:08 -04:00
brent s	1a5068d77d	fucking fedora.	2020-06-18 04:14:18 -04:00
brent s	3ece313a6f	SOOOO fedora implements ACL bullshit in their mirroring. Rendering this script useless.	2020-06-18 03:42:48 -04:00
brent s	061a18e8f4	dang it, don't need a linebreak there.	2020-06-18 00:40:08 -04:00
brent s	6d5a382e53	updating README	2020-06-18 00:38:28 -04:00
		`@@ -0,0 +1,2 @@`
							`from . import constants`
							`from . import classes`