fix reporting zero exit for rsync as non-zero

would give warning output like the following: /usr/local/lib/repomirror/repomirror/fetcher/rsync.py:78: UserWarning: Rsync process returned non-zero 0 (Success) for rsync --recursive --times --links --hard-links --delete-after --delay-updates --copy-links --safe-links --delete-excluded --no-motd --exclude=.* --verbose --log-file-format="[RSYNC arch.mirror.constant.com:873]:%l:%f%L" --log-file=/var/log/repo/arch.log rsync://arch.mirror.constant.com:873/archlinux/. /srv/repos/arch/.
version bump
2020-07-07 13:32:02 -04:00 · 2020-07-07 02:31:01 -04:00 · 2020-07-07 02:30:15 -04:00 · 2020-06-24 01:52:52 -04:00 · 2020-06-24 01:25:18 -04:00 · 2020-06-24 01:15:56 -04:00
9 changed files with 188 additions and 16 deletions
--- a/11
+++ b/11
@@ -17,13 +17,14 @@ Configuration/Deployment:

 See example.config.xml for details on a configuration file, as it's extensively commented and won't be covered in this README.

-You may take special interest in repomirror/utils/find_fastest_upstream/ scripts as they're pre-written to find the fastest (in theory) upstream you can use. Currently only Arch Linux and CentOS have scripts written, but I'll gladly try to add other distros if you open a feature request (see below).
+You can also run reposync -h (or reposync --help) to see all supported options.
+
+You may take special interest in repomirror/utils/find_fastest_upstream/ scripts as they're pre-written to find the fastest (in theory) upstream you can use. Currently only Arch Linux and CentOS have scripts written, but I'll gladly try to add other distros if you open a feature request (see below). You can run -h/--help to see the supported options (there aren't many).


 Bugs/Feature Requests:

-Please use my tracker at https://bugs.square-r00t.net/index.php?project=14 or, if you prefer to not create an account,
-simply email me at bts(at)square-r00t[dot]net.
+Please use my tracker at https://bugs.square-r00t.net/index.php?project=14 or, if you prefer to not create an account, simply email me at bts(at)square-r00t[dot]net.


 Sources:
@@ -36,3 +37,7 @@ This project can be found at/cloned from:
 And has a mirrored repository on GitHub (Issues/Wiki/etc. disabled) at:

 * https://github.com/johnnybubonic/repomirror
+
+It can also be found on PyPi at:
+
+* https://pypi.org/project/repomirror/
--- a/repomirror/constants.py
+++ b/repomirror/constants.py
@@ -11,5 +11,6 @@ RSYNC_DEF_ARGS = ['--recursive',
                  '--delete-excluded',
                  '--exclude=.*']
 # How many days an upstream should have last synced by before it's considered stale.
+## TODO: make this part of the upstream config? repo config?
 DAYS_WARN = 2
-VERSION = '1.0.1'
+VERSION = '1.0.4'
--- a/repomirror/fetcher/_base.py
+++ b/repomirror/fetcher/_base.py
@@ -31,7 +31,7 @@ class BaseFetcher(object):
                else:
                    tstmp = datetime.datetime.strptime(tstmp_raw, v.fmt)
                self.timestamps[k] = tstmp
-        _logger.debug('Updated timestamps: {0}'.format(self.timestamps))
+        _logger.debug('Updated upstream timestamps: {0}'.format(self.timestamps))
        return(None)

    def fetch_content(self, path):
--- a/repomirror/fetcher/rsync.py
+++ b/repomirror/fetcher/rsync.py
@@ -10,6 +10,7 @@ sys.path.append(os.path.abspath(os.path.join(_cur_dir, '..')))
 import constants
 # import logger
 from . import _base
+from . import rsync_returns


 _logger = logging.getLogger()
@@ -70,9 +71,20 @@ class RSync(_base.BaseFetcher):
        if stdout != '':
            _logger.debug('STDOUT: {0}'.format(stdout))
        if stderr != '' or cmd.returncode != 0:
-            _logger.error('Rsync to {0}:{1} returned exit status {2}'.format(self.domain, self.port, cmd.returncode))
-            _logger.debug('STDERR: {0}'.format(stderr))
-            warnings.warn('Rsync process returned non-zero')
+            rtrn = cmd.returncode
+            err = rsync_returns.returns[rtrn]
+            errmsg = 'Rsync to {0}:{1} returned'.format(self.domain, self.port)
+            debugmsg = 'Rsync command {0} returned'.format(' '.join(cmd_str))
+            if stderr != '':
+                errmsg += ' an error message: {0}'.format(stderr)
+                debugmsg += ' an error message: {0}'.format(stderr)
+            if rtrn != 0:
+                errmsg += ' with exit status {0} ({1})'.format(rtrn, err)
+                debugmsg += ' with exit status {0} ({1})'.format(rtrn, err)
+            errmsg += '.'
+            _logger.error(errmsg)
+            _logger.debug(debugmsg)
+            warnings.warn(errmsg)
        return(None)

    def fetch_content(self, remote_filepath):
@@ -91,9 +103,20 @@ class RSync(_base.BaseFetcher):
        if stdout != '':
            _logger.debug('STDOUT: {0}'.format(stdout))
        if stderr != '' or cmd.returncode != 0:
-            _logger.error('Rsync to {0}:{1} returned exit status {2}'.format(self.domain, self.port, cmd.returncode))
-            _logger.debug('STDERR: {0}'.format(stderr))
-            warnings.warn('Rsync process returned non-zero')
+            rtrn = cmd.returncode
+            err = rsync_returns.returns[rtrn]
+            errmsg = 'Rsync to {0}:{1} returned'.format(self.domain, self.port)
+            debugmsg = 'Rsync command {0} returned'.format(' '.join(cmd_str))
+            if stderr != '':
+                errmsg += ' an error message: {0}'.format(stderr)
+                debugmsg += ' an error message: {0}'.format(stderr)
+            if rtrn != 0:
+                errmsg += ' with exit status {0} ({1})'.format(rtrn, err)
+                debugmsg += ' with exit status {0} ({1})'.format(rtrn, err)
+            errmsg += '.'
+            _logger.error(errmsg)
+            _logger.debug(debugmsg)
+            warnings.warn(errmsg)
        with open(tf, 'rb') as fh:
            raw_content = fh.read()
        os.remove(tf)
--- a/repomirror/fetcher/rsync_returns.py
+++ b/repomirror/fetcher/rsync_returns.py
@@ -0,0 +1,22 @@
+returns = {0: 'Success',
+           1: 'Syntax or usage error',
+           2: 'Protocol incompatibility',
+           3: 'Errors selecting input/output files, dirs',
+           4: ('Requested action not supported: '
+               'an attempt was made to manipulate 64-bit files on a platform that cannot support them; '
+               'or an option was specified that is supported by the client and not by the server.'),
+           5: 'Error starting client-server protocol',
+           6: 'Daemon unable to append to log-file',
+           10: 'Error in socket I/O',
+           11: 'Error in file I/O',
+           12: 'Error in rsync protocol data stream',
+           13: 'Errors with program diagnostics',
+           14: 'Error in IPC code',
+           20: 'Received SIGUSR1 or SIGINT',
+           21: 'Some error returned by waitpid()',
+           22: 'Error allocating core memory buffers',
+           23: 'Partial transfer due to error',
+           24: 'Partial transfer due to vanished source files',
+           25: 'The --max-delete limit stopped deletions',
+           30: 'Timeout in data send/receive',
+           35: 'Timeout waiting for daemon connection'}
--- a/repomirror/sync.py
+++ b/repomirror/sync.py
@@ -229,27 +229,40 @@ class Distro(object):
            if v:
                tstmp = v.read()
                self.timestamps[k] = tstmp
-        _logger.debug('Updated timestamps: {0}'.format(self.timestamps))
+        _logger.debug('Updated local timestamps: {0}'.format(self.timestamps))
        local_checks = sorted([i for i in self.timestamps.values() if i])
+        if local_checks:
+            _logger.info('Local timestamps: {0}'.format(', '.join([str(t) for t in local_checks])))
        for u in self.upstreams:
            if not u.available:
                continue
            u.fetcher.check()
            remote_checks = sorted([i for i in u.fetcher.timestamps.values() if i])
+            if remote_checks:
+                _logger.info('Remote timestamps for {0}: {1}'.format(u.domain, ', '.join([str(t)
+                                                                                           for t in remote_checks])))
            if not any((local_checks, remote_checks)) or not remote_checks:
+                _logger.info('There are no reliable timestamp comparisons; syncing.')
                u.has_new = True
            else:
                update = u.fetcher.timestamps.get('update')
                sync = u.fetcher.timestamps.get('sync')
                if update:
-                    if local_checks and local_checks[-1] < update:
+                    if local_checks and (local_checks[-1] < update):
+                        _logger.info('Newest local timestamp is older than the remote update; syncing.')
                        u.has_new = True
                    elif not local_checks:
+                        _logger.info('No local timestamps; syncing.')
+                        u.has_new = True
+                    else:
+                        _logger.info('Local checks are newer than upstream.')
+                else:
+                    _logger.info('No remote update timestamp; syncing.')
                    u.has_new = True
                if sync:
                    td = datetime.datetime.utcnow() - sync
                    if td.days > constants.DAYS_WARN:
-                        _logger.warning(('Upstream {0} has not synced for {1}} or more days; this '
+                        _logger.warning(('Upstream {0} has not synced for {1} or more days; this '
                                         'repository may be out of date.').format(u.fetcher.url, constants.DAYS_WARN))
                        warnings.warn('Upstream may be out of date')
        return(None)
@@ -295,12 +308,17 @@ class Distro(object):
            fh.write('{0}\n'.format(str(my_pid)))
        for u in self.upstreams:
            if not u.available:
+                _logger.debug('Upstream {0} is not available; skipping.'.format(u.domain))
                continue
            if u.has_new:
+                _logger.info('Initiating syncing upstream {0}.'.format(u.domain))
                u.sync()
+                _logger.debug('Sync for upstream {0} complete.'.format(u.domain))
                if self.filechecks['local']['sync']:
                    self.filechecks['local']['sync'].write()
                break
+            else:
+                _logger.debug('Upstream {0} is not new; not syncing.'.format(u.domain))
        if self.filechecks['local']['check']:
            self.filechecks['local']['check'].write()
        os.remove(self.lockfile)
@@ -338,9 +356,10 @@ class Sync(object):
                if e is None:
                    _logger.error('Could not find specified distro {0}; skipping'.format(d))
                    continue
+                e = e[0]
                logger.filehandler.close()
                logger.filehandler.baseFilename = os.path.join(self.logdir, '{0}.log'.format(e.attrib['name']))
-                distro = Distro(e[0])
+                distro = Distro(e)
                distro.sync()
        else:
            for e in self.cfg.xml.findall('distro'):
--- a/repomirror/utils/find_fastest_upstream/init.py
+++ b/repomirror/utils/find_fastest_upstream/init.py
@@ -0,0 +1,2 @@
+from . import constants
+from . import classes
--- a/repomirror/utils/find_fastest_upstream/centos.py
+++ b/repomirror/utils/find_fastest_upstream/centos.py
@@ -2,7 +2,6 @@

 import argparse
 import csv
-import datetime
 import io
 import re
 ##
--- a/repomirror/utils/find_fastest_upstream/centos_epel.py
+++ b/repomirror/utils/find_fastest_upstream/centos_epel.py
@@ -0,0 +1,101 @@
+#!/usr/bin/env python3
+
+import argparse
+import re
+##
+import classes
+
+
+_proto_re = re.compile(r'^(?P<proto>https?)(?P<uri>.*)')
+
+
+class Ranker(classes.Ranker):
+    # No CSV, JSON, or XML that I could find, unfortunately.
+    # There's apparently? an API to mirrormanager2 but I can't seem to find a public endpoint nor an endpoint that
+    # would return the mirrors.
+    mirrorlist_url = 'https://admin.fedoraproject.org/mirrormanager/mirrors/EPEL'
+    distro_name = 'EPEL'
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.get_mirrors()
+
+    def extract_mirrors(self, preferred_proto = 'rsync'):
+        preferred_proto = preferred_proto.lower()
+        if preferred_proto not in ('rsync', 'ftp'):
+            raise ValueError('Invalid preferred_proto; must be one of rsync or ftp')
+        non_preferred = ('rsync' if preferred_proto == 'ftp' else 'ftp')
+        print(('Fedora (who maintains EPEL) do their mirroring in an extremely weird way.\n'
+               'See https://fedoraproject.org/wiki/Infrastructure/Mirroring and '
+               'https://fedoraproject.org/wiki/Infrastructure/Mirroring/Tiering#Tier_1_Mirrors for which mirrors and '
+               'how to sync.'))
+        return(None)
+        # mirror_section = self.bs.find('h2', string = 'Public active mirrors')
+        # mirror_table = mirror_section.find_next('table')
+        # if mirror_table is None:
+        #     return(None)
+        # # https://stackoverflow.com/a/56835562/733214
+        # headers = [h.text for h in mirror_table.find_all('th')]
+        # rows = [m for m in mirror_table.find_all('tr')][1:]
+        # for row in rows:
+        #     mirror = {}
+        #     do_skip = False
+        #     for idx, cell in enumerate(row.find_all('td')):
+        #         k = headers[idx]
+        #         v = cell.text.strip()
+        #         if k == 'Country' and v != self.my_info['country']:
+        #             do_skip = True
+        #             continue
+        #         if k == 'Categories' and not do_skip:
+        #             # TODO: DO THIS BETTER! Their mirrorlist sucks and is not easily parsed at all.
+        #             # I need to check and try to grab the specific URL that contains "epel".
+        #             if 'EPEL' not in v:
+        #                 do_skip = True
+        #                 continue
+        #             pref_proto = cell.find('a', attrs = {
+        #                 'href': re.compile(r'^{0}://'.format(preferred_proto), re.IGNORECASE)})
+        #             non_pref = cell.find('a', attrs = {
+        #                 'href': re.compile(r'^{0}://'.format(non_preferred), re.IGNORECASE)})
+        #             if pref_proto is not None:
+        #                 v = pref_proto['href']
+        #             elif non_pref is not None:
+        #                 v = non_pref['href']
+        #             else:
+        #                 v = None
+        #             mirror['url'] = v
+        #         # Fedora team can't spell.
+        #         elif k in ('Bandwidth', 'Bandwith'):
+        #             mirror['bw'] = int(v)
+        #     if do_skip:
+        #         continue
+        #     if not mirror['url']:
+        #         continue
+        #     self.raw_mirrors.append(mirror)
+        #     self.mirror_candidates.append(mirror['url'])
+        # return(None)
+
+
+def parseArgs():
+    args = argparse.ArgumentParser(description = 'Generate a list of suitable EPEL upstream mirrors in order of '
+                                                 'speed.')
+    args.add_argument('-x', '--xml',
+                      dest = 'xml',
+                      action = 'store_true',
+                      help = ('If specified, generate a config stub instead of a printed list of URLs'))
+    return(args)
+
+
+def main():
+    args = parseArgs().parse_args()
+    r = Ranker()
+    r.extract_mirrors()
+    r.speedcheck()
+    if args.xml:
+        print(r.gen_xml())
+    else:
+        r.print()
+    return(None)
+
+
+if __name__ == '__main__':
+    main()
Author	SHA1	Message	Date
brent s	a3203ab03a	fix reporting zero exit for rsync as non-zero would give warning output like the following: /usr/local/lib/repomirror/repomirror/fetcher/rsync.py:78: UserWarning: Rsync process returned non-zero 0 (Success) for rsync --recursive --times --links --hard-links --delete-after --delay-updates --copy-links --safe-links --delete-excluded --no-motd --exclude=.* --verbose --log-file-format="[RSYNC arch.mirror.constant.com:873]:%l:%f%L" --log-file=/var/log/repo/arch.log rsync://arch.mirror.constant.com:873/archlinux/. /srv/repos/arch/.	2020-07-07 13:32:02 -04:00
brent s	6d384e71ae	version bump	2020-07-07 02:31:01 -04:00
brent s	09afe59b91	double curly braces where there should have been one. minor fix, but breaks otherwise.	2020-07-07 02:30:15 -04:00
brent s	845cd90ddf	fixed apparently. version bump	2020-06-24 01:52:52 -04:00
brent s	ac431a873e	more logging to try to find cause of refusal to sync.	2020-06-24 01:25:18 -04:00
brent s	2ff334f220	still not syncing properly...	2020-06-24 01:15:56 -04:00
brent s	c8bf61ea98	whoops	2020-06-24 01:01:58 -04:00
brent s	0e16214e45	found a bug in distro selection	2020-06-24 00:59:14 -04:00
brent s	0ff9af4c48	way better rsync logging	2020-06-18 13:01:25 -04:00
brent s	ac1886e46b	better rsync logging	2020-06-18 12:45:08 -04:00
brent s	1a5068d77d	fucking fedora.	2020-06-18 04:14:18 -04:00
brent s	3ece313a6f	SOOOO fedora implements ACL bullshit in their mirroring. Rendering this script useless.	2020-06-18 03:42:48 -04:00
brent s	061a18e8f4	dang it, don't need a linebreak there.	2020-06-18 00:40:08 -04:00
brent s	6d5a382e53	updating README	2020-06-18 00:38:28 -04:00