summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorbrent s <bts@square-r00t.net>2020-06-13 03:46:49 -0400
committerbrent s <bts@square-r00t.net>2020-06-13 03:46:49 -0400
commit552611174365d2fd5ce1e6bb1f5abebfd8a09a10 (patch)
tree3787394acaa174a19435795c9852a1681112188c
parent19fddfaacfd1ec5870f7c9d64c8bf8e463573229 (diff)
downloadRepoMirror-552611174365d2fd5ce1e6bb1f5abebfd8a09a10.tar.xz
adding some stuff for repomirror
-rw-r--r--repomirror/__init__.py18
-rw-r--r--repomirror/config.py174
-rw-r--r--repomirror/constants.py2
-rw-r--r--repomirror/logger.py57
-rw-r--r--reposync.py55
-rw-r--r--sample.config.xml81
6 files changed, 387 insertions, 0 deletions
diff --git a/repomirror/__init__.py b/repomirror/__init__.py
new file mode 100644
index 0000000..e309666
--- /dev/null
+++ b/repomirror/__init__.py
@@ -0,0 +1,18 @@
+from . import logger
+##
+import logging
+##
+from . import config
+from . import constants
+
+
+_logger = logging.getLogger()
+
+
+class Sync(object):
+ def __init__(self, cfg = None, dummy = False, distro = None, logdir = None, *args, **kwargs):
+ _args = dict(locals())
+ del(_args['self'])
+ _logger.debug('Sync class instantiated with args: {0}'.format(_args))
+ self.cfg = config.Config(cfg)
+
diff --git a/repomirror/config.py b/repomirror/config.py
new file mode 100644
index 0000000..bd31f21
--- /dev/null
+++ b/repomirror/config.py
@@ -0,0 +1,174 @@
+import copy
+import datetime
+import os
+import logging
+import re
+import shutil
+##
+import requests
+import requests.auth
+from lxml import etree
+
+
+_logger = logging.getLogger()
+
+
+def create_default_cfg():
+ # Create a stripped sample config.
+ ws_re = re.compile(r'^\s*$')
+ cur_dir = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
+ samplexml = os.path.abspath(os.path.join(cur_dir, '..', 'example.config.xml'))
+ with open(samplexml, 'rb') as fh:
+ xml = etree.fromstring(fh.read())
+ # Create a stripped sample config.
+ # First we strip comments (and fix the ensuing whitespace).
+ # etree has a .canonicalize(), but it chokes on a default namespace.
+ # https://bugs.launchpad.net/lxml/+bug/1869455
+ # So everything we do is kind of a hack.
+ # for c in xml.xpath("//comment()"):
+ # parent = c.getparent()
+ # parent.remove(c)
+ xmlstr = etree.tostring(xml, with_comments = False, method = 'c14n', pretty_print = True).decode('utf-8')
+ newstr = []
+ for line in xmlstr.splitlines():
+ r = ws_re.search(line)
+ if not r:
+ newstr.append(line.strip())
+ xml = etree.fromstring(''.join(newstr).encode('utf-8'))
+ # Remove text and attr text.
+ xpathq = "descendant-or-self::*[namespace-uri()!='']"
+ for e in xml.xpath(xpathq):
+ if e.tag == '{{{0}}}mirror'.format(xml.nsmap[None]):
+ continue
+ if e.text is not None and e.text.strip() != '':
+ e.text = ''
+ for k, v in e.attrib.items():
+ if v is not None:
+ e.attrib[k] = ''
+ # Remove multiple children of same type to simplify.
+ for e in xml.xpath(xpathq):
+ if e.tag == '{{{0}}}mirror'.format(xml.nsmap[None]):
+ continue
+ parent = e.getparent()
+ try:
+ for idx, child in enumerate(parent.findall(e.tag)):
+ if idx == 0:
+ continue
+ parent.remove(child)
+ except AttributeError:
+ pass
+ # And add a comment pointing them to the fully commented config.
+ xml.insert(0, etree.Comment(('\n Please reference the fully commented example.config.xml found either '
+ 'at:\n '
+ ' * {0}\n * https://git.square-r00t.net/RepoMirror/tree/'
+ 'example.config.xml\n and then configure this according to those '
+ 'instructions.\n ').format(samplexml)))
+ return(etree.tostring(xml,
+ pretty_print = True,
+ with_comments = True,
+ with_tail = True,
+ encoding = 'UTF-8',
+ xml_declaration = True))
+
+
+class Config(object):
+ default_xsd = 'http://schema.xml.r00t2.io/projects/repomirror.xsd'
+ default_xml_path = '~/.config/repomirror.xml'
+
+ def __init__(self, xml_path, *args, **kwargs):
+ if not xml_path:
+ xml_path = self.default_xml_path
+ self.xml_path = os.path.abspath(os.path.expanduser(xml_path))
+ if not os.path.isfile(self.xml_path):
+ with open(self.xml_path, 'wb') as fh:
+ fh.write(create_default_cfg())
+ _logger.error(('{0} does not exist so a sample configuration file has been created in its place. '
+ 'Be sure to configure it appropriately.').format(self.default_xml_path))
+ raise ValueError('Config does not exist')
+ else:
+ with open(xml_path, 'rb') as fh:
+ self.raw = fh.read()
+ self.xml = None
+ self.xsd = None
+ self.ns_xml = None
+ self.tree = None
+ self.ns_tree = None
+ self.defaults_parser = None
+ self.parse_xml()
+ _logger.info('Instantiated {0}.'.format(type(self).__name__))
+
+ def get_xsd(self):
+ raw_xsd = None
+ base_url = None
+ xsi = self.xml.nsmap.get('xsi', 'http://www.w3.org/2001/XMLSchema-instance')
+ schemaLocation = '{{{0}}}schemaLocation'.format(xsi)
+ schemaURL = self.xml.attrib.get(schemaLocation, self.default_xsd)
+ split_url = schemaURL.split()
+ if len(split_url) == 2: # a properly defined schemaLocation
+ schemaURL = split_url[1]
+ else:
+ schemaURL = split_url[0] # a LAZY schemaLocation
+ if schemaURL.startswith('file://'):
+ schemaURL = re.sub(r'^file://', r'', schemaURL)
+ with open(schemaURL, 'rb') as fh:
+ raw_xsd = fh.read()
+ base_url = os.path.dirname(schemaURL) + '/'
+ else:
+ req = requests.get(schemaURL)
+ if not req.ok:
+ raise RuntimeError('Could not download XSD')
+ raw_xsd = req.content
+ base_url = os.path.split(req.url)[0] + '/' # This makes me feel dirty.
+ self.xsd = etree.XMLSchema(etree.XML(raw_xsd, base_url = base_url))
+ return(None)
+
+ def parse_xml(self):
+ self.parse_raw()
+ self.get_xsd()
+ self.populate_defaults()
+ self.validate()
+ return(None)
+
+ def parse_raw(self, parser = None):
+ self.xml = etree.fromstring(self.raw, parser = parser)
+ self.ns_xml = etree.fromstring(self.raw, parser = parser)
+ self.tree = self.xml.getroottree()
+ self.ns_tree = self.ns_xml.getroottree()
+ self.tree.xinclude()
+ self.ns_tree.xinclude()
+ self.strip_ns()
+ return(None)
+
+ def populate_defaults(self):
+ if not self.xsd:
+ self.get_xsd()
+ if not self.defaults_parser:
+ self.defaults_parser = etree.XMLParser(schema = self.xsd, attribute_defaults = True)
+ self.parse_raw(parser = self.defaults_parser)
+ return(None)
+
+ def remove_defaults(self):
+ self.parse_raw()
+ return(None)
+
+ def strip_ns(self, obj = None):
+ # https://stackoverflow.com/questions/30232031/how-can-i-strip-namespaces-out-of-an-lxml-tree/30233635#30233635
+ xpathq = "descendant-or-self::*[namespace-uri()!='']"
+ if not obj:
+ for x in (self.tree, self.xml):
+ for e in x.xpath(xpathq):
+ e.tag = etree.QName(e).localname
+ elif isinstance(obj, (etree._Element, etree._ElementTree)):
+ obj = copy.deepcopy(obj)
+ for e in obj.xpath(xpathq):
+ e.tag = etree.QName(e).localname
+ return(obj)
+ else:
+ raise ValueError('Did not know how to parse obj parameter')
+ return(None)
+
+ def validate(self):
+ if not self.xsd:
+ self.get_xsd()
+ self.xsd.assertValid(self.ns_tree)
+ return(None)
diff --git a/repomirror/constants.py b/repomirror/constants.py
new file mode 100644
index 0000000..72b97c4
--- /dev/null
+++ b/repomirror/constants.py
@@ -0,0 +1,2 @@
+PROTO_DEF_PORTS = {'ftp': 21,
+ 'rsync': 873}
diff --git a/repomirror/logger.py b/repomirror/logger.py
new file mode 100644
index 0000000..8d8fa53
--- /dev/null
+++ b/repomirror/logger.py
@@ -0,0 +1,57 @@
+import logging
+import logging.handlers
+import os
+try:
+ # https://www.freedesktop.org/software/systemd/python-systemd/journal.html#journalhandler-class
+ from systemd import journal
+ _has_journald = True
+except ImportError:
+ _has_journald = False
+
+
+def preplog(logfile = None):
+ if not logfile:
+ if os.geteuid() == 0:
+ logfile = '/var/log/repo/main.log'
+ else:
+ logfile = '~/.cache/repo.log'
+ # Prep the log file.
+ logfile = os.path.abspath(os.path.expanduser(logfile))
+ os.makedirs(os.path.dirname(logfile), exist_ok = True, mode = 0o0700)
+ if not os.path.isfile(logfile):
+ with open(logfile, 'w') as fh:
+ fh.write('')
+ os.chmod(logfile, 0o0600)
+ return(logfile)
+
+
+# And set up logging.
+_cfg_args = {'handlers': [],
+ 'level': logging.DEBUG}
+if _has_journald:
+ # There were some weird changes somewhere along the line.
+ try:
+ # But it's *probably* this one.
+ h = journal.JournalHandler()
+ except AttributeError:
+ h = journal.JournaldLogHandler()
+ # Systemd includes times, so we don't need to.
+ h.setFormatter(logging.Formatter(style = '{',
+ fmt = ('{name}:{levelname}:{name}:{filename}:'
+ '{funcName}:{lineno}: {message}')))
+ _cfg_args['handlers'].append(h)
+
+filehandler = logging.handlers.RotatingFileHandler(preplog(),
+ encoding = 'utf8',
+ # Disable rotating for now.
+ # maxBytes = 50000000000,
+ # backupCount = 30
+ )
+filehandler.setFormatter(logging.Formatter(style = '{',
+ fmt = ('{asctime}:'
+ '{levelname}:{name}:{filename}:'
+ '{funcName}:{lineno}: {message}')))
+_cfg_args['handlers'].append(filehandler)
+logging.basicConfig(**_cfg_args)
+logger = logging.getLogger('Repo Mirror')
+logger.info('Logging initialized.')
diff --git a/reposync.py b/reposync.py
new file mode 100644
index 0000000..62aac9b
--- /dev/null
+++ b/reposync.py
@@ -0,0 +1,55 @@
+#!/usr/bin/env python3
+
+import argparse
+import os
+import pathlib
+import sys
+##
+##
+_cur_file = pathlib.Path(os.path.abspath(os.path.expanduser(__file__))).resolve()
+_cur_path = os.path.dirname(_cur_file)
+sys.path.insert(1, _cur_path)
+import repomirror
+
+
+if os.geteuid() == 0:
+ _def_logdir = '/var/log/repo'
+else:
+ _def_logdir = '~/.cache/logs'
+
+
+def parseArgs():
+ args = argparse.ArgumentParser(description = 'Sync repositories for various distributions to local paths')
+ args.add_argument('-c', '--config',
+ default = '~/.config/repomirror.xml',
+ dest = 'cfg',
+ help = ('The path to the config file. If it does not exist, a bare version will be created. '
+ 'Default: ~/.config/repomirror.xmlost'))
+ args.add_argument('-n', '--dry-run',
+ action = 'store_true',
+ dest = 'dummy',
+ help = ('If specified, do not actually sync anything (other than timestamp files if '
+ 'applicable to determine logic); do not actually sync any repositories'))
+ args.add_argument('-d', '--distro',
+ dest = 'distro',
+ action = 'append',
+ help = ('If specified, only sync the specified distro in the config file (otherwise sync all '
+ 'specified). May be given multiple times'))
+ args.add_argument('-l', '--logdir',
+ default = _def_logdir,
+ dest = 'logdir',
+ help = ('The path to the directory where logs should be written. The actual log files will be '
+ 'named after their respective distro names in the config file. '
+ 'Defailt: {0}'.format(_def_logdir)))
+ return(args)
+
+
+def main():
+ args = parseArgs().parse_args()
+ r = repomirror.Sync()
+
+ return(None)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/sample.config.xml b/sample.config.xml
new file mode 100644
index 0000000..c9ea1e2
--- /dev/null
+++ b/sample.config.xml
@@ -0,0 +1,81 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+ This is by default placed in ~/.config/repomirror.xml
+-->
+<mirror xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xmlns="https://git.square-r00t.net/RepoMirror/"
+ xsi:schemaLocation="https://git.square-r00t.net/RepoMirror/ http://schema.xml.r00t2.io/projects/repomirror.xsd">
+ <distro name="arch">
+ <!--
+ The local path to where the hierarchy/files should be synced to.
+ -->
+ <dest>/srv/repos/arch/.</dest>
+ <!--
+ The local file to update with a timestamp with the last time we checked for updates.
+ -->
+ <lastLocalCheck>/srv/http/arch.lastcheck</lastLocalCheck>
+ <!--
+ The file to update with a timestamp with the last time we synced from our upstream.
+ -->
+ <lastLocalSync>/srv/http/arch.lastsync</lastLocalSync>
+ <!--
+ The path to a file on the upstream(s) that gives a time when it last updated.
+ -->
+ <lastRemoteUpdate>/lastupdate</lastRemoteUpdate>
+ <!--
+ The path to a file on the upstream(s) that gives a time when it last synced from its upstream.
+ -->
+ <lastRemoteSync>/lastsync</lastRemoteSync>
+ <!--
+ The path that must be currently mounted for sync to proceed.
+ -->
+ <mountCheck>/</mountCheck>
+ <!--
+ The speed to cap socket bandwidth at (in KiB). Decimals are okay.
+ -->
+ <bwlimit>7000</bwlimit>
+ <upstream>
+ <!--
+ The following example uses "rsync://arch.mirror.constant.com/archlinux/"
+ (https://www.archlinux.org/mirrors/constant.com/1008/)
+ -->
+ <!--
+ One of:
+ * rsync
+ * ftp
+ -->
+ <syncType>rsync</syncType>
+ <!--
+ ONLY the domain goes here.
+ -->
+ <domain>arch.mirror.constant.com</domain>
+ <!--
+ If not specified,the protocol's default port will be used.
+ -->
+ <port>873</port>
+ <!--
+ The *remote* path part of the URI. The leading / is necessary. A trailing one will be assumed.
+ -->
+ <path>/archlinux/</path>
+ </upstream>
+ <!--
+ Multiple upstreams can be specified. They are tried in order specified and if connection fails or times out,
+ the next one will be tried until no more specified upstreams exist.
+ -->
+ <upstream>
+ <syncType>rsync</syncType>
+ <domain>arch.mirror.square-r00t.net</domain>
+ <path>/arch/</path>
+ </upstream>
+ <upstream>
+ <!--
+ Only passive (PASV) mode for FTP is supported.
+ -->
+ <syncType>ftp</syncType>
+ <domain>sub.domain.tld</domain>
+ <port>21</port>
+ <path>/distros/archlinux/</path>
+ </upstream>
+ </distro>
+ <distro name="centos"/>
+</mirror>