diff --git a/repomirror/__init__.py b/repomirror/__init__.py new file mode 100644 index 0000000..e309666 --- /dev/null +++ b/repomirror/__init__.py @@ -0,0 +1,18 @@ +from . import logger +## +import logging +## +from . import config +from . import constants + + +_logger = logging.getLogger() + + +class Sync(object): + def __init__(self, cfg = None, dummy = False, distro = None, logdir = None, *args, **kwargs): + _args = dict(locals()) + del(_args['self']) + _logger.debug('Sync class instantiated with args: {0}'.format(_args)) + self.cfg = config.Config(cfg) + diff --git a/repomirror/config.py b/repomirror/config.py new file mode 100644 index 0000000..bd31f21 --- /dev/null +++ b/repomirror/config.py @@ -0,0 +1,174 @@ +import copy +import datetime +import os +import logging +import re +import shutil +## +import requests +import requests.auth +from lxml import etree + + +_logger = logging.getLogger() + + +def create_default_cfg(): + # Create a stripped sample config. + ws_re = re.compile(r'^\s*$') + cur_dir = os.path.dirname(os.path.abspath(os.path.expanduser(__file__))) + samplexml = os.path.abspath(os.path.join(cur_dir, '..', 'example.config.xml')) + with open(samplexml, 'rb') as fh: + xml = etree.fromstring(fh.read()) + # Create a stripped sample config. + # First we strip comments (and fix the ensuing whitespace). + # etree has a .canonicalize(), but it chokes on a default namespace. + # https://bugs.launchpad.net/lxml/+bug/1869455 + # So everything we do is kind of a hack. + # for c in xml.xpath("//comment()"): + # parent = c.getparent() + # parent.remove(c) + xmlstr = etree.tostring(xml, with_comments = False, method = 'c14n', pretty_print = True).decode('utf-8') + newstr = [] + for line in xmlstr.splitlines(): + r = ws_re.search(line) + if not r: + newstr.append(line.strip()) + xml = etree.fromstring(''.join(newstr).encode('utf-8')) + # Remove text and attr text. + xpathq = "descendant-or-self::*[namespace-uri()!='']" + for e in xml.xpath(xpathq): + if e.tag == '{{{0}}}mirror'.format(xml.nsmap[None]): + continue + if e.text is not None and e.text.strip() != '': + e.text = '' + for k, v in e.attrib.items(): + if v is not None: + e.attrib[k] = '' + # Remove multiple children of same type to simplify. + for e in xml.xpath(xpathq): + if e.tag == '{{{0}}}mirror'.format(xml.nsmap[None]): + continue + parent = e.getparent() + try: + for idx, child in enumerate(parent.findall(e.tag)): + if idx == 0: + continue + parent.remove(child) + except AttributeError: + pass + # And add a comment pointing them to the fully commented config. + xml.insert(0, etree.Comment(('\n Please reference the fully commented example.config.xml found either ' + 'at:\n ' + ' * {0}\n * https://git.square-r00t.net/RepoMirror/tree/' + 'example.config.xml\n and then configure this according to those ' + 'instructions.\n ').format(samplexml))) + return(etree.tostring(xml, + pretty_print = True, + with_comments = True, + with_tail = True, + encoding = 'UTF-8', + xml_declaration = True)) + + +class Config(object): + default_xsd = 'http://schema.xml.r00t2.io/projects/repomirror.xsd' + default_xml_path = '~/.config/repomirror.xml' + + def __init__(self, xml_path, *args, **kwargs): + if not xml_path: + xml_path = self.default_xml_path + self.xml_path = os.path.abspath(os.path.expanduser(xml_path)) + if not os.path.isfile(self.xml_path): + with open(self.xml_path, 'wb') as fh: + fh.write(create_default_cfg()) + _logger.error(('{0} does not exist so a sample configuration file has been created in its place. ' + 'Be sure to configure it appropriately.').format(self.default_xml_path)) + raise ValueError('Config does not exist') + else: + with open(xml_path, 'rb') as fh: + self.raw = fh.read() + self.xml = None + self.xsd = None + self.ns_xml = None + self.tree = None + self.ns_tree = None + self.defaults_parser = None + self.parse_xml() + _logger.info('Instantiated {0}.'.format(type(self).__name__)) + + def get_xsd(self): + raw_xsd = None + base_url = None + xsi = self.xml.nsmap.get('xsi', 'http://www.w3.org/2001/XMLSchema-instance') + schemaLocation = '{{{0}}}schemaLocation'.format(xsi) + schemaURL = self.xml.attrib.get(schemaLocation, self.default_xsd) + split_url = schemaURL.split() + if len(split_url) == 2: # a properly defined schemaLocation + schemaURL = split_url[1] + else: + schemaURL = split_url[0] # a LAZY schemaLocation + if schemaURL.startswith('file://'): + schemaURL = re.sub(r'^file://', r'', schemaURL) + with open(schemaURL, 'rb') as fh: + raw_xsd = fh.read() + base_url = os.path.dirname(schemaURL) + '/' + else: + req = requests.get(schemaURL) + if not req.ok: + raise RuntimeError('Could not download XSD') + raw_xsd = req.content + base_url = os.path.split(req.url)[0] + '/' # This makes me feel dirty. + self.xsd = etree.XMLSchema(etree.XML(raw_xsd, base_url = base_url)) + return(None) + + def parse_xml(self): + self.parse_raw() + self.get_xsd() + self.populate_defaults() + self.validate() + return(None) + + def parse_raw(self, parser = None): + self.xml = etree.fromstring(self.raw, parser = parser) + self.ns_xml = etree.fromstring(self.raw, parser = parser) + self.tree = self.xml.getroottree() + self.ns_tree = self.ns_xml.getroottree() + self.tree.xinclude() + self.ns_tree.xinclude() + self.strip_ns() + return(None) + + def populate_defaults(self): + if not self.xsd: + self.get_xsd() + if not self.defaults_parser: + self.defaults_parser = etree.XMLParser(schema = self.xsd, attribute_defaults = True) + self.parse_raw(parser = self.defaults_parser) + return(None) + + def remove_defaults(self): + self.parse_raw() + return(None) + + def strip_ns(self, obj = None): + # https://stackoverflow.com/questions/30232031/how-can-i-strip-namespaces-out-of-an-lxml-tree/30233635#30233635 + xpathq = "descendant-or-self::*[namespace-uri()!='']" + if not obj: + for x in (self.tree, self.xml): + for e in x.xpath(xpathq): + e.tag = etree.QName(e).localname + elif isinstance(obj, (etree._Element, etree._ElementTree)): + obj = copy.deepcopy(obj) + for e in obj.xpath(xpathq): + e.tag = etree.QName(e).localname + return(obj) + else: + raise ValueError('Did not know how to parse obj parameter') + return(None) + + def validate(self): + if not self.xsd: + self.get_xsd() + self.xsd.assertValid(self.ns_tree) + return(None) diff --git a/repomirror/constants.py b/repomirror/constants.py new file mode 100644 index 0000000..72b97c4 --- /dev/null +++ b/repomirror/constants.py @@ -0,0 +1,2 @@ +PROTO_DEF_PORTS = {'ftp': 21, + 'rsync': 873} diff --git a/repomirror/logger.py b/repomirror/logger.py new file mode 100644 index 0000000..8d8fa53 --- /dev/null +++ b/repomirror/logger.py @@ -0,0 +1,57 @@ +import logging +import logging.handlers +import os +try: + # https://www.freedesktop.org/software/systemd/python-systemd/journal.html#journalhandler-class + from systemd import journal + _has_journald = True +except ImportError: + _has_journald = False + + +def preplog(logfile = None): + if not logfile: + if os.geteuid() == 0: + logfile = '/var/log/repo/main.log' + else: + logfile = '~/.cache/repo.log' + # Prep the log file. + logfile = os.path.abspath(os.path.expanduser(logfile)) + os.makedirs(os.path.dirname(logfile), exist_ok = True, mode = 0o0700) + if not os.path.isfile(logfile): + with open(logfile, 'w') as fh: + fh.write('') + os.chmod(logfile, 0o0600) + return(logfile) + + +# And set up logging. +_cfg_args = {'handlers': [], + 'level': logging.DEBUG} +if _has_journald: + # There were some weird changes somewhere along the line. + try: + # But it's *probably* this one. + h = journal.JournalHandler() + except AttributeError: + h = journal.JournaldLogHandler() + # Systemd includes times, so we don't need to. + h.setFormatter(logging.Formatter(style = '{', + fmt = ('{name}:{levelname}:{name}:{filename}:' + '{funcName}:{lineno}: {message}'))) + _cfg_args['handlers'].append(h) + +filehandler = logging.handlers.RotatingFileHandler(preplog(), + encoding = 'utf8', + # Disable rotating for now. + # maxBytes = 50000000000, + # backupCount = 30 + ) +filehandler.setFormatter(logging.Formatter(style = '{', + fmt = ('{asctime}:' + '{levelname}:{name}:{filename}:' + '{funcName}:{lineno}: {message}'))) +_cfg_args['handlers'].append(filehandler) +logging.basicConfig(**_cfg_args) +logger = logging.getLogger('Repo Mirror') +logger.info('Logging initialized.') diff --git a/reposync.py b/reposync.py new file mode 100644 index 0000000..62aac9b --- /dev/null +++ b/reposync.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python3 + +import argparse +import os +import pathlib +import sys +## +## +_cur_file = pathlib.Path(os.path.abspath(os.path.expanduser(__file__))).resolve() +_cur_path = os.path.dirname(_cur_file) +sys.path.insert(1, _cur_path) +import repomirror + + +if os.geteuid() == 0: + _def_logdir = '/var/log/repo' +else: + _def_logdir = '~/.cache/logs' + + +def parseArgs(): + args = argparse.ArgumentParser(description = 'Sync repositories for various distributions to local paths') + args.add_argument('-c', '--config', + default = '~/.config/repomirror.xml', + dest = 'cfg', + help = ('The path to the config file. If it does not exist, a bare version will be created. ' + 'Default: ~/.config/repomirror.xmlost')) + args.add_argument('-n', '--dry-run', + action = 'store_true', + dest = 'dummy', + help = ('If specified, do not actually sync anything (other than timestamp files if ' + 'applicable to determine logic); do not actually sync any repositories')) + args.add_argument('-d', '--distro', + dest = 'distro', + action = 'append', + help = ('If specified, only sync the specified distro in the config file (otherwise sync all ' + 'specified). May be given multiple times')) + args.add_argument('-l', '--logdir', + default = _def_logdir, + dest = 'logdir', + help = ('The path to the directory where logs should be written. The actual log files will be ' + 'named after their respective distro names in the config file. ' + 'Defailt: {0}'.format(_def_logdir))) + return(args) + + +def main(): + args = parseArgs().parse_args() + r = repomirror.Sync() + + return(None) + + +if __name__ == '__main__': + main() diff --git a/sample.config.xml b/sample.config.xml new file mode 100644 index 0000000..c9ea1e2 --- /dev/null +++ b/sample.config.xml @@ -0,0 +1,81 @@ + + + + + + /srv/repos/arch/. + + /srv/http/arch.lastcheck + + /srv/http/arch.lastsync + + /lastupdate + + /lastsync + + / + + 7000 + + + + rsync + + arch.mirror.constant.com + + 873 + + /archlinux/ + + + + rsync + arch.mirror.square-r00t.net + /arch/ + + + + ftp + sub.domain.tld + 21 + /distros/archlinux/ + + + +