adding some stuff for repomirror
This commit is contained in:
parent
19fddfaacf
commit
5526111743
18
repomirror/__init__.py
Normal file
18
repomirror/__init__.py
Normal file
@ -0,0 +1,18 @@
|
||||
from . import logger
|
||||
##
|
||||
import logging
|
||||
##
|
||||
from . import config
|
||||
from . import constants
|
||||
|
||||
|
||||
_logger = logging.getLogger()
|
||||
|
||||
|
||||
class Sync(object):
|
||||
def __init__(self, cfg = None, dummy = False, distro = None, logdir = None, *args, **kwargs):
|
||||
_args = dict(locals())
|
||||
del(_args['self'])
|
||||
_logger.debug('Sync class instantiated with args: {0}'.format(_args))
|
||||
self.cfg = config.Config(cfg)
|
||||
|
174
repomirror/config.py
Normal file
174
repomirror/config.py
Normal file
@ -0,0 +1,174 @@
|
||||
import copy
|
||||
import datetime
|
||||
import os
|
||||
import logging
|
||||
import re
|
||||
import shutil
|
||||
##
|
||||
import requests
|
||||
import requests.auth
|
||||
from lxml import etree
|
||||
|
||||
|
||||
_logger = logging.getLogger()
|
||||
|
||||
|
||||
def create_default_cfg():
|
||||
# Create a stripped sample config.
|
||||
ws_re = re.compile(r'^\s*$')
|
||||
cur_dir = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
|
||||
samplexml = os.path.abspath(os.path.join(cur_dir, '..', 'example.config.xml'))
|
||||
with open(samplexml, 'rb') as fh:
|
||||
xml = etree.fromstring(fh.read())
|
||||
# Create a stripped sample config.
|
||||
# First we strip comments (and fix the ensuing whitespace).
|
||||
# etree has a .canonicalize(), but it chokes on a default namespace.
|
||||
# https://bugs.launchpad.net/lxml/+bug/1869455
|
||||
# So everything we do is kind of a hack.
|
||||
# for c in xml.xpath("//comment()"):
|
||||
# parent = c.getparent()
|
||||
# parent.remove(c)
|
||||
xmlstr = etree.tostring(xml, with_comments = False, method = 'c14n', pretty_print = True).decode('utf-8')
|
||||
newstr = []
|
||||
for line in xmlstr.splitlines():
|
||||
r = ws_re.search(line)
|
||||
if not r:
|
||||
newstr.append(line.strip())
|
||||
xml = etree.fromstring(''.join(newstr).encode('utf-8'))
|
||||
# Remove text and attr text.
|
||||
xpathq = "descendant-or-self::*[namespace-uri()!='']"
|
||||
for e in xml.xpath(xpathq):
|
||||
if e.tag == '{{{0}}}mirror'.format(xml.nsmap[None]):
|
||||
continue
|
||||
if e.text is not None and e.text.strip() != '':
|
||||
e.text = ''
|
||||
for k, v in e.attrib.items():
|
||||
if v is not None:
|
||||
e.attrib[k] = ''
|
||||
# Remove multiple children of same type to simplify.
|
||||
for e in xml.xpath(xpathq):
|
||||
if e.tag == '{{{0}}}mirror'.format(xml.nsmap[None]):
|
||||
continue
|
||||
parent = e.getparent()
|
||||
try:
|
||||
for idx, child in enumerate(parent.findall(e.tag)):
|
||||
if idx == 0:
|
||||
continue
|
||||
parent.remove(child)
|
||||
except AttributeError:
|
||||
pass
|
||||
# And add a comment pointing them to the fully commented config.
|
||||
xml.insert(0, etree.Comment(('\n Please reference the fully commented example.config.xml found either '
|
||||
'at:\n '
|
||||
' * {0}\n * https://git.square-r00t.net/RepoMirror/tree/'
|
||||
'example.config.xml\n and then configure this according to those '
|
||||
'instructions.\n ').format(samplexml)))
|
||||
return(etree.tostring(xml,
|
||||
pretty_print = True,
|
||||
with_comments = True,
|
||||
with_tail = True,
|
||||
encoding = 'UTF-8',
|
||||
xml_declaration = True))
|
||||
|
||||
|
||||
class Config(object):
|
||||
default_xsd = 'http://schema.xml.r00t2.io/projects/repomirror.xsd'
|
||||
default_xml_path = '~/.config/repomirror.xml'
|
||||
|
||||
def __init__(self, xml_path, *args, **kwargs):
|
||||
if not xml_path:
|
||||
xml_path = self.default_xml_path
|
||||
self.xml_path = os.path.abspath(os.path.expanduser(xml_path))
|
||||
if not os.path.isfile(self.xml_path):
|
||||
with open(self.xml_path, 'wb') as fh:
|
||||
fh.write(create_default_cfg())
|
||||
_logger.error(('{0} does not exist so a sample configuration file has been created in its place. '
|
||||
'Be sure to configure it appropriately.').format(self.default_xml_path))
|
||||
raise ValueError('Config does not exist')
|
||||
else:
|
||||
with open(xml_path, 'rb') as fh:
|
||||
self.raw = fh.read()
|
||||
self.xml = None
|
||||
self.xsd = None
|
||||
self.ns_xml = None
|
||||
self.tree = None
|
||||
self.ns_tree = None
|
||||
self.defaults_parser = None
|
||||
self.parse_xml()
|
||||
_logger.info('Instantiated {0}.'.format(type(self).__name__))
|
||||
|
||||
def get_xsd(self):
|
||||
raw_xsd = None
|
||||
base_url = None
|
||||
xsi = self.xml.nsmap.get('xsi', 'http://www.w3.org/2001/XMLSchema-instance')
|
||||
schemaLocation = '{{{0}}}schemaLocation'.format(xsi)
|
||||
schemaURL = self.xml.attrib.get(schemaLocation, self.default_xsd)
|
||||
split_url = schemaURL.split()
|
||||
if len(split_url) == 2: # a properly defined schemaLocation
|
||||
schemaURL = split_url[1]
|
||||
else:
|
||||
schemaURL = split_url[0] # a LAZY schemaLocation
|
||||
if schemaURL.startswith('file://'):
|
||||
schemaURL = re.sub(r'^file://', r'', schemaURL)
|
||||
with open(schemaURL, 'rb') as fh:
|
||||
raw_xsd = fh.read()
|
||||
base_url = os.path.dirname(schemaURL) + '/'
|
||||
else:
|
||||
req = requests.get(schemaURL)
|
||||
if not req.ok:
|
||||
raise RuntimeError('Could not download XSD')
|
||||
raw_xsd = req.content
|
||||
base_url = os.path.split(req.url)[0] + '/' # This makes me feel dirty.
|
||||
self.xsd = etree.XMLSchema(etree.XML(raw_xsd, base_url = base_url))
|
||||
return(None)
|
||||
|
||||
def parse_xml(self):
|
||||
self.parse_raw()
|
||||
self.get_xsd()
|
||||
self.populate_defaults()
|
||||
self.validate()
|
||||
return(None)
|
||||
|
||||
def parse_raw(self, parser = None):
|
||||
self.xml = etree.fromstring(self.raw, parser = parser)
|
||||
self.ns_xml = etree.fromstring(self.raw, parser = parser)
|
||||
self.tree = self.xml.getroottree()
|
||||
self.ns_tree = self.ns_xml.getroottree()
|
||||
self.tree.xinclude()
|
||||
self.ns_tree.xinclude()
|
||||
self.strip_ns()
|
||||
return(None)
|
||||
|
||||
def populate_defaults(self):
|
||||
if not self.xsd:
|
||||
self.get_xsd()
|
||||
if not self.defaults_parser:
|
||||
self.defaults_parser = etree.XMLParser(schema = self.xsd, attribute_defaults = True)
|
||||
self.parse_raw(parser = self.defaults_parser)
|
||||
return(None)
|
||||
|
||||
def remove_defaults(self):
|
||||
self.parse_raw()
|
||||
return(None)
|
||||
|
||||
def strip_ns(self, obj = None):
|
||||
# https://stackoverflow.com/questions/30232031/how-can-i-strip-namespaces-out-of-an-lxml-tree/30233635#30233635
|
||||
xpathq = "descendant-or-self::*[namespace-uri()!='']"
|
||||
if not obj:
|
||||
for x in (self.tree, self.xml):
|
||||
for e in x.xpath(xpathq):
|
||||
e.tag = etree.QName(e).localname
|
||||
elif isinstance(obj, (etree._Element, etree._ElementTree)):
|
||||
obj = copy.deepcopy(obj)
|
||||
for e in obj.xpath(xpathq):
|
||||
e.tag = etree.QName(e).localname
|
||||
return(obj)
|
||||
else:
|
||||
raise ValueError('Did not know how to parse obj parameter')
|
||||
return(None)
|
||||
|
||||
def validate(self):
|
||||
if not self.xsd:
|
||||
self.get_xsd()
|
||||
self.xsd.assertValid(self.ns_tree)
|
||||
return(None)
|
2
repomirror/constants.py
Normal file
2
repomirror/constants.py
Normal file
@ -0,0 +1,2 @@
|
||||
PROTO_DEF_PORTS = {'ftp': 21,
|
||||
'rsync': 873}
|
57
repomirror/logger.py
Normal file
57
repomirror/logger.py
Normal file
@ -0,0 +1,57 @@
|
||||
import logging
|
||||
import logging.handlers
|
||||
import os
|
||||
try:
|
||||
# https://www.freedesktop.org/software/systemd/python-systemd/journal.html#journalhandler-class
|
||||
from systemd import journal
|
||||
_has_journald = True
|
||||
except ImportError:
|
||||
_has_journald = False
|
||||
|
||||
|
||||
def preplog(logfile = None):
|
||||
if not logfile:
|
||||
if os.geteuid() == 0:
|
||||
logfile = '/var/log/repo/main.log'
|
||||
else:
|
||||
logfile = '~/.cache/repo.log'
|
||||
# Prep the log file.
|
||||
logfile = os.path.abspath(os.path.expanduser(logfile))
|
||||
os.makedirs(os.path.dirname(logfile), exist_ok = True, mode = 0o0700)
|
||||
if not os.path.isfile(logfile):
|
||||
with open(logfile, 'w') as fh:
|
||||
fh.write('')
|
||||
os.chmod(logfile, 0o0600)
|
||||
return(logfile)
|
||||
|
||||
|
||||
# And set up logging.
|
||||
_cfg_args = {'handlers': [],
|
||||
'level': logging.DEBUG}
|
||||
if _has_journald:
|
||||
# There were some weird changes somewhere along the line.
|
||||
try:
|
||||
# But it's *probably* this one.
|
||||
h = journal.JournalHandler()
|
||||
except AttributeError:
|
||||
h = journal.JournaldLogHandler()
|
||||
# Systemd includes times, so we don't need to.
|
||||
h.setFormatter(logging.Formatter(style = '{',
|
||||
fmt = ('{name}:{levelname}:{name}:{filename}:'
|
||||
'{funcName}:{lineno}: {message}')))
|
||||
_cfg_args['handlers'].append(h)
|
||||
|
||||
filehandler = logging.handlers.RotatingFileHandler(preplog(),
|
||||
encoding = 'utf8',
|
||||
# Disable rotating for now.
|
||||
# maxBytes = 50000000000,
|
||||
# backupCount = 30
|
||||
)
|
||||
filehandler.setFormatter(logging.Formatter(style = '{',
|
||||
fmt = ('{asctime}:'
|
||||
'{levelname}:{name}:{filename}:'
|
||||
'{funcName}:{lineno}: {message}')))
|
||||
_cfg_args['handlers'].append(filehandler)
|
||||
logging.basicConfig(**_cfg_args)
|
||||
logger = logging.getLogger('Repo Mirror')
|
||||
logger.info('Logging initialized.')
|
55
reposync.py
Normal file
55
reposync.py
Normal file
@ -0,0 +1,55 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import pathlib
|
||||
import sys
|
||||
##
|
||||
##
|
||||
_cur_file = pathlib.Path(os.path.abspath(os.path.expanduser(__file__))).resolve()
|
||||
_cur_path = os.path.dirname(_cur_file)
|
||||
sys.path.insert(1, _cur_path)
|
||||
import repomirror
|
||||
|
||||
|
||||
if os.geteuid() == 0:
|
||||
_def_logdir = '/var/log/repo'
|
||||
else:
|
||||
_def_logdir = '~/.cache/logs'
|
||||
|
||||
|
||||
def parseArgs():
|
||||
args = argparse.ArgumentParser(description = 'Sync repositories for various distributions to local paths')
|
||||
args.add_argument('-c', '--config',
|
||||
default = '~/.config/repomirror.xml',
|
||||
dest = 'cfg',
|
||||
help = ('The path to the config file. If it does not exist, a bare version will be created. '
|
||||
'Default: ~/.config/repomirror.xmlost'))
|
||||
args.add_argument('-n', '--dry-run',
|
||||
action = 'store_true',
|
||||
dest = 'dummy',
|
||||
help = ('If specified, do not actually sync anything (other than timestamp files if '
|
||||
'applicable to determine logic); do not actually sync any repositories'))
|
||||
args.add_argument('-d', '--distro',
|
||||
dest = 'distro',
|
||||
action = 'append',
|
||||
help = ('If specified, only sync the specified distro in the config file (otherwise sync all '
|
||||
'specified). May be given multiple times'))
|
||||
args.add_argument('-l', '--logdir',
|
||||
default = _def_logdir,
|
||||
dest = 'logdir',
|
||||
help = ('The path to the directory where logs should be written. The actual log files will be '
|
||||
'named after their respective distro names in the config file. '
|
||||
'Defailt: {0}'.format(_def_logdir)))
|
||||
return(args)
|
||||
|
||||
|
||||
def main():
|
||||
args = parseArgs().parse_args()
|
||||
r = repomirror.Sync()
|
||||
|
||||
return(None)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
81
sample.config.xml
Normal file
81
sample.config.xml
Normal file
@ -0,0 +1,81 @@
|
||||
<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<!--
|
||||
This is by default placed in ~/.config/repomirror.xml
|
||||
-->
|
||||
<mirror xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xmlns="https://git.square-r00t.net/RepoMirror/"
|
||||
xsi:schemaLocation="https://git.square-r00t.net/RepoMirror/ http://schema.xml.r00t2.io/projects/repomirror.xsd">
|
||||
<distro name="arch">
|
||||
<!--
|
||||
The local path to where the hierarchy/files should be synced to.
|
||||
-->
|
||||
<dest>/srv/repos/arch/.</dest>
|
||||
<!--
|
||||
The local file to update with a timestamp with the last time we checked for updates.
|
||||
-->
|
||||
<lastLocalCheck>/srv/http/arch.lastcheck</lastLocalCheck>
|
||||
<!--
|
||||
The file to update with a timestamp with the last time we synced from our upstream.
|
||||
-->
|
||||
<lastLocalSync>/srv/http/arch.lastsync</lastLocalSync>
|
||||
<!--
|
||||
The path to a file on the upstream(s) that gives a time when it last updated.
|
||||
-->
|
||||
<lastRemoteUpdate>/lastupdate</lastRemoteUpdate>
|
||||
<!--
|
||||
The path to a file on the upstream(s) that gives a time when it last synced from its upstream.
|
||||
-->
|
||||
<lastRemoteSync>/lastsync</lastRemoteSync>
|
||||
<!--
|
||||
The path that must be currently mounted for sync to proceed.
|
||||
-->
|
||||
<mountCheck>/</mountCheck>
|
||||
<!--
|
||||
The speed to cap socket bandwidth at (in KiB). Decimals are okay.
|
||||
-->
|
||||
<bwlimit>7000</bwlimit>
|
||||
<upstream>
|
||||
<!--
|
||||
The following example uses "rsync://arch.mirror.constant.com/archlinux/"
|
||||
(https://www.archlinux.org/mirrors/constant.com/1008/)
|
||||
-->
|
||||
<!--
|
||||
One of:
|
||||
* rsync
|
||||
* ftp
|
||||
-->
|
||||
<syncType>rsync</syncType>
|
||||
<!--
|
||||
ONLY the domain goes here.
|
||||
-->
|
||||
<domain>arch.mirror.constant.com</domain>
|
||||
<!--
|
||||
If not specified,the protocol's default port will be used.
|
||||
-->
|
||||
<port>873</port>
|
||||
<!--
|
||||
The *remote* path part of the URI. The leading / is necessary. A trailing one will be assumed.
|
||||
-->
|
||||
<path>/archlinux/</path>
|
||||
</upstream>
|
||||
<!--
|
||||
Multiple upstreams can be specified. They are tried in order specified and if connection fails or times out,
|
||||
the next one will be tried until no more specified upstreams exist.
|
||||
-->
|
||||
<upstream>
|
||||
<syncType>rsync</syncType>
|
||||
<domain>arch.mirror.square-r00t.net</domain>
|
||||
<path>/arch/</path>
|
||||
</upstream>
|
||||
<upstream>
|
||||
<!--
|
||||
Only passive (PASV) mode for FTP is supported.
|
||||
-->
|
||||
<syncType>ftp</syncType>
|
||||
<domain>sub.domain.tld</domain>
|
||||
<port>21</port>
|
||||
<path>/distros/archlinux/</path>
|
||||
</upstream>
|
||||
</distro>
|
||||
<distro name="centos"/>
|
||||
</mirror>
|
Loading…
Reference in New Issue
Block a user