moving to its own repo

This commit is contained in:
brent s 2019-06-05 21:47:31 -04:00
commit 08fc183956
9 changed files with 1732 additions and 0 deletions

27
.gitignore vendored Normal file
View File

@ -0,0 +1,27 @@
# https://git-scm.com/docs/gitignore
# https://help.github.com/articles/ignoring-files
# Example .gitignore files: https://github.com/github/gitignore
*.bak
screenlog*
*.swp
*.lck
*~
.~lock.*
.editix
__pycache__/
*.pyc
*.tar
*.tar.bz2
*.tar.xz
*.tar.gz
*.tgz
*.txz
*.tbz
*.tbz2
*.zip
*.run
*.7z
*.rar
*.sqlite3
*.deb
.idea/

837
backup.py Executable file
View File

@ -0,0 +1,837 @@
#!/usr/bin/env python3

# TODO: https://borgbackup.readthedocs.io/en/latest/internals/frontends.html
# will they EVER release a public API? for now we'll just use subprocess since
# we import it for various prep stuff anyways.
# TODO: change loglevel of borg itself in subprocess to match the argparse?
# --debug, --info (same as -v/--verbose), --warning, --error, --critical
# TODO: modify config to add repo to cfg for init? or add new operation, "add"

import argparse
import datetime
import json
import getpass
import logging
import logging.handlers
import os
import pwd
import re
# TODO: use borg module directly instead of subprocess?
import subprocess
import sys
import tempfile
# TODO: virtual env?
from lxml import etree # A lot safer and easier to use than the stdlib xml module.
try:
import pymysql # not stdlib; "python-pymysql" in Arch's AUR
has_mysql = True
except ImportError:
has_mysql = False
try:
# https://www.freedesktop.org/software/systemd/python-systemd/journal.html#journalhandler-class
from systemd import journal
has_systemd = True
except ImportError:
has_systemd = False

### LOG LEVEL MAPPINGS ###
loglvls = {'critical': logging.CRITICAL,
'error': logging.ERROR,
'warning': logging.WARNING,
'info': logging.INFO,
'debug': logging.DEBUG}

### DEFAULT NAMESPACE ###
dflt_ns = 'http://git.square-r00t.net/OpTools/tree/storage/backups/borg/'


### THE GUTS ###
class Backup(object):
def __init__(self, args):
self.args = args
self.ns = '{{{0}}}'.format(dflt_ns)
if self.args['oper'] == 'restore':
self.args['target_dir'] = os.path.abspath(os.path.expanduser(self.args['target_dir']))
os.makedirs(self.args['target_dir'],
exist_ok = True,
mode = 0o700)
self.repos = {}
### LOGGING ###
# Thanks to:
# https://web.archive.org/web/20170726052946/http://www.lexev.org/en/2013/python-logging-every-day/
# https://stackoverflow.com/a/42604392
# https://plumberjack.blogspot.com/2010/10/supporting-alternative-formatting.html
# and user K900_ on r/python for entertaining my very silly question.
self.logger = logging.getLogger(__name__)
self.logger.setLevel(loglvls[self.args['loglevel']])
_logfmt = logging.Formatter(fmt = ('{levelname}:{name}: {message} ({asctime}; {filename}:{lineno})'),
style = '{',
datefmt = '%Y-%m-%d %H:%M:%S')
_journalfmt = logging.Formatter(fmt = '{levelname}:{name}: {message} ({filename}:{lineno})',
style = '{',
datefmt = '%Y-%m-%d %H:%M:%S')
handlers = []
if self.args['disklog']:
os.makedirs(os.path.dirname(self.args['logfile']),
exist_ok = True,
mode = 0o700)
# TODO: make the constraints for rotation in config?
handlers.append(logging.handlers.RotatingFileHandler(self.args['logfile'],
encoding = 'utf8',
maxBytes = 100000,
backupCount = 1))
if self.args['verbose']:
handlers.append(logging.StreamHandler())
if has_systemd:
try:
h = journal.JournalHandler()
except AttributeError:
h = journal.JournaldLogHandler()
h.setFormatter(_journalfmt)
h.setLevel(loglvls[self.args['loglevel']])
self.logger.addHandler(h)
for h in handlers:
h.setFormatter(_logfmt)
h.setLevel(loglvls[self.args['loglevel']])
self.logger.addHandler(h)
### END LOGGING ###
self.logger.debug('BEGIN INITIALIZATION')
### CONFIG ###
if not os.path.isfile(self.args['cfgfile']):
self.logger.error('{0} does not exist'.format(self.args['cfgfile']))
exit(1)
try:
with open(self.args['cfgfile'], 'rb') as f:
self.xml = etree.parse(f)
self.xml.xinclude()
self.cfg = self.xml.getroot()
except etree.XMLSyntaxError:
self.logger.error('{0} is invalid XML'.format(self.args['cfgfile']))
raise ValueError(('{0} does not seem to be valid XML. '
'See sample.config.xml for an example configuration.').format(self.args['cfgfile']))
self.borgbin = self.cfg.attrib.get('borgpath', '/usr/bin/borg')
### CHECK ENVIRONMENT ###
# If we're running from cron, we want to print errors to stdout.
if os.isatty(sys.stdin.fileno()):
self.cron = False
else:
self.cron = True
self.logger.debug('END INITIALIZATION')
self.buildRepos()

def buildRepos(self):
def getRepo(server, reponames = None):
if not reponames:
reponames = []
repos = []
for repo in server.findall('{0}repo'.format(self.ns)):
if reponames and repo.attrib['name'] not in reponames:
continue
r = {}
for a in repo.attrib:
r[a] = repo.attrib[a]
for e in ('path', 'exclude'):
r[e] = [i.text for i in repo.findall(self.ns + e)]
for prep in repo.findall('{0}prep'.format(self.ns)):
if 'prep' not in r:
r['prep'] = []
if prep.attrib.get('inline', 'true').lower()[0] in ('0', 'f'):
with open(os.path.abspath(os.path.expanduser(prep.text)), 'r') as f:
r['prep'].append(f.read())
else:
r['prep'].append(prep.text)
plugins = repo.find('{0}plugins'.format(self.ns))
if plugins is not None:
r['plugins'] = {}
for plugin in plugins.findall('{0}plugin'.format(self.ns)):
pname = plugin.attrib['name']
r['plugins'][pname] = {'path': plugin.attrib.get('path'),
'params': {}}
for param in plugin.findall('{0}param'.format(self.ns)):
paramname = param.attrib['key']
if param.attrib.get('json', 'false').lower()[0] in ('1', 't'):
r['plugins'][pname]['params'][paramname] = json.loads(param.text)
else:
r['plugins'][pname]['params'][paramname] = param.text
repos.append(r)
return(repos)
self.logger.debug('VARS (before args cleanup): {0}'.format(vars(self)))
self.args['repo'] = [i.strip() for i in self.args['repo'].split(',')]
self.args['server'] = [i.strip() for i in self.args['server'].split(',')]
if 'all' in self.args['repo']:
self.args['repo'] = None
if 'all' in self.args['server']:
self.args['server'] = []
for server in self.cfg.findall('{0}server'.format(self.ns)):
# The server elements are uniquely constrained to the "target" attrib.
# *NO TWO <server> ELEMENTS WITH THE SAME target= SHOULD EXIST.*
self.args['server'].append(server.attrib['target'])
for server in self.cfg.findall('{0}server'.format(self.ns)):
sname = server.attrib['target']
if sname not in self.args['server']:
continue
self.repos[sname] = {}
for x in server.attrib:
if x != 'target':
self.repos[sname][x] = server.attrib[x]
self.repos[sname]['repos'] = getRepo(server, reponames = self.args['repo'])
self.logger.debug('VARS (after args cleanup): {0}'.format(vars(self)))
return()

def createRepo(self):
for server in self.repos:
_env = os.environ.copy()
# https://github.com/borgbackup/borg/issues/2273
# https://borgbackup.readthedocs.io/en/stable/internals/frontends.html
_env['LANG'] = 'en_US.UTF-8'
_env['LC_CTYPE'] = 'en_US.UTF-8'
if self.repos[server]['remote'].lower()[0] in ('1', 't'):
_env['BORG_RSH'] = self.repos[server]['rsh']
_user = self.repos[server].get('user', pwd.getpwuid(os.geteuid()).pw_name)
for repo in self.repos[server]['repos']:
self.logger.info('[{0}]: BEGIN INITIALIZATION'.format(repo['name']))
_loc_env = _env.copy()
if 'password' not in repo:
print('Password not supplied for {0}:{1}.'.format(server, repo['name']))
_loc_env['BORG_PASSPHRASE'] = getpass.getpass('Password (will NOT echo back): ')
else:
_loc_env['BORG_PASSPHRASE'] = repo['password']
_cmd = [self.borgbin,
'--log-json',
'--{0}'.format(self.args['loglevel']),
'init',
'-e', 'repokey']
if self.repos[server]['remote'].lower()[0] in ('1', 't'):
repo_tgt = '{0}@{1}'.format(_user, server)
else:
repo_tgt = os.path.abspath(os.path.expanduser(server))
_cmd.append('{0}:{1}'.format(repo_tgt,
repo['name']))
self.logger.debug('VARS: {0}'.format(vars(self)))
if not self.args['dryrun']:
_out = subprocess.run(_cmd,
env = _loc_env,
stdout = subprocess.PIPE,
stderr = subprocess.PIPE)
_stdout = _out.stdout.decode('utf-8').strip()
_stderr = _out.stderr.decode('utf-8').strip()
_returncode = _out.returncode
self.logger.debug('[{0}]: (RESULT) {1}'.format(repo['name'], _stdout))
# sigh. borg uses stderr for verbose output.
self.logger.debug('[{0}]: STDERR: ({2})\n{1}'.format(repo['name'],
_stderr,
' '.join(_cmd)))
if _returncode != 0:
self.logger.error(
'[{0}]: FAILED: {1}'.format(repo['name'], ' '.join(_cmd)))
if _stderr != '' and self.cron and _returncode != 0:
self.logger.warning('Command {0} failed: {1}'.format(' '.join(_cmd),
_stderr))
self.logger.info('[{0}]: END INITIALIZATION'.format(repo['name']))
return()

def create(self):
# TODO: support "--strip-components N"?
self.logger.info('START: backup')
for server in self.repos:
_env = os.environ.copy()
if self.repos[server]['remote'].lower()[0] in ('1', 't'):
_env['BORG_RSH'] = self.repos[server].get('rsh', None)
_env['LANG'] = 'en_US.UTF-8'
_env['LC_CTYPE'] = 'en_US.UTF-8'
_user = self.repos[server].get('user', pwd.getpwuid(os.geteuid()).pw_name)
for repo in self.repos[server]['repos']:
_loc_env = _env.copy()
if 'password' not in repo:
print('Password not supplied for {0}:{1}.'.format(server, repo['name']))
_loc_env['BORG_PASSPHRASE'] = getpass.getpass('Password (will NOT echo back): ')
else:
_loc_env['BORG_PASSPHRASE'] = repo['password']
self.logger.info('[{0}]: BEGIN BACKUP: {1}'.format(server, repo['name']))
if 'prep' in repo:
tmpdir = os.path.abspath(os.path.expanduser('~/.cache/.optools_backup'))
os.makedirs(tmpdir, exist_ok = True)
os.chmod(tmpdir, mode = 0o0700)
for idx, prep in enumerate(repo['prep']):
exec_tmp = tempfile.mkstemp(prefix = '_optools.backup.',
suffix = '._tmpexc',
text = True,
dir = tmpdir)[1]
os.chmod(exec_tmp, mode = 0o0700)
with open(exec_tmp, 'w') as f:
f.write(prep)
prep_out = subprocess.run([exec_tmp],
stdout = subprocess.PIPE,
stderr = subprocess.PIPE)
if prep_out.returncode != 0:
err = ('Prep job {0} ({1}) for server {2} (repo {3}) '
'returned non-zero').format(idx, exec_tmp, server, repo)
self.logger.warning(err)
self.logger.debug('STDOUT: {0}'.format(prep_out.stdout.decode('utf-8')))
self.logger.debug('STDERR: {0}'.format(prep_out.stderr.decode('utf-8')))
else:
os.remove(exec_tmp)
if 'plugins' in repo:
import importlib
_orig_path = sys.path
for plugin in repo['plugins']:
self.logger.debug('Initializing plugin: {0}'.format(plugin))
if repo['plugins'][plugin]['path']:
sys.path.insert(1, os.path.abspath(os.path.expanduser(repo['plugins'][plugin]['path'])))
optools_tmpmod = importlib.import_module(plugin, package = None)
if not repo['plugins'][plugin]['params']:
optools_tmpmod.Backup()
else:
optools_tmpmod.Backup(**repo['plugins'][plugin]['params'])
del(sys.modules[plugin])
del(optools_tmpmod)
sys.path = _orig_path
self.logger.debug('Finished plugin: {0}'.format(plugin))
# This is where we actually do the thing.
_cmd = [self.borgbin,
'--log-json',
'--{0}'.format(self.args['loglevel']),
'create',
'--stats']
if 'compression' in repo:
_cmd.extend(['--compression', repo['compression']])
if 'exclude' in repo:
for e in repo['exclude']:
_cmd.extend(['--exclude', e])
if self.repos[server]['remote'].lower()[0] in ('1', 't'):
repo_tgt = '{0}@{1}'.format(_user, server)
else:
repo_tgt = os.path.abspath(os.path.expanduser(server))
_cmd.append('{0}:{1}::{2}'.format(repo_tgt,
repo['name'],
self.args['archive']))
for p in repo['path']:
_cmd.append(p)
self.logger.debug('VARS: {0}'.format(vars()))
# We don't use self.cmdExec() here because we want to explicitly
# pass the env and format the log line differently.
self.logger.debug('[{0}]: Running command: {1}'.format(repo['name'],
' '.join(_cmd)))
if not self.args['dryrun']:
_out = subprocess.run(_cmd,
env = _loc_env,
stdout = subprocess.PIPE,
stderr = subprocess.PIPE)
_stdout = _out.stdout.decode('utf-8').strip()
_stderr = _out.stderr.decode('utf-8').strip()
_returncode = _out.returncode
self.logger.debug('[{0}]: (RESULT) {1}'.format(repo['name'], _stdout))
self.logger.debug('[{0}]: STDERR: ({2})\n{1}'.format(repo['name'],
_stderr,
' '.join(
_cmd)))
if _returncode != 0:
self.logger.error(
'[{0}]: FAILED: {1}'.format(repo['name'], ' '.join(_cmd)))
if _stderr != '' and self.cron and _returncode != 0:
self.logger.warning('Command {0} failed: {1}'.format(' '.join(_cmd),
_stderr))
del (_loc_env['BORG_PASSPHRASE'])
self.logger.info('[{0}]: END BACKUP'.format(repo['name']))
self.logger.info('END: backup')
return()

def restore(self):
# TODO: support "--strip-components N"?
# TODO: support add'l args?
# TODO: Restore() class in plugins?
# https://borgbackup.readthedocs.io/en/stable/usage/extract.html
orig_dir = os.getcwd()
self.logger.info('START: restore')
self.args['target_dir'] = os.path.abspath(os.path.expanduser(self.args['target_dir']))
os.makedirs(self.args['target_dir'], exist_ok = True)
os.chmod(self.args['target_dir'], mode = 0o0700)
for server in self.repos:
_env = os.environ.copy()
if self.repos[server]['remote'].lower()[0] in ('1', 't'):
_env['BORG_RSH'] = self.repos[server].get('rsh', None)
_env['LANG'] = 'en_US.UTF-8'
_env['LC_CTYPE'] = 'en_US.UTF-8'
_user = self.repos[server].get('user', pwd.getpwuid(os.geteuid()).pw_name)
server_dir = os.path.join(self.args['target_dir'], server)
for repo in self.repos[server]['repos']:
_loc_env = _env.copy()
if 'password' not in repo:
print('Password not supplied for {0}:{1}.'.format(server, repo['name']))
_loc_env['BORG_PASSPHRASE'] = getpass.getpass('Password (will NOT echo back): ')
else:
_loc_env['BORG_PASSPHRASE'] = repo['password']
if len(self.repos[server]) > 1:
dest_dir = os.path.join(server_dir, repo['name'])
else:
dest_dir = server_dir
os.makedirs(dest_dir, exist_ok = True)
os.chmod(dest_dir, mode = 0o0700)
os.chdir(dest_dir)
self.logger.info('[{0}]: BEGIN RESTORE'.format(repo['name']))
_cmd = [self.borgbin,
'--log-json',
'--{0}'.format(self.args['loglevel']),
'extract']
if self.repos[server]['remote'].lower()[0] in ('1', 't'):
repo_tgt = '{0}@{1}'.format(_user, server)
else:
repo_tgt = os.path.abspath(os.path.expanduser(server))
_cmd.append('{0}:{1}::{2}'.format(repo_tgt,
repo['name'],
self.args['archive']))
if self.args['archive_path']:
_cmd.append(self.args['archive_path'])
self.logger.debug('VARS: {0}'.format(vars(self)))
self.logger.debug('[{0}]: Running command: {1}'.format(repo['name'],
' '.join(_cmd)))
if not self.args['dryrun']:
_out = subprocess.run(_cmd,
env = _loc_env,
stdout = subprocess.PIPE,
stderr = subprocess.PIPE)
_stdout = _out.stdout.decode('utf-8').strip()
_stderr = _out.stderr.decode('utf-8').strip()
_returncode = _out.returncode
self.logger.debug('[{0}]: (RESULT) {1}'.format(repo['name'], _stdout))
self.logger.debug('[{0}]: STDERR: ({2})\n{1}'.format(repo['name'],
_stderr,
' '.join(_cmd)))
if _returncode != 0:
self.logger.error('[{0}]: FAILED: {1}'.format(repo['name'],
' '.join(_cmd)))
if _stderr != '' and self.cron and _returncode != 0:
self.logger.warning('Command {0} failed: {1}'.format(' '.join(_cmd),
_stderr))
self.logger.info('[{0}]: END RESTORE'.format(repo['name']))
os.chdir(orig_dir)
self.logger.info('END: restore')
return()

def listRepos(self):
def objPrinter(d, indent = 0):
for k, v in d.items():
if k == 'name':
continue
if k.lower() in ('password', 'path', 'exclude', 'prep', 'plugins', 'params', 'compression'):
keyname = k.title()
else:
keyname = k
if isinstance(v, list):
for i in v:
print('\033[1m{0}{1}:\033[0m {2}'.format(('\t' * indent),
keyname,
i))
elif isinstance(v, dict):
print('\033[1m{0}{1}:\033[0m'.format(('\t' * indent),
keyname))
objPrinter(v, indent = (indent + 1))
else:
print('\033[1m{0}{1}:\033[0m {2}'.format(('\t' * indent),
keyname,
v))
return()
print('\n\033[1mCurrently configured repositories are:\033[0m\n')
for server in self.repos:
print('\033[1mTarget:\033[0m {0}'.format(server))
print('\033[1mRepositories:\033[0m')
for r in self.repos[server]['repos']:
if not self.args['verbose']:
print('\t\t{0}'.format(r['name']))
else:
print('\t\t\033[1mName:\033[0m {0}'.format(r['name']))
print('\033[1m\t\tDetails:\033[0m')
objPrinter(r, indent = 3)
print()
return()

def printer(self):
# TODO: better alignment. https://stackoverflow.com/a/5676884
_results = self.lister()
timefmt = '%Y-%m-%dT%H:%M:%S.%f'
if not self.args['archive']:
# It's a listing of archives
for server in _results:
print('\033[1mTarget:\033[0m {0}'.format(server))
print('\033[1mRepositories:\033[0m')
# Normally this is a list everywhere else. For results, however, it's a dict.
for repo in _results[server]:
print('\t\033[1m{0}:\033[0m'.format(repo))
print('\t\t\033[1mSnapshot\t\tTimestamp\033[0m')
for archive in _results[server][repo]:
print('\t\t{0}\t\t{1}'.format(archive['name'],
datetime.datetime.strptime(archive['time'], timefmt)))
print()
else:
# It's a listing inside an archive
if self.args['verbose']:
_archive_fields = ['Mode', 'Owner', 'Size', 'Timestamp', 'Path']
for server in _results:
print('\033[1mTarget:\033[0m {0}'.format(server))
print('\033[1mRepositories:\033[0m')
for repo in _results[server]:
print('\t\033[1m{0}:\033[0m'.format(repo))
print(('\t\t\033[1m'
'{0[0]:<10}\t'
'{0[1]:<10}\t'
'{0[2]:<10}\t'
'{0[3]:<19}\t'
'{0[4]}'
'\033[0m').format(_archive_fields))
for file in _results[server][repo]:
file['mtime'] = datetime.datetime.strptime(file['mtime'], timefmt)
print(('\t\t'
'{mode:<10}\t'
'{user:<10}\t'
'{size:<10}\t'
'{mtime}\t'
'{path}').format(**file))
else:
for server in _results:
print('\033[1mTarget:\033[0m {0}'.format(server))
print('\033[1mRepositories:\033[0m')
for repo in _results[server]:
print('\t\033[1m{0}:\033[0m'.format(repo))
for file in _results[server][repo]:
print(file['path'])
return()

def lister(self):
output = {}
self.logger.debug('START: lister')
for server in self.repos:
output[server] = {}
_env = os.environ.copy()
if self.repos[server]['remote'].lower()[0] in ('1', 't'):
_env['BORG_RSH'] = self.repos[server].get('rsh', None)
_env['LANG'] = 'en_US.UTF-8'
_env['LC_CTYPE'] = 'en_US.UTF-8'
_user = self.repos[server].get('user', pwd.getpwuid(os.geteuid()).pw_name)
for repo in self.repos[server]['repos']:
_loc_env = _env.copy()
if 'password' not in repo:
print('Password not supplied for {0}:{1}.'.format(server, repo['name']))
_loc_env['BORG_PASSPHRASE'] = getpass.getpass('Password (will NOT echo back): ')
else:
_loc_env['BORG_PASSPHRASE'] = repo['password']
if self.repos[server]['remote'].lower()[0] in ('1', 't'):
repo_tgt = '{0}@{1}'.format(_user, server)
else:
repo_tgt = os.path.abspath(os.path.expanduser(server))
_cmd = [self.borgbin,
'--log-json',
'--{0}'.format(self.args['loglevel']),
'list',
('--json-lines' if self.args['archive'] else '--json')]
_cmd.append('{0}:{1}{2}'.format(repo_tgt,
repo['name'],
('::{0}'.format(self.args['archive']) if self.args['archive']
else '')))
if not self.args['dryrun']:
_out = subprocess.run(_cmd,
env = _loc_env,
stdout = subprocess.PIPE,
stderr = subprocess.PIPE)
_stdout = '\n'.join([i.strip() for i in _out.stdout.decode('utf-8').splitlines()])
_stderr = _out.stderr.decode('utf-8').strip()
_returncode = _out.returncode
try:
if self.args['archive']:
output[server][repo['name']] = [json.loads(i) for i in _stdout.splitlines()]
else:
output[server][repo['name']] = json.loads(_stdout)['archives']
except json.decoder.JSONDecodeError:
output[server][repo['name']] = []
self.logger.debug('[{0}]: (RESULT) {1}'.format(repo['name'],
'\n'.join(_stdout)))
self.logger.debug('[{0}]: STDERR: ({2}) ({1})'.format(repo['name'],
_stderr,
' '.join(_cmd)))
if _stderr != '' and self.cron and _returncode != 0:
self.logger.warning('Command {0} failed: {1}'.format(' '.join(_cmd),
_stderr))
if not self.args['archive']:
if self.args['numlimit'] > 0:
if self.args['old']:
output[server][repo['name']] = output[server][repo['name']][:self.args['numlimit']]
else:
output[server][repo['name']] = list(
reversed(
output[server][repo['name']]))[:self.args['numlimit']]
if self.args['invert']:
output[server][repo['name']] = reversed(output[server][repo['name']])
self.logger.debug('END: lister')
return(output)


def printMoarHelp():
_helpstr = ('\n\tNOTE: Sorting only applies to listing archives, NOT the contents!\n\n'
'In order to efficiently display results, there are several options to handle it. '
'Namely, these are:\n\n\t\t'
'-s/--sort [direction]\n\t\t'
'-l/--limit [number]\n\t\t'
'-x/--invert\n\n'
'For example, if you want to list the 5 most recently *taken* snapshots, you would use:\n\n\t\t'
'-l 5\n\n'
'If you would want those SAME results SORTED in the reverse order (i.e. the 5 most recently '
'taken snapshots sorted from newest to oldest), then it would be: \n\n\t\t'
'-l 5 -x\n\n'
'Lastly, if you wanted to list the 7 OLDEST TAKEN snapshots in reverse order '
'(that is, sorted from newest to oldest), that\'d be:\n\n\t\t'
'-o -l 7 -x\n')
print(_helpstr)
exit(0)


def parseArgs():
### DEFAULTS ###
_date = datetime.datetime.now().strftime("%Y_%m_%d.%H_%M")
_logfile = '/var/log/borg/{0}'.format(_date)
_cfgfile = os.path.abspath(
os.path.join(os.path.expanduser('~'),
'.config',
'optools',
'backup.xml'))
_defloglvl = 'info'
######
args = argparse.ArgumentParser(description = 'Backups manager',
epilog = ('TIP: this program has context-specific help. '
'e.g. try "%(prog)s list --help"'))
args.add_argument('-c', '--config',
dest = 'cfgfile',
default = _cfgfile,
help = (
'The path to the config file. '
'Default: \033[1m{0}\033[0m'.format(_cfgfile)))
args.add_argument('-Ll', '--loglevel',
dest = 'loglevel',
default = _defloglvl,
choices = list(loglvls.keys()),
help = (
'The level of logging to perform. \033[1mWARNING:\033[0m \033[1mdebug\033[0m will '
'log VERY sensitive information such as passwords! '
'Default: \033[1m{0}\033[0m'.format(_defloglvl)))
args.add_argument('-Ld', '--log-to-disk',
dest = 'disklog',
action = 'store_true',
help = (
'If specified, log to a specific file (-Lf/--logfile) instead of the system logger.'))
args.add_argument('-Lf', '--logfile',
dest = 'logfile',
default = _logfile,
help = (
'The path to the logfile, only used if -Ld/--log-to-disk is specified. '
'Default: \033[1m{0}\033[0m (dynamic)').format(_logfile))
args.add_argument('-v', '--verbose',
dest = 'verbose',
action = 'store_true',
help = ('If specified, log messages will be printed to STDERR in addition to the other '
'configured log system(s), and verbosity for printing functions is increased. '
'\033[1mWARNING:\033[0m This may display VERY sensitive information such as passwords!'))
### ARGS FOR ALL OPERATIONS ###
commonargs = argparse.ArgumentParser(add_help = False)
commonargs.add_argument('-r', '--repo',
dest = 'repo',
default = 'all',
help = ('The repository to perform the operation for. '
'The default is \033[1mall\033[0m, a special value that specifies all known '
'repositories. Can also accept a comma-separated list.'))
commonargs.add_argument('-S', '--server',
dest = 'server',
default = 'all',
help = ('The server to perform the operation for. '
'The default is \033[1mall\033[0m, a special value that specifies all known '
'servers. Can also accept a comma-separated list.'))
fileargs = argparse.ArgumentParser(add_help = False)
fileargs.add_argument('-a', '--archive',
default = _date,
dest = 'archive',
help = ('The name of the archive/snapshot. '
'Default: \033[1m{0}\033[0m (dynamic)').format(_date))
remoteargs = argparse.ArgumentParser(add_help = False)
remoteargs.add_argument('-d', '--dry-run',
dest = 'dryrun',
action = 'store_true',
help = ('Act as if we are performing tasks, but none will actually be executed '
'(useful for testing logging)'))
### OPERATIONS ###
subparsers = args.add_subparsers(help = 'Operation to perform',
dest = 'oper')
backupargs = subparsers.add_parser('backup',
help = 'Perform a backup.',
parents = [commonargs,
remoteargs,
fileargs])
listargs = subparsers.add_parser('list',
help = 'List available backups.',
parents = [commonargs, remoteargs])
listrepoargs = subparsers.add_parser('listrepos',
help = ('List availabile/configured repositories.'),
parents = [commonargs])
initargs = subparsers.add_parser('init',
help = 'Initialise a repository.',
parents = [commonargs, remoteargs])
rstrargs = subparsers.add_parser('restore',
help = ('Restore ("extract") an archive.'),
parents = [commonargs,
remoteargs,
fileargs])
cvrtargs = subparsers.add_parser('convert',
help = ('Convert the legacy JSON format to the new XML format and quit'))
### OPERATION-SPECIFIC OPTIONS ###
# CREATE ("backup") #
# DISPLAY/OUTPUT ("list") #
listargs.add_argument('-a', '--archive',
dest = 'archive',
default = False,
help = 'If specified, will list the *contents* of the given archive name.')
listargs.add_argument('-l', '--limit',
dest = 'numlimit',
type = int,
default = '5',
help = ('If specified, constrain the outout to this number of results each repo. '
'Default is \033[1m5\033[0m, use 0 for unlimited. See \033[1m-H/--list-help\033[0m'))
listargs.add_argument('-s', '--sort',
dest = 'sortby',
choices = ['newest', 'oldest'],
default = 'oldest',
help = ('The order to sort the results by. See \033[1m-H/--list-help\033[0m. '
'Default: \033[1moldest\033[0m'))
listargs.add_argument('-x', '--invert',
dest = 'invert',
action = 'store_true',
help = 'Invert the order of results. See \033[1m-H/--list-help\033[0m.')
listargs.add_argument('-o', '--old',
dest = 'old',
action = 'store_true',
help = ('Instead of grabbing the latest results, grab the earliest results. This differs '
'from \033[1m-s/--sort\033[0m. See \033[1m-H/--list-help\033[0m.'))
listargs.add_argument('-H', '--list-help',
dest = 'moarhelp',
action = 'store_true',
help = ('Print extended information about how to '
'manage the output of listing and exit.'))
## EXTRACT ("restore")
rstrargs.add_argument('-p', '--path',
dest = 'archive_path',
help = ('If specified, only restore this specific path (and any subpaths).'))
rstrargs.add_argument('-t', '--target',
required = True,
dest = 'target_dir',
help = ('The path to the directory where the restore should be dumped to. It is '
'recommended to not restore to the same directory that the archive is taken from. '
'A subdirectory will be created for each server. '
'If multiple repos (or "all") are provided, subdirectories will be created per '
'repo under their respective server(s).'))
return (args)

def convertConf(cfgfile):
oldcfgfile = re.sub('\.xml$', '.json', cfgfile)
try:
with open(oldcfgfile, 'r') as f:
oldcfg = json.load(f)
except json.decoder.JSONDecodeError:
# It's not JSON. It's either already XML or invalid config.
return(cfgfile)
# Switched from JSON to XML, so we need to do some basic conversion.
newfname = re.sub('\.json$', '.xml', os.path.basename(cfgfile))
newcfg = os.path.join(os.path.dirname(cfgfile),
newfname)
if os.path.exists(newcfg):
# Do nothing. We don't want to overwrite an existing config
# and we'll assume it's an already-done conversion.
return(newcfg)
print(('It appears that you are still using the legacy JSON format. '
'We will attempt to convert it to the new XML format ({0}) but it may '
'require modifications, especially if you are using any prep functions as those are not '
'converted automatically. See sample.config.xml for an example of this.').format(newcfg))
cfg = etree.Element('borg')
# The old format only supported one server.
server = etree.Element('server')
server.attrib['target'] = oldcfg['config']['host']
server.attrib['remote'] = 'true'
server.attrib['rsh'] = oldcfg['config']['ctx']
server.attrib['user'] = oldcfg['config'].get('user', pwd.getpwnam(os.geteuid()).pw_name)
for r in oldcfg['repos']:
repo = etree.Element('repo')
repo.attrib['name'] = r
repo.attrib['password'] = oldcfg['repos'][r]['password']
for p in oldcfg['repos'][r]['paths']:
path = etree.Element('path')
path.text = p
repo.append(path)
for e in oldcfg['repos'][r].get('excludes', []):
path = etree.Element('exclude')
path.text = e
repo.append(path)
server.append(repo)
cfg.append(server)
# Build the full XML spec.
namespaces = {None: dflt_ns,
'xsi': 'http://www.w3.org/2001/XMLSchema-instance'}
xsi = {('{http://www.w3.org/2001/'
'XMLSchema-instance}schemaLocation'): ('http://git.square-r00t.net/OpTools/plain/'
'storage/backups/borg/config.xsd')}
genname = 'LXML (http://lxml.de/)'
root = etree.Element('borg', nsmap = namespaces, attrib = xsi)
root.append(etree.Comment(('Generated by {0} on {1} from {2} via {3}').format(sys.argv[0],
datetime.datetime.now(),
oldcfgfile,
genname)))
root.append(etree.Comment('THIS FILE CONTAINS SENSITIVE INFORMATION. SHARE/SCRUB WISELY.'))
for x in cfg:
root.append(x)
# Write out the file to disk.
xml = etree.ElementTree(root)
with open(newcfg, 'wb') as f:
xml.write(f,
xml_declaration = True,
encoding = 'utf-8',
pretty_print = True)
# Return the new config's path.
return(newcfg)


def main():
rawargs = parseArgs()
parsedargs = rawargs.parse_args()
args = vars(parsedargs)
args['cfgfile'] = os.path.abspath(os.path.expanduser(args['cfgfile']))
if not args['oper']:
rawargs.print_help()
exit(0)
if 'moarhelp' in args.keys() and args['moarhelp']:
printMoarHelp()
if args['oper'] == 'convert':
convertConf(args['cfgfile'])
return()
else:
if not os.path.isfile(args['cfgfile']):
oldfile = re.sub('\.xml$', '.json', args['cfgfile'])
if os.path.isfile(oldfile):
try:
with open(oldfile, 'r') as f:
json.load(f)
args['cfgfile'] = convertConf(args['cfgfile'])
except json.decoder.JSONDecodeError:
# It's not JSON. It's either already XML or invalid config.
pass
if not os.path.isfile(args['cfgfile']):
raise OSError('{0} does not exist'.format(args['cfgfile']))
# The "Do stuff" part
bak = Backup(args)
if args['oper'] == 'list':
bak.printer()
elif args['oper'] == 'listrepos':
bak.listRepos()
elif args['oper'] == 'backup':
bak.create()
elif args['oper'] == 'init':
bak.createRepo()
elif args['oper'] == 'restore':
bak.restore()
return()


if __name__ == '__main__':
main()

127
config.xsd Normal file
View File

@ -0,0 +1,127 @@
<?xml version="1.0" encoding="UTF-8" ?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
targetNamespace="http://git.square-r00t.net/BorgExtend/tree/"
xmlns="http://git.square-r00t.net/BorgExtend/tree/"
xmlns:borg="http://git.square-r00t.net/BorgExtend/tree/"
elementFormDefault="qualified"
attributeFormDefault="unqualified">

<xs:simpleType name="posixuser">
<xs:restriction base="xs:token">
<xs:pattern value="[a-z_]([a-z0-9_-]{0,31}|[a-z0-9_-]{0,30}$)"/>
</xs:restriction>
</xs:simpleType>
<xs:simpleType name="blocktext">
<xs:restriction base="xs:string">
<xs:whiteSpace value="preserve"/>
</xs:restriction>
</xs:simpleType>

<!-- START ROOT -->
<xs:element name="borg">
<xs:complexType>
<xs:choice>
<!-- START SERVER -->
<!-- This allows multiple backup destinations to be specified. -->
<xs:element name="server" minOccurs="1" maxOccurs="unbounded">
<xs:complexType>
<xs:sequence>
<!-- START REPO -->
<xs:element name="repo" minOccurs="1" maxOccurs="unbounded">
<xs:complexType>
<xs:choice minOccurs="1" maxOccurs="unbounded">
<!-- START PATH -->
<xs:element name="path" minOccurs="1"
maxOccurs="unbounded" type="xs:anyURI"/>
<!-- END PATH -->
<!-- START EXCLUDE -->
<xs:element name="exclude" minOccurs="0"
maxOccurs="unbounded" type="xs:anyURI"/>
<!-- END EXCLUDE -->
<!-- START PREP -->
<!-- This gets messy. We essentially preserve whitespace, allowing
either an inline script to be executed (written to a temp file) or
a path to an external script/command to be specified. -->
<xs:element name="prep" minOccurs="0"
maxOccurs="unbounded">
<xs:complexType>
<xs:simpleContent>
<xs:extension base="borg:blocktext">
<xs:attribute name="inline" type="xs:boolean"
default="0"/>
</xs:extension>
</xs:simpleContent>
</xs:complexType>
</xs:element>
<!-- END PREP -->
<!-- START PLUGIN -->
<xs:element name="plugins" minOccurs="0"
maxOccurs="1">
<xs:complexType>
<xs:sequence>
<xs:element name="plugin" minOccurs="1" maxOccurs="unbounded">
<xs:complexType>
<xs:sequence>
<xs:element name="param" minOccurs="0"
maxOccurs="unbounded">
<xs:complexType>
<xs:simpleContent>
<xs:extension base="borg:blocktext">
<xs:attribute name="key"
type="xs:token"
use="required"/>
<xs:attribute name="json"
type="xs:boolean"
default="0"
use="optional"/>
</xs:extension>
</xs:simpleContent>
</xs:complexType>
</xs:element>
</xs:sequence>
<xs:attribute name="name" type="xs:string" use="required"/>
<xs:attribute name="path" type="xs:anyURI" use="optional"/>
</xs:complexType>
</xs:element>
</xs:sequence>
</xs:complexType>
</xs:element>
<!-- END PLUGIN -->
</xs:choice>
<xs:attribute name="name" type="xs:token" use="required"/>
<!-- Optional. If not specified, the password will
be interactively (and securely) prompted for. -->
<xs:attribute name="password" type="xs:string" use="optional"/>
<xs:attribute name="compression" type="xs:token" use="optional"/>
</xs:complexType>
<xs:unique name="uniquePath">
<xs:selector xpath="borg:path"/>
<xs:field xpath="."/>
</xs:unique>
</xs:element>
<!-- END REPO -->
</xs:sequence>
<!-- "target" should be either a local filesystem path or the remote hostname. -->
<!-- This should *not* contain a path if it's remote. If it does, you set up Borg wrong. -->
<xs:attribute name="target" type="xs:anyURI" use="required"/>
<!-- "remote" is used to determine what type "target" is. -->
<xs:attribute name="remote" type="xs:boolean" use="required"/>
<!-- Only used if "target" is a remote host. -->
<!-- See "BORG_RSH" at https://borgbackup.readthedocs.io/en/stable/usage/general.html -->
<xs:attribute name="rsh" type="xs:string" use="optional"/>
<!-- Only used if "target" is a remote host. -->
<!-- The remote host SSH user. -->
<xs:attribute name="user" type="borg:posixuser" use="optional"/>
</xs:complexType>
</xs:element>
<!-- END SERVER -->
</xs:choice>
<xs:attribute name="borgpath" default="borg" use="optional"/>
</xs:complexType>
<xs:unique name="uniqueServer">
<xs:selector xpath="borg:server"/>
<xs:field xpath="@target"/>
</xs:unique>
</xs:element>
<!-- END ROOT -->
</xs:schema>

97
plugins/ldap.py Normal file
View File

@ -0,0 +1,97 @@
import os
# TODO: virtual env?
import ldap
import ldif


# Designed for use with OpenLDAP in an OLC configuration.


class Backup(object):
def __init__(self,
server = 'ldap://sub.domain.tld',
port = 389,
basedn = 'dc=domain,dc=tld',
sasl = False,
starttls = True,
binddn = 'cn=Manager,dc=domain,dc=tld',
password_file = '~/.ldap.pass',
password = None,
outdir = '~/.cache/backup/ldap',
splitldifs = True):
self.server = server
self.port = port
self.basedn = basedn
self.sasl = sasl
self.binddn = binddn
self.outdir = os.path.abspath(os.path.expanduser(outdir))
os.makedirs(self.outdir, exist_ok = True)
os.chmod(self.outdir, mode = 0o0700)
self.splitldifs = splitldifs
self.starttls = starttls
if password_file and not password:
with open(os.path.abspath(os.path.expanduser(password_file)), 'r') as f:
self.password = f.read().strip()
else:
self.password = password
# Human readability, yay.
# A note, SSLv3 is 0x300. But StartTLS can only be done with TLS, not SSL, I *think*?
# PRESUMABLY, now that it's finalized, TLS 1.3 will be 0x304.
# See https://tools.ietf.org/html/rfc5246#appendix-E
self._tlsmap = {'1.0': int(0x301), # 769
'1.1': int(0x302), # 770
'1.2': int(0x303)} # 771
self._minimum_tls_ver = '1.2'
if self.sasl:
self.server = 'ldapi:///'
self.cxn = None
self.connect()
self.dump()
self.close()

def connect(self):
self.cxn = ldap.initialize(self.server)
self.cxn.set_option(ldap.OPT_REFERRALS, 0)
self.cxn.set_option(ldap.OPT_PROTOCOL_VERSION, 3)
if not self.sasl:
if self.starttls:
self.cxn.set_option(ldap.OPT_X_TLS_REQUIRE_CERT, ldap.OPT_X_TLS_NEVER)
self.cxn.set_option(ldap.OPT_X_TLS, ldap.OPT_X_TLS_DEMAND)
self.cxn.set_option(ldap.OPT_X_TLS_DEMAND, True)
self.cxn.set_option(ldap.OPT_X_TLS_PROTOCOL_MIN, self._tlsmap[self._minimum_tls_ver])
if self.sasl:
self.cxn.sasl_external_bind_s()
else:
if self.starttls:
self.cxn.start_tls_s()
self.cxn.bind_s(self.binddn, self.password)
return()

def dump(self):
dumps = {'schema': 'cn=config',
'data': self.basedn}
with open(os.path.join(self.outdir, ('ldap-config.ldif' if self.splitldifs else 'ldap.ldif')), 'w') as f:
l = ldif.LDIFWriter(f)
rslts = self.cxn.search_s(dumps['schema'],
ldap.SCOPE_SUBTREE,
filterstr = '(objectClass=*)',
attrlist = ['*', '+'])
for r in rslts:
l.unparse(r[0], r[1])
if self.splitldifs:
f = open(os.path.join(self.outdir, 'ldap-data.ldif'), 'w')
else:
f = open(os.path.join(self.outdir, 'ldap.ldif'), 'a')
rslts = self.cxn.search_s(dumps['data'],
ldap.SCOPE_SUBTREE,
filterstr = '(objectClass=*)',
attrlist = ['*', '+'])
l = ldif.LDIFWriter(f)
for r in rslts:
l.unparse(r[0], r[1])
f.close()

def close(self):
if self.cxn:
self.cxn.unbind_s()
return()

96
plugins/mysql.py Normal file
View File

@ -0,0 +1,96 @@
import copy
import os
import re
import subprocess
import warnings

_mysql_ssl_re = re.compile('^ssl-(.*)$')

# TODO: is it possible to do a pure-python dump via PyMySQL?
# TODO: add compression support? Not *that* necessary since borg has its own.
# in fact, it's better to not do it on the dumps directly so borg can diff/delta better.

class Backup(object):
def __init__(self, dbs = None,
cfg = '~/.my.cnf',
cfgsuffix = '',
splitdumps = True,
dumpopts = None,
mysqlbin = 'mysql',
mysqldumpbin = 'mysqldump',
outdir = '~/.cache/backup/mysql'):
# If dbs is None, we dump ALL databases (that the user has access to).
self.dbs = dbs
self.cfgsuffix = cfgsuffix
self.splitdumps = splitdumps
self.mysqlbin = mysqlbin
self.mysqldumpbin = mysqldumpbin
self.outdir = os.path.abspath(os.path.expanduser(outdir))
self.cfg = os.path.abspath(os.path.expanduser(cfg))
os.makedirs(self.outdir, exist_ok = True)
os.chmod(self.outdir, mode = 0o0700)
if not os.path.isfile(self.cfg):
raise OSError(('{0} does not exist!').format(self.cfg))
if not dumpopts:
self.dumpopts = ['--routines',
'--add-drop-database',
'--add-drop-table',
'--allow-keywords',
'--complete-insert',
'--create-options',
'--extended-insert']
else:
self.dumpopts = dumpopts
self.getDBs()
self.dump()

def getDBs(self):
if not self.dbs:
_out = subprocess.run([self.mysqlbin, '-BNne', 'SHOW DATABASES'],
stdout = subprocess.PIPE,
stderr = subprocess.PIPE)
if _out.returncode != 0:
raise RuntimeError(('Could not successfully list databases: '
'{0}').format(_out.stderr.decode('utf-8')))
self.dbs = _out.stdout.decode('utf-8').strip().splitlines()
return()

def dump(self):
if self.splitdumps:
for db in self.dbs:
args = copy.deepcopy(self.dumpopts)
outfile = os.path.join(self.outdir, '{0}.sql'.format(db))
if db in ('information_schema', 'performance_schema'):
args.append('--skip-lock-tables')
elif db == 'mysql':
args.append('--flush-privileges')
cmd = [self.mysqldumpbin,
'--result-file={0}'.format(outfile)]
cmd.extend(args)
cmd.append(db)
out = subprocess.run(cmd,
stdout = subprocess.PIPE,
stderr = subprocess.PIPE)
if out.returncode != 0:
warn = ('Error dumping {0}: {1}').format(db, out.stderr.decode('utf-8').strip())
warnings.warn(warn)
else:
outfile = os.path.join(self.outdir, 'all.databases.sql')
args = copy.deepcopy(self.dumpopts)
args.append('--result-file={0}'.format(outfile))
if 'information_schema' in self.dbs:
args.append('--skip-lock-tables')
if 'mysql' in self.dbs:
args.append('--flush-privileges')
args.append(['--databases'])
cmd = [self.mysqldumpbin]
cmd.extend(args)
cmd.extend(self.dbs)
out = subprocess.run(cmd,
stdout = subprocess.PIPE,
stderr = subprocess.PIPE)
if out.returncode != 0:
warn = ('Error dumping {0}: {1}').format(','.join(self.dbs),
out.stderr.decode('utf-8').strip())
warnings.warn(warn)
return()

229
plugins/yum_pkgs.py Normal file
View File

@ -0,0 +1,229 @@
import datetime
import os
import re
import sys
##
from lxml import etree
try:
# Note that currently, even on CentOS/RHEL 7, the yum module is only available for Python 2...
# because reasons or something?
# This may be re-done to allow for a third-party library in the case of python 3 invocation.
import yum
has_yum = True
except ImportError:
# This will get *ugly*. You have been warned. It also uses more system resources and it's INCREDIBLY slow.
# But it's safe.
# Requires yum-utils to be installed.
# It assumes a python 3 environment for the exact above reason.
import subprocess
has_yum = False

# See <optools>:/storage/backups/borg/tools/restore_yum_pkgs.py to use the XML file this generates.


# Detect RH version.
ver_re =re.compile('^(centos.*|red\s?hat.*) ([0-9\.]+) .*$', re.IGNORECASE)
# distro module isn't stdlib, and platform.linux_distribution() (AND platform.distro()) are both deprecated in 3.7.
# So we get hacky.
with open('/etc/redhat-release', 'r') as f:
rawver = f.read()
distver = [int(i) for i in ver_re.sub('\g<2>', rawver.strip()).split('.')]
distname = re.sub('(Linux )?release', '', ver_re.sub('\g<1>', rawver.strip()), re.IGNORECASE).strip()
# Regex pattern to get the repo name. We compile it just to speed up the execution.
repo_re = re.compile('^@')
# Python version
pyver = sys.hexversion
py3 = 0x30000f0 # TODO: check the version incompats


class Backup(object):
def __init__(self, explicit_only = True,
include_deps = False,
output = '~/.cache/backup/misc/installed_pkgs.xml'):
self.explicit_only = explicit_only
self.include_deps = include_deps
self.reasons = []
if self.explicit_only:
self.reasons.append('user')
if self.include_deps:
self.reasons.append('dep')
self.output = os.path.abspath(os.path.expanduser(output))
if has_yum:
self.yb = yum.YumBase()
# Make it run silently.
self.yb.preconf.debuglevel = 0
self.yb.preconf.errorlevel = 0
self.pkg_meta = []
# TODO: XSD?
self.pkgs = etree.Element('packages')
self.pkgs.attrib['distro'] = distname
self.pkgs.attrib['version'] = '.'.join([str(i) for i in distver])
self.pkglist = b''
self.getPkgList()
self.buildPkgInfo()
self.write()

def getPkgList(self):
if has_yum:
if not self.explicit_only:
self.pkg_meta = self.yb.rpmdb.returnPackages()
else:
for pkg in self.yb.rpmdb.returnPackages():
reason = pkg.yumdb_info.get('reason')
if reason and reason.lower() in self.reasons:
self.pkg_meta.append(pkg)
else:
pass # We do this in buildPkgInfo().
return()

def buildPkgInfo(self):
if not has_yum:
def repoQuery(nevra, fmtstr):
cmd = ['/usr/bin/repoquery',
'--installed',
'--queryformat', fmtstr,
nevra]
cmd_out = subprocess.run(cmd, stdout = subprocess.PIPE).stdout.decode('utf-8')
return(cmd_out)
_reason = '*'
if self.reasons:
if 'dep' not in self.reasons:
_reason = 'user'
cmd = ['/usr/sbin/yumdb',
'search',
'reason',
_reason]
rawpkgs = subprocess.run(cmd, stdout = subprocess.PIPE).stdout.decode('utf-8')
reason_re = re.compile('^(\s+reason\s+=\s+.*|\s*)$')
pkgs = []
for line in rawpkgs.splitlines():
if not reason_re.search(line):
pkgs.append(line.strip())
for pkg_nevra in pkgs:
reponame = repo_re.sub('', repoQuery(pkg_nevra, '%{ui_from_repo}')).strip()
repo = self.pkgs.xpath('repo[@name="{0}"]'.format(reponame))
if repo:
repo = repo[0]
else:
# This is pretty error-prone. Fix/cleanup your systems.
repo = etree.Element('repo')
repo.attrib['name'] = reponame
rawrepo = subprocess.run(['/usr/bin/yum',
'-v',
'repolist',
reponame],
stdout = subprocess.PIPE).stdout.decode('utf-8')
urls = []
mirror = re.search('^Repo-mirrors\s*:', rawrepo, re.M)
repostatus = re.search('^Repo-status\s*:', rawrepo, re.M)
repourl = re.search('^Repo-baseurl\s*:', rawrepo, re.M)
repodesc = re.search('^Repo-name\s*:', rawrepo, re.M)
if mirror:
urls.append(mirror.group(0).split(':', 1)[1].strip())
if repourl:
urls.append(repourl.group(0).split(':', 1)[1].strip())
repo.attrib['urls'] = '>'.join(urls) # https://stackoverflow.com/a/13500078
if repostatus:
repostatus = repostatus.group(0).split(':', 1)[1].strip().lower()
repo.attrib['enabled'] = ('true' if repostatus == 'enabled' else 'false')
else:
repo.attrib['enabled'] = 'false'
if repodesc:
repo.attrib['desc'] = repodesc.group(0).split(':', 1)[1].strip()
else:
repo.attrib['desc'] = '(metadata missing)'
self.pkgs.append(repo)
pkgelem = etree.Element('package')
pkginfo = {'NEVRA': pkg_nevra,
'desc': repoQuery(pkg_nevra, '%{summary}').strip()}
# These are all values with no whitespace so we can easily combine into one call and then split them.
(pkginfo['name'],
pkginfo['release'],
pkginfo['arch'],
pkginfo['version'],
pkginfo['built'],
pkginfo['installed'],
pkginfo['sizerpm'],
pkginfo['sizedisk']) = re.split('\t',
repoQuery(pkg_nevra,
('%{name}\t'
'%{release}\t'
'%{arch}\t'
'%{ver}\t' # version
'%{buildtime}\t' # built
'%{installtime}\t' # installed
'%{packagesize}\t' # sizerpm
'%{installedsize}') # sizedisk
))
for k in ('built', 'installed', 'sizerpm', 'sizedisk'):
pkginfo[k] = int(pkginfo[k])
for k in ('built', 'installed'):
pkginfo[k] = datetime.datetime.fromtimestamp(pkginfo[k])
for k, v in pkginfo.items():
if pyver >= py3:
pkgelem.attrib[k] = str(v)
else:
if isinstance(v, (int, long, datetime.datetime)):
pkgelem.attrib[k] = str(v).encode('utf-8')
elif isinstance(v, str):
pkgelem.attrib[k] = v.decode('utf-8')
else:
pkgelem.attrib[k] = v.encode('utf-8')
repo.append(pkgelem)
else:
for pkg in self.pkg_meta:
reponame = repo_re.sub('', pkg.ui_from_repo)
repo = self.pkgs.xpath('repo[@name="{0}"]'.format(reponame))
if repo:
repo = repo[0]
else:
repo = etree.Element('repo')
repo.attrib['name'] = reponame
try:
repoinfo = self.yb.repos.repos[reponame]
repo.attrib['urls'] = '>'.join(repoinfo.urls) # https://stackoverflow.com/a/13500078
repo.attrib['enabled'] = ('true' if repoinfo in self.yb.repos.listEnabled() else 'false')
repo.attrib['desc'] = repoinfo.name
except KeyError: # Repo is missing
repo.attrib['desc'] = '(metadata missing)'
self.pkgs.append(repo)
pkgelem = etree.Element('package')
pkginfo = {'name': pkg.name,
'desc': pkg.summary,
'version': pkg.ver,
'release': pkg.release,
'arch': pkg.arch,
'built': datetime.datetime.fromtimestamp(pkg.buildtime),
'installed': datetime.datetime.fromtimestamp(pkg.installtime),
'sizerpm': pkg.packagesize,
'sizedisk': pkg.installedsize,
'NEVRA': pkg.nevra}
for k, v in pkginfo.items():
if pyver >= py3:
pkgelem.attrib[k] = str(v)
else:
if isinstance(v, (int, long, datetime.datetime)):
pkgelem.attrib[k] = str(v).encode('utf-8')
elif isinstance(v, str):
pkgelem.attrib[k] = v.decode('utf-8')
else:
pkgelem.attrib[k] = v.encode('utf-8')
repo.append(pkgelem)
self.pkglist = etree.tostring(self.pkgs,
pretty_print = True,
xml_declaration = True,
encoding = 'UTF-8')
return()

def write(self):
outdir = os.path.dirname(self.output)
if pyver >= py3:
os.makedirs(outdir, exist_ok = True)
os.chmod(outdir, mode = 0o0700)
else:
if not os.path.isdir(outdir):
os.makedirs(outdir)
os.chmod(outdir, 0o0700)
with open(self.output, 'wb') as f:
f.write(self.pkglist)
return()

73
sample.config.xml Normal file
View File

@ -0,0 +1,73 @@
<?xml version="1.0" encoding="UTF-8" ?>
<borg xmlns="http://git.square-r00t.net/BorgExtend/"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://git.square-r00t.net/BorgExtend/plain/config.xsd"
borgpath="/usr/bin/borg">
<!-- You can have multiple server elements, but each one *MUST* have a unique "target" attribute. -->
<!-- "target" = either the local filesystem path (absolute or relative to execution) or the remote host
"remote" = 1/true if "target" is a remote host or 0/false if it's a local filepath
"rsh" = (remote host only) the ssh command to use. The default is given below.
"user" = (remote host only) the ssh user to use. -->
<server target="fq.dn.tld" remote="true" rsh="ssh -p 22" user="root">
<!-- You can (and probably will) have multiple repos for each server. -->
<!-- "name" = the repositoriy name.
"password" = the repository's password for the key. If not specified, you will be prompted
to enter it interactively and securely.
"compression" = see https://borgbackup.readthedocs.io/en/stable/usage/create.html (-C option) -->
<repo name="testrepo" password="SuperSecretPassword" compression="lzma,9">
<!-- Each path entry is a path to back up.
See https://borgbackup.readthedocs.io/en/stable/usage/create.html for examples of globbing, etc. -->
<path>/a</path>
<!-- Each exclude entry should be a subdirectory of a <path> (otherwise it wouldn't match, obviously).
See https://borgbackup.readthedocs.io/en/stable/usage/create.html for examples of globbing etc. -->
<exclude>/a/b</exclude>
<!-- Prep items are executed in non-guaranteed order (but are likely to be performed in order given).
If you require them to be in a specific order, you should use a wrapper script and
use that as a prep item. -->
<!-- "inline" = if true/1, the provided text will be temporarily written to disk, executed, and deleted.
if false/0, the provided text is assumed to be a single-shot command/path to a script
(arguments are not currently supported, but may be in the future). -->
<!-- If using inline especially, take note of and use XML escape characters:
" = &quot;
' = &apos;
< = &lt;
> = &gt;
& = &amp;
and note that whitespace (including leading!) *is* preserved. -->
<!-- It *MUST* return 0 on success. -->
<prep inline="1">#!/bin/bash
# this is block text
</prep>
<prep inline="0">/usr/local/bin/someprep.sh</prep>
<!-- Plugins are direct Python modules, and are alternatives to prep items.
They must:
- be in the Python's path environment (or a path must be provided) either absolute or relative to
*execution*, not the script's placement in the filesystem)
- contain a class called <module>.Backup() (which will execute all tasks on initialization)
See plugins/ directory for examples and below for example of invocation. -->
<plugins>
<!-- Each plugin item MUST define a "name" attribute. This is the name of the module to import.
"path" = (optional) the directory containing the plugin module; it must end in .py -->
<plugin name="mysql" path="./plugins">
<!-- Param elements are optional. Each param element MUST define a "key" attribute; this is
the name of the parameter. (For positional attributes, this should match the name used
by the <module>.Backup().init() parameter name.)
If you want a parameter to be provided but with a None value, make it self-enclosed
(e.g. '<param key="someparam"/>').
If you need to serialize pythonic objects (lists, dicts, booleans),
then set the "json" attribute to 1/true and provide the data in minified
JSON format (also referred to as "compressed JSON") - see "tools/minify_json.py -h". -->
<param key="dbs" json="true">["db1","db2"]</param>
<param key="splitdumps" json="true">true</param>
<param key="dumpopts" json="true">["--routines","--add-drop-database","--add-drop-table","--allow-keywords","--complete-insert","--create-options","--extended-insert"]</param>
</plugin>
<plugin name="ldap" path="./plugins">
<param key="server">ldap://my.server.tld</param>
<param key="binddn">cn=Manager,dc=server,dc=tld</param>
<param key="password">SuperSecretPassword</param>
<param key="splitldifs" json="true">false</param>
</plugin>
</plugins>
</repo>
</server>
</borg>

52
tools/minify_json.py Executable file
View File

@ -0,0 +1,52 @@
#!/usr/bin/env python3

import argparse
import json
import os
import sys

def minify(json_in):
j = json.loads(json_in)
j = json.dumps(j, indent = None, separators = (',', ':'))
return(j)

def parseArgs():
args = argparse.ArgumentParser(description = ('Minify ("compress") JSON input'))
args.add_argument('-o', '--output',
default = '-',
help = ('Write the minified JSON out to a file. The default is "-", which instead prints it to '
'STDOUT. If instead you would like to write out to STDERR, use "+" (otherwise provide a '
'path)'))
args.add_argument('json_in',
default = '-',
nargs = '?',
help = ('The JSON input. If "-" (the default), read STDIN; otherwise provide a path to the '
'JSON file'))
return(args)

def main():
args = parseArgs().parse_args()
if args.json_in.strip() == '-':
stdin = sys.stdin.read()
if not stdin:
raise argparse.ArgumentError('You specified to read from STDIN, but STDIN is blank')
else:
args.json_in = stdin
else:
with open(os.path.abspath(os.path.expanduser(args.json_in)), 'r') as f:
args.json_in = f.read()
minified = minify(args.json_in)
if args.output.strip() not in ('-', '+'):
args.output = os.path.abspath(os.path.expanduser(args.output))
if not args.output.endswith('.json'):
args.output += '.json'
with open(args.output, 'w') as f:
f.write(minified + '\n')
elif args.output.strip() == '+':
sys.stderr.write(minified + '\n')
else:
sys.stdout.write(minified + '\n')
return()

if __name__ == '__main__':
main()

194
tools/restore_yum_pkgs.py Executable file
View File

@ -0,0 +1,194 @@
#!/usr/bin/env python

import argparse # yum install python-argparse on CentOS/RHEL 6.x
import os
import re
import subprocess
import sys
import warnings
##
# The yum API is *suuuper* cantankerous and kind of broken, even.
# Patches welcome, but for now we just use subprocess.
import yum
from lxml import etree # yum install python-lxml


# Detect RH version.
ver_re =re.compile('^(centos.*|red\s?hat.*) ([0-9\.]+) .*$', re.IGNORECASE)
# distro module isn't stdlib, and platform.linux_distribution() (AND platform.distro()) are both deprecated in 3.7.
# So we get hacky.
with open('/etc/redhat-release', 'r') as f:
rawver = f.read()
distver = [int(i) for i in ver_re.sub('\g<2>', rawver.strip()).split('.')]
distname = re.sub('(Linux )?release', '', ver_re.sub('\g<1>', rawver.strip()), re.IGNORECASE).strip()
# Regex pattern to get the repo name. We compile it just to speed up the execution.
repo_re = re.compile('^@')
# Python version
pyver = sys.hexversion
py3 = 0x30000f0 # TODO: check the version incompats

if pyver < py3:
import copy


class Reinstaller(object):
def __init__(self, pkglist_path, latest = True):
self.latest = latest
pkglist_file = os.path.abspath(os.path.expanduser(pkglist_path))
with open(pkglist_file, 'rb') as f:
self.pkgs = etree.fromstring(f.read())
if not self.latest:
# Make sure the versions match, otherwise Bad Things(TM) can occur.
if not all(((distname == self.pkgs.attrib['distro']),
('.'.join([str(i) for i in distver]) == self.pkgs.attrib['version']))):
err = ('This package set was created on {0} {1}. '
'The current running OS is {2} {3} and you have set latest = False/None. '
'THIS IS A VERY BAD IDEA.').format(self.pkgs.attrib['distro'],
self.pkgs.attrib['version'],
distname,
'.'.join([str(i) for i in distver]))
raise RuntimeError(err)
# Make it run silently.
self.yb = yum.YumBase()
self.yb.preconf.quiet = 1
self.yb.preconf.debuglevel = 0
self.yb.preconf.errorlevel = 0
self.yb.preconf.assumeyes = 1
self.yb.preconf.rpmverbosity = 'error'

def iterPkgs(self):
for repo in self.pkgs.findall('repo'):
# Base install packages ("anaconda") don't play nicely with this. They should be expected to
# already be installed anyways, and self.latest is irrelevant - downgrading these can cause
# *major* issues.
# And "installed" repo are packages installed manually from RPM.
if self.latest:
if repo.attrib['name'].lower() in ('anaconda', 'installed'):
continue
reponm = repo.attrib['desc']
# This is only needed for the subprocess workaround.
cmd = ['yum', '-q', '-y',
# '--disablerepo=*',
'--enablerepo={0}'.format(repo.attrib['name'])]
pkgs = {'new': [],
'upgrade': [],
'downgrade': []}
for pkg in repo.findall('package'):
pkg_found = False
is_installed = False
if self.latest:
pkgnm = pkg.attrib['name']
else:
pkgnm = pkg.attrib['NEVRA']
pkglist = self.yb.doPackageLists(patterns = [pkgnm], showdups = True)
if pkglist.updates:
for pkgobj in reversed(pkglist.updates):
if pkgobj.repo.name == reponm:
# Haven't gotten this working properly. Patches welcome.
# self.yb.install(po = pkgobj)
# self.yb.resolveDeps()
# self.yb.buildTransaction()
# self.yb.processTransaction()
if self.latest:
pkgs['upgrade'].append(pkgobj.name)
else:
if distver[0] >= 7:
pkgs['upgrade'].append(pkgobj.nevra)
else:
pkgs['upgrade'].append(pkgobj._ui_nevra())
pkg_found = True
is_installed = False
break
if pkglist.installed and not pkg_found:
for pkgobj in reversed(pkglist.installed):
if pkgobj.repo.name == reponm:
if distver[0] >= 7:
nevra = pkgobj.nevra
else:
nevra = pkgobj._ui_nevra()
warn = ('{0} from {1} is already installed; skipping').format(nevra,
repo.attrib['name'])
warnings.warn(warn)
pkg_found = True
is_installed = True
if not all((is_installed, pkg_found)):
if pkglist.available:
for pkgobj in reversed(pkglist.available):
if pkgobj.repo.name == reponm:
# Haven't gotten this working properly. Patches welcome.
# self.yb.install(po = pkgobj)
# self.yb.resolveDeps()
# self.yb.buildTransaction()
# self.yb.processTransaction()
if self.latest:
pkgs['new'].append(pkgobj.name)
else:
if distver[0] >= 7:
pkgs['new'].append(pkgobj.nevra)
else:
pkgs['new'].append(pkgobj._ui_nevra())
is_installed = False
pkg_found = True
break
if not self.latest:
if pkglist.old_available:
for pkgobj in reversed(pkglist.old_available):
if pkgobj.repo.name == reponm:
# Haven't gotten this working properly. Patches welcome.
# self.yb.install(po = pkgobj)
# self.yb.resolveDeps()
# self.yb.buildTransaction()
# self.yb.processTransaction()
if distver[0] >= 7:
pkgs['downgrade'].append(pkgobj.nevra)
else:
pkgs['downgrade'].append(pkgobj._ui_nevra())
pkg_found = True
break
# # This... seems to always fail. Patches welcome.
# # self.yb.processTransaction()
for k in pkgs:
if not pkgs[k]:
continue
if pyver < py3:
_cmd = copy.deepcopy(cmd)
else:
_cmd = cmd.copy()
if k == 'downgrade':
_cmd.append('downgrade')
else:
if self.latest:
_cmd.append('install')
else:
if distver[0] >= 7:
_cmd.append('install-nevra')
else:
_cmd.append('install')
_cmd.extend(pkgs[k])
if pyver >= py3:
subprocess.run(_cmd)
else:
subprocess.call(_cmd)
return()


def parseArgs():
args = argparse.ArgumentParser(description = ('Reinstall packages from a generated XML package list'))
args.add_argument('-V', '--version',
dest = 'latest',
action = 'store_false',
help = ('If specified, (try to) install the same version as specified in the package list.'))
args.add_argument('pkglist_path',
metavar = 'PKGLIST',
help = ('The path to the generated packages XML file.'))
return(args)

def main():
args = parseArgs().parse_args()
dictargs = vars(args)
r = Reinstaller(**dictargs)
r.iterPkgs()
return()

if __name__ == '__main__':
main()