commit 08fc183956eac6563a1a112379501057ac30702b Author: brent s Date: Wed Jun 5 21:47:31 2019 -0400 moving to its own repo diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ff3b78a --- /dev/null +++ b/.gitignore @@ -0,0 +1,27 @@ +# https://git-scm.com/docs/gitignore +# https://help.github.com/articles/ignoring-files +# Example .gitignore files: https://github.com/github/gitignore +*.bak +screenlog* +*.swp +*.lck +*~ +.~lock.* +.editix +__pycache__/ +*.pyc +*.tar +*.tar.bz2 +*.tar.xz +*.tar.gz +*.tgz +*.txz +*.tbz +*.tbz2 +*.zip +*.run +*.7z +*.rar +*.sqlite3 +*.deb +.idea/ diff --git a/backup.py b/backup.py new file mode 100755 index 0000000..ec90e34 --- /dev/null +++ b/backup.py @@ -0,0 +1,837 @@ +#!/usr/bin/env python3 + +# TODO: https://borgbackup.readthedocs.io/en/latest/internals/frontends.html +# will they EVER release a public API? for now we'll just use subprocess since +# we import it for various prep stuff anyways. +# TODO: change loglevel of borg itself in subprocess to match the argparse? +# --debug, --info (same as -v/--verbose), --warning, --error, --critical +# TODO: modify config to add repo to cfg for init? or add new operation, "add" + +import argparse +import datetime +import json +import getpass +import logging +import logging.handlers +import os +import pwd +import re +# TODO: use borg module directly instead of subprocess? +import subprocess +import sys +import tempfile +# TODO: virtual env? +from lxml import etree # A lot safer and easier to use than the stdlib xml module. +try: + import pymysql # not stdlib; "python-pymysql" in Arch's AUR + has_mysql = True +except ImportError: + has_mysql = False +try: + # https://www.freedesktop.org/software/systemd/python-systemd/journal.html#journalhandler-class + from systemd import journal + has_systemd = True +except ImportError: + has_systemd = False + +### LOG LEVEL MAPPINGS ### +loglvls = {'critical': logging.CRITICAL, + 'error': logging.ERROR, + 'warning': logging.WARNING, + 'info': logging.INFO, + 'debug': logging.DEBUG} + +### DEFAULT NAMESPACE ### +dflt_ns = 'http://git.square-r00t.net/OpTools/tree/storage/backups/borg/' + + +### THE GUTS ### +class Backup(object): + def __init__(self, args): + self.args = args + self.ns = '{{{0}}}'.format(dflt_ns) + if self.args['oper'] == 'restore': + self.args['target_dir'] = os.path.abspath(os.path.expanduser(self.args['target_dir'])) + os.makedirs(self.args['target_dir'], + exist_ok = True, + mode = 0o700) + self.repos = {} + ### LOGGING ### + # Thanks to: + # https://web.archive.org/web/20170726052946/http://www.lexev.org/en/2013/python-logging-every-day/ + # https://stackoverflow.com/a/42604392 + # https://plumberjack.blogspot.com/2010/10/supporting-alternative-formatting.html + # and user K900_ on r/python for entertaining my very silly question. + self.logger = logging.getLogger(__name__) + self.logger.setLevel(loglvls[self.args['loglevel']]) + _logfmt = logging.Formatter(fmt = ('{levelname}:{name}: {message} ({asctime}; {filename}:{lineno})'), + style = '{', + datefmt = '%Y-%m-%d %H:%M:%S') + _journalfmt = logging.Formatter(fmt = '{levelname}:{name}: {message} ({filename}:{lineno})', + style = '{', + datefmt = '%Y-%m-%d %H:%M:%S') + handlers = [] + if self.args['disklog']: + os.makedirs(os.path.dirname(self.args['logfile']), + exist_ok = True, + mode = 0o700) + # TODO: make the constraints for rotation in config? + handlers.append(logging.handlers.RotatingFileHandler(self.args['logfile'], + encoding = 'utf8', + maxBytes = 100000, + backupCount = 1)) + if self.args['verbose']: + handlers.append(logging.StreamHandler()) + if has_systemd: + try: + h = journal.JournalHandler() + except AttributeError: + h = journal.JournaldLogHandler() + h.setFormatter(_journalfmt) + h.setLevel(loglvls[self.args['loglevel']]) + self.logger.addHandler(h) + for h in handlers: + h.setFormatter(_logfmt) + h.setLevel(loglvls[self.args['loglevel']]) + self.logger.addHandler(h) + ### END LOGGING ### + self.logger.debug('BEGIN INITIALIZATION') + ### CONFIG ### + if not os.path.isfile(self.args['cfgfile']): + self.logger.error('{0} does not exist'.format(self.args['cfgfile'])) + exit(1) + try: + with open(self.args['cfgfile'], 'rb') as f: + self.xml = etree.parse(f) + self.xml.xinclude() + self.cfg = self.xml.getroot() + except etree.XMLSyntaxError: + self.logger.error('{0} is invalid XML'.format(self.args['cfgfile'])) + raise ValueError(('{0} does not seem to be valid XML. ' + 'See sample.config.xml for an example configuration.').format(self.args['cfgfile'])) + self.borgbin = self.cfg.attrib.get('borgpath', '/usr/bin/borg') + ### CHECK ENVIRONMENT ### + # If we're running from cron, we want to print errors to stdout. + if os.isatty(sys.stdin.fileno()): + self.cron = False + else: + self.cron = True + self.logger.debug('END INITIALIZATION') + self.buildRepos() + + def buildRepos(self): + def getRepo(server, reponames = None): + if not reponames: + reponames = [] + repos = [] + for repo in server.findall('{0}repo'.format(self.ns)): + if reponames and repo.attrib['name'] not in reponames: + continue + r = {} + for a in repo.attrib: + r[a] = repo.attrib[a] + for e in ('path', 'exclude'): + r[e] = [i.text for i in repo.findall(self.ns + e)] + for prep in repo.findall('{0}prep'.format(self.ns)): + if 'prep' not in r: + r['prep'] = [] + if prep.attrib.get('inline', 'true').lower()[0] in ('0', 'f'): + with open(os.path.abspath(os.path.expanduser(prep.text)), 'r') as f: + r['prep'].append(f.read()) + else: + r['prep'].append(prep.text) + plugins = repo.find('{0}plugins'.format(self.ns)) + if plugins is not None: + r['plugins'] = {} + for plugin in plugins.findall('{0}plugin'.format(self.ns)): + pname = plugin.attrib['name'] + r['plugins'][pname] = {'path': plugin.attrib.get('path'), + 'params': {}} + for param in plugin.findall('{0}param'.format(self.ns)): + paramname = param.attrib['key'] + if param.attrib.get('json', 'false').lower()[0] in ('1', 't'): + r['plugins'][pname]['params'][paramname] = json.loads(param.text) + else: + r['plugins'][pname]['params'][paramname] = param.text + repos.append(r) + return(repos) + self.logger.debug('VARS (before args cleanup): {0}'.format(vars(self))) + self.args['repo'] = [i.strip() for i in self.args['repo'].split(',')] + self.args['server'] = [i.strip() for i in self.args['server'].split(',')] + if 'all' in self.args['repo']: + self.args['repo'] = None + if 'all' in self.args['server']: + self.args['server'] = [] + for server in self.cfg.findall('{0}server'.format(self.ns)): + # The server elements are uniquely constrained to the "target" attrib. + # *NO TWO ELEMENTS WITH THE SAME target= SHOULD EXIST.* + self.args['server'].append(server.attrib['target']) + for server in self.cfg.findall('{0}server'.format(self.ns)): + sname = server.attrib['target'] + if sname not in self.args['server']: + continue + self.repos[sname] = {} + for x in server.attrib: + if x != 'target': + self.repos[sname][x] = server.attrib[x] + self.repos[sname]['repos'] = getRepo(server, reponames = self.args['repo']) + self.logger.debug('VARS (after args cleanup): {0}'.format(vars(self))) + return() + + def createRepo(self): + for server in self.repos: + _env = os.environ.copy() + # https://github.com/borgbackup/borg/issues/2273 + # https://borgbackup.readthedocs.io/en/stable/internals/frontends.html + _env['LANG'] = 'en_US.UTF-8' + _env['LC_CTYPE'] = 'en_US.UTF-8' + if self.repos[server]['remote'].lower()[0] in ('1', 't'): + _env['BORG_RSH'] = self.repos[server]['rsh'] + _user = self.repos[server].get('user', pwd.getpwuid(os.geteuid()).pw_name) + for repo in self.repos[server]['repos']: + self.logger.info('[{0}]: BEGIN INITIALIZATION'.format(repo['name'])) + _loc_env = _env.copy() + if 'password' not in repo: + print('Password not supplied for {0}:{1}.'.format(server, repo['name'])) + _loc_env['BORG_PASSPHRASE'] = getpass.getpass('Password (will NOT echo back): ') + else: + _loc_env['BORG_PASSPHRASE'] = repo['password'] + _cmd = [self.borgbin, + '--log-json', + '--{0}'.format(self.args['loglevel']), + 'init', + '-e', 'repokey'] + if self.repos[server]['remote'].lower()[0] in ('1', 't'): + repo_tgt = '{0}@{1}'.format(_user, server) + else: + repo_tgt = os.path.abspath(os.path.expanduser(server)) + _cmd.append('{0}:{1}'.format(repo_tgt, + repo['name'])) + self.logger.debug('VARS: {0}'.format(vars(self))) + if not self.args['dryrun']: + _out = subprocess.run(_cmd, + env = _loc_env, + stdout = subprocess.PIPE, + stderr = subprocess.PIPE) + _stdout = _out.stdout.decode('utf-8').strip() + _stderr = _out.stderr.decode('utf-8').strip() + _returncode = _out.returncode + self.logger.debug('[{0}]: (RESULT) {1}'.format(repo['name'], _stdout)) + # sigh. borg uses stderr for verbose output. + self.logger.debug('[{0}]: STDERR: ({2})\n{1}'.format(repo['name'], + _stderr, + ' '.join(_cmd))) + if _returncode != 0: + self.logger.error( + '[{0}]: FAILED: {1}'.format(repo['name'], ' '.join(_cmd))) + if _stderr != '' and self.cron and _returncode != 0: + self.logger.warning('Command {0} failed: {1}'.format(' '.join(_cmd), + _stderr)) + self.logger.info('[{0}]: END INITIALIZATION'.format(repo['name'])) + return() + + def create(self): + # TODO: support "--strip-components N"? + self.logger.info('START: backup') + for server in self.repos: + _env = os.environ.copy() + if self.repos[server]['remote'].lower()[0] in ('1', 't'): + _env['BORG_RSH'] = self.repos[server].get('rsh', None) + _env['LANG'] = 'en_US.UTF-8' + _env['LC_CTYPE'] = 'en_US.UTF-8' + _user = self.repos[server].get('user', pwd.getpwuid(os.geteuid()).pw_name) + for repo in self.repos[server]['repos']: + _loc_env = _env.copy() + if 'password' not in repo: + print('Password not supplied for {0}:{1}.'.format(server, repo['name'])) + _loc_env['BORG_PASSPHRASE'] = getpass.getpass('Password (will NOT echo back): ') + else: + _loc_env['BORG_PASSPHRASE'] = repo['password'] + self.logger.info('[{0}]: BEGIN BACKUP: {1}'.format(server, repo['name'])) + if 'prep' in repo: + tmpdir = os.path.abspath(os.path.expanduser('~/.cache/.optools_backup')) + os.makedirs(tmpdir, exist_ok = True) + os.chmod(tmpdir, mode = 0o0700) + for idx, prep in enumerate(repo['prep']): + exec_tmp = tempfile.mkstemp(prefix = '_optools.backup.', + suffix = '._tmpexc', + text = True, + dir = tmpdir)[1] + os.chmod(exec_tmp, mode = 0o0700) + with open(exec_tmp, 'w') as f: + f.write(prep) + prep_out = subprocess.run([exec_tmp], + stdout = subprocess.PIPE, + stderr = subprocess.PIPE) + if prep_out.returncode != 0: + err = ('Prep job {0} ({1}) for server {2} (repo {3}) ' + 'returned non-zero').format(idx, exec_tmp, server, repo) + self.logger.warning(err) + self.logger.debug('STDOUT: {0}'.format(prep_out.stdout.decode('utf-8'))) + self.logger.debug('STDERR: {0}'.format(prep_out.stderr.decode('utf-8'))) + else: + os.remove(exec_tmp) + if 'plugins' in repo: + import importlib + _orig_path = sys.path + for plugin in repo['plugins']: + self.logger.debug('Initializing plugin: {0}'.format(plugin)) + if repo['plugins'][plugin]['path']: + sys.path.insert(1, os.path.abspath(os.path.expanduser(repo['plugins'][plugin]['path']))) + optools_tmpmod = importlib.import_module(plugin, package = None) + if not repo['plugins'][plugin]['params']: + optools_tmpmod.Backup() + else: + optools_tmpmod.Backup(**repo['plugins'][plugin]['params']) + del(sys.modules[plugin]) + del(optools_tmpmod) + sys.path = _orig_path + self.logger.debug('Finished plugin: {0}'.format(plugin)) + # This is where we actually do the thing. + _cmd = [self.borgbin, + '--log-json', + '--{0}'.format(self.args['loglevel']), + 'create', + '--stats'] + if 'compression' in repo: + _cmd.extend(['--compression', repo['compression']]) + if 'exclude' in repo: + for e in repo['exclude']: + _cmd.extend(['--exclude', e]) + if self.repos[server]['remote'].lower()[0] in ('1', 't'): + repo_tgt = '{0}@{1}'.format(_user, server) + else: + repo_tgt = os.path.abspath(os.path.expanduser(server)) + _cmd.append('{0}:{1}::{2}'.format(repo_tgt, + repo['name'], + self.args['archive'])) + for p in repo['path']: + _cmd.append(p) + self.logger.debug('VARS: {0}'.format(vars())) + # We don't use self.cmdExec() here because we want to explicitly + # pass the env and format the log line differently. + self.logger.debug('[{0}]: Running command: {1}'.format(repo['name'], + ' '.join(_cmd))) + if not self.args['dryrun']: + _out = subprocess.run(_cmd, + env = _loc_env, + stdout = subprocess.PIPE, + stderr = subprocess.PIPE) + _stdout = _out.stdout.decode('utf-8').strip() + _stderr = _out.stderr.decode('utf-8').strip() + _returncode = _out.returncode + self.logger.debug('[{0}]: (RESULT) {1}'.format(repo['name'], _stdout)) + self.logger.debug('[{0}]: STDERR: ({2})\n{1}'.format(repo['name'], + _stderr, + ' '.join( + _cmd))) + if _returncode != 0: + self.logger.error( + '[{0}]: FAILED: {1}'.format(repo['name'], ' '.join(_cmd))) + if _stderr != '' and self.cron and _returncode != 0: + self.logger.warning('Command {0} failed: {1}'.format(' '.join(_cmd), + _stderr)) + del (_loc_env['BORG_PASSPHRASE']) + self.logger.info('[{0}]: END BACKUP'.format(repo['name'])) + self.logger.info('END: backup') + return() + + def restore(self): + # TODO: support "--strip-components N"? + # TODO: support add'l args? + # TODO: Restore() class in plugins? + # https://borgbackup.readthedocs.io/en/stable/usage/extract.html + orig_dir = os.getcwd() + self.logger.info('START: restore') + self.args['target_dir'] = os.path.abspath(os.path.expanduser(self.args['target_dir'])) + os.makedirs(self.args['target_dir'], exist_ok = True) + os.chmod(self.args['target_dir'], mode = 0o0700) + for server in self.repos: + _env = os.environ.copy() + if self.repos[server]['remote'].lower()[0] in ('1', 't'): + _env['BORG_RSH'] = self.repos[server].get('rsh', None) + _env['LANG'] = 'en_US.UTF-8' + _env['LC_CTYPE'] = 'en_US.UTF-8' + _user = self.repos[server].get('user', pwd.getpwuid(os.geteuid()).pw_name) + server_dir = os.path.join(self.args['target_dir'], server) + for repo in self.repos[server]['repos']: + _loc_env = _env.copy() + if 'password' not in repo: + print('Password not supplied for {0}:{1}.'.format(server, repo['name'])) + _loc_env['BORG_PASSPHRASE'] = getpass.getpass('Password (will NOT echo back): ') + else: + _loc_env['BORG_PASSPHRASE'] = repo['password'] + if len(self.repos[server]) > 1: + dest_dir = os.path.join(server_dir, repo['name']) + else: + dest_dir = server_dir + os.makedirs(dest_dir, exist_ok = True) + os.chmod(dest_dir, mode = 0o0700) + os.chdir(dest_dir) + self.logger.info('[{0}]: BEGIN RESTORE'.format(repo['name'])) + _cmd = [self.borgbin, + '--log-json', + '--{0}'.format(self.args['loglevel']), + 'extract'] + if self.repos[server]['remote'].lower()[0] in ('1', 't'): + repo_tgt = '{0}@{1}'.format(_user, server) + else: + repo_tgt = os.path.abspath(os.path.expanduser(server)) + _cmd.append('{0}:{1}::{2}'.format(repo_tgt, + repo['name'], + self.args['archive'])) + if self.args['archive_path']: + _cmd.append(self.args['archive_path']) + self.logger.debug('VARS: {0}'.format(vars(self))) + self.logger.debug('[{0}]: Running command: {1}'.format(repo['name'], + ' '.join(_cmd))) + if not self.args['dryrun']: + _out = subprocess.run(_cmd, + env = _loc_env, + stdout = subprocess.PIPE, + stderr = subprocess.PIPE) + _stdout = _out.stdout.decode('utf-8').strip() + _stderr = _out.stderr.decode('utf-8').strip() + _returncode = _out.returncode + self.logger.debug('[{0}]: (RESULT) {1}'.format(repo['name'], _stdout)) + self.logger.debug('[{0}]: STDERR: ({2})\n{1}'.format(repo['name'], + _stderr, + ' '.join(_cmd))) + if _returncode != 0: + self.logger.error('[{0}]: FAILED: {1}'.format(repo['name'], + ' '.join(_cmd))) + if _stderr != '' and self.cron and _returncode != 0: + self.logger.warning('Command {0} failed: {1}'.format(' '.join(_cmd), + _stderr)) + self.logger.info('[{0}]: END RESTORE'.format(repo['name'])) + os.chdir(orig_dir) + self.logger.info('END: restore') + return() + + def listRepos(self): + def objPrinter(d, indent = 0): + for k, v in d.items(): + if k == 'name': + continue + if k.lower() in ('password', 'path', 'exclude', 'prep', 'plugins', 'params', 'compression'): + keyname = k.title() + else: + keyname = k + if isinstance(v, list): + for i in v: + print('\033[1m{0}{1}:\033[0m {2}'.format(('\t' * indent), + keyname, + i)) + elif isinstance(v, dict): + print('\033[1m{0}{1}:\033[0m'.format(('\t' * indent), + keyname)) + objPrinter(v, indent = (indent + 1)) + else: + print('\033[1m{0}{1}:\033[0m {2}'.format(('\t' * indent), + keyname, + v)) + return() + print('\n\033[1mCurrently configured repositories are:\033[0m\n') + for server in self.repos: + print('\033[1mTarget:\033[0m {0}'.format(server)) + print('\033[1mRepositories:\033[0m') + for r in self.repos[server]['repos']: + if not self.args['verbose']: + print('\t\t{0}'.format(r['name'])) + else: + print('\t\t\033[1mName:\033[0m {0}'.format(r['name'])) + print('\033[1m\t\tDetails:\033[0m') + objPrinter(r, indent = 3) + print() + return() + + def printer(self): + # TODO: better alignment. https://stackoverflow.com/a/5676884 + _results = self.lister() + timefmt = '%Y-%m-%dT%H:%M:%S.%f' + if not self.args['archive']: + # It's a listing of archives + for server in _results: + print('\033[1mTarget:\033[0m {0}'.format(server)) + print('\033[1mRepositories:\033[0m') + # Normally this is a list everywhere else. For results, however, it's a dict. + for repo in _results[server]: + print('\t\033[1m{0}:\033[0m'.format(repo)) + print('\t\t\033[1mSnapshot\t\tTimestamp\033[0m') + for archive in _results[server][repo]: + print('\t\t{0}\t\t{1}'.format(archive['name'], + datetime.datetime.strptime(archive['time'], timefmt))) + print() + else: + # It's a listing inside an archive + if self.args['verbose']: + _archive_fields = ['Mode', 'Owner', 'Size', 'Timestamp', 'Path'] + for server in _results: + print('\033[1mTarget:\033[0m {0}'.format(server)) + print('\033[1mRepositories:\033[0m') + for repo in _results[server]: + print('\t\033[1m{0}:\033[0m'.format(repo)) + print(('\t\t\033[1m' + '{0[0]:<10}\t' + '{0[1]:<10}\t' + '{0[2]:<10}\t' + '{0[3]:<19}\t' + '{0[4]}' + '\033[0m').format(_archive_fields)) + for file in _results[server][repo]: + file['mtime'] = datetime.datetime.strptime(file['mtime'], timefmt) + print(('\t\t' + '{mode:<10}\t' + '{user:<10}\t' + '{size:<10}\t' + '{mtime}\t' + '{path}').format(**file)) + else: + for server in _results: + print('\033[1mTarget:\033[0m {0}'.format(server)) + print('\033[1mRepositories:\033[0m') + for repo in _results[server]: + print('\t\033[1m{0}:\033[0m'.format(repo)) + for file in _results[server][repo]: + print(file['path']) + return() + + def lister(self): + output = {} + self.logger.debug('START: lister') + for server in self.repos: + output[server] = {} + _env = os.environ.copy() + if self.repos[server]['remote'].lower()[0] in ('1', 't'): + _env['BORG_RSH'] = self.repos[server].get('rsh', None) + _env['LANG'] = 'en_US.UTF-8' + _env['LC_CTYPE'] = 'en_US.UTF-8' + _user = self.repos[server].get('user', pwd.getpwuid(os.geteuid()).pw_name) + for repo in self.repos[server]['repos']: + _loc_env = _env.copy() + if 'password' not in repo: + print('Password not supplied for {0}:{1}.'.format(server, repo['name'])) + _loc_env['BORG_PASSPHRASE'] = getpass.getpass('Password (will NOT echo back): ') + else: + _loc_env['BORG_PASSPHRASE'] = repo['password'] + if self.repos[server]['remote'].lower()[0] in ('1', 't'): + repo_tgt = '{0}@{1}'.format(_user, server) + else: + repo_tgt = os.path.abspath(os.path.expanduser(server)) + _cmd = [self.borgbin, + '--log-json', + '--{0}'.format(self.args['loglevel']), + 'list', + ('--json-lines' if self.args['archive'] else '--json')] + _cmd.append('{0}:{1}{2}'.format(repo_tgt, + repo['name'], + ('::{0}'.format(self.args['archive']) if self.args['archive'] + else ''))) + if not self.args['dryrun']: + _out = subprocess.run(_cmd, + env = _loc_env, + stdout = subprocess.PIPE, + stderr = subprocess.PIPE) + _stdout = '\n'.join([i.strip() for i in _out.stdout.decode('utf-8').splitlines()]) + _stderr = _out.stderr.decode('utf-8').strip() + _returncode = _out.returncode + try: + if self.args['archive']: + output[server][repo['name']] = [json.loads(i) for i in _stdout.splitlines()] + else: + output[server][repo['name']] = json.loads(_stdout)['archives'] + except json.decoder.JSONDecodeError: + output[server][repo['name']] = [] + self.logger.debug('[{0}]: (RESULT) {1}'.format(repo['name'], + '\n'.join(_stdout))) + self.logger.debug('[{0}]: STDERR: ({2}) ({1})'.format(repo['name'], + _stderr, + ' '.join(_cmd))) + if _stderr != '' and self.cron and _returncode != 0: + self.logger.warning('Command {0} failed: {1}'.format(' '.join(_cmd), + _stderr)) + if not self.args['archive']: + if self.args['numlimit'] > 0: + if self.args['old']: + output[server][repo['name']] = output[server][repo['name']][:self.args['numlimit']] + else: + output[server][repo['name']] = list( + reversed( + output[server][repo['name']]))[:self.args['numlimit']] + if self.args['invert']: + output[server][repo['name']] = reversed(output[server][repo['name']]) + self.logger.debug('END: lister') + return(output) + + +def printMoarHelp(): + _helpstr = ('\n\tNOTE: Sorting only applies to listing archives, NOT the contents!\n\n' + 'In order to efficiently display results, there are several options to handle it. ' + 'Namely, these are:\n\n\t\t' + '-s/--sort [direction]\n\t\t' + '-l/--limit [number]\n\t\t' + '-x/--invert\n\n' + 'For example, if you want to list the 5 most recently *taken* snapshots, you would use:\n\n\t\t' + '-l 5\n\n' + 'If you would want those SAME results SORTED in the reverse order (i.e. the 5 most recently ' + 'taken snapshots sorted from newest to oldest), then it would be: \n\n\t\t' + '-l 5 -x\n\n' + 'Lastly, if you wanted to list the 7 OLDEST TAKEN snapshots in reverse order ' + '(that is, sorted from newest to oldest), that\'d be:\n\n\t\t' + '-o -l 7 -x\n') + print(_helpstr) + exit(0) + + +def parseArgs(): + ### DEFAULTS ### + _date = datetime.datetime.now().strftime("%Y_%m_%d.%H_%M") + _logfile = '/var/log/borg/{0}'.format(_date) + _cfgfile = os.path.abspath( + os.path.join(os.path.expanduser('~'), + '.config', + 'optools', + 'backup.xml')) + _defloglvl = 'info' + ###### + args = argparse.ArgumentParser(description = 'Backups manager', + epilog = ('TIP: this program has context-specific help. ' + 'e.g. try "%(prog)s list --help"')) + args.add_argument('-c', '--config', + dest = 'cfgfile', + default = _cfgfile, + help = ( + 'The path to the config file. ' + 'Default: \033[1m{0}\033[0m'.format(_cfgfile))) + args.add_argument('-Ll', '--loglevel', + dest = 'loglevel', + default = _defloglvl, + choices = list(loglvls.keys()), + help = ( + 'The level of logging to perform. \033[1mWARNING:\033[0m \033[1mdebug\033[0m will ' + 'log VERY sensitive information such as passwords! ' + 'Default: \033[1m{0}\033[0m'.format(_defloglvl))) + args.add_argument('-Ld', '--log-to-disk', + dest = 'disklog', + action = 'store_true', + help = ( + 'If specified, log to a specific file (-Lf/--logfile) instead of the system logger.')) + args.add_argument('-Lf', '--logfile', + dest = 'logfile', + default = _logfile, + help = ( + 'The path to the logfile, only used if -Ld/--log-to-disk is specified. ' + 'Default: \033[1m{0}\033[0m (dynamic)').format(_logfile)) + args.add_argument('-v', '--verbose', + dest = 'verbose', + action = 'store_true', + help = ('If specified, log messages will be printed to STDERR in addition to the other ' + 'configured log system(s), and verbosity for printing functions is increased. ' + '\033[1mWARNING:\033[0m This may display VERY sensitive information such as passwords!')) + ### ARGS FOR ALL OPERATIONS ### + commonargs = argparse.ArgumentParser(add_help = False) + commonargs.add_argument('-r', '--repo', + dest = 'repo', + default = 'all', + help = ('The repository to perform the operation for. ' + 'The default is \033[1mall\033[0m, a special value that specifies all known ' + 'repositories. Can also accept a comma-separated list.')) + commonargs.add_argument('-S', '--server', + dest = 'server', + default = 'all', + help = ('The server to perform the operation for. ' + 'The default is \033[1mall\033[0m, a special value that specifies all known ' + 'servers. Can also accept a comma-separated list.')) + fileargs = argparse.ArgumentParser(add_help = False) + fileargs.add_argument('-a', '--archive', + default = _date, + dest = 'archive', + help = ('The name of the archive/snapshot. ' + 'Default: \033[1m{0}\033[0m (dynamic)').format(_date)) + remoteargs = argparse.ArgumentParser(add_help = False) + remoteargs.add_argument('-d', '--dry-run', + dest = 'dryrun', + action = 'store_true', + help = ('Act as if we are performing tasks, but none will actually be executed ' + '(useful for testing logging)')) + ### OPERATIONS ### + subparsers = args.add_subparsers(help = 'Operation to perform', + dest = 'oper') + backupargs = subparsers.add_parser('backup', + help = 'Perform a backup.', + parents = [commonargs, + remoteargs, + fileargs]) + listargs = subparsers.add_parser('list', + help = 'List available backups.', + parents = [commonargs, remoteargs]) + listrepoargs = subparsers.add_parser('listrepos', + help = ('List availabile/configured repositories.'), + parents = [commonargs]) + initargs = subparsers.add_parser('init', + help = 'Initialise a repository.', + parents = [commonargs, remoteargs]) + rstrargs = subparsers.add_parser('restore', + help = ('Restore ("extract") an archive.'), + parents = [commonargs, + remoteargs, + fileargs]) + cvrtargs = subparsers.add_parser('convert', + help = ('Convert the legacy JSON format to the new XML format and quit')) + ### OPERATION-SPECIFIC OPTIONS ### + # CREATE ("backup") # + # DISPLAY/OUTPUT ("list") # + listargs.add_argument('-a', '--archive', + dest = 'archive', + default = False, + help = 'If specified, will list the *contents* of the given archive name.') + listargs.add_argument('-l', '--limit', + dest = 'numlimit', + type = int, + default = '5', + help = ('If specified, constrain the outout to this number of results each repo. ' + 'Default is \033[1m5\033[0m, use 0 for unlimited. See \033[1m-H/--list-help\033[0m')) + listargs.add_argument('-s', '--sort', + dest = 'sortby', + choices = ['newest', 'oldest'], + default = 'oldest', + help = ('The order to sort the results by. See \033[1m-H/--list-help\033[0m. ' + 'Default: \033[1moldest\033[0m')) + listargs.add_argument('-x', '--invert', + dest = 'invert', + action = 'store_true', + help = 'Invert the order of results. See \033[1m-H/--list-help\033[0m.') + listargs.add_argument('-o', '--old', + dest = 'old', + action = 'store_true', + help = ('Instead of grabbing the latest results, grab the earliest results. This differs ' + 'from \033[1m-s/--sort\033[0m. See \033[1m-H/--list-help\033[0m.')) + listargs.add_argument('-H', '--list-help', + dest = 'moarhelp', + action = 'store_true', + help = ('Print extended information about how to ' + 'manage the output of listing and exit.')) + ## EXTRACT ("restore") + rstrargs.add_argument('-p', '--path', + dest = 'archive_path', + help = ('If specified, only restore this specific path (and any subpaths).')) + rstrargs.add_argument('-t', '--target', + required = True, + dest = 'target_dir', + help = ('The path to the directory where the restore should be dumped to. It is ' + 'recommended to not restore to the same directory that the archive is taken from. ' + 'A subdirectory will be created for each server. ' + 'If multiple repos (or "all") are provided, subdirectories will be created per ' + 'repo under their respective server(s).')) + return (args) + +def convertConf(cfgfile): + oldcfgfile = re.sub('\.xml$', '.json', cfgfile) + try: + with open(oldcfgfile, 'r') as f: + oldcfg = json.load(f) + except json.decoder.JSONDecodeError: + # It's not JSON. It's either already XML or invalid config. + return(cfgfile) + # Switched from JSON to XML, so we need to do some basic conversion. + newfname = re.sub('\.json$', '.xml', os.path.basename(cfgfile)) + newcfg = os.path.join(os.path.dirname(cfgfile), + newfname) + if os.path.exists(newcfg): + # Do nothing. We don't want to overwrite an existing config + # and we'll assume it's an already-done conversion. + return(newcfg) + print(('It appears that you are still using the legacy JSON format. ' + 'We will attempt to convert it to the new XML format ({0}) but it may ' + 'require modifications, especially if you are using any prep functions as those are not ' + 'converted automatically. See sample.config.xml for an example of this.').format(newcfg)) + cfg = etree.Element('borg') + # The old format only supported one server. + server = etree.Element('server') + server.attrib['target'] = oldcfg['config']['host'] + server.attrib['remote'] = 'true' + server.attrib['rsh'] = oldcfg['config']['ctx'] + server.attrib['user'] = oldcfg['config'].get('user', pwd.getpwnam(os.geteuid()).pw_name) + for r in oldcfg['repos']: + repo = etree.Element('repo') + repo.attrib['name'] = r + repo.attrib['password'] = oldcfg['repos'][r]['password'] + for p in oldcfg['repos'][r]['paths']: + path = etree.Element('path') + path.text = p + repo.append(path) + for e in oldcfg['repos'][r].get('excludes', []): + path = etree.Element('exclude') + path.text = e + repo.append(path) + server.append(repo) + cfg.append(server) + # Build the full XML spec. + namespaces = {None: dflt_ns, + 'xsi': 'http://www.w3.org/2001/XMLSchema-instance'} + xsi = {('{http://www.w3.org/2001/' + 'XMLSchema-instance}schemaLocation'): ('http://git.square-r00t.net/OpTools/plain/' + 'storage/backups/borg/config.xsd')} + genname = 'LXML (http://lxml.de/)' + root = etree.Element('borg', nsmap = namespaces, attrib = xsi) + root.append(etree.Comment(('Generated by {0} on {1} from {2} via {3}').format(sys.argv[0], + datetime.datetime.now(), + oldcfgfile, + genname))) + root.append(etree.Comment('THIS FILE CONTAINS SENSITIVE INFORMATION. SHARE/SCRUB WISELY.')) + for x in cfg: + root.append(x) + # Write out the file to disk. + xml = etree.ElementTree(root) + with open(newcfg, 'wb') as f: + xml.write(f, + xml_declaration = True, + encoding = 'utf-8', + pretty_print = True) + # Return the new config's path. + return(newcfg) + + +def main(): + rawargs = parseArgs() + parsedargs = rawargs.parse_args() + args = vars(parsedargs) + args['cfgfile'] = os.path.abspath(os.path.expanduser(args['cfgfile'])) + if not args['oper']: + rawargs.print_help() + exit(0) + if 'moarhelp' in args.keys() and args['moarhelp']: + printMoarHelp() + if args['oper'] == 'convert': + convertConf(args['cfgfile']) + return() + else: + if not os.path.isfile(args['cfgfile']): + oldfile = re.sub('\.xml$', '.json', args['cfgfile']) + if os.path.isfile(oldfile): + try: + with open(oldfile, 'r') as f: + json.load(f) + args['cfgfile'] = convertConf(args['cfgfile']) + except json.decoder.JSONDecodeError: + # It's not JSON. It's either already XML or invalid config. + pass + if not os.path.isfile(args['cfgfile']): + raise OSError('{0} does not exist'.format(args['cfgfile'])) + # The "Do stuff" part + bak = Backup(args) + if args['oper'] == 'list': + bak.printer() + elif args['oper'] == 'listrepos': + bak.listRepos() + elif args['oper'] == 'backup': + bak.create() + elif args['oper'] == 'init': + bak.createRepo() + elif args['oper'] == 'restore': + bak.restore() + return() + + +if __name__ == '__main__': + main() diff --git a/config.xsd b/config.xsd new file mode 100644 index 0000000..946e1cd --- /dev/null +++ b/config.xsd @@ -0,0 +1,127 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/plugins/ldap.py b/plugins/ldap.py new file mode 100644 index 0000000..a257c5e --- /dev/null +++ b/plugins/ldap.py @@ -0,0 +1,97 @@ +import os +# TODO: virtual env? +import ldap +import ldif + + +# Designed for use with OpenLDAP in an OLC configuration. + + +class Backup(object): + def __init__(self, + server = 'ldap://sub.domain.tld', + port = 389, + basedn = 'dc=domain,dc=tld', + sasl = False, + starttls = True, + binddn = 'cn=Manager,dc=domain,dc=tld', + password_file = '~/.ldap.pass', + password = None, + outdir = '~/.cache/backup/ldap', + splitldifs = True): + self.server = server + self.port = port + self.basedn = basedn + self.sasl = sasl + self.binddn = binddn + self.outdir = os.path.abspath(os.path.expanduser(outdir)) + os.makedirs(self.outdir, exist_ok = True) + os.chmod(self.outdir, mode = 0o0700) + self.splitldifs = splitldifs + self.starttls = starttls + if password_file and not password: + with open(os.path.abspath(os.path.expanduser(password_file)), 'r') as f: + self.password = f.read().strip() + else: + self.password = password + # Human readability, yay. + # A note, SSLv3 is 0x300. But StartTLS can only be done with TLS, not SSL, I *think*? + # PRESUMABLY, now that it's finalized, TLS 1.3 will be 0x304. + # See https://tools.ietf.org/html/rfc5246#appendix-E + self._tlsmap = {'1.0': int(0x301), # 769 + '1.1': int(0x302), # 770 + '1.2': int(0x303)} # 771 + self._minimum_tls_ver = '1.2' + if self.sasl: + self.server = 'ldapi:///' + self.cxn = None + self.connect() + self.dump() + self.close() + + def connect(self): + self.cxn = ldap.initialize(self.server) + self.cxn.set_option(ldap.OPT_REFERRALS, 0) + self.cxn.set_option(ldap.OPT_PROTOCOL_VERSION, 3) + if not self.sasl: + if self.starttls: + self.cxn.set_option(ldap.OPT_X_TLS_REQUIRE_CERT, ldap.OPT_X_TLS_NEVER) + self.cxn.set_option(ldap.OPT_X_TLS, ldap.OPT_X_TLS_DEMAND) + self.cxn.set_option(ldap.OPT_X_TLS_DEMAND, True) + self.cxn.set_option(ldap.OPT_X_TLS_PROTOCOL_MIN, self._tlsmap[self._minimum_tls_ver]) + if self.sasl: + self.cxn.sasl_external_bind_s() + else: + if self.starttls: + self.cxn.start_tls_s() + self.cxn.bind_s(self.binddn, self.password) + return() + + def dump(self): + dumps = {'schema': 'cn=config', + 'data': self.basedn} + with open(os.path.join(self.outdir, ('ldap-config.ldif' if self.splitldifs else 'ldap.ldif')), 'w') as f: + l = ldif.LDIFWriter(f) + rslts = self.cxn.search_s(dumps['schema'], + ldap.SCOPE_SUBTREE, + filterstr = '(objectClass=*)', + attrlist = ['*', '+']) + for r in rslts: + l.unparse(r[0], r[1]) + if self.splitldifs: + f = open(os.path.join(self.outdir, 'ldap-data.ldif'), 'w') + else: + f = open(os.path.join(self.outdir, 'ldap.ldif'), 'a') + rslts = self.cxn.search_s(dumps['data'], + ldap.SCOPE_SUBTREE, + filterstr = '(objectClass=*)', + attrlist = ['*', '+']) + l = ldif.LDIFWriter(f) + for r in rslts: + l.unparse(r[0], r[1]) + f.close() + + def close(self): + if self.cxn: + self.cxn.unbind_s() + return() diff --git a/plugins/mysql.py b/plugins/mysql.py new file mode 100644 index 0000000..ae1aa03 --- /dev/null +++ b/plugins/mysql.py @@ -0,0 +1,96 @@ +import copy +import os +import re +import subprocess +import warnings + +_mysql_ssl_re = re.compile('^ssl-(.*)$') + +# TODO: is it possible to do a pure-python dump via PyMySQL? +# TODO: add compression support? Not *that* necessary since borg has its own. +# in fact, it's better to not do it on the dumps directly so borg can diff/delta better. + +class Backup(object): + def __init__(self, dbs = None, + cfg = '~/.my.cnf', + cfgsuffix = '', + splitdumps = True, + dumpopts = None, + mysqlbin = 'mysql', + mysqldumpbin = 'mysqldump', + outdir = '~/.cache/backup/mysql'): + # If dbs is None, we dump ALL databases (that the user has access to). + self.dbs = dbs + self.cfgsuffix = cfgsuffix + self.splitdumps = splitdumps + self.mysqlbin = mysqlbin + self.mysqldumpbin = mysqldumpbin + self.outdir = os.path.abspath(os.path.expanduser(outdir)) + self.cfg = os.path.abspath(os.path.expanduser(cfg)) + os.makedirs(self.outdir, exist_ok = True) + os.chmod(self.outdir, mode = 0o0700) + if not os.path.isfile(self.cfg): + raise OSError(('{0} does not exist!').format(self.cfg)) + if not dumpopts: + self.dumpopts = ['--routines', + '--add-drop-database', + '--add-drop-table', + '--allow-keywords', + '--complete-insert', + '--create-options', + '--extended-insert'] + else: + self.dumpopts = dumpopts + self.getDBs() + self.dump() + + def getDBs(self): + if not self.dbs: + _out = subprocess.run([self.mysqlbin, '-BNne', 'SHOW DATABASES'], + stdout = subprocess.PIPE, + stderr = subprocess.PIPE) + if _out.returncode != 0: + raise RuntimeError(('Could not successfully list databases: ' + '{0}').format(_out.stderr.decode('utf-8'))) + self.dbs = _out.stdout.decode('utf-8').strip().splitlines() + return() + + def dump(self): + if self.splitdumps: + for db in self.dbs: + args = copy.deepcopy(self.dumpopts) + outfile = os.path.join(self.outdir, '{0}.sql'.format(db)) + if db in ('information_schema', 'performance_schema'): + args.append('--skip-lock-tables') + elif db == 'mysql': + args.append('--flush-privileges') + cmd = [self.mysqldumpbin, + '--result-file={0}'.format(outfile)] + cmd.extend(args) + cmd.append(db) + out = subprocess.run(cmd, + stdout = subprocess.PIPE, + stderr = subprocess.PIPE) + if out.returncode != 0: + warn = ('Error dumping {0}: {1}').format(db, out.stderr.decode('utf-8').strip()) + warnings.warn(warn) + else: + outfile = os.path.join(self.outdir, 'all.databases.sql') + args = copy.deepcopy(self.dumpopts) + args.append('--result-file={0}'.format(outfile)) + if 'information_schema' in self.dbs: + args.append('--skip-lock-tables') + if 'mysql' in self.dbs: + args.append('--flush-privileges') + args.append(['--databases']) + cmd = [self.mysqldumpbin] + cmd.extend(args) + cmd.extend(self.dbs) + out = subprocess.run(cmd, + stdout = subprocess.PIPE, + stderr = subprocess.PIPE) + if out.returncode != 0: + warn = ('Error dumping {0}: {1}').format(','.join(self.dbs), + out.stderr.decode('utf-8').strip()) + warnings.warn(warn) + return() diff --git a/plugins/yum_pkgs.py b/plugins/yum_pkgs.py new file mode 100644 index 0000000..5346835 --- /dev/null +++ b/plugins/yum_pkgs.py @@ -0,0 +1,229 @@ +import datetime +import os +import re +import sys +## +from lxml import etree +try: + # Note that currently, even on CentOS/RHEL 7, the yum module is only available for Python 2... + # because reasons or something? + # This may be re-done to allow for a third-party library in the case of python 3 invocation. + import yum + has_yum = True +except ImportError: + # This will get *ugly*. You have been warned. It also uses more system resources and it's INCREDIBLY slow. + # But it's safe. + # Requires yum-utils to be installed. + # It assumes a python 3 environment for the exact above reason. + import subprocess + has_yum = False + +# See :/storage/backups/borg/tools/restore_yum_pkgs.py to use the XML file this generates. + + +# Detect RH version. +ver_re =re.compile('^(centos.*|red\s?hat.*) ([0-9\.]+) .*$', re.IGNORECASE) +# distro module isn't stdlib, and platform.linux_distribution() (AND platform.distro()) are both deprecated in 3.7. +# So we get hacky. +with open('/etc/redhat-release', 'r') as f: + rawver = f.read() +distver = [int(i) for i in ver_re.sub('\g<2>', rawver.strip()).split('.')] +distname = re.sub('(Linux )?release', '', ver_re.sub('\g<1>', rawver.strip()), re.IGNORECASE).strip() +# Regex pattern to get the repo name. We compile it just to speed up the execution. +repo_re = re.compile('^@') +# Python version +pyver = sys.hexversion +py3 = 0x30000f0 # TODO: check the version incompats + + +class Backup(object): + def __init__(self, explicit_only = True, + include_deps = False, + output = '~/.cache/backup/misc/installed_pkgs.xml'): + self.explicit_only = explicit_only + self.include_deps = include_deps + self.reasons = [] + if self.explicit_only: + self.reasons.append('user') + if self.include_deps: + self.reasons.append('dep') + self.output = os.path.abspath(os.path.expanduser(output)) + if has_yum: + self.yb = yum.YumBase() + # Make it run silently. + self.yb.preconf.debuglevel = 0 + self.yb.preconf.errorlevel = 0 + self.pkg_meta = [] + # TODO: XSD? + self.pkgs = etree.Element('packages') + self.pkgs.attrib['distro'] = distname + self.pkgs.attrib['version'] = '.'.join([str(i) for i in distver]) + self.pkglist = b'' + self.getPkgList() + self.buildPkgInfo() + self.write() + + def getPkgList(self): + if has_yum: + if not self.explicit_only: + self.pkg_meta = self.yb.rpmdb.returnPackages() + else: + for pkg in self.yb.rpmdb.returnPackages(): + reason = pkg.yumdb_info.get('reason') + if reason and reason.lower() in self.reasons: + self.pkg_meta.append(pkg) + else: + pass # We do this in buildPkgInfo(). + return() + + def buildPkgInfo(self): + if not has_yum: + def repoQuery(nevra, fmtstr): + cmd = ['/usr/bin/repoquery', + '--installed', + '--queryformat', fmtstr, + nevra] + cmd_out = subprocess.run(cmd, stdout = subprocess.PIPE).stdout.decode('utf-8') + return(cmd_out) + _reason = '*' + if self.reasons: + if 'dep' not in self.reasons: + _reason = 'user' + cmd = ['/usr/sbin/yumdb', + 'search', + 'reason', + _reason] + rawpkgs = subprocess.run(cmd, stdout = subprocess.PIPE).stdout.decode('utf-8') + reason_re = re.compile('^(\s+reason\s+=\s+.*|\s*)$') + pkgs = [] + for line in rawpkgs.splitlines(): + if not reason_re.search(line): + pkgs.append(line.strip()) + for pkg_nevra in pkgs: + reponame = repo_re.sub('', repoQuery(pkg_nevra, '%{ui_from_repo}')).strip() + repo = self.pkgs.xpath('repo[@name="{0}"]'.format(reponame)) + if repo: + repo = repo[0] + else: + # This is pretty error-prone. Fix/cleanup your systems. + repo = etree.Element('repo') + repo.attrib['name'] = reponame + rawrepo = subprocess.run(['/usr/bin/yum', + '-v', + 'repolist', + reponame], + stdout = subprocess.PIPE).stdout.decode('utf-8') + urls = [] + mirror = re.search('^Repo-mirrors\s*:', rawrepo, re.M) + repostatus = re.search('^Repo-status\s*:', rawrepo, re.M) + repourl = re.search('^Repo-baseurl\s*:', rawrepo, re.M) + repodesc = re.search('^Repo-name\s*:', rawrepo, re.M) + if mirror: + urls.append(mirror.group(0).split(':', 1)[1].strip()) + if repourl: + urls.append(repourl.group(0).split(':', 1)[1].strip()) + repo.attrib['urls'] = '>'.join(urls) # https://stackoverflow.com/a/13500078 + if repostatus: + repostatus = repostatus.group(0).split(':', 1)[1].strip().lower() + repo.attrib['enabled'] = ('true' if repostatus == 'enabled' else 'false') + else: + repo.attrib['enabled'] = 'false' + if repodesc: + repo.attrib['desc'] = repodesc.group(0).split(':', 1)[1].strip() + else: + repo.attrib['desc'] = '(metadata missing)' + self.pkgs.append(repo) + pkgelem = etree.Element('package') + pkginfo = {'NEVRA': pkg_nevra, + 'desc': repoQuery(pkg_nevra, '%{summary}').strip()} + # These are all values with no whitespace so we can easily combine into one call and then split them. + (pkginfo['name'], + pkginfo['release'], + pkginfo['arch'], + pkginfo['version'], + pkginfo['built'], + pkginfo['installed'], + pkginfo['sizerpm'], + pkginfo['sizedisk']) = re.split('\t', + repoQuery(pkg_nevra, + ('%{name}\t' + '%{release}\t' + '%{arch}\t' + '%{ver}\t' # version + '%{buildtime}\t' # built + '%{installtime}\t' # installed + '%{packagesize}\t' # sizerpm + '%{installedsize}') # sizedisk + )) + for k in ('built', 'installed', 'sizerpm', 'sizedisk'): + pkginfo[k] = int(pkginfo[k]) + for k in ('built', 'installed'): + pkginfo[k] = datetime.datetime.fromtimestamp(pkginfo[k]) + for k, v in pkginfo.items(): + if pyver >= py3: + pkgelem.attrib[k] = str(v) + else: + if isinstance(v, (int, long, datetime.datetime)): + pkgelem.attrib[k] = str(v).encode('utf-8') + elif isinstance(v, str): + pkgelem.attrib[k] = v.decode('utf-8') + else: + pkgelem.attrib[k] = v.encode('utf-8') + repo.append(pkgelem) + else: + for pkg in self.pkg_meta: + reponame = repo_re.sub('', pkg.ui_from_repo) + repo = self.pkgs.xpath('repo[@name="{0}"]'.format(reponame)) + if repo: + repo = repo[0] + else: + repo = etree.Element('repo') + repo.attrib['name'] = reponame + try: + repoinfo = self.yb.repos.repos[reponame] + repo.attrib['urls'] = '>'.join(repoinfo.urls) # https://stackoverflow.com/a/13500078 + repo.attrib['enabled'] = ('true' if repoinfo in self.yb.repos.listEnabled() else 'false') + repo.attrib['desc'] = repoinfo.name + except KeyError: # Repo is missing + repo.attrib['desc'] = '(metadata missing)' + self.pkgs.append(repo) + pkgelem = etree.Element('package') + pkginfo = {'name': pkg.name, + 'desc': pkg.summary, + 'version': pkg.ver, + 'release': pkg.release, + 'arch': pkg.arch, + 'built': datetime.datetime.fromtimestamp(pkg.buildtime), + 'installed': datetime.datetime.fromtimestamp(pkg.installtime), + 'sizerpm': pkg.packagesize, + 'sizedisk': pkg.installedsize, + 'NEVRA': pkg.nevra} + for k, v in pkginfo.items(): + if pyver >= py3: + pkgelem.attrib[k] = str(v) + else: + if isinstance(v, (int, long, datetime.datetime)): + pkgelem.attrib[k] = str(v).encode('utf-8') + elif isinstance(v, str): + pkgelem.attrib[k] = v.decode('utf-8') + else: + pkgelem.attrib[k] = v.encode('utf-8') + repo.append(pkgelem) + self.pkglist = etree.tostring(self.pkgs, + pretty_print = True, + xml_declaration = True, + encoding = 'UTF-8') + return() + + def write(self): + outdir = os.path.dirname(self.output) + if pyver >= py3: + os.makedirs(outdir, exist_ok = True) + os.chmod(outdir, mode = 0o0700) + else: + if not os.path.isdir(outdir): + os.makedirs(outdir) + os.chmod(outdir, 0o0700) + with open(self.output, 'wb') as f: + f.write(self.pkglist) + return() diff --git a/sample.config.xml b/sample.config.xml new file mode 100644 index 0000000..699a674 --- /dev/null +++ b/sample.config.xml @@ -0,0 +1,73 @@ + + + + + + + + + + /a + + /a/b + + + + + #!/bin/bash + # this is block text + + /usr/local/bin/someprep.sh + + + + + + ["db1","db2"] + true + ["--routines","--add-drop-database","--add-drop-table","--allow-keywords","--complete-insert","--create-options","--extended-insert"] + + + ldap://my.server.tld + cn=Manager,dc=server,dc=tld + SuperSecretPassword + false + + + + + diff --git a/tools/minify_json.py b/tools/minify_json.py new file mode 100755 index 0000000..0adeefe --- /dev/null +++ b/tools/minify_json.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python3 + +import argparse +import json +import os +import sys + +def minify(json_in): + j = json.loads(json_in) + j = json.dumps(j, indent = None, separators = (',', ':')) + return(j) + +def parseArgs(): + args = argparse.ArgumentParser(description = ('Minify ("compress") JSON input')) + args.add_argument('-o', '--output', + default = '-', + help = ('Write the minified JSON out to a file. The default is "-", which instead prints it to ' + 'STDOUT. If instead you would like to write out to STDERR, use "+" (otherwise provide a ' + 'path)')) + args.add_argument('json_in', + default = '-', + nargs = '?', + help = ('The JSON input. If "-" (the default), read STDIN; otherwise provide a path to the ' + 'JSON file')) + return(args) + +def main(): + args = parseArgs().parse_args() + if args.json_in.strip() == '-': + stdin = sys.stdin.read() + if not stdin: + raise argparse.ArgumentError('You specified to read from STDIN, but STDIN is blank') + else: + args.json_in = stdin + else: + with open(os.path.abspath(os.path.expanduser(args.json_in)), 'r') as f: + args.json_in = f.read() + minified = minify(args.json_in) + if args.output.strip() not in ('-', '+'): + args.output = os.path.abspath(os.path.expanduser(args.output)) + if not args.output.endswith('.json'): + args.output += '.json' + with open(args.output, 'w') as f: + f.write(minified + '\n') + elif args.output.strip() == '+': + sys.stderr.write(minified + '\n') + else: + sys.stdout.write(minified + '\n') + return() + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/tools/restore_yum_pkgs.py b/tools/restore_yum_pkgs.py new file mode 100755 index 0000000..f6a2557 --- /dev/null +++ b/tools/restore_yum_pkgs.py @@ -0,0 +1,194 @@ +#!/usr/bin/env python + +import argparse # yum install python-argparse on CentOS/RHEL 6.x +import os +import re +import subprocess +import sys +import warnings +## +# The yum API is *suuuper* cantankerous and kind of broken, even. +# Patches welcome, but for now we just use subprocess. +import yum +from lxml import etree # yum install python-lxml + + +# Detect RH version. +ver_re =re.compile('^(centos.*|red\s?hat.*) ([0-9\.]+) .*$', re.IGNORECASE) +# distro module isn't stdlib, and platform.linux_distribution() (AND platform.distro()) are both deprecated in 3.7. +# So we get hacky. +with open('/etc/redhat-release', 'r') as f: + rawver = f.read() +distver = [int(i) for i in ver_re.sub('\g<2>', rawver.strip()).split('.')] +distname = re.sub('(Linux )?release', '', ver_re.sub('\g<1>', rawver.strip()), re.IGNORECASE).strip() +# Regex pattern to get the repo name. We compile it just to speed up the execution. +repo_re = re.compile('^@') +# Python version +pyver = sys.hexversion +py3 = 0x30000f0 # TODO: check the version incompats + +if pyver < py3: + import copy + + +class Reinstaller(object): + def __init__(self, pkglist_path, latest = True): + self.latest = latest + pkglist_file = os.path.abspath(os.path.expanduser(pkglist_path)) + with open(pkglist_file, 'rb') as f: + self.pkgs = etree.fromstring(f.read()) + if not self.latest: + # Make sure the versions match, otherwise Bad Things(TM) can occur. + if not all(((distname == self.pkgs.attrib['distro']), + ('.'.join([str(i) for i in distver]) == self.pkgs.attrib['version']))): + err = ('This package set was created on {0} {1}. ' + 'The current running OS is {2} {3} and you have set latest = False/None. ' + 'THIS IS A VERY BAD IDEA.').format(self.pkgs.attrib['distro'], + self.pkgs.attrib['version'], + distname, + '.'.join([str(i) for i in distver])) + raise RuntimeError(err) + # Make it run silently. + self.yb = yum.YumBase() + self.yb.preconf.quiet = 1 + self.yb.preconf.debuglevel = 0 + self.yb.preconf.errorlevel = 0 + self.yb.preconf.assumeyes = 1 + self.yb.preconf.rpmverbosity = 'error' + + def iterPkgs(self): + for repo in self.pkgs.findall('repo'): + # Base install packages ("anaconda") don't play nicely with this. They should be expected to + # already be installed anyways, and self.latest is irrelevant - downgrading these can cause + # *major* issues. + # And "installed" repo are packages installed manually from RPM. + if self.latest: + if repo.attrib['name'].lower() in ('anaconda', 'installed'): + continue + reponm = repo.attrib['desc'] + # This is only needed for the subprocess workaround. + cmd = ['yum', '-q', '-y', + # '--disablerepo=*', + '--enablerepo={0}'.format(repo.attrib['name'])] + pkgs = {'new': [], + 'upgrade': [], + 'downgrade': []} + for pkg in repo.findall('package'): + pkg_found = False + is_installed = False + if self.latest: + pkgnm = pkg.attrib['name'] + else: + pkgnm = pkg.attrib['NEVRA'] + pkglist = self.yb.doPackageLists(patterns = [pkgnm], showdups = True) + if pkglist.updates: + for pkgobj in reversed(pkglist.updates): + if pkgobj.repo.name == reponm: + # Haven't gotten this working properly. Patches welcome. + # self.yb.install(po = pkgobj) + # self.yb.resolveDeps() + # self.yb.buildTransaction() + # self.yb.processTransaction() + if self.latest: + pkgs['upgrade'].append(pkgobj.name) + else: + if distver[0] >= 7: + pkgs['upgrade'].append(pkgobj.nevra) + else: + pkgs['upgrade'].append(pkgobj._ui_nevra()) + pkg_found = True + is_installed = False + break + if pkglist.installed and not pkg_found: + for pkgobj in reversed(pkglist.installed): + if pkgobj.repo.name == reponm: + if distver[0] >= 7: + nevra = pkgobj.nevra + else: + nevra = pkgobj._ui_nevra() + warn = ('{0} from {1} is already installed; skipping').format(nevra, + repo.attrib['name']) + warnings.warn(warn) + pkg_found = True + is_installed = True + if not all((is_installed, pkg_found)): + if pkglist.available: + for pkgobj in reversed(pkglist.available): + if pkgobj.repo.name == reponm: + # Haven't gotten this working properly. Patches welcome. + # self.yb.install(po = pkgobj) + # self.yb.resolveDeps() + # self.yb.buildTransaction() + # self.yb.processTransaction() + if self.latest: + pkgs['new'].append(pkgobj.name) + else: + if distver[0] >= 7: + pkgs['new'].append(pkgobj.nevra) + else: + pkgs['new'].append(pkgobj._ui_nevra()) + is_installed = False + pkg_found = True + break + if not self.latest: + if pkglist.old_available: + for pkgobj in reversed(pkglist.old_available): + if pkgobj.repo.name == reponm: + # Haven't gotten this working properly. Patches welcome. + # self.yb.install(po = pkgobj) + # self.yb.resolveDeps() + # self.yb.buildTransaction() + # self.yb.processTransaction() + if distver[0] >= 7: + pkgs['downgrade'].append(pkgobj.nevra) + else: + pkgs['downgrade'].append(pkgobj._ui_nevra()) + pkg_found = True + break + # # This... seems to always fail. Patches welcome. + # # self.yb.processTransaction() + for k in pkgs: + if not pkgs[k]: + continue + if pyver < py3: + _cmd = copy.deepcopy(cmd) + else: + _cmd = cmd.copy() + if k == 'downgrade': + _cmd.append('downgrade') + else: + if self.latest: + _cmd.append('install') + else: + if distver[0] >= 7: + _cmd.append('install-nevra') + else: + _cmd.append('install') + _cmd.extend(pkgs[k]) + if pyver >= py3: + subprocess.run(_cmd) + else: + subprocess.call(_cmd) + return() + + +def parseArgs(): + args = argparse.ArgumentParser(description = ('Reinstall packages from a generated XML package list')) + args.add_argument('-V', '--version', + dest = 'latest', + action = 'store_false', + help = ('If specified, (try to) install the same version as specified in the package list.')) + args.add_argument('pkglist_path', + metavar = 'PKGLIST', + help = ('The path to the generated packages XML file.')) + return(args) + +def main(): + args = parseArgs().parse_args() + dictargs = vars(args) + r = Reinstaller(**dictargs) + r.iterPkgs() + return() + +if __name__ == '__main__': + main() \ No newline at end of file