diff --git a/centos/list_pkgs.py b/centos/list_pkgs.py index cf23369..c53b65b 100755 --- a/centos/list_pkgs.py +++ b/centos/list_pkgs.py @@ -1,110 +1,185 @@ #!/usr/bin/env python # Supports CentOS 6.9 and up, untested on lower versions. -# Lets you get a list of files for a given package name(s) without installing -# any extra packages (such as yum-utils for repoquery). +# Lets you dump a list of installed packages for backup purposes +# Reference: https://blog.fpmurphy.com/2011/08/programmatically-retrieve-rpm-package-details.html import argparse -import json +import copy +import datetime +import io import re -# For when CentOS/RHEL switch to python 3 by default (if EVER). import sys -pyver = sys.version_info try: - import rpm + import yum except ImportError: - exit('This script only runs on RHEL/CentOS/other RPM-based distros.') + exit('This script only runs on RHEL/CentOS/other yum-based distros.') +# Detect RH version. +ver_re = re.compile('^(centos( linux)? release) ([0-9\.]+) .*$', re.IGNORECASE) +# distro module isn't stdlib, and platform.linux_distribution() (AND platform.distro()) are both deprecated in 3.7. +# So we get hacky. +with open('/etc/redhat-release', 'r') as f: + ver = [int(i) for i in ver_re.sub('\g<3>', f.read().strip()).split('.')] +import pprint -def all_pkgs(): - # Gets a list of all packages. - pkgs = [] - trns = rpm.TransactionSet() - for p in trns.dbMatch(): - pkgs.append(p['name']) - pkgs = list(sorted(set(pkgs))) - return(pkgs) +repo_re = re.compile('^@') -class FileGetter(object): - def __init__(self, symlinks = True, verbose = False, *args, **kwargs): - self.symlinks = symlinks - self.verbose = verbose - self.trns = rpm.TransactionSet() +class PkgIndexer(object): + def __init__(self, **args): + self.pkgs = [] + self.args = args + self.yb = yum.YumBase() + # Make the Yum API shut the heck up. + self.yb.preconf.debuglevel = 0 + self.yb.preconf.errorlevel = 0 + self._pkgs = self._pkglst() + self._build_pkginfo() + if self.args['report'] == 'csv': + self._gen_csv() + elif self.args['report'] == 'json': + self._gen_json() + elif self.args['report'] == 'xml': + self._gen_xml() - def getfiles(self, pkgnm): - files = {} - for pkg in self.trns.dbMatch('name', pkgnm): - # The canonical package name - _pkgnm = pkg.sprintf('%{NAME}') - # Return just a list of files, or a dict of filepath:hash - # if verbose is enabled. - if self.verbose: - files[_pkgnm] = {} + def _pkglst(self): + pkgs = [] + # Get the list of packages + if self.args['reason'] != 'all': + for p in sorted(self.yb.rpmdb.returnPackages()): + if 'reason' not in p.yumdb_info: + continue + reason = getattr(p.yumdb_info, 'reason') + if reason == self.args['reason']: + pkgs.append(p) + else: + pkgs = sorted(self.yb.rpmdb.returnPackages()) + return(pkgs) + + def _build_pkginfo(self): + for p in self._pkgs: + _pkg = {'name': p.name, + 'desc': p.summary, + 'version': p.ver, + 'release': p.release, + 'arch': p.arch, + 'built': datetime.datetime.fromtimestamp(p.buildtime), + 'installed': datetime.datetime.fromtimestamp(p.installtime), + 'repo': repo_re.sub('', p.ui_from_repo), + 'sizerpm': p.packagesize, + 'sizedisk': p.installedsize} + self.pkgs.append(_pkg) + + def _gen_csv(self): + if self.args['plain']: + _fields = ['name'] + else: + _fields = ['name', 'version', 'release', 'arch', 'desc', 'built', + 'installed', 'repo', 'sizerpm', 'sizedisk'] + import csv + if sys.hexversion >= 0x30000f0: + _buf = io.StringIO() + else: + _buf = io.BytesIO() + _csv = csv.writer(_buf, delimiter = self.args['sep_char']) + if self.args['header']: + if self.args['plain']: + _csv.writerow(['Name']) else: - files[_pkgnm] = [] - for f in pkg.fiFromHeader(): - _symlink = (True if re.search('^0+$', f[12]) else False) - if self.verbose: - if _symlink: - if self.symlinks: - files[_pkgnm][f[0]] = '(symbolic link)' - continue - files[_pkgnm][f[0]] = f[12] - else: - # Skip if it is a symlink but they aren't enabled - if _symlink and not self.symlinks: - continue - else: - files[_pkgnm].append(f[0]) - files[_pkgnm].sort() - return(files) + _csv.writerow(['Name', 'Version', 'Release', 'Architecture', 'Description', 'Build Time', + 'Install Time', 'Repository', 'Size (RPM)', 'Size (On-Disk)']) + _csv = csv.DictWriter(_buf, fieldnames = _fields, extrasaction = 'ignore', delimiter = self.args['sep_char']) + for p in self.pkgs: + _csv.writerow(p) + _buf.seek(0, 0) + self.report = _buf.read() + return() + + def _gen_json(self): + import json + self.report = json.dumps(self.pkgs, default = str, indent = 4) + return() + + def _gen_xml(self): + from lxml import etree + _xml = etree.Element('packages') + for p in self.pkgs: + _attrib = copy.deepcopy(p) + for i in ('built', 'installed', 'sizerpm', 'sizedisk'): + _attrib[i] = str(_attrib[i]) + _pkg = etree.Element('package', attrib = _attrib) + _xml.append(_pkg) + #del(_attrib['name']) # I started to make it a more complex, nested structure... is that necessary? + if self.args['header']: + self.report = etree.tostring(_xml, pretty_print = True, xml_declaration = True, encoding = 'UTF-8') + else: + self.report = etree.tostring(_xml, pretty_print = True) + return() + def parseArgs(): - args = argparse.ArgumentParser(description = ( - 'This script allows you get a list of files for a given ' - 'package name(s) without installing any extra packages ' - '(such as yum-utils for repoquery).')) - args.add_argument('-l', '--ignore-symlinks', - dest = 'symlinks', - action = 'store_false', - help = ('If specified, don\'t report files that are ' + - 'symlinks in the RPM')) - args.add_argument('-v', '--verbose', - dest = 'verbose', + args = argparse.ArgumentParser(description = ('This script lets you dump the list of installed packages')) + args.add_argument('-p', '--plain', + dest = 'plain', action = 'store_true', - help = ('If specified, include the hashes of the files')) - args.add_argument('-p', '--package', - dest = 'pkgs', - #nargs = 1, - metavar = 'PKGNAME', - action = 'append', - default = [], - help = ('If specified, restrict the list of ' + - 'packages to check against to only this ' + - 'package. Can be specified multiple times. ' + - 'HIGHLY RECOMMENDED')) + help = 'If specified, only create a list of plain package names (i.e. don\'t include extra ' + 'information)') + args.add_argument('-n', '--no-header', + dest = 'header', + action = 'store_false', + help = 'If specified, do not print column headers/XML headers') + args.add_argument('-s', '--separator', + dest = 'sep_char', + default = ',', + help = 'The separator used to split fields in the output (default: ,) (only used for CSV ' + 'reports)') + rprt = args.add_mutually_exclusive_group() + rprt.add_argument('-c', '--csv', + dest = 'report', + default = 'csv', + action = 'store_const', + const = 'csv', + help = 'Generate CSV output (this is the default). See -n/--no-header, -s/--separator') + rprt.add_argument('-x', '--xml', + dest = 'report', + default = 'csv', + action = 'store_const', + const = 'xml', + help = 'Generate XML output (requires the LXML module: yum install python-lxml)') + rprt.add_argument('-j', '--json', + dest = 'report', + default = 'csv', + action = 'store_const', + const = 'json', + help = 'Generate JSON output') + rsn = args.add_mutually_exclusive_group() + rsn.add_argument('-a', '--all', + dest = 'reason', + default = 'all', + action = 'store_const', + const = 'all', + help = ('Parse/report all packages that are currently installed. ' + 'Conflicts with -u/--user and -d/--dep. ' + 'This is the default')) + rsn.add_argument('-u', '--user', + dest = 'reason', + default = 'all', + action = 'store_const', + const = 'user', + help = ('Parse/report only packages which were explicitly installed. ' + 'Conflicts with -a/--all and -d/--dep')) + rsn.add_argument('-d', '--dep', + dest = 'reason', + default = 'all', + action = 'store_const', + const = 'dep', + help = ('Parse/report only packages which were installed to satisfy a dependency. ' + 'Conflicts with -a/--all and -u/--user')) return(args) def main(): args = vars(parseArgs().parse_args()) - if not args['pkgs']: - prompt_str = ( - 'You have not specified any package names.\nThis means we will ' - 'get file lists for EVERY SINGLE installed package.\nThis is a ' - 'LOT of output and can take a few moments.\nIf this was a ' - 'mistake, you can hit ctrl-c now.\nOtherwise, hit the enter key ' - 'to continue.\n') - sys.stderr.write(prompt_str) - if pyver.major >= 3: - input() - elif pyver.major == 2: - raw_input() - args['pkgs'] = all_pkgs() - gf = FileGetter(**args) - file_rslts = {} - for p in args['pkgs']: - if p not in file_rslts.keys(): - file_rslts[p] = gf.getfiles(p) - print(json.dumps(file_rslts, indent = 4)) + p = PkgIndexer(**args) + print(p.report) return() if __name__ == '__main__':