checking in a ton of population

This commit is contained in:
brent s. 2018-09-23 07:55:58 -04:00
parent c48c752f84
commit 0dd54a604d
12 changed files with 426 additions and 66 deletions

18
TODO
View File

@ -1,11 +1,11 @@
- write classes/functions - write classes/functions
- XML-based config - XML-based config
-x XML syntax -x XML syntax
--- regex btags - case-insensitive? this can be represented in-pattern: --- xregex btags - case-insensitive? this can be represented in-pattern:
https://stackoverflow.com/a/9655186/733214 xhttps://stackoverflow.com/a/9655186/733214
-x configuration generator -x configuration generator
--- print end result xml config to stderr for easier redirection? or print prompts to stderr and xml to stdout? --- xprint end result xml config to stderr for easier redirection? or print prompts to stderr and xml to stdout?
-- XSD for validation -- xXSD for validation
-- Flask app for generating config? -- Flask app for generating config?
-- TKinter (or pygame?) GUI? -- TKinter (or pygame?) GUI?
--- https://docs.python.org/3/faq/gui.html --- https://docs.python.org/3/faq/gui.html
@ -16,12 +16,9 @@


- locking - locking
- for docs, 3.x (as of 3.10) was 2.4M. - for docs, 3.x (as of 3.10) was 2.4M.
- Need ability to write/parse mtree specs (or a similar equivalent) for applying ownerships/permissions to overlay files - xNeed ability to write/parse mtree specs (or a similar equivalent) for applying ownerships/permissions to overlay files
-- parsing is done. writing may? come later.


- need to package:
python-hashid (https://psypanda.github.io/hashID/,
https://github.com/psypanda/hashID,
https://pypi.org/project/hashID/)


- package for PyPI: - package for PyPI:
# https://packaging.python.org/tutorials/distributing-packages/ # https://packaging.python.org/tutorials/distributing-packages/
@ -37,7 +34,6 @@ BUGS.SQUARE-R00T.NET bugs/tasks:
#14: Use os.path.join() for more consistency/pythonicness #14: Use os.path.join() for more consistency/pythonicness
#24: Run as regular user? (pychroot? fakeroot?) #24: Run as regular user? (pychroot? fakeroot?)
#34: Build-time support for only building single phase of build #34: Build-time support for only building single phase of build
#36: Allow parsing pkg lists with inline comments
#39: Fix UEFI #39: Fix UEFI
#40: ISO overlay (to add e.g. memtest86+ to final ISO) #40: ISO overlay (to add e.g. memtest86+ to final ISO)
#43: Support resuming partial tarball downloads (Accept-Ranges: bytes) #43: Support resuming partial tarball downloads (Accept-Ranges: bytes)

View File

@ -1,3 +1,4 @@
import jinja2 import jinja2
import os import os
import shutil import shutil


View File

@ -3,6 +3,17 @@ import os
import psutil import psutil
import gpg.errors import gpg.errors



# This helps translate the input name from the conf to a string compatible with the gpg module.
_algmaps = {#'cv': 'cv{keysize}', # DISABLED, can't sign (only encrypt). Currently only 25519
'ed': 'ed{keysize}', # Currently only 25519
#'elg': 'elg{}', # DISABLED, can't sign (only encrypt). 1024, 2048, 4096
'nist': 'nistp{keysize}', # 256, 384, 521
'brainpool.1': 'brainpoolP{keysize}r1', # 256, 384, 512
'sec.k1': 'secp{keysize}k1', # Currently only 256
'rsa': 'rsa{keysize}', # Variable (1024 <> 4096), but we only support 1024, 2048, 4096
'dsa': 'dsa{keysize}'} # Variable (768 <> 3072), but we only support 768, 2048, 3072

# http://files.au.adversary.org/crypto/GPGMEpythonHOWTOen.html # http://files.au.adversary.org/crypto/GPGMEpythonHOWTOen.html
# https://www.gnupg.org/documentation/manuals/gpgme.pdf # https://www.gnupg.org/documentation/manuals/gpgme.pdf
# Support ECC? https://www.gnupg.org/faq/whats-new-in-2.1.html#ecc # Support ECC? https://www.gnupg.org/faq/whats-new-in-2.1.html#ecc
@ -60,7 +71,7 @@ class GPGHandler(object):
self._prep_home() self._prep_home()
else: else:
self._check_home() self._check_home()
self.ctx = self.get_context(home_dir = self.home) self.ctx = self.GetContext(home_dir = self.home)


def _check_home(self, home = None): def _check_home(self, home = None):
if not home: if not home:
@ -94,11 +105,12 @@ class GPGHandler(object):
'write to') 'write to')
return() return()


def get_context(self, **kwargs): def GetContext(self, **kwargs):
ctx = gpg.Context(**kwargs) ctx = gpg.Context(**kwargs)
return(ctx) return(ctx)


def kill_stale_agent(self): def KillStaleAgent(self):
# Is this even necessary since I switched to the native gpg module instead of the gpgme one?
_process_list = [] _process_list = []
# TODO: optimize; can I search by proc name? # TODO: optimize; can I search by proc name?
for p in psutil.process_iter(): for p in psutil.process_iter():
@ -113,7 +125,13 @@ class GPGHandler(object):
# for p in plst: # for p in plst:
# psutil.Process(p).terminate() # psutil.Process(p).terminate()


def get_sigs(self, data_in): def CreateKey(self, params): # TODO: explicit params
# We can't use self.ctx.create_key; it's a little limiting.
# It's a fairly thin wrapper to .op_createkey() (the C GPGME API gpgme_op_createkey) anyways.

pass

def GetSigs(self, data_in):
key_ids = [] key_ids = []
# Currently as of May 13, 2018 there's no way using the GPGME API to do # Currently as of May 13, 2018 there's no way using the GPGME API to do
# the equivalent of the CLI's --list-packets. # the equivalent of the CLI's --list-packets.
@ -131,3 +149,7 @@ class GPGHandler(object):
l = [i.strip() for i in line.split(':')] l = [i.strip() for i in line.split(':')]
key_ids.append(l[0]) key_ids.append(l[0])
return(key_ids) return(key_ids)

def CheckSigs(self, keys, sig_data):
try:
self.ctx.verify(sig_data)

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python3.6 #!/usr/bin/env python3


# Supported initsys values: # Supported initsys values:
# systemd # systemd
@ -41,6 +41,7 @@ pkg_mgr_prep = """#!/bin/bash
pacman -Syy pacman -Syy
pacman-key --init pacman-key --init
pacman-key --populate archlinux pacman-key --populate archlinux
pacman -S --noconfirm --needed base
pacman -S --noconfirm --needed base-devel multilib-devel git linux-headers \ pacman -S --noconfirm --needed base-devel multilib-devel git linux-headers \
mercurial subversion vala xorg-server-devel mercurial subversion vala xorg-server-devel
cd /tmp cd /tmp
@ -62,8 +63,8 @@ rm apacman*
# should try to install it. # should try to install it.
#### AUR SUPPORT #### #### AUR SUPPORT ####
packager = {'pre_check': False, packager = {'pre_check': False,
'sys_update': ['/usr/bin/aurman', '-S', '-u'], 'sys_update': ['/usr/bin/apacman', '-S', '-u'],
'sync_cmd': ['/usr/bin/aurman', '-S', '-y', '-y'], 'sync_cmd': ['/usr/bin/apacman', '-S', '-y', '-y'],
'check_cmds': {'versioned': ['/usr/bin/pacman', 'check_cmds': {'versioned': ['/usr/bin/pacman',
'-Q', '-s', '-Q', '-s',
'{PACKAGE}'], '{PACKAGE}'],

View File

@ -669,8 +669,17 @@
<xs:attribute name="algo" use="optional"> <xs:attribute name="algo" use="optional">
<xs:simpleType> <xs:simpleType>
<xs:restriction base="xs:string"> <xs:restriction base="xs:string">
<!-- rsa, dsa, and elgamal are "normal". Newer GnuPG supports ECC (yay!), so we have support for those in the XSD (you can get a list with gpg -with-colons -list-config curve | cut -f3 -d":" | tr ';' '\n'). -->
<!-- We test in-code if the host supports it. -->
<xs:enumeration value="rsa"/> <xs:enumeration value="rsa"/>
<xs:enumeration value="dsa"/> <xs:enumeration value="dsa"/>
<!-- The following only support encryption. The entire reason we'd be generating a key is to sign files, so we disable them. -->
<!-- <xs:enumeration value="elg"/> -->
<!-- <xs:enumeration value="cv"/> -->
<xs:enumeration value="ed"/>
<xs:enumeration value="nist"/>
<xs:enumeration value="brainpool.1"/>
<xs:enumeration value="sec.k1"/>
</xs:restriction> </xs:restriction>
</xs:simpleType> </xs:simpleType>
</xs:attribute> </xs:attribute>
@ -893,4 +902,4 @@
</xs:complexType> </xs:complexType>
</xs:element> </xs:element>
<!-- END BDISK --> <!-- END BDISK -->
</xs:schema> </xs:schema>

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python3.6 #!/usr/bin/env python3


# Ironically enough, I think building a GUI for this would be *cleaner*. # Ironically enough, I think building a GUI for this would be *cleaner*.
# Go figure. # Go figure.

View File

@ -40,6 +40,8 @@ class Conf(object):


You can provide any combination of these You can provide any combination of these
(e.g. "profile={'id': 2, 'name' = 'some_profile'}"). (e.g. "profile={'id': 2, 'name' = 'some_profile'}").
Non-greedy matching (meaning ALL attributes specified
must match).
""" """
if validate_cfg == 'pre': if validate_cfg == 'pre':
# Validate before attempting any other operations # Validate before attempting any other operations
@ -57,6 +59,7 @@ class Conf(object):
if validate_cfg: if validate_cfg:
# Validation post-substitution # Validation post-substitution
self.validate(parsed = False) self.validate(parsed = False)
# TODO: populate checksum{} with hash_algo if explicit


def get_pki_obj(self, pki, pki_type): def get_pki_obj(self, pki, pki_type):
elem = {} elem = {}
@ -272,6 +275,9 @@ class Conf(object):
self.cfg['profile'][a] = transform.xml2py( self.cfg['profile'][a] = transform.xml2py(
self.profile.attrib[a], self.profile.attrib[a],
attrib = True) attrib = True)
# Small bug in transform.xml2py that we unfortunately can't fix, so we manually fix.
if 'id' in self.cfg['profile'] and isinstance(self.cfg['profile']['id'], bool):
self.cfg['profile']['id'] = int(self.cfg['profile']['id'])
return() return()


def parse_sources(self): def parse_sources(self):
@ -323,11 +329,12 @@ class Conf(object):
xml = etree.fromstring(self.xml_suppl.return_full()) xml = etree.fromstring(self.xml_suppl.return_full())
self.xsd.assertValid(xml) self.xsd.assertValid(xml)
if parsed: if parsed:
# TODO: perform further validations that we can't do in XSD.
# We wait until after it's parsed to evaluate because otherwise we # We wait until after it's parsed to evaluate because otherwise we
# can't use utils.valid(). # can't use utils.valid().
# We only bother with stuff that would hinder building, though - # We only bother with stuff that would hinder building, though -
# e.g. we don't check that profile's UUID is a valid UUID4. # e.g. we don't check that profile's UUID is a valid UUID4.
# The XSD can catch a lot of stuff, but it's not so hot with things like URI validation,
# email validation, etc.
# URLs # URLs
for url in (self.cfg['uri'], self.cfg['dev']['website']): for url in (self.cfg['uri'], self.cfg['dev']['website']):
if not valid.url(url): if not valid.url(url):
@ -335,25 +342,41 @@ class Conf(object):
# Emails # Emails
for k in self.cfg['gpg']['keys']: for k in self.cfg['gpg']['keys']:
if not valid.email(k['email']): if not valid.email(k['email']):
raise ValueError( raise ValueError('GPG key {0}: {1} is not a valid email address'.format(k['name'], k['email']))
'GPG key {0}: {1} is not a valid email '
'address'.format(k['name'], k['email']))
if not valid.email(self.cfg['dev']['email']): if not valid.email(self.cfg['dev']['email']):
raise ValueError('{0} is not a valid email address'.format( raise ValueError('{0} is not a valid email address'.format(self.cfg['dev']['email']))
self.cfg['dev']['email']))
if self.cfg['pki']: if self.cfg['pki']:
if 'subject' in self.cfg['pki']['ca']: if 'subject' in self.cfg['pki']['ca']:
if not valid.email( if not valid.email(self.cfg['pki']['ca']['subject']['emailAddress']):
self.cfg['pki']['ca']['subject']['emailAddress']): raise ValueError('{0} is not a valid email address'.format(
raise ValueError('{0} is not a valid email ' self.cfg['pki']['ca']['subject']['emailAddress']))
'address'.format( for cert in self.cfg['pki']['clients']:
self.cfg['pki']['ca']['subject']['emailAddress'])) if not cert['subject']:

if not self.cfg['pki'][x]['subject']:
continue continue
if not valid.email( if not valid.email(cert['subject']['emailAddress']):
self.cfg['pki'][x]['subject']['emailAddress']): raise ValueError('{0} is not a valid email address'.format(cert['subject']['email']))
raise ValueError('{0} is not a valid email ' # Salts/hashes
'address'.format( if self.cfg['root']['salt']:
self.cfg['pki'][x]['subject']['email'])) if not valid.salt_hash(self.cfg['root']['salt']):
raise ValueError('{0} is not a valid salt'.format(self.cfg['root']['salt']))
if self.cfg['root']['hashed']:
if not valid.salt_hash_full(self.cfg['root']['salt_hash'], self.cfg['root']['hash_algo']):
raise ValueError('{0} is not a valid hash of type {1}'.format(self.cfg['root']['salt_hash'],
self.cfg['root']['hash_algo']))
for u in self.cfg['users']:
if u['salt']:
if not valid.salt_hash(u['salt']):
raise ValueError('{0} is not a valid salt'.format(u['salt']))
if u['hashed']:
if not valid.salt_hash_full(u['salt_hash'], u['hash_algo']):
raise ValueError('{0} is not a valid hash of type {1}'.format(u['salt_hash'], u['hash_algo']))
# GPG Key IDs
if self.cfg['gpg']['keyid']:
if not valid.gpgkeyID(self.cfg['gpg']['keyid']):
raise ValueError('{0} is not a valid GPG Key ID/fingerprint'.format(self.cfg['gpg']['keyid']))
for s in self.cfg['sources']:
if 'sig' in s:
for k in s['sig']['keys']:
if not valid.gpgkeyID(k):
raise ValueError('{0} is not a valid GPG Key ID/fingerprint'.format(k))
return() return()

View File

@ -1,3 +1,67 @@
import copy import hashlib
import importlib import importlib # needed for the guest-os-specific stuff...
import os import os
from . import utils
from urllib.parse import urljoin


def hashsum_downloader(url, filename = None):
# TODO: support "latest" and "regex" flags? or remove from specs (since the tarball can be specified by these)?
# move that to the utils.DOwnload() class?
d = utils.Download(url, progress = False)
hashes = {os.path.basename(k):v for (v, k) in [line.split() for line in d.fetch().decode('utf-8').splitlines()]}
if filename:
if filename in hashes:
return(hashes[filename])
else:
raise KeyError('Filename {0} not in the list of hashes'.format(filename))
return(hashes)


class Prepper(object):
def __init__(self, dirs, sources, gpg = None):
# dirs is a ConfParse.cfg['build']['paths'] dict of dirs
self.CreateDirs(dirs)
# TODO: set up GPG env here so we can use it to import sig key and verify sources
for idx, s in enumerate(sources):
self._download(idx)

def CreateDirs(self, dirs):
for d in dirs:
os.makedirs(d, exist_ok = True)
return()


def _download(self, source_idx):
download = True
_source = self.cfg['sources'][source_idx]
_dest_dir = os.path.join(self.cfg['build']['paths']['cache'], source_idx)
_tarball = os.path.join(_dest_dir, _source['tarball']['fname'])
_remote_dir = urljoin(_source['mirror'], _source['rootpath'])
_remote_tarball = urljoin(_remote_dir + '/', _source['tarball']['fname'])
def _hash_verify(): # TODO: move to utils.valid()?
# Get a checksum.
if 'checksum' in _source:
if not _source['checksum']['explicit']:
_source['checksum']['value'] = hashsum_downloader(urljoin(_remote_dir + '/',
_source['checksum']['fname']))
if not _source['checksum']['hash_algo']:
_source['checksum']['hash_algo'] = utils.detect.any_hash(_source['checksum']['value'],
normalize = True)[0]
_hash = hashlib.new(_source['checksum']['hash_algo'])
with open(_tarball, 'rb') as f:
# It's potentially a large file, so we chunk it 64kb at a time.
_hashbuf = f.read(64000)
while len(_hashbuf) > 0:
_hash.update(_hashbuf)
_hashbuf = f.read(64000)
if _hash.hexdigest().lower() != _source['checksum']['value'].lower():
return(False)
return(True)
def _sig_verify(gpg_instance): # TODO: move to utils.valid()? or just use as part of the bdisk.GPG module?
pass
if os.path.isfile(_tarball):
download = _hash_verify()
download = _sig_verify()
if download:
d = utils.Download(_remote_tarball)

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python3.6 #!/usr/bin/env python3


import argparse import argparse
import confparse import confparse
@ -14,8 +14,10 @@ def parseArgs():
epilog = ('https://git.square-r00t.net')) epilog = ('https://git.square-r00t.net'))
return(args) return(args)


def run(): def run(cfg):
pass cfg = confparse.Conf(cfg, validate_cfg = True)
cfg.parse_all()



def run_interactive(): def run_interactive():
args = vars(parseArgs().parse_args()) args = vars(parseArgs().parse_args())

View File

@ -3,15 +3,188 @@
import argparse import argparse
import copy import copy
import datetime import datetime
import grp
import hashlib
import os import os
import pathlib import pathlib
import platform
import pwd
import re import re
import stat
from collections import OrderedDict
try:
import pycksum
has_cksum = True
except ImportError:
has_cksum = False


# Parse BSD mtree spec files. # Parse BSD mtree spec files.
# On arch, BSD mtree is ported in the AUR as nmtree. # On arch, BSD mtree is ported in the AUR as nmtree.
# TODO: add a generator class as well? # TODO: add a generator class as well? (in process)
# TODO: add a checking function as well? # TODO: add a checking function as well?


# The format used for headers
_header_strptime_fmt = '%a %b %d %H:%M:%S %Y'

# Supported hash types (for generation). These are globally available always.
_hashtypes = ['md5', 'sha1', 'sha256', 'sha384', 'sha512']
# If RIPEMD-160 is supported, we add it (after MD5).
if 'ripemd160' in hashlib.algorithms_available:
_hashtypes.insert(1, 'rmd160')

# Iterative to determine which type an item is.
_stype_map = {'block': stat.S_ISBLK,
'char': stat.S_ISCHR,
'dir': stat.S_ISDIR,
'fifo': stat.S_ISFIFO,
'file': stat.S_ISREG,
'link': stat.S_ISLNK,
'socket': stat.S_ISSOCK}

# Regex pattern for cleaning up an octal perm mode into a string representation.
_octre = re.compile('^0o')

class MTreeGen(object):
def __init__(self, path):
self.path = pathlib.PosixPath(os.path.abspath(os.path.expanduser(path)))
# These are used to keep a cached copy of the info.
self._sysinfo = {'uids': {}, 'gids': {}}
self._build_header()
# We use this to keep track of where we are exactly in the tree so we can generate a full absolute path at
# any moment relative to the tree.
self._path_pointer = copy.deepcopy(self.path)


def paths_iterator(self):
for root, dirs, files in os.walk(self.path):
for f in files:
_fname = self.path.joinpath(f)
_stats = self._get_stats(_fname)
if not _stats:
print(('WARNING: {0} either disappeared while we were trying to parse it or '
'it is a broken symlink.').format(_fname))
continue
# TODO: get /set line here?
item = ' {0} \\\n'.format(f)
_type = 'file' # TODO: stat this more accurately
_cksum = self._gen_cksum(_fname)
item += ' {0} {1} {2}\\\n'.format(_stats['size'],
_stats['time'],
('{0} '.format(_cksum) if _cksum else ''))
# TODO: here's where the hashes would get added
# TODO: here's where we parse dirs. maybe do that before files?
# remember: mtree specs use ..'s to traverse upwards when done with a dir
for d in dirs:
_dname = self.path.joinpath(d)
_stats = self._get_stats(_dname)
if not _stats:
print(('WARNING: {0} either disappeared while we were trying to parse it or '
'it is a broken symlink.').format(_dname))
continue
# TODO: get /set line here?
return()


def _gen_cksum(self, fpath):
if not has_cksum:
return(None)
if not os.path.isfile(fpath):
return(None)
# TODO: waiting on https://github.com/sobotklp/pycksum/issues/2 for byte iteration (because large files maybe?)
c = pycksum.Cksum()
with open(fpath, 'rb') as f:
c.add(f)
return(c.get_cksum())


def _get_stats(self, path):
stats = {}
try:
_st = os.stat(path, follow_symlinks = False)
except FileNotFoundError:
# Broken symlink? Shouldn't occur since follow_symlinks is False anyways, BUT...
return(None)
# Ownership
stats['uid'] = _st.st_uid
stats['gid'] = _st.st_gid
if _st.st_uid in self._sysinfo['uids']:
stats['uname'] = self._sysinfo['uids'][_st.st_uid]
else:
_pw = pwd.getpwuid(_st.st_uid).pw_name
stats['uname'] = _pw
self._sysinfo['uids'][_st.stuid] = _pw
if _st.st_gid in self._sysinfo['gids']:
stats['gname'] = self._sysinfo['gids'][_st.st_gid]
else:
_grp = grp.getgrgid(_st.st_gid).gr_name
stats['gname'] = _grp
self._sysinfo['gids'][_st.stgid] = _grp
# Type and Mode
for t in _stype_map:
if _stype_map[t](_st.st_mode):
stats['type'] = t
# TODO: need a reliable way of parsing this.
# for instance, for /dev/autofs, _st.st_dev = 6 (os.makedev(6) confirms major is 0, minor is 6)
# but netBSD mtree (ported) says it's "0xaeb" (2795? or, as str, "®b" apparently).
# I'm guessing the kernel determines this, but where is it pulling it from/how?
# We can probably do 'format,major,minor' (or, for above, 'linux,0,6').
# if t in ('block', 'char'):
# stats['device'] = None
# Handle symlinks.
if t == 'link':
_target = path
while os.path.islink(_target):
_target = os.path.realpath(_target)
stats['link'] = _target
break
stats['mode'] = '{0:0>4}'.format(_octre.sub('', str(oct(stat.S_IMODE(_st.st_mode)))))
stats['size'] = _st.st_size
stats['time'] = str(float(_st.st_mtime))
stats['nlink'] = _st.st_nlink
# TODO: "flags" keyword? is that meaningful on linux?
stats['flags'] = 'none'
return(stats)



def _gen_hashes(self, fpath):
hashes = OrderedDict({})
if not os.path.isfile(fpath):
return(hashes)
_hashnums = len(_hashtypes)
for idx, h in enumerate(_hashtypes):
# Stupid naming inconsistencies.
_hashname = (h if h is not 'rmd160' else 'ripemd160')
_hasher = hashlib.new(_hashname)
with open(fpath, 'rb') as f:
# Hash 64kb at a time in case it's a huge file. TODO: is this the most ideal chunk size?
_hashbuf = f.read(64000)
while len(_hashbuf) > 0:
_hasher.update(_hashbuf)
_hashbuf = f.read(64000)
hashes[h] = _hasher.hexdigest()
return(hashes)
# if idx + 1 < _hashnums:
# hashes += ' {0}={1} \\\n'.format(h, _hasher.hexdigest())
# else:
# hashes += ' {0}={1}\n'.format(h, _hasher.hexdigest())
# return(hashes)


def _build_header(self):
self.spec = ''
_header = OrderedDict({})
_header['user'] = pwd.getpwuid(os.geteuid()).pw_name
_header['machine'] = platform.node()
_header['tree'] = str(self.path)
_header['date'] = datetime.datetime.utcnow().strftime(_header_strptime_fmt)
for h in _header:
self.spec += '#\t{0:>7}: {1}\n'.format(h, _header[h])
self.spec += '\n'
return()



class MTreeParse(object): class MTreeParse(object):
def __init__(self, spec): def __init__(self, spec):
if not isinstance(spec, (str, bytes)): if not isinstance(spec, (str, bytes)):
@ -21,7 +194,6 @@ class MTreeParse(object):
spec = spec.decode('utf-8') spec = spec.decode('utf-8')
except UnicodeDecodeError: except UnicodeDecodeError:
raise ValueError('spec must be a utf-8 encoded set of bytes if using byte mode') raise ValueError('spec must be a utf-8 encoded set of bytes if using byte mode')
self._strptime_fmt = '%a %b %d %H:%M:%S %Y'
self.orig_spec = copy.deepcopy(spec) # For referencing in case someone wanted to write it out. self.orig_spec = copy.deepcopy(spec) # For referencing in case someone wanted to write it out.
# We NOW need to handle the escaped linebreaking it does. # We NOW need to handle the escaped linebreaking it does.
self._specdata = re.sub('\\\\\s+', '', spec).splitlines() self._specdata = re.sub('\\\\\s+', '', spec).splitlines()
@ -82,7 +254,7 @@ class MTreeParse(object):
# They are restored by an "/unset". Since they're global and stateful, they're handled as a class attribute. # They are restored by an "/unset". Since they're global and stateful, they're handled as a class attribute.
self.settings = copy.deepcopy(self._tplitem) self.settings = copy.deepcopy(self._tplitem)
self._parse_items() self._parse_items()
del(self.settings, self._tplitem, self._strptime_fmt) del(self.settings, self._tplitem)




def _get_header(self): def _get_header(self):
@ -96,7 +268,7 @@ class MTreeParse(object):
header = l[0] header = l[0]
val = (l[1] if l[1] is not '(null)' else None) val = (l[1] if l[1] is not '(null)' else None)
if header == 'date': if header == 'date':
val = datetime.datetime.strptime(val, self._strptime_fmt) val = datetime.datetime.strptime(val, _header_strptime_fmt)
elif header == 'tree': elif header == 'tree':
val = pathlib.PosixPath(val) val = pathlib.PosixPath(val)
self.header[header] = val self.header[header] = val
@ -158,6 +330,8 @@ class MTreeParse(object):
return(out) return(out)
def _unset_parse(unsetline): def _unset_parse(unsetline):
out = {} out = {}
if unsetline[1] == 'all':
return(copy.deepcopy(self._tplitem))
for i in unsetline: for i in unsetline:
out[i] = self._tplitem[i] out[i] = self._tplitem[i]
return(out) return(out)

View File

@ -11,18 +11,31 @@ class PromptStrings(object):
'attribs': { 'attribs': {
'algo': { 'algo': {
'text': 'the subkey\'s encryption type/algorithm', 'text': 'the subkey\'s encryption type/algorithm',
'choices': ['rsa', 'dsa'], # The following can ONLY be used for encryption, not signing: elg, cv
'default': 'rsa' #'choices': ['rsa', 'dsa', 'elg', 'ed', 'cv', 'nistp', 'brainpool.1', 'secp.k1'],
'choices': ['rsa', 'dsa', 'ed', 'nist', 'brainpool.1', 'sec.k1'],
#'default': 'rsa'
'default': 'ed'
}, },
'keysize': { 'keysize': {
'text': 'the subkey\'s key size (in bits)', 'text': 'the subkey\'s key size (in bits)',
'choices': { 'choices': {
'rsa': ['1024', '2048', '4096'], 'rsa': ['1024', '2048', '4096'],
'dsa': ['768', '2048', '3072'] 'dsa': ['768', '2048', '3072'],
#'elg': ['1024', '2048', '4096'], # Invalid for signing, etc.
'ed': ['25519'],
#'cv': ['25519'],
'nistp': ['256', '384', '521'],
'brainpool.1': ['256', '384', '512'],
'sec.k1': ['256']
}, },
'default': { 'default': {
'rsa': '4096', 'rsa': '4096',
'dsa': '3072' 'dsa': '3072',
'ed': '25519',
'nistp': '521',
'brainpool.1': '512',
'sec.k1': '256'
} }
} }
}, },
@ -113,4 +126,4 @@ class PromptStrings(object):
'Email: ') 'Email: ')
} }
} }
} }

View File

@ -1,3 +1,5 @@
# Yes, this is messy. They doesn't belong anywhere else, leave me alone.

import _io import _io
import copy import copy
import crypt import crypt
@ -14,6 +16,7 @@ import string
import uuid import uuid
import validators import validators
import zlib import zlib
import requests
import lxml.etree import lxml.etree
import lxml.objectify import lxml.objectify
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
@ -30,7 +33,7 @@ passlib_schemes = ['des_crypt', 'md5_crypt', 'sha256_crypt', 'sha512_crypt']
# Build various hash digest name lists # Build various hash digest name lists
digest_schemes = list(hashlib.algorithms_available) digest_schemes = list(hashlib.algorithms_available)
# Provided by zlib # Provided by zlib
# TODO # TODO?
digest_schemes.append('adler32') digest_schemes.append('adler32')
digest_schemes.append('crc32') digest_schemes.append('crc32')


@ -39,6 +42,54 @@ crypt_map = {'sha512': crypt.METHOD_SHA512,
'md5': crypt.METHOD_MD5, 'md5': crypt.METHOD_MD5,
'des': crypt.METHOD_CRYPT} 'des': crypt.METHOD_CRYPT}



class Download(object):
def __init__(self, url, progress = True, offset = None, chunksize = 1024):
self.cnt_len = None
self.head = requests.head(url, allow_redirects = True).headers
self.req_headers = {}
self.range = False
self.url = url
self.offset = offset
self.chunksize = chunksize
self.progress = progress
if 'accept-ranges' in self.head:
if self.head['accept-ranmges'].lower() != 'none':
self.range = True
if 'content-length' in self.head:
try:
self.cnt_len = int(self.head['content-length'])
except TypeError:
pass
if self.cnt_len and self.offset and self.range:
if not self.offset <= self.cnt_len:
raise ValueError(('The offset requested ({0}) is greater than '
'the content-length value').format(self.offset, self.cnt_len))
self.req_headers['range'] = 'bytes={0}-'.format(self.offset)

def fetch(self):
if not self.progress:
self.req = requests.get(self.url, allow_redirects = True, headers = self.req_headers)
self.bytes_obj = self.req.content
else:
self.req = requests.get(self.url, allow_redirects = True, stream = True, headers = self.req_headers)
self.bytes_obj = bytes()
_bytelen = 0
# TODO: better handling for logging instead of print()s?
for chunk in self.req.iter_content(chunk_size = self.chunksize):
self.bytes_obj += chunk
if self.cnt_len:
print('\033[F')
print('{0:.2f}'.format((_bytelen / float(self.head['content-length'])) * 100),
end = '%',
flush = True)
_bytelen += self.chunksize
else:
print('.', end = '')
print()
return(self.bytes_obj)


class XPathFmt(string.Formatter): class XPathFmt(string.Formatter):
def get_field(self, field_name, args, kwargs): def get_field(self, field_name, args, kwargs):
vals = self.get_value(field_name, args, kwargs), field_name vals = self.get_value(field_name, args, kwargs), field_name
@ -50,18 +101,19 @@ class detect(object):
def __init__(self): def __init__(self):
pass pass


def any_hash(self, hash_str): def any_hash(self, hash_str, normalize = False):
h = hashid.HashID() h = hashid.HashID()
hashes = [] hashes = []
for i in h.identifyHash(hash_str): for i in h.identifyHash(hash_str):
if i.extended: if i.extended:
continue continue
x = i.name x = i.name
if x.lower() in ('crc-32', 'ripemd-160', 'sha-1', 'sha-224', if x.lower() in ('crc-32', 'ripemd-160', 'sha-1', 'sha-224', 'sha-256', 'sha-384', 'sha-512'):
'sha-256', 'sha-384', 'sha-512'):
# Gorram you, c0re. # Gorram you, c0re.
x = re.sub('-', '', x.lower()) x = re.sub('-', '', x.lower())
_hashes = [h.lower() for h in digest_schemes] _hashes = [h.lower() for h in digest_schemes] # TODO: move this outside so we don't define it every invoke
if normalize:
x = re.sub('(-|crypt|\s+)', '', x.lower())
if x.lower() in sorted(list(set(_hashes))): if x.lower() in sorted(list(set(_hashes))):
hashes.append(x) hashes.append(x)
return(hashes) return(hashes)
@ -83,8 +135,7 @@ class detect(object):
return(salt) return(salt)


def remote_files(self, url_base, ptrn = None, flags = []): def remote_files(self, url_base, ptrn = None, flags = []):
with urlopen(url_base) as u: soup = BeautifulSoup(Download(url_base, progress = False).bytes_obj, 'lxml')
soup = BeautifulSoup(u.read(), 'lxml')
urls = [] urls = []
if 'regex' in flags: if 'regex' in flags:
if not isinstance(ptrn, str): if not isinstance(ptrn, str):
@ -113,8 +164,7 @@ class detect(object):
return(urls) return(urls)


def gpgkeyID_from_url(self, url): def gpgkeyID_from_url(self, url):
with urlopen(url) as u: data = Download(url, progress = False).bytes_obj
data = u.read()
g = GPG.GPGHandler() g = GPG.GPGHandler()
key_ids = g.get_sigs(data) key_ids = g.get_sigs(data)
del(g) del(g)
@ -166,7 +216,7 @@ class detect(object):
# Get any easy ones out of the way first. # Get any easy ones out of the way first.
if name in digest_schemes: if name in digest_schemes:
return(name) return(name)
# Otherwise grab the first one that matches, in order from the . # Otherwise grab the first one that matches
_digest_re = re.compile('^{0}$'.format(name.strip()), re.IGNORECASE) _digest_re = re.compile('^{0}$'.format(name.strip()), re.IGNORECASE)
for h in digest_schemes: for h in digest_schemes:
if _digest_re.search(h): if _digest_re.search(h):
@ -774,14 +824,19 @@ class valid(object):
return(True) return(True)


def salt_hash(self, salthash): def salt_hash(self, salthash):
_idents = ''.join([i.ident for i in crypt_map if i.ident]) _idents = ''.join([i.ident for i in crypt_map.values() if i.ident])
# noinspection PyStringFormat # noinspection PyStringFormat
_regex = re.compile('^(\$[{0}]\$)?[./0-9A-Za-z]{{0,16}}\$?'.format( _regex = re.compile('^(\$[{0}]\$)?[./0-9A-Za-z]{{0,16}}\$?'.format(_idents))
_idents))
if not _regex.search(salthash): if not _regex.search(salthash):
return(False) return(False)
return(True) return(True)


def salt_hash_full(self, salthash, hash_type):
h = [re.sub('-', '', i.lower()).split()[0] for i in detect.any_hash(self, salthash, normalize = True)]
if hash_type.lower() not in h:
return(False)
return(True)

def plugin_name(self, name): def plugin_name(self, name):
if len(name) == 0: if len(name) == 0:
return(False) return(False)
@ -1068,4 +1123,4 @@ class xml_supplicant(object):
for i in selectors.items(): for i in selectors.items():
if i[1] and i[0] in self.selector_ids: if i[1] and i[0] in self.selector_ids:
xpath += '[@{0}="{1}"]'.format(*i) xpath += '[@{0}="{1}"]'.format(*i)
return(xpath) return(xpath)