adding some mtree parsing stuff we need for overlays
This commit is contained in:
		
							parent
							
								
									82c21f170a
								
							
						
					
					
						commit
						c48c752f84
					
				
							
								
								
									
										222
									
								
								bdisk/mtree.py
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										222
									
								
								bdisk/mtree.py
									
									
									
									
									
										Executable file
									
								
							@ -0,0 +1,222 @@
 | 
				
			|||||||
 | 
					#!/usr/bin/env python3
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import argparse
 | 
				
			||||||
 | 
					import copy
 | 
				
			||||||
 | 
					import datetime
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
 | 
					import pathlib
 | 
				
			||||||
 | 
					import re
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Parse BSD mtree spec files.
 | 
				
			||||||
 | 
					# On arch, BSD mtree is ported in the AUR as nmtree.
 | 
				
			||||||
 | 
					# TODO: add a generator class as well?
 | 
				
			||||||
 | 
					# TODO: add a checking function as well?
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class MTreeParse(object):
 | 
				
			||||||
 | 
					    def __init__(self, spec):
 | 
				
			||||||
 | 
					        if not isinstance(spec, (str, bytes)):
 | 
				
			||||||
 | 
					            raise ValueError('spec must be a raw string of the spec or a bytes object of the string')
 | 
				
			||||||
 | 
					        if isinstance(spec, bytes):
 | 
				
			||||||
 | 
					            try:
 | 
				
			||||||
 | 
					                spec = spec.decode('utf-8')
 | 
				
			||||||
 | 
					            except UnicodeDecodeError:
 | 
				
			||||||
 | 
					                raise ValueError('spec must be a utf-8 encoded set of bytes if using byte mode')
 | 
				
			||||||
 | 
					        self._strptime_fmt = '%a %b %d %H:%M:%S %Y'
 | 
				
			||||||
 | 
					        self.orig_spec = copy.deepcopy(spec)  # For referencing in case someone wanted to write it out.
 | 
				
			||||||
 | 
					        # We NOW need to handle the escaped linebreaking it does.
 | 
				
			||||||
 | 
					        self._specdata = re.sub('\\\\\s+', '', spec).splitlines()
 | 
				
			||||||
 | 
					        self._get_header()
 | 
				
			||||||
 | 
					        self.spec = {'header': self.header,
 | 
				
			||||||
 | 
					                     'paths': {}}
 | 
				
			||||||
 | 
					        # Template for an item.
 | 
				
			||||||
 | 
					        # Default keywords are:
 | 
				
			||||||
 | 
					        # flags, gid, link, mode, nlink, size, time, type, uid
 | 
				
			||||||
 | 
					        self._tplitem = {
 | 
				
			||||||
 | 
					            'type': None,  # ('block', 'char', 'dir', 'fifo', 'file', 'link', 'socket')
 | 
				
			||||||
 | 
					            # checksum of file (if it's a file) (int)
 | 
				
			||||||
 | 
					            # On all *nix platforms, the cksum(1) utility (which is what the mtree spec uses) follows
 | 
				
			||||||
 | 
					            # the POSIX standard CRC (which is NOT CRC-1/CRC-16 nor CRC32!):
 | 
				
			||||||
 | 
					            # http://pubs.opengroup.org/onlinepubs/009695299/utilities/cksum.html
 | 
				
			||||||
 | 
					            # For a python implementation,
 | 
				
			||||||
 | 
					            # https://stackoverflow.com/questions/6835381/python-equivalent-of-unix-cksum-function
 | 
				
			||||||
 | 
					            # See also crcmod (in PyPi).
 | 
				
			||||||
 | 
					            'cksum': None,
 | 
				
			||||||
 | 
					            # "The device number to use for block or char file types." Should be converted to a tuple of one
 | 
				
			||||||
 | 
					            #  of the following:
 | 
				
			||||||
 | 
					            # - (format(str), major(int), minor(int))
 | 
				
			||||||
 | 
					            # - (format(str), major(int), unit(str?), subunit(str?)) (only used on bsdos formats)
 | 
				
			||||||
 | 
					            # - (number(int?), ) ("opaque" number)
 | 
				
			||||||
 | 
					            # Valid formats are, per man page of mtree:
 | 
				
			||||||
 | 
					            # native, 386bsd, 4bsd, bsdos, freebsd, hpux, isc, linux, netbsd, osf1, sco, solaris, sunos,
 | 
				
			||||||
 | 
					            # svr3, svr4, ultrix
 | 
				
			||||||
 | 
					            'device': None,
 | 
				
			||||||
 | 
					            # File flags as symbolic name. BSD-specific thing? TODO: testing on BSD system
 | 
				
			||||||
 | 
					            'flags': [],
 | 
				
			||||||
 | 
					            'ignore': False,  # An mtree-internal flag to ignore hierarchy under this item
 | 
				
			||||||
 | 
					            'gid': None,  # The group ID (int)
 | 
				
			||||||
 | 
					            'gname': None,  # The group name (str)
 | 
				
			||||||
 | 
					            'link': None,  # The link target/source, if a link.
 | 
				
			||||||
 | 
					            # The MD5 checksum digest (str? hex?). "md5digest" is a synonym for this, so it's consolidated in
 | 
				
			||||||
 | 
					            # as the same keyword.
 | 
				
			||||||
 | 
					            'md5': None,
 | 
				
			||||||
 | 
					            # The mode (in octal) (we convert it to a python-native int for os.chmod/stat, etc.)
 | 
				
			||||||
 | 
					            # May also be a symbolic value; TODO: map symbolic to octal/int.
 | 
				
			||||||
 | 
					            'mode': None,
 | 
				
			||||||
 | 
					            'nlink': None,  # Number of hard links for this item.
 | 
				
			||||||
 | 
					            'optional': False,  # This item may or may not be present in the compared directory for checking.
 | 
				
			||||||
 | 
					            'rmd160': None,  # The RMD-160 checksum of the file. "rmd160digest" is a synonym.
 | 
				
			||||||
 | 
					            'sha1': None,  # The SHA-1 sum. "sha1digest" is a synonym.
 | 
				
			||||||
 | 
					            'sha256': None,  # SHA-2 256-bit checksum; "sha256digest" is a synonym.
 | 
				
			||||||
 | 
					            'sha384': None,  # SHA-2 384-bit checksum; "sha384digest" is a synonym.
 | 
				
			||||||
 | 
					            'sha512': None,  # SHA-2 512-bit checksum; "sha512digest" is a synonym.
 | 
				
			||||||
 | 
					            'size': None,  # Size of the file in bytes (int).
 | 
				
			||||||
 | 
					            'tags': [],  # mtree-internal tags (comma-separated in the mtree spec).
 | 
				
			||||||
 | 
					            'time': None,  # Time the file was last modified (in Epoch fmt as float).
 | 
				
			||||||
 | 
					            'uid': None,  # File owner UID (int)
 | 
				
			||||||
 | 
					            'uname': None  # File owner username (str)
 | 
				
			||||||
 | 
					            # And lastly, "children" is where the children files/directories go. We don't include it in the template;
 | 
				
			||||||
 | 
					            # it's added programmatically.
 | 
				
			||||||
 | 
					            # 'children': {}
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        # Global aspects are handled by "/set" directives.
 | 
				
			||||||
 | 
					        # They are restored by an "/unset". Since they're global and stateful, they're handled as a class attribute.
 | 
				
			||||||
 | 
					        self.settings = copy.deepcopy(self._tplitem)
 | 
				
			||||||
 | 
					        self._parse_items()
 | 
				
			||||||
 | 
					        del(self.settings, self._tplitem, self._strptime_fmt)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def _get_header(self):
 | 
				
			||||||
 | 
					        self.header = {}
 | 
				
			||||||
 | 
					        _headre = re.compile('^#\s+(user|machine|tree|date):\s')
 | 
				
			||||||
 | 
					        _cmtre = re.compile('^\s*#\s*')
 | 
				
			||||||
 | 
					        _blklnre = re.compile('^\s*$')
 | 
				
			||||||
 | 
					        for idx, line in enumerate(self._specdata):
 | 
				
			||||||
 | 
					            if _headre.search(line):  # We found a header item.
 | 
				
			||||||
 | 
					                l = [i.lstrip() for i in _cmtre.sub('', line).split(':', 1)]
 | 
				
			||||||
 | 
					                header = l[0]
 | 
				
			||||||
 | 
					                val = (l[1] if l[1] is not '(null)' else None)
 | 
				
			||||||
 | 
					                if header == 'date':
 | 
				
			||||||
 | 
					                    val = datetime.datetime.strptime(val, self._strptime_fmt)
 | 
				
			||||||
 | 
					                elif header == 'tree':
 | 
				
			||||||
 | 
					                    val = pathlib.PosixPath(val)
 | 
				
			||||||
 | 
					                self.header[header] = val
 | 
				
			||||||
 | 
					            elif _blklnre.search(line):
 | 
				
			||||||
 | 
					                break  # We've reached the end of the header. Otherwise...
 | 
				
			||||||
 | 
					            else:  # We definitely shouldn't be here, but this means the spec doesn't even have a header.
 | 
				
			||||||
 | 
					                break
 | 
				
			||||||
 | 
					        return()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def _parse_items(self):
 | 
				
			||||||
 | 
					        # A pattern (compiled for performance) to match commands.
 | 
				
			||||||
 | 
					        _stngsre = re.compile('^/(un)?set\s')
 | 
				
			||||||
 | 
					        # Per the man page:
 | 
				
			||||||
 | 
					        # "Empty lines and lines whose first non-whitespace character is a hash mark (‘#’) are ignored."
 | 
				
			||||||
 | 
					        _ignre = re.compile('^(\s*(#.*)?)?$')
 | 
				
			||||||
 | 
					        # The following regex is used to quickly and efficiently check for a synonymized hash name.
 | 
				
			||||||
 | 
					        _hashre = re.compile('^(md5|rmd160|sha1|sha256|sha384|sha512)(digest)?$')
 | 
				
			||||||
 | 
					        # The following regex is to test if we need to traverse upwards in the path.
 | 
				
			||||||
 | 
					        _parentre = re.compile('^\.{,2}/?$')
 | 
				
			||||||
 | 
					        # _curpath = self.header['tree']
 | 
				
			||||||
 | 
					        _curpath = pathlib.PosixPath('/')
 | 
				
			||||||
 | 
					        _types = ('block', 'char', 'dir', 'fifo', 'file', 'link', 'socket')
 | 
				
			||||||
 | 
					        # This parses keywords. Used by both item specs and /set.
 | 
				
			||||||
 | 
					        def _kwparse(kwline):
 | 
				
			||||||
 | 
					            out = {}
 | 
				
			||||||
 | 
					            for i in kwline:
 | 
				
			||||||
 | 
					                l = i.split('=', 1)
 | 
				
			||||||
 | 
					                if len(l) < 2:
 | 
				
			||||||
 | 
					                    l.append(None)
 | 
				
			||||||
 | 
					                k, v = l
 | 
				
			||||||
 | 
					                if v == 'none':
 | 
				
			||||||
 | 
					                    v = None
 | 
				
			||||||
 | 
					                # These are represented as octals.
 | 
				
			||||||
 | 
					                if k in ('mode', ):
 | 
				
			||||||
 | 
					                    # TODO: handle symbolic references too (e.g. rwxrwxrwx)
 | 
				
			||||||
 | 
					                    if v.isdigit():
 | 
				
			||||||
 | 
					                        v = int(v, 8)  # Convert from the octal. This can then be used directly with os.chmod etc.
 | 
				
			||||||
 | 
					                # These are represented as ints
 | 
				
			||||||
 | 
					                elif k in ('uid', 'gid', 'cksum', 'nlink'):
 | 
				
			||||||
 | 
					                    if v.isdigit():
 | 
				
			||||||
 | 
					                        v = int(v)
 | 
				
			||||||
 | 
					                # These are booleans (represented as True by their presence).
 | 
				
			||||||
 | 
					                elif k in ('ignore', 'optional'):
 | 
				
			||||||
 | 
					                    v = True
 | 
				
			||||||
 | 
					                # These are lists (comma-separated).
 | 
				
			||||||
 | 
					                elif k in ('flags', 'tags'):
 | 
				
			||||||
 | 
					                    if v:
 | 
				
			||||||
 | 
					                        v = [i.strip() for i in v.split(',')]
 | 
				
			||||||
 | 
					                # The following are synonyms.
 | 
				
			||||||
 | 
					                elif _hashre.search(k):
 | 
				
			||||||
 | 
					                    k = _hashre.sub('\g<1>', k)
 | 
				
			||||||
 | 
					                elif k == 'time':
 | 
				
			||||||
 | 
					                    v = datetime.datetime.fromtimestamp(float(v))
 | 
				
			||||||
 | 
					                elif k == 'type':
 | 
				
			||||||
 | 
					                    if v not in _types:
 | 
				
			||||||
 | 
					                        raise ValueError('{0} not one of: {1}'.format(v, ', '.join(_types)))
 | 
				
			||||||
 | 
					                out[k] = v
 | 
				
			||||||
 | 
					            return(out)
 | 
				
			||||||
 | 
					        def _unset_parse(unsetline):
 | 
				
			||||||
 | 
					            out = {}
 | 
				
			||||||
 | 
					            for i in unsetline:
 | 
				
			||||||
 | 
					                out[i] = self._tplitem[i]
 | 
				
			||||||
 | 
					            return(out)
 | 
				
			||||||
 | 
					        # The Business-End (TM)
 | 
				
			||||||
 | 
					        for idx, line in enumerate(self._specdata):
 | 
				
			||||||
 | 
					            _fname = copy.deepcopy(_curpath)
 | 
				
			||||||
 | 
					            # Skip these lines
 | 
				
			||||||
 | 
					            if _ignre.search(line):
 | 
				
			||||||
 | 
					                continue
 | 
				
			||||||
 | 
					            l = line.split()
 | 
				
			||||||
 | 
					            if _parentre.search(line):
 | 
				
			||||||
 | 
					                _curpath = _curpath.parent
 | 
				
			||||||
 | 
					            elif not _stngsre.search(line):
 | 
				
			||||||
 | 
					                # So it's an item, not a command.
 | 
				
			||||||
 | 
					                _itemsettings = copy.deepcopy(self.settings)
 | 
				
			||||||
 | 
					                _itemsettings.update(_kwparse(l[1:]))
 | 
				
			||||||
 | 
					                if _itemsettings['type'] == 'dir':
 | 
				
			||||||
 | 
					                    # SOMEONE PLEASE let me know if there's a cleaner way to do this.
 | 
				
			||||||
 | 
					                    _curpath = pathlib.PosixPath(os.path.normpath(_curpath.joinpath(l[0])))
 | 
				
			||||||
 | 
					                    _fname = _curpath
 | 
				
			||||||
 | 
					                else:
 | 
				
			||||||
 | 
					                    _fname = pathlib.PosixPath(os.path.normpath(_curpath.joinpath(l[0])))
 | 
				
			||||||
 | 
					                self.spec['paths'][_fname] = _itemsettings
 | 
				
			||||||
 | 
					            else:
 | 
				
			||||||
 | 
					                # It's a command. We can safely split on whitespace since the man page specifies the
 | 
				
			||||||
 | 
					                # values are not to contain whitespace.
 | 
				
			||||||
 | 
					                # /set
 | 
				
			||||||
 | 
					                if l[0] == '/set':
 | 
				
			||||||
 | 
					                    del(l[0])
 | 
				
			||||||
 | 
					                    self.settings.update(_kwparse(l))
 | 
				
			||||||
 | 
					                # /unset
 | 
				
			||||||
 | 
					                else:
 | 
				
			||||||
 | 
					                    self.settings.update(_unset_parse(l))
 | 
				
			||||||
 | 
					                continue
 | 
				
			||||||
 | 
					        return()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def parseArgs():
 | 
				
			||||||
 | 
					    args = argparse.ArgumentParser(description = 'An mtree parser')
 | 
				
			||||||
 | 
					    # TODO: support stdin piping
 | 
				
			||||||
 | 
					    args.add_argument('specfile',
 | 
				
			||||||
 | 
					                      help = 'The path to the spec file to parse')
 | 
				
			||||||
 | 
					    return(args)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Allow to be run as a CLI utility as well.
 | 
				
			||||||
 | 
					def main():
 | 
				
			||||||
 | 
					    args = vars(parseArgs().parse_args())
 | 
				
			||||||
 | 
					    import os
 | 
				
			||||||
 | 
					    with open(os.path.abspath(os.path.expanduser(args['specfile']))) as f:
 | 
				
			||||||
 | 
					        mt = MTreeParse(f.read())
 | 
				
			||||||
 | 
					    with open('/tmp/newspec', 'w') as f:
 | 
				
			||||||
 | 
					        f.write('\n'.join(mt._specdata))
 | 
				
			||||||
 | 
					    import pprint
 | 
				
			||||||
 | 
					    import inspect
 | 
				
			||||||
 | 
					    del(mt.orig_spec)
 | 
				
			||||||
 | 
					    del(mt._specdata)
 | 
				
			||||||
 | 
					    import shutil
 | 
				
			||||||
 | 
					    pprint.pprint(inspect.getmembers(mt), width = shutil.get_terminal_size()[0])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if __name__ == '__main__':
 | 
				
			||||||
 | 
					    main()
 | 
				
			||||||
							
								
								
									
										8
									
								
								examples/README
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										8
									
								
								examples/README
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,8 @@
 | 
				
			|||||||
 | 
					This directory contains example files/data that you may see referenced in documentation/code.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					- mtree.spec
 | 
				
			||||||
 | 
					  This file is an example mtree spec sheet that one may use for an overlay. It was generated by the command "mtree -c -K all -p /home/bts".
 | 
				
			||||||
 | 
					  If you're on Arch, a port of mtree can be found in the AUR under the package name "nmtree" (it's maintained by the same author as BDisk!).
 | 
				
			||||||
 | 
					  If you're on Debian or Ubuntu (or forks thereof), you can find it in the "freebsd-buildutils" package. (The executable is called "fmtree").
 | 
				
			||||||
 | 
					  If you're on Gentoo, it's in sys-apps/mtree.
 | 
				
			||||||
 | 
					  If you're on RHEL/CentOS, the "extras" repository has gomtree, which (although written in Go) should be able to produce mtree spec files (but this is unknown for certain).
 | 
				
			||||||
							
								
								
									
										1191
									
								
								examples/mtree.spec
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1191
									
								
								examples/mtree.spec
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user