optools/net/irc/irssilogparse.py
2018-02-13 00:09:36 -05:00

199 lines
9.1 KiB
Python
Executable File

#!/usr/bin/env python3
import argparse
import os
import re
try:
import magic
has_magic = True
except ImportError:
print(('Warning: you do not have the magic module installed ' +
'(you can install it via "pip3 install --user python-magic"). ' +
'Automatic log decompression will not work.'))
has_magic = False
class logParser(object):
def __init__(self, args, data = None):
# We'll need these accessible across the entire class.
self.args = args
self.data = data
self.has_html = False
# This is a map to determine which module to use to decompress,
# if we should.
self.cmprsn_map = {'text/plain': None, # Plain ol' text
'application/octet-stream': None, # Sometimes the formatting with color gives this
'application/x-bzip2': 'bz2', # Bzip2
'application/x-gzip': 'gzip', # Gzip
'application/x-xz': 'lzma'} # XZ
# I though y'all liked GUIs.
# ANSI, which is interpreted by the shell.
# Only used if args['color'] = True
self.ansi_prefix = '\033['
# irssi to ANSI
self.colormap = {'00': '1;37m', # White
'01': '0;30m', # Black
'02': '0;34m', # Blue
'03': '0;32m', # Green
'04': '1;31m', # Light Red
'05': '0;31m', # Red
'06': '0;35m', # Magenta (translated as Purple)
'07': '0;33m', # Orange (translated as Brown)
'08': '1;33m', # Yellow
'09': '1:32m', # Light Green
'10': '0;36m', # Cyan
'11': '1;36m', # Light Cyan
'12': '1;34m', # Light Blue
'13': '1;35m', # Light Magenta (translated as Light Purple)
'14': '0;37m', # Gray
'15': '1;37'} # Light Gray (translated as White)
# The full, interpreted path.
if 'logfile' in self.args.keys():
self.args['logfile'] = os.path.abspath(os.path.expanduser(self.args['logfile']))
if not self.data:
self.getLog()
else:
self.data = self.data.decode('utf-8').splitlines()
self.decompress = None
if has_magic:
# Determine what decompressor to use, if we need to.
_mime = magic.detect_from_content(self.data).mime_type
self.decompress = self.cmprsn_map[_mime]
if self.args['html']:
try:
import ansi2html
self.has_html = True
except ImportError:
print(('Warning: you have selected HTML output but do not ' +
'have the ansi2html module installed. Rendering HTML ' +
'output is not possible.'))
self.has_html = False
else:
self.has_html = False
def getLog(self):
if not os.path.isfile(self.args['logfile']):
raise FileNotFoundError('{0} does not exist.'.formatself.args['logfile'])
with open(self.args['logfile'], 'rb') as f:
self.data = f.read()
return()
def parseLog(self):
if self.decompress:
import importlib
self.decmp = importlib.import_module(self.decompress)
self.data = self.decmp.decompress(self.data)
if self.args['color']:
_idx = 0
_datalst = self.data.split(b'\n')
for line in _datalst[:]: # not really "lines", per se, but...
# First we strip out some basic formatting at the beginning
# of lines. Status lines are \x049/, chat lines are \x048/.
# \x04g seem to be formatting resets of sort.
line = re.sub('\x04[89]/'.encode('utf-8'),
''.encode('utf-8'),
line)
line = re.sub('\x04g'.encode('utf-8'),
''.encode('utf-8'),
line)
# Formatting resets
line = re.sub('\x04e'.encode('utf-8'),
'\033[0m'.encode('utf-8'),
line)
# Then we substitute bolds in. This is trickier, because
# bolds (\x04c) *alternate*. So does the other? bold, \x02.
for b in ('\x04c'.encode('utf-8'), '\x02'.encode('utf-8')):
_linelst = line.split(b)
_bold = False
_cnt = 0
for i in _linelst[:]:
if _bold:
_linelst[_cnt] = re.sub('^'.encode('utf-8'),
(self.ansi_prefix + '1m').encode('utf-8'),
i)
else:
_linelst[_cnt] = re.sub('^'.encode('utf-8'),
(self.ansi_prefix + '0m').encode('utf-8'),
i)
_cnt += 1
_bold = not _bold
line = b''.join(_linelst)
# Then we handle colors.
_cnt = 0
_linelst = line.split(b'\x03')
for i in _linelst[:]:
_color_idx = re.sub('^([0-9]{2}).*$'.encode('utf-8'),
'\g<1>',
i,
re.MULTILINE).decode('utf-8')
if _color_idx in self.colormap.keys():
_linelst[_cnt] = re.sub('^[0-9]{2}'.encode('utf-8'),
(self.ansi_prefix + self.colormap[_color_idx]).encode('utf-8'),
i)
_cnt += 1
line = b''.join(_linelst)
# Lastly, we fix join/part and other messages.
_cnt = 0
_linelst = line.split(b'\x04;/')
for i in _linelst[:]:
_templine = re.sub('^'.encode('utf-8'),
''.encode('utf-8'),
i,
re.MULTILINE)
_templine = re.sub('-!-'.encode('utf-8'),
'\033[2m-!-'.encode('utf-8'),
_templine)
_linelst[_cnt] = re.sub('\x043/'.encode('utf-8'),
''.encode('utf-8'),
_templine)
_cnt += 1
line = re.sub(b'^\x1b\[0;32m\x1b\[0m\x1b\[0m', b'\033[0m', b''.join(_linelst))
# Lastly we strip out \x04>/
line = re.sub(b'\x04>/', b'', line)
###
_datalst[_idx] = line
_idx += 1
###
self.data = b'\n'.join(_datalst)
if self.args['html']:
try:
import ansi2html
_has_html = True
except ImportError:
print(('Warning: you have selected HTML output but do not ' +
'have the ansi2html module installed. Rendering HTML ' +
'output is not possible.'))
_has_html = False
else:
_has_html = False
if _has_html:
# This... basically sucks. It currently doesn't properly interpret the ANSI.
_html = ansi2html.Ansi2HTMLConverter()
self.data = _html.convert(self.data.decode('utf-8'))
else: # We want plaintext, so strip ALL formatting.
_stripbytes = ['\x04>/', '\x02', '\x043/', '\x048/', '\x049/', '\x04g', '\x04e', '\x04c', '\x04;/']
for b in _stripbytes:
self.data = re.sub(b.encode('utf-8'), ''.encode('utf-8'), self.data)
self.data = re.sub('\\x03[0-9]{2}'.encode('utf-8'), ''.encode('utf-8'), self.data)
return()
def parseArgs():
args = argparse.ArgumentParser()
args.add_argument('-c', '--color',
dest = 'color',
action = 'store_true',
help = ('Print the log with converted colors.'))
args.add_argument('-H', '--html',
dest = 'html',
action = 'store_true',
help = ('Render HTML output.'))
args.add_argument(dest = 'logfile',
metavar = 'path/to/logfile',
help = ('The path to the log file. It can be uncompressed ' +
'or compressed with XZ/LZMA, Gzip, or Bzip2.'))
return(args)
if __name__ == '__main__':
args = vars(parseArgs().parse_args())
l = logParser(args)
l.parseLog()
print(l.data.decode('utf-8'))