#!/usr/bin/env python3 import os import argparse import textwrap import tarfile import io import re import pydoc from urllib.request import urlopen # TODO: non-txt format support? (i.e. PDF, HTML, etc.) # TODO: search function? keyword or regex, display RFC number and title def downloadRFC(destdir, rfcnum): rfcnum = (str(rfcnum)).lower() # In case argparse interprets it as an int or it's entered in uppercase # For when we implement format support: # "plain" = raw text # "html" = html text # "pdf" = pdf text # "x-gzip" is for a gzipped presumably tarball, but we use the "all" keyword for this. filetypes = ['plain', 'html', 'pdf', 'x-gzip'] filext = {} for t in filetypes: filext[t] = t filext['plain'] = 'txt' filext['x-gzip'] = 'txt' if rfcnum == 'all': rfcuri = 'https://www.rfc-editor.org/rfc/tar/RFC-all.tar.gz' rfctype = 'tar' filetype = 'x-gzip' else: rfcuri = 'https://tools.ietf.org/rfc/rfc{0}.txt'.format(rfcnum) rfctype = 'plain' # We'll make use of this later. filetype = 'plain' rfcdir = '{0}/{1}'.format(destdir, filext[filetype]) os.makedirs(rfcdir, exist_ok = True) # And some minor fixes. We don't need .txt extension for plaintype. Remove if someone complains. if filetype == 'plain': rfcpath = '{0}/{1}'.format(rfcdir, rfcnum) elif filetype == 'x-gzip': # We need to handle this a special way, since it's a gzipped tarball. rfcpath = rfcdir else: rfcpath = '{0}/{1}.{2}'.format(rfcdir, rfcnum, filext[filetype]) with urlopen(rfcuri) as rfc: # Is this a single RFC, a release tarball, etc. # Commented out for now until we implement multi-format support, as we have the 'all' keyword to help us. #rfctype = rfc.response.info().get_content_subtype() # Handle the tarball here. if filetype == 'x-gzip': content = io.BytesIO(rfc.read()) tarball = tarfile.open(fileobj = content) for i in tarball.getnames(): filedest = '{0}/{1}'.format(rfcpath, re.sub('^rfc([0-9]+)\.txt$', '\g<1>', i)) if re.match('^rfc[0-9]+\.txt$', i): with tarball.extractfile(i) as e: with open(filedest, 'wb') as f: f.write(e.read()) # We don't need to extract from the tarball, so we can just handle it as a plain read. else: content = rfc.read() with open(rfcpath, 'wb') as f: f.write(content) def pageRFC(rfcnum): rfcnum = (str(rfcnum)).lower() # In case argparse interprets it as an int or it's entered in uppercase with urlopen('https://tools.ietf.org/rfc/rfc{0}.txt'.format(rfcnum)) as rfc: pydoc.pager(rfc.read().decode('utf-8')) def parseArgs(): args = argparse.ArgumentParser(description = 'RFC Downloader/Viewer', epilog = 'TIP: this program has context-specific help. e.g. try "%(prog)s d -h"\nhttps://square-r00t.net/') subparsers = args.add_subparsers(help = 'Operation to perform', dest = 'operation') downloadargs = subparsers.add_parser('d', help = 'Download an RFC/RFCs.') pagerargs = subparsers.add_parser('p', help = 'Print the RFC to the terminal.') # TODO: add -b/--browser? redirect to lynx for html paging? downloadargs.add_argument('-d', '--destination', dest = 'destdir', metavar = 'DESTINATION', default = '/usr/local/share/doc/rfc', help = 'The destination directory to save the RFC to. Will be created if it doesn\'t exist (assuming we have permissions). The default is /usr/local/share/doc/rfc/.') downloadargs.add_argument(dest = 'rfcnum', metavar = 'RFC', default = 'all', help = 'The RFC number. If the special value "all" is used, then ALL of the published RFCs will be fetched.') pagerargs.add_argument(dest = 'rfcnum', metavar = 'RFC', help = 'The RFC number.') return(args) def main(): argsin = parseArgs() args = argsin.parse_args() if args.operation == 'd': downloadRFC(args.destdir, args.rfcnum) elif args.operation == 'p': pageRFC(args.rfcnum) else: argsin.print_help() if __name__ == '__main__': main()