#!/usr/bin/env python3 # https://sysadministrivia.com/news/every-new-beginning import hashlib import argparse import os import glob from urllib.request import urlopen try: from lxml import etree except ImportError: import xml.etree.ElementTree as etree # TODO: GPG verification too baseurl = 'https://sysadministrivia.com' feeds = {'itunes':'/feed/itunes.xml', 'google':'/feed/google.xml', 'mp3':'/feed/podcast.xml', 'ogg':'/feed/oggcast.xml'} def getXML(baseurl, feeds, args): xml = {} print('Fetching feed(s) XML, please wait...') for feed in args.feedlist: with urlopen(baseurl + feeds[feed]) as url: xml[feed] = etree.fromstring(url.read()) return(xml) def getSums(xml, args): sums = {} for feed in args.feedlist: sums[feed] = {} for episode in xml[feed].findall('channel/item'): epID = episode.find('title').text.split(':')[0] sums[feed][epID] = {} sums[feed][epID]['uri'] = episode.find('enclosure').attrib['url'] sums[feed][epID]['guid'] = episode.find('guid').text sums[feed][epID]['file'] = os.path.basename(sums[feed][epID]['uri']) if args.livesums: livesha = hashlib.sha256() print('{0}({1}): Fetching/verifying live sum...'.format(epID, feed)) with urlopen(sums[feed][epID]['uri']) as url: for chunk in iter(lambda: url.read(4096), b''): livesha.update(chunk) sums[feed][epID]['livesha'] = livesha.hexdigest() if sums[feed][epID]['livesha'] != sums[feed][epID]['guid']: print('\t\tWARNING: GUID {1} does not match live sum {1}!'.format(sums[feed][epID]['guid'], sums[feed][epID]['livesha'])) if args.locdir: localdir = os.path.abspath(os.path.expanduser(args.locdir)) if not os.path.isdir(localdir): exit('ERROR: Directory {0} does not exist!'.format(args.locdir)) episodes = sums[args.feedlist[0]] print('Checking local files...') for episode in episodes.keys(): filename = episodes[episode]['file'] guid = episodes[episode]['guid'] for localfile in glob.iglob('{0}/**/{1}'.format(localdir, filename), recursive = True): localsha = hashlib.sha256() print('Checking {0}...'.format(localfile)) with open(localfile, 'rb') as f: for chunk in iter(lambda: f.read(4096), b''): localsha.update(chunk) if localsha.hexdigest() != guid: print('WARNING: GUID {0} does not match local hash {1}!'.format(guid, localsha.hexdigest())) print('Finished checking local files.') if not args.locdir and not args.livesums: for episode in sums[args.feedlist[0]].keys(): print(episode + ':') for feed in args.feedlist: print('\t{0:6}: {1}'.format(feed, sums[feed][episode]['guid'])) return(sums) def parseArgs(): args = argparse.ArgumentParser(description = 'Sysadministrivia Verifier', epilog = 'https://git.square-r00t.net/Podloader') args.add_argument('-l', '--live', dest = 'livesums', action = 'store_true', help = 'If specified, calculate the sums live from the site and compare against the GUIDs served. This can take a long time.') args.add_argument('-f', '--feed', choices = ['itunes', 'google', 'mp3', 'ogg'], dest = 'feedlist', nargs = '*', default = ['itunes', 'google', 'mp3', 'ogg'], help = 'Which feed(s) to check. The default is all. Multiple can be specified via "-f itunes google" etc.') args.add_argument('-d', '--directory', dest = 'locdir', metavar = 'path', default = False, help = 'If specified, a directory where local copies of the episodes exist. (e.g. ~/gPodder/Downloads/Sysadministrivia)') return(args) def main(): args = parseArgs().parse_args() xml = getXML(baseurl, feeds, args) sums = getSums(xml, args) if __name__ == '__main__': main()