summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorbrent s <bts@square-r00t.net>2017-04-04 07:54:39 -0400
committerbrent s <bts@square-r00t.net>2017-04-04 07:54:39 -0400
commitd2d405a36a9ca369c575db026f6c6041e60f12b1 (patch)
tree1b3d6f69f6ee9a50fc9955b6eb45f3b0b5026afb
parent9931d64d489dc578a2fe71c8ddeecf4adbe89702 (diff)
downloadPodloader-d2d405a36a9ca369c575db026f6c6041e60f12b1.tar.xz
pretty happy with this now.
-rwxr-xr-xverifyfeed.py105
1 files changed, 105 insertions, 0 deletions
diff --git a/verifyfeed.py b/verifyfeed.py
new file mode 100755
index 0000000..9e4ad41
--- /dev/null
+++ b/verifyfeed.py
@@ -0,0 +1,105 @@
+#!/usr/bin/env python3
+
+# https://sysadministrivia.com/news/every-new-beginning
+
+import hashlib
+import argparse
+import os
+import glob
+from urllib.request import urlopen
+try:
+ from lxml import etree
+except ImportError:
+ import xml.etree.ElementTree as etree
+
+baseurl = 'https://sysadministrivia.com'
+
+feeds = {'itunes':'/feed/itunes.xml',
+ 'google':'/feed/google.xml',
+ 'mp3':'/feed/podcast.xml',
+ 'ogg':'/feed/oggcast.xml'}
+
+def getXML(baseurl, feeds, args):
+ xml = {}
+ print('Fetching feed(s) XML, please wait...')
+ for feed in args.feedlist:
+ with urlopen(baseurl + feeds[feed]) as url:
+ xml[feed] = etree.fromstring(url.read())
+ return(xml)
+
+def getSums(xml, args):
+ sums = {}
+ for feed in args.feedlist:
+ sums[feed] = {}
+ for episode in xml[feed].findall('channel/item'):
+ epID = episode.find('title').text.split(':')[0]
+ sums[feed][epID] = {}
+ sums[feed][epID]['uri'] = episode.find('enclosure').attrib['url']
+ sums[feed][epID]['guid'] = episode.find('guid').text
+ sums[feed][epID]['file'] = os.path.basename(sums[feed][epID]['uri'])
+ if args.livesums:
+ livesha = hashlib.sha256()
+ print('{0}({1}): Fetching/verifying live sum...'.format(epID, feed))
+ with urlopen(sums[feed][epID]['uri']) as url:
+ for chunk in iter(lambda: url.read(4096), b''):
+ livesha.update(chunk)
+ sums[feed][epID]['livesha'] = livesha.hexdigest()
+ if sums[feed][epID]['livesha'] != sums[feed][epID]['guid']:
+ print('\t\tWARNING: GUID {1} does not match live sum {1}!'.format(sums[feed][epID]['guid'],
+ sums[feed][epID]['livesha']))
+ if args.locdir:
+ localdir = os.path.abspath(os.path.expanduser(args.locdir))
+ if not os.path.isdir(localdir):
+ exit('ERROR: Directory {0} does not exist!'.format(args.locdir))
+ episodes = sums[args.feedlist[0]]
+ print('Checking local files...')
+ for episode in episodes.keys():
+ filename = episodes[episode]['file']
+ guid = episodes[episode]['guid']
+ for localfile in glob.iglob('{0}/**/{1}'.format(localdir, filename), recursive = True):
+ localsha = hashlib.sha256()
+ print('Checking {0}...'.format(localfile))
+ with open(localfile, 'rb') as f:
+ for chunk in iter(lambda: f.read(4096), b''):
+ localsha.update(chunk)
+ if localsha.hexdigest() != guid:
+ print('WARNING: GUID {0} does not match local hash {1}!'.format(guid, localsha.hexdigest()))
+ print('Finished checking local files.')
+ if not args.locdir and not args.livesums:
+ for episode in sums[args.feedlist[0]].keys():
+ print(episode + ':')
+ for feed in args.feedlist:
+ print('\t{0:6}: {1}'.format(feed,
+ sums[feed][episode]['guid']))
+ return(sums)
+
+def parseArgs():
+ args = argparse.ArgumentParser(description = 'Sysadministrivia Verifier',
+ epilog = 'https://git.square-r00t.net/Podloader')
+ args.add_argument('-l',
+ '--live',
+ dest = 'livesums',
+ action = 'store_true',
+ help = 'If specified, calculate the sums live from the site and compare against the GUIDs served. This can take a long time.')
+ args.add_argument('-f',
+ '--feed',
+ choices = ['itunes', 'google', 'mp3', 'ogg'],
+ dest = 'feedlist',
+ nargs = '*',
+ default = ['itunes', 'google', 'mp3', 'ogg'],
+ help = 'Which feed(s) to check. The default is all. Multiple can be specified via "-f itunes google" etc.')
+ args.add_argument('-d',
+ '--directory',
+ dest = 'locdir',
+ metavar = 'path',
+ default = False,
+ help = 'If specified, a directory where local copies of the episodes exist. (e.g. ~/gPodder/Downloads/Sysadministrivia)')
+ return(args)
+
+def main():
+ args = parseArgs().parse_args()
+ xml = getXML(baseurl, feeds, args)
+ sums = getSums(xml, args)
+
+if __name__ == '__main__':
+ main()