diff --git a/arch/relchk.py b/arch/relchk.py index dfbf1f7..8ec78e2 100755 --- a/arch/relchk.py +++ b/arch/relchk.py @@ -1,81 +1,293 @@ #!/usr/bin/env python3 -import configparser +import datetime +import json import hashlib import os +import pathlib import re -from bs4 import BeautifulSoup -from urllib.parse import urljoin, urlparse -from urllib.request import urlopen -try: - import lxml - htmlparser = 'lxml' -except ImportError: - htmlparser = 'html.parser' +import shutil +## +import psutil +import requests +from lxml import etree +## +import arch_mirror_ranking # /arch/arch_mirror_ranking.py -cfgpath = os.path.abspath(os.path.expanduser( - '~/.config/optools/relchk/arch.ini')) -cfg = configparser.ConfigParser() -cfg['arch'] = {'url': 'https://arch.mirror.square-r00t.net/iso/latest/', - 'path': '/boot/iso/arch.iso', - 'hashtype': 'sha1', - 'hashurl': ( - 'https://arch.mirror.square-r00t.net/iso/latest/sha1sums.txt') - } +class Updater(object): + _fname_re = re.compile(r'^archlinux-(?P[0-9]{4}\.[0-9]{2}\.[0-9]{2})-(?P(i686|x86_64)).iso$') + _def_hash = 'sha1' + _allowed_hashes = ('md5', 'sha1') + _allowed_arches = ('x86_64', ) + _date_fmt = '%a, %d %b %Y %H:%M:%S %z' + _datever_fmt = '%Y.%m.%d' + _arch = 'x86_64' # Arch Linux proper only offers x86_64. + _iso_dir = 'iso/latest' + _iso_file = os.path.join(_iso_dir, 'archlinux-{ver}-{arch}.iso') -if not os.path.isfile(cfgpath): - os.makedirs(os.path.dirname(cfgpath), exist_ok = True) - with open(cfgpath, 'w') as f: - cfg.write(f) -else: - cfg.read(cfgpath) + def __init__(self, + dest_dir = '/boot/iso', + dest_file = 'arch.iso', + ver_file = '.arch.json', + lock_path = '/tmp/.arch.lck', + feed_url = 'https://archlinux.org/feeds/releases/', + grub_cfg = '/etc/grub.d/40_custom_arch', + # check_gpg = True, # TODO: GPG sig checking + hash_type = 'sha1'): + # if arch.lower() not in self._allowed_arches: + # raise ValueError('arch must be one of: {0}'.format(', '.join(self._allowed_arches))) + # else: + # self._arch = arch.lower() + if hash_type.lower() not in self._allowed_hashes: + raise ValueError('hash_type must be one of: {0}'.format(', '.join(self._allowed_hashes))) + else: + self.hash_type = hash_type.lower() + self.dest_dir = os.path.abspath(os.path.expanduser(dest_dir)) + self.dest_file = dest_file + self.ver_file = ver_file + self.feed_url = feed_url + self.grub_cfg = grub_cfg + self.lckfile = os.path.abspath(os.path.expanduser(lock_path)) + # From the JSON. + self.rel_notes_url = None + self.old_date = None + self.old_ver = None + self.old_hash = None + self.mirror_base = None + self.country = None + # New vals. + self.new_date = None + self.new_ver = None + self.new_hash = None + # Instance vars again. + self.do_update = False + self.force_update = False + self.iso_url = None + self.ipv4 = True + self.ipv6 = False + self.dest_iso = os.path.join(self.dest_dir, self.dest_file) + self.dest_ver = os.path.join(self.dest_dir, self.ver_file) + self._init_vars() -cfg['arch']['path'] = os.path.abspath(os.path.expanduser(cfg['arch']['path'])) + def _init_vars(self): + if self.getRunning(): + return(None) + self.getCountry() + self.getNet() + self.getCurVer() + self.getNewVer() + return(None) -# We need the hashes first. We'll pop them into memory, -# no need to save locally. -# Must be in GNU checksum format (i.e. " \n"). -hashes = {} -if 'hashurl' in cfg['arch']: - with urlopen(cfg['arch']['hashurl']) as h: - for i in h.read().decode('utf-8').splitlines(): - line = [x.strip() for x in i.split()] - hashes[os.path.basename(line[1])] = line[0] -chksum = hashlib.new(cfg['arch']['hashtype']) + def main(self): + if self.getRunning(): + return(None) + self.lock() + if self.do_update or \ + self.force_update or not \ + all((self.old_date, + self.old_ver, + self.old_hash)): + self.do_update = True + self.findMirror() + self.download() + self.touchVer() + self.unlock() + return(None) -# Now we (try to) get a list of files available for download. We're looking -# for .iso or .img files. Compressed images not currently supported; TODO. -exts = re.compile('.*\.(iso|img)$', re.IGNORECASE) -imgfiles = [] -with urlopen(cfg['arch']['url']) as u: - dlsoup = BeautifulSoup(u.read().decode('utf-8'), htmlparser) -for a in dlsoup.find_all('a'): - if a['href']: - if exts.search(a['href']): - if not urlparse(a['href']).netloc: - imgfiles.append(urljoin(cfg['arch']['url'], a['href'])) - else: - imgfiles.append(a['href']) -if not imgfiles: - raise RuntimeError('Could not find any ISO or IMG files at {0}'.format( - cfg['arch']['url'])) -# Not foolproof, but will handle standard Arch ISO mirrors just fine. -imgfiles.sort() -iso = imgfiles[0] + def download(self): + if self.getRunning(): + return(None) + if not any((self.do_update, self.force_update)): + return(None) + if not self.iso_url: + raise RuntimeError('iso_url attribute must be set first') + req = requests.get(self.iso_url, stream = True, headers = {'User-Agent': 'curl/7.74.0'}) + if not req.ok: + raise RuntimeError('Received non-200/30x {0} for {1}'.format(req.status_code, self.iso_url)) + with req as uri: + with open(self.dest_iso, 'wb') as fh: + shutil.copyfileobj(uri.raw, fh) + hasher = hashlib.new(self.hash_type) + with open(self.dest_iso, 'rb') as fh: + hasher.update(fh.read()) + realhash = hasher.hexdigest().lower() + if realhash != self.new_hash: + raise RuntimeError('Hash mismatch: {0} (LOCAL), {1} (REMOTE)'.format(realhash, self.new_hash)) + self.updateVer() + return(None) -# Now we get the existing file (if it exists) and grab the hash (if we have -# one fetched). -up2date = False -if os.path.isfile(cfg['arch']['path']): - _fname = os.path.basename(iso) - if _fname in hashes: - with open(cfg['arch']['path'], 'rb') as f: - chksum.update(f.read()) - if chksum.hexdigest().lower() == hashes[_fname].lower(): - up2date = True + def findMirror(self): + self.getCountry() + if self.mirror_base: + return(None) + for p in ('http', 'https'): + m = arch_mirror_ranking.MirrorIdx(country = self.country, + proto = 'http', + is_active = True, + ipv4 = self.ipv4, + ipv6 = self.ipv6, + isos = True, + statuses = False) + for s in m.ranked_servers: + try: + req = requests.get(s['url']) + if req.ok: + self.mirror_base = s['url'] + break + except (OSError, ConnectionRefusedError): + continue + return(None) -if not up2date: - os.makedirs(os.path.dirname(cfg['arch']['path']), exist_ok = True) - with open(cfg['arch']['path'], 'wb') as f, urlopen(iso) as u: - f.write(u.read()) + def getCountry(self): + if self.country: # The API has limited number of accesses for free. + return(None) + url = 'https://ipinfo.io/country' + req = requests.get(url, headers = {'User-Agent': 'curl/7.74.0'}) + if not req.ok: + raise RuntimeError('Received non-200/30x {0} for {1}'.format(req.status_code, url)) + self.country = req.content.decode('utf-8').strip().upper() + return(None) + + def getCurVer(self): + if self.getRunning(): + return(None) + if not os.path.isfile(self.dest_ver): + self.do_update = True + self.force_update = True + self.old_ver = 0.00 + return(None) + with open(self.dest_ver, 'rb') as fh: + ver_info = json.load(fh) + self.old_date = datetime.datetime.strptime(ver_info['date'], self._date_fmt) + self.old_ver = datetime.datetime.strptime(ver_info['ver'], self._datever_fmt) + self.old_hash = ver_info.get(self.hash_type, self._def_hash) + self.country = ver_info.get('country') + self.new_hash = self.old_hash + self.new_ver = self.old_ver + self.new_date = self.old_date + # if ver_info.get('arch') != self._arch: + # self.do_update = True + # self.force_update = True + try: + hasher = hashlib.new(self.hash_type) + with open(self.dest_iso, 'rb') as fh: + hasher.update(fh.read()) + if self.old_hash != hasher.hexdigest().lower(): + self.do_update = True + self.force_update = True + except FileNotFoundError: + self.do_update = True + self.force_update = True + return(None) + return(None) + + def getNet(self): + for k in ('ipv4', 'ipv6'): + url = 'https://{0}.clientinfo.square-r00t.net'.format(k) + try: + req = requests.get(url) + setattr(self, k, req.json()['ip']) + except OSError: + setattr(self, k, False) + return(None) + + def getNewVer(self): + if self.getRunning(): + return(None) + if not self.mirror_base: + self.findMirror() + req = requests.get(self.feed_url, headers = {'User-Agent': 'curl/7.74.0'}) + if not req.ok: + raise RuntimeError('Received non-200/30x {0} for {1}'.format(req.status_code, self.feed_url)) + feed = etree.fromstring(req.content) + for item in feed.xpath('//item'): + date_xml = item.find('pubDate') + ver_xml = item.find('title') + notes_xml = item.find('link') + date = ver = notes = None + if date_xml is not None: + date = datetime.datetime.strptime(date_xml.text, self._date_fmt) + if ver_xml is not None: + ver = ver_xml.text + if notes_xml is not None: + notes = notes_xml.text + new_ver = datetime.datetime.strptime(ver, self._datever_fmt) + if not all((self.old_ver, self.old_date)) or \ + (new_ver > self.old_ver) or \ + (self.old_date < date): + self.do_update = True + self.new_ver = new_ver + self.new_date = date + self.rel_notes_url = notes + datever = self.new_ver.strftime(self._datever_fmt) + self.iso_url = os.path.join(self.mirror_base, + self._iso_file.lstrip('/')).format(ver = datever, arch = self._arch) + hash_url = os.path.join(self.mirror_base, + self._iso_dir, + '{0}sums.txt'.format(self.hash_type)) + req = requests.get(hash_url, headers = {'User-Agent': 'curl/7.74.0'}) + if not req.ok: + raise RuntimeError('Received non-200/30x {0} for {1}'.format(req.status_code, hash_url)) + hash_lines = req.content.decode('utf-8').strip().splitlines() + tgt_fname = os.path.basename(self.iso_url) + for line in hash_lines: + if line.strip().startswith('#'): + continue + hash_str, fname = line.split() + if fname != tgt_fname: + continue + self.new_hash = hash_str.lower() + break + break + return(None) + + def getRunning(self): + if not os.path.isfile(self.lckfile): + return(False) + my_pid = os.getpid() + with open(self.lckfile, 'r') as fh: + pid = int(fh.read().strip()) + if not psutil.pid_exists(pid): + os.remove(self.lckfile) + return(False) + if pid == my_pid: + return(False) + return(True) + + def lock(self): + with open(self.lckfile, 'w') as fh: + fh.write(str(os.getpid())) + return(None) + + def touchVer(self): + if self.getRunning(): + return(None) + ver_path = pathlib.Path(self.dest_ver) + ver_path.touch(exist_ok = True) + return(None) + + def unlock(self): + if os.path.isfile(self.lckfile): + os.remove(self.lckfile) + return(None) + + def updateVer(self): + if self.getRunning(): + return(None) + d = {'date': self.new_date.strftime(self._date_fmt), + 'mirror': self.mirror_base, + 'country': self.country, + 'notes': self.rel_notes_url, + 'ver': self.new_ver.strftime(self._datever_fmt), + self.hash_type: self.new_hash} + j = json.dumps(d, indent = 4) + with open(self.dest_ver, 'w') as fh: + fh.write(j) + fh.write('\n') + return(None) + + +if __name__ == '__main__': + u = Updater() + u.main() diff --git a/arch/relchk2.py b/arch/relchk2.py deleted file mode 100755 index 8ec78e2..0000000 --- a/arch/relchk2.py +++ /dev/null @@ -1,293 +0,0 @@ -#!/usr/bin/env python3 - -import datetime -import json -import hashlib -import os -import pathlib -import re -import shutil -## -import psutil -import requests -from lxml import etree -## -import arch_mirror_ranking # /arch/arch_mirror_ranking.py - - -class Updater(object): - _fname_re = re.compile(r'^archlinux-(?P[0-9]{4}\.[0-9]{2}\.[0-9]{2})-(?P(i686|x86_64)).iso$') - _def_hash = 'sha1' - _allowed_hashes = ('md5', 'sha1') - _allowed_arches = ('x86_64', ) - _date_fmt = '%a, %d %b %Y %H:%M:%S %z' - _datever_fmt = '%Y.%m.%d' - _arch = 'x86_64' # Arch Linux proper only offers x86_64. - _iso_dir = 'iso/latest' - _iso_file = os.path.join(_iso_dir, 'archlinux-{ver}-{arch}.iso') - - def __init__(self, - dest_dir = '/boot/iso', - dest_file = 'arch.iso', - ver_file = '.arch.json', - lock_path = '/tmp/.arch.lck', - feed_url = 'https://archlinux.org/feeds/releases/', - grub_cfg = '/etc/grub.d/40_custom_arch', - # check_gpg = True, # TODO: GPG sig checking - hash_type = 'sha1'): - # if arch.lower() not in self._allowed_arches: - # raise ValueError('arch must be one of: {0}'.format(', '.join(self._allowed_arches))) - # else: - # self._arch = arch.lower() - if hash_type.lower() not in self._allowed_hashes: - raise ValueError('hash_type must be one of: {0}'.format(', '.join(self._allowed_hashes))) - else: - self.hash_type = hash_type.lower() - self.dest_dir = os.path.abspath(os.path.expanduser(dest_dir)) - self.dest_file = dest_file - self.ver_file = ver_file - self.feed_url = feed_url - self.grub_cfg = grub_cfg - self.lckfile = os.path.abspath(os.path.expanduser(lock_path)) - # From the JSON. - self.rel_notes_url = None - self.old_date = None - self.old_ver = None - self.old_hash = None - self.mirror_base = None - self.country = None - # New vals. - self.new_date = None - self.new_ver = None - self.new_hash = None - # Instance vars again. - self.do_update = False - self.force_update = False - self.iso_url = None - self.ipv4 = True - self.ipv6 = False - self.dest_iso = os.path.join(self.dest_dir, self.dest_file) - self.dest_ver = os.path.join(self.dest_dir, self.ver_file) - self._init_vars() - - def _init_vars(self): - if self.getRunning(): - return(None) - self.getCountry() - self.getNet() - self.getCurVer() - self.getNewVer() - return(None) - - def main(self): - if self.getRunning(): - return(None) - self.lock() - if self.do_update or \ - self.force_update or not \ - all((self.old_date, - self.old_ver, - self.old_hash)): - self.do_update = True - self.findMirror() - self.download() - self.touchVer() - self.unlock() - return(None) - - def download(self): - if self.getRunning(): - return(None) - if not any((self.do_update, self.force_update)): - return(None) - if not self.iso_url: - raise RuntimeError('iso_url attribute must be set first') - req = requests.get(self.iso_url, stream = True, headers = {'User-Agent': 'curl/7.74.0'}) - if not req.ok: - raise RuntimeError('Received non-200/30x {0} for {1}'.format(req.status_code, self.iso_url)) - with req as uri: - with open(self.dest_iso, 'wb') as fh: - shutil.copyfileobj(uri.raw, fh) - hasher = hashlib.new(self.hash_type) - with open(self.dest_iso, 'rb') as fh: - hasher.update(fh.read()) - realhash = hasher.hexdigest().lower() - if realhash != self.new_hash: - raise RuntimeError('Hash mismatch: {0} (LOCAL), {1} (REMOTE)'.format(realhash, self.new_hash)) - self.updateVer() - return(None) - - def findMirror(self): - self.getCountry() - if self.mirror_base: - return(None) - for p in ('http', 'https'): - m = arch_mirror_ranking.MirrorIdx(country = self.country, - proto = 'http', - is_active = True, - ipv4 = self.ipv4, - ipv6 = self.ipv6, - isos = True, - statuses = False) - for s in m.ranked_servers: - try: - req = requests.get(s['url']) - if req.ok: - self.mirror_base = s['url'] - break - except (OSError, ConnectionRefusedError): - continue - return(None) - - def getCountry(self): - if self.country: # The API has limited number of accesses for free. - return(None) - url = 'https://ipinfo.io/country' - req = requests.get(url, headers = {'User-Agent': 'curl/7.74.0'}) - if not req.ok: - raise RuntimeError('Received non-200/30x {0} for {1}'.format(req.status_code, url)) - self.country = req.content.decode('utf-8').strip().upper() - return(None) - - def getCurVer(self): - if self.getRunning(): - return(None) - if not os.path.isfile(self.dest_ver): - self.do_update = True - self.force_update = True - self.old_ver = 0.00 - return(None) - with open(self.dest_ver, 'rb') as fh: - ver_info = json.load(fh) - self.old_date = datetime.datetime.strptime(ver_info['date'], self._date_fmt) - self.old_ver = datetime.datetime.strptime(ver_info['ver'], self._datever_fmt) - self.old_hash = ver_info.get(self.hash_type, self._def_hash) - self.country = ver_info.get('country') - self.new_hash = self.old_hash - self.new_ver = self.old_ver - self.new_date = self.old_date - # if ver_info.get('arch') != self._arch: - # self.do_update = True - # self.force_update = True - try: - hasher = hashlib.new(self.hash_type) - with open(self.dest_iso, 'rb') as fh: - hasher.update(fh.read()) - if self.old_hash != hasher.hexdigest().lower(): - self.do_update = True - self.force_update = True - except FileNotFoundError: - self.do_update = True - self.force_update = True - return(None) - return(None) - - def getNet(self): - for k in ('ipv4', 'ipv6'): - url = 'https://{0}.clientinfo.square-r00t.net'.format(k) - try: - req = requests.get(url) - setattr(self, k, req.json()['ip']) - except OSError: - setattr(self, k, False) - return(None) - - def getNewVer(self): - if self.getRunning(): - return(None) - if not self.mirror_base: - self.findMirror() - req = requests.get(self.feed_url, headers = {'User-Agent': 'curl/7.74.0'}) - if not req.ok: - raise RuntimeError('Received non-200/30x {0} for {1}'.format(req.status_code, self.feed_url)) - feed = etree.fromstring(req.content) - for item in feed.xpath('//item'): - date_xml = item.find('pubDate') - ver_xml = item.find('title') - notes_xml = item.find('link') - date = ver = notes = None - if date_xml is not None: - date = datetime.datetime.strptime(date_xml.text, self._date_fmt) - if ver_xml is not None: - ver = ver_xml.text - if notes_xml is not None: - notes = notes_xml.text - new_ver = datetime.datetime.strptime(ver, self._datever_fmt) - if not all((self.old_ver, self.old_date)) or \ - (new_ver > self.old_ver) or \ - (self.old_date < date): - self.do_update = True - self.new_ver = new_ver - self.new_date = date - self.rel_notes_url = notes - datever = self.new_ver.strftime(self._datever_fmt) - self.iso_url = os.path.join(self.mirror_base, - self._iso_file.lstrip('/')).format(ver = datever, arch = self._arch) - hash_url = os.path.join(self.mirror_base, - self._iso_dir, - '{0}sums.txt'.format(self.hash_type)) - req = requests.get(hash_url, headers = {'User-Agent': 'curl/7.74.0'}) - if not req.ok: - raise RuntimeError('Received non-200/30x {0} for {1}'.format(req.status_code, hash_url)) - hash_lines = req.content.decode('utf-8').strip().splitlines() - tgt_fname = os.path.basename(self.iso_url) - for line in hash_lines: - if line.strip().startswith('#'): - continue - hash_str, fname = line.split() - if fname != tgt_fname: - continue - self.new_hash = hash_str.lower() - break - break - return(None) - - def getRunning(self): - if not os.path.isfile(self.lckfile): - return(False) - my_pid = os.getpid() - with open(self.lckfile, 'r') as fh: - pid = int(fh.read().strip()) - if not psutil.pid_exists(pid): - os.remove(self.lckfile) - return(False) - if pid == my_pid: - return(False) - return(True) - - def lock(self): - with open(self.lckfile, 'w') as fh: - fh.write(str(os.getpid())) - return(None) - - def touchVer(self): - if self.getRunning(): - return(None) - ver_path = pathlib.Path(self.dest_ver) - ver_path.touch(exist_ok = True) - return(None) - - def unlock(self): - if os.path.isfile(self.lckfile): - os.remove(self.lckfile) - return(None) - - def updateVer(self): - if self.getRunning(): - return(None) - d = {'date': self.new_date.strftime(self._date_fmt), - 'mirror': self.mirror_base, - 'country': self.country, - 'notes': self.rel_notes_url, - 'ver': self.new_ver.strftime(self._datever_fmt), - self.hash_type: self.new_hash} - j = json.dumps(d, indent = 4) - with open(self.dest_ver, 'w') as fh: - fh.write(j) - fh.write('\n') - return(None) - - -if __name__ == '__main__': - u = Updater() - u.main()