changing to new format

This commit is contained in:
root 2021-01-12 04:55:20 -05:00
parent 4663c3cd02
commit 436bc3d083
2 changed files with 280 additions and 361 deletions

View File

@ -1,81 +1,293 @@
#!/usr/bin/env python3 #!/usr/bin/env python3


import configparser import datetime
import json
import hashlib import hashlib
import os import os
import pathlib
import re import re
from bs4 import BeautifulSoup import shutil
from urllib.parse import urljoin, urlparse ##
from urllib.request import urlopen import psutil
try: import requests
import lxml from lxml import etree
htmlparser = 'lxml' ##
except ImportError: import arch_mirror_ranking # <optools>/arch/arch_mirror_ranking.py
htmlparser = 'html.parser'


cfgpath = os.path.abspath(os.path.expanduser(
'~/.config/optools/relchk/arch.ini'))


cfg = configparser.ConfigParser() class Updater(object):
cfg['arch'] = {'url': 'https://arch.mirror.square-r00t.net/iso/latest/', _fname_re = re.compile(r'^archlinux-(?P<version>[0-9]{4}\.[0-9]{2}\.[0-9]{2})-(?P<arch>(i686|x86_64)).iso$')
'path': '/boot/iso/arch.iso', _def_hash = 'sha1'
'hashtype': 'sha1', _allowed_hashes = ('md5', 'sha1')
'hashurl': ( _allowed_arches = ('x86_64', )
'https://arch.mirror.square-r00t.net/iso/latest/sha1sums.txt') _date_fmt = '%a, %d %b %Y %H:%M:%S %z'
} _datever_fmt = '%Y.%m.%d'
_arch = 'x86_64' # Arch Linux proper only offers x86_64.
_iso_dir = 'iso/latest'
_iso_file = os.path.join(_iso_dir, 'archlinux-{ver}-{arch}.iso')


if not os.path.isfile(cfgpath): def __init__(self,
os.makedirs(os.path.dirname(cfgpath), exist_ok = True) dest_dir = '/boot/iso',
with open(cfgpath, 'w') as f: dest_file = 'arch.iso',
cfg.write(f) ver_file = '.arch.json',
else: lock_path = '/tmp/.arch.lck',
cfg.read(cfgpath) feed_url = 'https://archlinux.org/feeds/releases/',

grub_cfg = '/etc/grub.d/40_custom_arch',
cfg['arch']['path'] = os.path.abspath(os.path.expanduser(cfg['arch']['path'])) # check_gpg = True, # TODO: GPG sig checking

hash_type = 'sha1'):
# We need the hashes first. We'll pop them into memory, # if arch.lower() not in self._allowed_arches:
# no need to save locally. # raise ValueError('arch must be one of: {0}'.format(', '.join(self._allowed_arches)))
# Must be in GNU checksum format (i.e. "<HASH> <FILENAME>\n"). # else:
hashes = {} # self._arch = arch.lower()
if 'hashurl' in cfg['arch']: if hash_type.lower() not in self._allowed_hashes:
with urlopen(cfg['arch']['hashurl']) as h: raise ValueError('hash_type must be one of: {0}'.format(', '.join(self._allowed_hashes)))
for i in h.read().decode('utf-8').splitlines():
line = [x.strip() for x in i.split()]
hashes[os.path.basename(line[1])] = line[0]
chksum = hashlib.new(cfg['arch']['hashtype'])

# Now we (try to) get a list of files available for download. We're looking
# for .iso or .img files. Compressed images not currently supported; TODO.
exts = re.compile('.*\.(iso|img)$', re.IGNORECASE)
imgfiles = []
with urlopen(cfg['arch']['url']) as u:
dlsoup = BeautifulSoup(u.read().decode('utf-8'), htmlparser)
for a in dlsoup.find_all('a'):
if a['href']:
if exts.search(a['href']):
if not urlparse(a['href']).netloc:
imgfiles.append(urljoin(cfg['arch']['url'], a['href']))
else: else:
imgfiles.append(a['href']) self.hash_type = hash_type.lower()
if not imgfiles: self.dest_dir = os.path.abspath(os.path.expanduser(dest_dir))
raise RuntimeError('Could not find any ISO or IMG files at {0}'.format( self.dest_file = dest_file
cfg['arch']['url'])) self.ver_file = ver_file
# Not foolproof, but will handle standard Arch ISO mirrors just fine. self.feed_url = feed_url
imgfiles.sort() self.grub_cfg = grub_cfg
iso = imgfiles[0] self.lckfile = os.path.abspath(os.path.expanduser(lock_path))
# From the JSON.
self.rel_notes_url = None
self.old_date = None
self.old_ver = None
self.old_hash = None
self.mirror_base = None
self.country = None
# New vals.
self.new_date = None
self.new_ver = None
self.new_hash = None
# Instance vars again.
self.do_update = False
self.force_update = False
self.iso_url = None
self.ipv4 = True
self.ipv6 = False
self.dest_iso = os.path.join(self.dest_dir, self.dest_file)
self.dest_ver = os.path.join(self.dest_dir, self.ver_file)
self._init_vars()


# Now we get the existing file (if it exists) and grab the hash (if we have def _init_vars(self):
# one fetched). if self.getRunning():
up2date = False return(None)
if os.path.isfile(cfg['arch']['path']): self.getCountry()
_fname = os.path.basename(iso) self.getNet()
if _fname in hashes: self.getCurVer()
with open(cfg['arch']['path'], 'rb') as f: self.getNewVer()
chksum.update(f.read()) return(None)
if chksum.hexdigest().lower() == hashes[_fname].lower():
up2date = True


if not up2date: def main(self):
os.makedirs(os.path.dirname(cfg['arch']['path']), exist_ok = True) if self.getRunning():
with open(cfg['arch']['path'], 'wb') as f, urlopen(iso) as u: return(None)
f.write(u.read()) self.lock()
if self.do_update or \
self.force_update or not \
all((self.old_date,
self.old_ver,
self.old_hash)):
self.do_update = True
self.findMirror()
self.download()
self.touchVer()
self.unlock()
return(None)

def download(self):
if self.getRunning():
return(None)
if not any((self.do_update, self.force_update)):
return(None)
if not self.iso_url:
raise RuntimeError('iso_url attribute must be set first')
req = requests.get(self.iso_url, stream = True, headers = {'User-Agent': 'curl/7.74.0'})
if not req.ok:
raise RuntimeError('Received non-200/30x {0} for {1}'.format(req.status_code, self.iso_url))
with req as uri:
with open(self.dest_iso, 'wb') as fh:
shutil.copyfileobj(uri.raw, fh)
hasher = hashlib.new(self.hash_type)
with open(self.dest_iso, 'rb') as fh:
hasher.update(fh.read())
realhash = hasher.hexdigest().lower()
if realhash != self.new_hash:
raise RuntimeError('Hash mismatch: {0} (LOCAL), {1} (REMOTE)'.format(realhash, self.new_hash))
self.updateVer()
return(None)

def findMirror(self):
self.getCountry()
if self.mirror_base:
return(None)
for p in ('http', 'https'):
m = arch_mirror_ranking.MirrorIdx(country = self.country,
proto = 'http',
is_active = True,
ipv4 = self.ipv4,
ipv6 = self.ipv6,
isos = True,
statuses = False)
for s in m.ranked_servers:
try:
req = requests.get(s['url'])
if req.ok:
self.mirror_base = s['url']
break
except (OSError, ConnectionRefusedError):
continue
return(None)

def getCountry(self):
if self.country: # The API has limited number of accesses for free.
return(None)
url = 'https://ipinfo.io/country'
req = requests.get(url, headers = {'User-Agent': 'curl/7.74.0'})
if not req.ok:
raise RuntimeError('Received non-200/30x {0} for {1}'.format(req.status_code, url))
self.country = req.content.decode('utf-8').strip().upper()
return(None)

def getCurVer(self):
if self.getRunning():
return(None)
if not os.path.isfile(self.dest_ver):
self.do_update = True
self.force_update = True
self.old_ver = 0.00
return(None)
with open(self.dest_ver, 'rb') as fh:
ver_info = json.load(fh)
self.old_date = datetime.datetime.strptime(ver_info['date'], self._date_fmt)
self.old_ver = datetime.datetime.strptime(ver_info['ver'], self._datever_fmt)
self.old_hash = ver_info.get(self.hash_type, self._def_hash)
self.country = ver_info.get('country')
self.new_hash = self.old_hash
self.new_ver = self.old_ver
self.new_date = self.old_date
# if ver_info.get('arch') != self._arch:
# self.do_update = True
# self.force_update = True
try:
hasher = hashlib.new(self.hash_type)
with open(self.dest_iso, 'rb') as fh:
hasher.update(fh.read())
if self.old_hash != hasher.hexdigest().lower():
self.do_update = True
self.force_update = True
except FileNotFoundError:
self.do_update = True
self.force_update = True
return(None)
return(None)

def getNet(self):
for k in ('ipv4', 'ipv6'):
url = 'https://{0}.clientinfo.square-r00t.net'.format(k)
try:
req = requests.get(url)
setattr(self, k, req.json()['ip'])
except OSError:
setattr(self, k, False)
return(None)

def getNewVer(self):
if self.getRunning():
return(None)
if not self.mirror_base:
self.findMirror()
req = requests.get(self.feed_url, headers = {'User-Agent': 'curl/7.74.0'})
if not req.ok:
raise RuntimeError('Received non-200/30x {0} for {1}'.format(req.status_code, self.feed_url))
feed = etree.fromstring(req.content)
for item in feed.xpath('//item'):
date_xml = item.find('pubDate')
ver_xml = item.find('title')
notes_xml = item.find('link')
date = ver = notes = None
if date_xml is not None:
date = datetime.datetime.strptime(date_xml.text, self._date_fmt)
if ver_xml is not None:
ver = ver_xml.text
if notes_xml is not None:
notes = notes_xml.text
new_ver = datetime.datetime.strptime(ver, self._datever_fmt)
if not all((self.old_ver, self.old_date)) or \
(new_ver > self.old_ver) or \
(self.old_date < date):
self.do_update = True
self.new_ver = new_ver
self.new_date = date
self.rel_notes_url = notes
datever = self.new_ver.strftime(self._datever_fmt)
self.iso_url = os.path.join(self.mirror_base,
self._iso_file.lstrip('/')).format(ver = datever, arch = self._arch)
hash_url = os.path.join(self.mirror_base,
self._iso_dir,
'{0}sums.txt'.format(self.hash_type))
req = requests.get(hash_url, headers = {'User-Agent': 'curl/7.74.0'})
if not req.ok:
raise RuntimeError('Received non-200/30x {0} for {1}'.format(req.status_code, hash_url))
hash_lines = req.content.decode('utf-8').strip().splitlines()
tgt_fname = os.path.basename(self.iso_url)
for line in hash_lines:
if line.strip().startswith('#'):
continue
hash_str, fname = line.split()
if fname != tgt_fname:
continue
self.new_hash = hash_str.lower()
break
break
return(None)

def getRunning(self):
if not os.path.isfile(self.lckfile):
return(False)
my_pid = os.getpid()
with open(self.lckfile, 'r') as fh:
pid = int(fh.read().strip())
if not psutil.pid_exists(pid):
os.remove(self.lckfile)
return(False)
if pid == my_pid:
return(False)
return(True)

def lock(self):
with open(self.lckfile, 'w') as fh:
fh.write(str(os.getpid()))
return(None)

def touchVer(self):
if self.getRunning():
return(None)
ver_path = pathlib.Path(self.dest_ver)
ver_path.touch(exist_ok = True)
return(None)

def unlock(self):
if os.path.isfile(self.lckfile):
os.remove(self.lckfile)
return(None)

def updateVer(self):
if self.getRunning():
return(None)
d = {'date': self.new_date.strftime(self._date_fmt),
'mirror': self.mirror_base,
'country': self.country,
'notes': self.rel_notes_url,
'ver': self.new_ver.strftime(self._datever_fmt),
self.hash_type: self.new_hash}
j = json.dumps(d, indent = 4)
with open(self.dest_ver, 'w') as fh:
fh.write(j)
fh.write('\n')
return(None)


if __name__ == '__main__':
u = Updater()
u.main()

View File

@ -1,293 +0,0 @@
#!/usr/bin/env python3

import datetime
import json
import hashlib
import os
import pathlib
import re
import shutil
##
import psutil
import requests
from lxml import etree
##
import arch_mirror_ranking # <optools>/arch/arch_mirror_ranking.py


class Updater(object):
_fname_re = re.compile(r'^archlinux-(?P<version>[0-9]{4}\.[0-9]{2}\.[0-9]{2})-(?P<arch>(i686|x86_64)).iso$')
_def_hash = 'sha1'
_allowed_hashes = ('md5', 'sha1')
_allowed_arches = ('x86_64', )
_date_fmt = '%a, %d %b %Y %H:%M:%S %z'
_datever_fmt = '%Y.%m.%d'
_arch = 'x86_64' # Arch Linux proper only offers x86_64.
_iso_dir = 'iso/latest'
_iso_file = os.path.join(_iso_dir, 'archlinux-{ver}-{arch}.iso')

def __init__(self,
dest_dir = '/boot/iso',
dest_file = 'arch.iso',
ver_file = '.arch.json',
lock_path = '/tmp/.arch.lck',
feed_url = 'https://archlinux.org/feeds/releases/',
grub_cfg = '/etc/grub.d/40_custom_arch',
# check_gpg = True, # TODO: GPG sig checking
hash_type = 'sha1'):
# if arch.lower() not in self._allowed_arches:
# raise ValueError('arch must be one of: {0}'.format(', '.join(self._allowed_arches)))
# else:
# self._arch = arch.lower()
if hash_type.lower() not in self._allowed_hashes:
raise ValueError('hash_type must be one of: {0}'.format(', '.join(self._allowed_hashes)))
else:
self.hash_type = hash_type.lower()
self.dest_dir = os.path.abspath(os.path.expanduser(dest_dir))
self.dest_file = dest_file
self.ver_file = ver_file
self.feed_url = feed_url
self.grub_cfg = grub_cfg
self.lckfile = os.path.abspath(os.path.expanduser(lock_path))
# From the JSON.
self.rel_notes_url = None
self.old_date = None
self.old_ver = None
self.old_hash = None
self.mirror_base = None
self.country = None
# New vals.
self.new_date = None
self.new_ver = None
self.new_hash = None
# Instance vars again.
self.do_update = False
self.force_update = False
self.iso_url = None
self.ipv4 = True
self.ipv6 = False
self.dest_iso = os.path.join(self.dest_dir, self.dest_file)
self.dest_ver = os.path.join(self.dest_dir, self.ver_file)
self._init_vars()

def _init_vars(self):
if self.getRunning():
return(None)
self.getCountry()
self.getNet()
self.getCurVer()
self.getNewVer()
return(None)

def main(self):
if self.getRunning():
return(None)
self.lock()
if self.do_update or \
self.force_update or not \
all((self.old_date,
self.old_ver,
self.old_hash)):
self.do_update = True
self.findMirror()
self.download()
self.touchVer()
self.unlock()
return(None)

def download(self):
if self.getRunning():
return(None)
if not any((self.do_update, self.force_update)):
return(None)
if not self.iso_url:
raise RuntimeError('iso_url attribute must be set first')
req = requests.get(self.iso_url, stream = True, headers = {'User-Agent': 'curl/7.74.0'})
if not req.ok:
raise RuntimeError('Received non-200/30x {0} for {1}'.format(req.status_code, self.iso_url))
with req as uri:
with open(self.dest_iso, 'wb') as fh:
shutil.copyfileobj(uri.raw, fh)
hasher = hashlib.new(self.hash_type)
with open(self.dest_iso, 'rb') as fh:
hasher.update(fh.read())
realhash = hasher.hexdigest().lower()
if realhash != self.new_hash:
raise RuntimeError('Hash mismatch: {0} (LOCAL), {1} (REMOTE)'.format(realhash, self.new_hash))
self.updateVer()
return(None)

def findMirror(self):
self.getCountry()
if self.mirror_base:
return(None)
for p in ('http', 'https'):
m = arch_mirror_ranking.MirrorIdx(country = self.country,
proto = 'http',
is_active = True,
ipv4 = self.ipv4,
ipv6 = self.ipv6,
isos = True,
statuses = False)
for s in m.ranked_servers:
try:
req = requests.get(s['url'])
if req.ok:
self.mirror_base = s['url']
break
except (OSError, ConnectionRefusedError):
continue
return(None)

def getCountry(self):
if self.country: # The API has limited number of accesses for free.
return(None)
url = 'https://ipinfo.io/country'
req = requests.get(url, headers = {'User-Agent': 'curl/7.74.0'})
if not req.ok:
raise RuntimeError('Received non-200/30x {0} for {1}'.format(req.status_code, url))
self.country = req.content.decode('utf-8').strip().upper()
return(None)

def getCurVer(self):
if self.getRunning():
return(None)
if not os.path.isfile(self.dest_ver):
self.do_update = True
self.force_update = True
self.old_ver = 0.00
return(None)
with open(self.dest_ver, 'rb') as fh:
ver_info = json.load(fh)
self.old_date = datetime.datetime.strptime(ver_info['date'], self._date_fmt)
self.old_ver = datetime.datetime.strptime(ver_info['ver'], self._datever_fmt)
self.old_hash = ver_info.get(self.hash_type, self._def_hash)
self.country = ver_info.get('country')
self.new_hash = self.old_hash
self.new_ver = self.old_ver
self.new_date = self.old_date
# if ver_info.get('arch') != self._arch:
# self.do_update = True
# self.force_update = True
try:
hasher = hashlib.new(self.hash_type)
with open(self.dest_iso, 'rb') as fh:
hasher.update(fh.read())
if self.old_hash != hasher.hexdigest().lower():
self.do_update = True
self.force_update = True
except FileNotFoundError:
self.do_update = True
self.force_update = True
return(None)
return(None)

def getNet(self):
for k in ('ipv4', 'ipv6'):
url = 'https://{0}.clientinfo.square-r00t.net'.format(k)
try:
req = requests.get(url)
setattr(self, k, req.json()['ip'])
except OSError:
setattr(self, k, False)
return(None)

def getNewVer(self):
if self.getRunning():
return(None)
if not self.mirror_base:
self.findMirror()
req = requests.get(self.feed_url, headers = {'User-Agent': 'curl/7.74.0'})
if not req.ok:
raise RuntimeError('Received non-200/30x {0} for {1}'.format(req.status_code, self.feed_url))
feed = etree.fromstring(req.content)
for item in feed.xpath('//item'):
date_xml = item.find('pubDate')
ver_xml = item.find('title')
notes_xml = item.find('link')
date = ver = notes = None
if date_xml is not None:
date = datetime.datetime.strptime(date_xml.text, self._date_fmt)
if ver_xml is not None:
ver = ver_xml.text
if notes_xml is not None:
notes = notes_xml.text
new_ver = datetime.datetime.strptime(ver, self._datever_fmt)
if not all((self.old_ver, self.old_date)) or \
(new_ver > self.old_ver) or \
(self.old_date < date):
self.do_update = True
self.new_ver = new_ver
self.new_date = date
self.rel_notes_url = notes
datever = self.new_ver.strftime(self._datever_fmt)
self.iso_url = os.path.join(self.mirror_base,
self._iso_file.lstrip('/')).format(ver = datever, arch = self._arch)
hash_url = os.path.join(self.mirror_base,
self._iso_dir,
'{0}sums.txt'.format(self.hash_type))
req = requests.get(hash_url, headers = {'User-Agent': 'curl/7.74.0'})
if not req.ok:
raise RuntimeError('Received non-200/30x {0} for {1}'.format(req.status_code, hash_url))
hash_lines = req.content.decode('utf-8').strip().splitlines()
tgt_fname = os.path.basename(self.iso_url)
for line in hash_lines:
if line.strip().startswith('#'):
continue
hash_str, fname = line.split()
if fname != tgt_fname:
continue
self.new_hash = hash_str.lower()
break
break
return(None)

def getRunning(self):
if not os.path.isfile(self.lckfile):
return(False)
my_pid = os.getpid()
with open(self.lckfile, 'r') as fh:
pid = int(fh.read().strip())
if not psutil.pid_exists(pid):
os.remove(self.lckfile)
return(False)
if pid == my_pid:
return(False)
return(True)

def lock(self):
with open(self.lckfile, 'w') as fh:
fh.write(str(os.getpid()))
return(None)

def touchVer(self):
if self.getRunning():
return(None)
ver_path = pathlib.Path(self.dest_ver)
ver_path.touch(exist_ok = True)
return(None)

def unlock(self):
if os.path.isfile(self.lckfile):
os.remove(self.lckfile)
return(None)

def updateVer(self):
if self.getRunning():
return(None)
d = {'date': self.new_date.strftime(self._date_fmt),
'mirror': self.mirror_base,
'country': self.country,
'notes': self.rel_notes_url,
'ver': self.new_ver.strftime(self._datever_fmt),
self.hash_type: self.new_hash}
j = json.dumps(d, indent = 4)
with open(self.dest_ver, 'w') as fh:
fh.write(j)
fh.write('\n')
return(None)


if __name__ == '__main__':
u = Updater()
u.main()