checking in work
This commit is contained in:
parent
2ba79cd801
commit
eed480c590
@ -4,15 +4,8 @@ import logging
|
||||
##
|
||||
from . import config
|
||||
from . import constants
|
||||
from . import sync
|
||||
|
||||
|
||||
_logger = logging.getLogger()
|
||||
|
||||
|
||||
class Sync(object):
|
||||
def __init__(self, cfg = None, dummy = False, distro = None, logdir = None, *args, **kwargs):
|
||||
_args = dict(locals())
|
||||
del(_args['self'])
|
||||
_logger.debug('Sync class instantiated with args: {0}'.format(_args))
|
||||
self.cfg = config.Config(cfg)
|
||||
|
||||
|
61
repomirror/sync.py
Normal file
61
repomirror/sync.py
Normal file
@ -0,0 +1,61 @@
|
||||
import datetime
|
||||
import logging
|
||||
import os
|
||||
##
|
||||
from . import config
|
||||
|
||||
|
||||
_logger = logging.getLogger()
|
||||
|
||||
|
||||
class Args(object):
|
||||
def __init__(self, args_xml):
|
||||
self.xml = args_xml
|
||||
self.args = []
|
||||
self._parse_xml()
|
||||
|
||||
def _parse_xml(self):
|
||||
for arg_xml in self.xml.xpath('(short|long)'):
|
||||
|
||||
|
||||
|
||||
class Mount(object):
|
||||
def __init__(self, mpchk_xml):
|
||||
self.path = os.path.abspath(os.path.expanduser(mpchk_xml))
|
||||
self.is_mounted = None
|
||||
self._check_mount()
|
||||
|
||||
def _check_mount(self):
|
||||
with open('/proc/mounts', 'r') as fh:
|
||||
raw = fh.read()
|
||||
for line in raw.splitlines():
|
||||
l = line.split()
|
||||
mp = l[1]
|
||||
if mp == self.path:
|
||||
self.is_mounted = True
|
||||
return(None)
|
||||
self.is_mounted = False
|
||||
return(None)
|
||||
|
||||
|
||||
class TimestampFile(object):
|
||||
def __init__(self, ts_xml):
|
||||
self.fmt = ts_xml.attrib.get('timeFormat', 'UNIX_EPOCH')
|
||||
if self.fmt == 'UNIX_EPOCH':
|
||||
self.fmt = '%s'
|
||||
elif self.fmt == 'MICROSECOND_EPOCH':
|
||||
self.fmt = '%s.%f'
|
||||
self.path = os.path.abspath(os.path.expanduser(ts_xml.text))
|
||||
|
||||
|
||||
class Upstream(object):
|
||||
def __init__(self, upstream_xml):
|
||||
pass
|
||||
|
||||
|
||||
class Sync(object):
|
||||
def __init__(self, cfg = None, dummy = False, distro = None, logdir = None, *args, **kwargs):
|
||||
_args = dict(locals())
|
||||
del(_args['self'])
|
||||
_logger.debug('Sync class instantiated with args: {0}'.format(_args))
|
||||
self.cfg = config.Config(cfg)
|
@ -28,8 +28,8 @@
|
||||
* https://strftime.org/
|
||||
The default is to use a regular UNIX Epoch integer (e.g. June 13, 2020 5:03:53 PM UTC => 1592067833).
|
||||
This can be manually specified by the special string "UNIX_EPOCH".
|
||||
Optionally, you can use the special string "MICROSECOND_EPOCH", which will specify the above with microseconds.
|
||||
e.g. June 13, 2020 5:09:13.995777 PM UTC => 1592068153.995777
|
||||
Optionally, you can use the special string "MICROSECOND_EPOCH", which will specify the above with left-padded
|
||||
microseconds (e.g. June 13, 2020 5:09:13.995777 PM UTC => 1592068153.995777).
|
||||
-->
|
||||
<lastLocalCheck timeFormat="MICROSECOND_EPOCH">/srv/http/arch.lastcheck</lastLocalCheck>
|
||||
<!--
|
||||
@ -84,6 +84,10 @@
|
||||
<!--
|
||||
The following example uses "rsync://arch.mirror.constant.com/archlinux/"
|
||||
(https://www.archlinux.org/mirrors/constant.com/1008/)
|
||||
If you need to find a mirror, you may be interested in the utils/find_fastest_upstream/ scripts. They will
|
||||
automatically find (and sort based on connection speed) all mirrors in your country for a given distro.
|
||||
They can even generate stubbed configuration files using those upstreams.
|
||||
Currently only Arch Linux and CentOS are supported.
|
||||
-->
|
||||
<!--
|
||||
Required; one of:
|
||||
|
@ -1,57 +1,56 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import argparse
|
||||
import datetime
|
||||
import re
|
||||
##
|
||||
import iso3166
|
||||
##
|
||||
import classes
|
||||
|
||||
|
||||
_strip_re = re.compile(r'^\s*(?P<num>[0-9.]+).*$')
|
||||
|
||||
|
||||
class Ranker(classes.Ranker):
|
||||
mirrorlist_url = 'https://www.archlinux.org/mirrors/status/tier/1/'
|
||||
mirrorlist_url = 'https://www.archlinux.org/mirrors/status/tier/1/json/'
|
||||
distro_name = 'archlinux'
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.get_mirrors()
|
||||
self.mycountry = iso3166.countries_by_alpha2[self.my_info['country']].name
|
||||
|
||||
def extract_mirrors(self):
|
||||
# Limit to only successful mirrors.
|
||||
mirrors = self.bs.find('table', {'id': 'successful_mirrors'})
|
||||
# Ayyy, thanks dude.
|
||||
# Modified from https://stackoverflow.com/a/56835562/733214.
|
||||
header = mirrors.find('thead').find('tr')
|
||||
headers = [h.text if h.text != '' else 'details' for h in header.find_all('th')]
|
||||
raw_rows = mirrors.find_all('tr')
|
||||
# rows = [{headers[i]: cell.text for i, cell in enumerate(row.find_all('td'))} for row in raw_rows]
|
||||
rows = [{headers[i]: cell for i, cell in enumerate(row.find_all('td'))} for row in raw_rows]
|
||||
for r in rows:
|
||||
for k, v in r.items():
|
||||
print(v)
|
||||
if k in ('Completion %', 'Mirror Score', 'μ Duration (s)', 'σ Duration (s)'):
|
||||
r[k] = float(_strip_re.sub(r'\g<num>', v.text).strip())
|
||||
elif k == 'μ Delay (hh:mm)':
|
||||
# HOO boy. Wish they just did it in seconds.
|
||||
# elif k == 'Country':
|
||||
self.raw_mirrors.append(r)
|
||||
# for row in rows:
|
||||
# if not row:
|
||||
# continue
|
||||
# for k in ('Completion %', 'Mirror Score', 'μ Duration (s)', 'σ Duration (s)'):
|
||||
# row[k] = float(_strip_re.sub(r'\g<num>', row[k]).strip())
|
||||
|
||||
for mirror in self.req.json()['urls']:
|
||||
if not all((mirror['active'], # Only successful/active mirrors
|
||||
mirror['isos'], # Only mirrors with ISOs
|
||||
# Only mirrors that support rsync (Arch mirrors do not support ftp)
|
||||
(mirror['protocol'] == 'rsync'),
|
||||
# Only mirrors in the system's country (May be buggy if both are not ISO-3166-1 Alpha-2)
|
||||
(mirror['country_code'].upper() == self.my_info['country'].upper()),
|
||||
# Only mirrors that are at least 100% complete.
|
||||
(mirror['completion_pct'] >= 1.0))):
|
||||
continue
|
||||
# Convert the timestamp to python-native.
|
||||
mirror['last_sync'] = datetime.datetime.strptime(mirror['last_sync'], '%Y-%m-%dT%H:%M:%SZ')
|
||||
self.raw_mirrors.append(mirror)
|
||||
self.mirror_candidates.append(mirror['url'])
|
||||
return(None)
|
||||
|
||||
|
||||
def parseArgs():
|
||||
args = argparse.ArgumentParser(description = 'Generate a list of suitable Arch Linux upstream mirrors in order of '
|
||||
'speed')
|
||||
args.add_argument('-x', '--xml',
|
||||
dest = 'xml',
|
||||
action = 'store_true',
|
||||
help = ('If specified, generate a config stub instead of a printed list of URLs'))
|
||||
return(args)
|
||||
|
||||
|
||||
def main():
|
||||
args = parseArgs().parse_args()
|
||||
r = Ranker()
|
||||
r.extract_mirrors()
|
||||
import pprint
|
||||
pprint.pprint(r.raw_mirrors)
|
||||
r.speedcheck()
|
||||
if args.xml:
|
||||
print(r.gen_xml())
|
||||
else:
|
||||
r.print()
|
||||
return(None)
|
||||
|
||||
|
||||
|
@ -0,0 +1,79 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import argparse
|
||||
import csv
|
||||
import datetime
|
||||
import io
|
||||
import re
|
||||
##
|
||||
import classes
|
||||
|
||||
|
||||
_proto_re = re.compile(r'^(?P<proto>https?)(?P<uri>.*)')
|
||||
|
||||
|
||||
class Ranker(classes.Ranker):
|
||||
# https://lists.centos.org/pipermail/centos-mirror/2017-March/010312.html
|
||||
mirrorlist_url = 'https://www.centos.org/download/full-mirrorlist.csv'
|
||||
distro_name = 'centos'
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.get_mirrors()
|
||||
|
||||
def extract_mirrors(self, preferred_proto = 'rsync'):
|
||||
preferred_proto = preferred_proto.lower()
|
||||
if preferred_proto not in ('rsync', 'ftp'):
|
||||
raise ValueError('Invalid preferred_proto; must be one of rsync or ftp')
|
||||
non_preferred = ('rsync' if preferred_proto == 'ftp' else 'ftp')
|
||||
c = csv.DictReader(io.StringIO(self.raw_html), )
|
||||
for row in c:
|
||||
if not row['Country'] or row['Country'].strip() == '':
|
||||
continue
|
||||
# GorRAM it, dudes. States are not countries.
|
||||
country = row['Country'].strip()
|
||||
region = row['Region'].strip()
|
||||
if region == 'US':
|
||||
country = region
|
||||
if country != self.my_info['country']:
|
||||
continue
|
||||
for k, v in row.items():
|
||||
if v.strip() == '':
|
||||
row[k] = None
|
||||
pref_url = row['{0} mirror link'.format(preferred_proto)]
|
||||
nonpref_url = row['{0} mirror link'.format(non_preferred)]
|
||||
if pref_url:
|
||||
url = _proto_re.sub(r'{0}\g<uri>'.format(preferred_proto), pref_url)
|
||||
else:
|
||||
if not nonpref_url:
|
||||
continue
|
||||
url = _proto_re.sub(r'{0}\g<uri>'.format(non_preferred), nonpref_url)
|
||||
self.raw_mirrors.append(row)
|
||||
self.mirror_candidates.append(url)
|
||||
return(None)
|
||||
|
||||
|
||||
def parseArgs():
|
||||
args = argparse.ArgumentParser(description = 'Generate a list of suitable CentOS upstream mirrors in order of '
|
||||
'speed')
|
||||
args.add_argument('-x', '--xml',
|
||||
dest = 'xml',
|
||||
action = 'store_true',
|
||||
help = ('If specified, generate a config stub instead of a printed list of URLs'))
|
||||
return(args)
|
||||
|
||||
|
||||
def main():
|
||||
args = parseArgs().parse_args()
|
||||
r = Ranker()
|
||||
r.extract_mirrors()
|
||||
r.speedcheck()
|
||||
if args.xml:
|
||||
print(r.gen_xml())
|
||||
else:
|
||||
r.print()
|
||||
return(None)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -1,22 +1,31 @@
|
||||
import socket
|
||||
import time
|
||||
from urllib import parse as urlparse
|
||||
##
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from lxml import etree
|
||||
##
|
||||
import constants
|
||||
|
||||
|
||||
class Ranker(object):
|
||||
mirrorlist_url = None # This is replaced by subclasses
|
||||
distro_name = None
|
||||
|
||||
def __init__(self, parser = 'lxml', *args, **kwargs):
|
||||
self.my_info = {}
|
||||
self.raw_html = None
|
||||
self.parser = parser
|
||||
self.bs = None
|
||||
self.req = None
|
||||
self.get_myinfo()
|
||||
# The native collection of mirror information.
|
||||
self.raw_mirrors = []
|
||||
# The list of URLs only of the above.
|
||||
self.mirror_candidates = []
|
||||
self.ranked_mirrors = {}
|
||||
self.ranked_urls = {}
|
||||
|
||||
def extract_mirrors(self):
|
||||
# A dummy func. This should be overridden by subclasses.
|
||||
@ -34,6 +43,79 @@ class Ranker(object):
|
||||
req = requests.get(self.mirrorlist_url)
|
||||
if not req.ok:
|
||||
raise RuntimeError('Could not contact information gatherer')
|
||||
self.raw_html = req.content.decode('utf-8')
|
||||
self.req = req
|
||||
self.raw_html = self.req.content.decode('utf-8')
|
||||
self.bs = BeautifulSoup(self.raw_html, self.parser)
|
||||
return(None)
|
||||
|
||||
def speedcheck(self):
|
||||
if not self.mirror_candidates:
|
||||
self.extract_mirrors()
|
||||
for url in self.mirror_candidates:
|
||||
u = urlparse.urlparse(url)
|
||||
sock = socket.socket()
|
||||
sock.settimeout(7)
|
||||
port = u.port
|
||||
if not port:
|
||||
port = constants.DEF_PORTS[u.scheme.lower()]
|
||||
try:
|
||||
start = time.perf_counter()
|
||||
sock.connect((u.hostname, port))
|
||||
conntime = time.perf_counter() - start # in seconds
|
||||
sock.close()
|
||||
del(sock)
|
||||
except (socket.timeout, socket.error):
|
||||
continue
|
||||
# Skip the mirror if it has an exact time in the mirrors already.
|
||||
# Sure, it's *very* unlikely, but best practice to do this.
|
||||
if conntime in self.ranked_mirrors:
|
||||
continue
|
||||
mirror = {}
|
||||
for a in ('path', 'port'):
|
||||
mirror[a] = getattr(u, a, None)
|
||||
mirror['domain'] = u.hostname.lower()
|
||||
mirror['syncType'] = u.scheme.lower()
|
||||
if not mirror['port']:
|
||||
mirror['port'] = constants.DEF_PORTS[mirror['syncType']]
|
||||
if mirror['path'] == '':
|
||||
mirror['path'] = '/'
|
||||
self.ranked_mirrors[conntime] = mirror
|
||||
self.ranked_urls[conntime] = url
|
||||
return(None)
|
||||
|
||||
def print(self):
|
||||
if not self.ranked_mirrors:
|
||||
self.speedcheck()
|
||||
print('Mirrors in order of speed:\n')
|
||||
for m in sorted(list(self.ranked_urls.keys())):
|
||||
print('{0} # ({1} seconds to connect)'.format(self.ranked_urls[m], m))
|
||||
return(None)
|
||||
|
||||
def gen_xml(self):
|
||||
if not self.distro_name:
|
||||
raise ValueError('This class must be subclassed to be useful')
|
||||
if not self.ranked_mirrors:
|
||||
self.speedcheck()
|
||||
s = ('<?xml version="1.0" encoding="UTF-8" ?>'
|
||||
'<mirror xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" '
|
||||
'xmlns="https://git.square-r00t.net/RepoMirror/" '
|
||||
'xsi:schemaLocation="https://git.square-r00t.net/RepoMirror/ '
|
||||
'http://schema.xml.r00t2.io/projects/repomirror.xsd">'
|
||||
'</mirror>')
|
||||
xml = etree.fromstring(s.encode('utf-8'))
|
||||
distro = etree.Element('distro')
|
||||
distro.attrib['name'] = self.distro_name
|
||||
for m in sorted(list(self.ranked_mirrors.keys())):
|
||||
mirror = self.ranked_mirrors[m]
|
||||
distro.append(etree.Comment(' ({0} seconds to connect) '.format(m)))
|
||||
u = etree.SubElement(distro, 'upstream')
|
||||
for k, v in mirror.items():
|
||||
e = etree.SubElement(u, k)
|
||||
e.text = str(v)
|
||||
xml.append(distro)
|
||||
return(etree.tostring(xml,
|
||||
pretty_print = True,
|
||||
with_comments = True,
|
||||
with_tail = True,
|
||||
encoding = 'UTF-8',
|
||||
xml_declaration = True).decode('utf-8'))
|
||||
|
@ -1 +1,5 @@
|
||||
MYINFO_URL = 'https://ipinfo.io'
|
||||
DEF_PORTS = {'ftp': 21,
|
||||
'http': 80,
|
||||
'https': 443,
|
||||
'rsync': 873}
|
||||
|
@ -1,20 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
country = 'US'
|
||||
url = 'https://www.archlinux.org/mirrors/status/tier/1/'
|
||||
|
||||
req = requests.get(url)
|
||||
html = req.content.decode('utf-8')
|
||||
bs = BeautifulSoup(html, 'lxml')
|
||||
|
||||
mirrors = bs.find('table', {'id': 'successful_mirrors'})
|
||||
header = mirrors.find('thead').find('tr')
|
||||
headers = [h.text if h.text != '' else 'details' for h in header.find_all('th')]
|
||||
|
||||
results = [{headers[i]: cell.text for i, cell in enumerate(row.find_all('td'))} for row in mirrors.find_all('tr')]
|
||||
|
||||
import pprint
|
||||
pprint.pprint(results)
|
Loading…
Reference in New Issue
Block a user