repomirror/utils/find_fastest_upstream/classes.py

122 lines
4.4 KiB
Python
Raw Normal View History

2020-06-14 00:53:12 -04:00
import socket
import time
2020-06-14 03:46:29 -04:00
from urllib import parse as urlparse
2020-06-14 00:53:12 -04:00
##
import requests
from bs4 import BeautifulSoup
2020-06-14 03:46:29 -04:00
from lxml import etree
2020-06-14 00:53:12 -04:00
##
import constants
class Ranker(object):
mirrorlist_url = None # This is replaced by subclasses
2020-06-14 03:46:29 -04:00
distro_name = None
2020-06-14 00:53:12 -04:00
def __init__(self, parser = 'lxml', *args, **kwargs):
self.my_info = {}
self.raw_html = None
self.parser = parser
self.bs = None
2020-06-14 03:46:29 -04:00
self.req = None
2020-06-14 00:53:12 -04:00
self.get_myinfo()
2020-06-14 03:46:29 -04:00
# The native collection of mirror information.
2020-06-14 00:53:12 -04:00
self.raw_mirrors = []
2020-06-14 03:46:29 -04:00
# The list of URLs only of the above.
self.mirror_candidates = []
self.ranked_mirrors = {}
self.ranked_urls = {}
2020-06-14 00:53:12 -04:00
def extract_mirrors(self):
# A dummy func. This should be overridden by subclasses.
pass
return(None)
def get_myinfo(self):
req = requests.get(constants.MYINFO_URL)
if not req.ok:
raise RuntimeError('Could not contact information gatherer')
self.my_info = req.json()
return(None)
def get_mirrors(self):
req = requests.get(self.mirrorlist_url)
if not req.ok:
raise RuntimeError('Could not contact information gatherer')
2020-06-14 03:46:29 -04:00
self.req = req
self.raw_html = self.req.content.decode('utf-8')
2020-06-14 00:53:12 -04:00
self.bs = BeautifulSoup(self.raw_html, self.parser)
return(None)
2020-06-14 03:46:29 -04:00
def speedcheck(self):
if not self.mirror_candidates:
self.extract_mirrors()
for url in self.mirror_candidates:
u = urlparse.urlparse(url)
sock = socket.socket()
sock.settimeout(7)
port = u.port
if not port:
port = constants.DEF_PORTS[u.scheme.lower()]
try:
start = time.perf_counter()
sock.connect((u.hostname, port))
conntime = time.perf_counter() - start # in seconds
sock.close()
del(sock)
except (socket.timeout, socket.error):
continue
# Skip the mirror if it has an exact time in the mirrors already.
# Sure, it's *very* unlikely, but best practice to do this.
if conntime in self.ranked_mirrors:
continue
mirror = {}
for a in ('path', 'port'):
mirror[a] = getattr(u, a, None)
mirror['domain'] = u.hostname.lower()
mirror['syncType'] = u.scheme.lower()
if not mirror['port']:
mirror['port'] = constants.DEF_PORTS[mirror['syncType']]
if mirror['path'] == '':
mirror['path'] = '/'
self.ranked_mirrors[conntime] = mirror
self.ranked_urls[conntime] = url
return(None)
def print(self):
if not self.ranked_mirrors:
self.speedcheck()
print('Mirrors in order of speed:\n')
for m in sorted(list(self.ranked_urls.keys())):
print('{0} # ({1} seconds to connect)'.format(self.ranked_urls[m], m))
return(None)
def gen_xml(self):
if not self.distro_name:
raise ValueError('This class must be subclassed to be useful')
if not self.ranked_mirrors:
self.speedcheck()
s = ('<?xml version="1.0" encoding="UTF-8" ?>'
'<mirror xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" '
'xmlns="https://git.square-r00t.net/RepoMirror/" '
'xsi:schemaLocation="https://git.square-r00t.net/RepoMirror/ '
'http://schema.xml.r00t2.io/projects/repomirror.xsd">'
'</mirror>')
xml = etree.fromstring(s.encode('utf-8'))
distro = etree.Element('distro')
distro.attrib['name'] = self.distro_name
for m in sorted(list(self.ranked_mirrors.keys())):
mirror = self.ranked_mirrors[m]
distro.append(etree.Comment(' ({0} seconds to connect) '.format(m)))
u = etree.SubElement(distro, 'upstream')
for k, v in mirror.items():
e = etree.SubElement(u, k)
e.text = str(v)
xml.append(distro)
return(etree.tostring(xml,
pretty_print = True,
with_comments = True,
with_tail = True,
encoding = 'UTF-8',
xml_declaration = True).decode('utf-8'))