about to change up a lot of stuff...

2020-06-14 00:53:12 -04:00
parent 5526111743
commit 2ba79cd801
6 changed files with 192 additions and 13 deletions
--- a/utils/find_fastest_upstream/archlinux.py
+++ b/utils/find_fastest_upstream/archlinux.py
@@ -0,0 +1,59 @@
+#!/usr/bin/env python3
+
+import datetime
+import re
+##
+import iso3166
+##
+import classes
+
+
+_strip_re = re.compile(r'^\s*(?P<num>[0-9.]+).*$')
+
+
+class Ranker(classes.Ranker):
+    mirrorlist_url = 'https://www.archlinux.org/mirrors/status/tier/1/'
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.get_mirrors()
+        self.mycountry = iso3166.countries_by_alpha2[self.my_info['country']].name
+
+    def extract_mirrors(self):
+        # Limit to only successful mirrors.
+        mirrors = self.bs.find('table', {'id': 'successful_mirrors'})
+        # Ayyy, thanks dude.
+        # Modified from https://stackoverflow.com/a/56835562/733214.
+        header = mirrors.find('thead').find('tr')
+        headers = [h.text if h.text != '' else 'details' for h in header.find_all('th')]
+        raw_rows = mirrors.find_all('tr')
+        # rows = [{headers[i]: cell.text for i, cell in enumerate(row.find_all('td'))} for row in raw_rows]
+        rows = [{headers[i]: cell for i, cell in enumerate(row.find_all('td'))} for row in raw_rows]
+        for r in rows:
+            for k, v in r.items():
+                print(v)
+                if k in ('Completion %', 'Mirror Score', 'μ Duration (s)', 'σ Duration (s)'):
+                    r[k] = float(_strip_re.sub(r'\g<num>', v.text).strip())
+                elif k == 'μ Delay (hh:mm)':
+                    # HOO boy. Wish they just did it in seconds.
+                # elif k == 'Country':
+            self.raw_mirrors.append(r)
+        # for row in rows:
+        #     if not row:
+        #         continue
+        #     for k in ('Completion %', 'Mirror Score', 'μ Duration (s)', 'σ Duration (s)'):
+        #         row[k] = float(_strip_re.sub(r'\g<num>', row[k]).strip())
+
+        return(None)
+
+
+def main():
+    r = Ranker()
+    r.extract_mirrors()
+    import pprint
+    pprint.pprint(r.raw_mirrors)
+    return(None)
+
+
+if __name__ == '__main__':
+    main()
--- a/utils/find_fastest_upstream/centos.py
+++ b/utils/find_fastest_upstream/centos.py
--- a/utils/find_fastest_upstream/classes.py
+++ b/utils/find_fastest_upstream/classes.py
@@ -0,0 +1,39 @@
+import socket
+import time
+##
+import requests
+from bs4 import BeautifulSoup
+##
+import constants
+
+
+class Ranker(object):
+    mirrorlist_url = None  # This is replaced by subclasses
+
+    def __init__(self, parser = 'lxml', *args, **kwargs):
+        self.my_info = {}
+        self.raw_html = None
+        self.parser = parser
+        self.bs = None
+        self.get_myinfo()
+        self.raw_mirrors = []
+
+    def extract_mirrors(self):
+        # A dummy func. This should be overridden by subclasses.
+        pass
+        return(None)
+
+    def get_myinfo(self):
+        req = requests.get(constants.MYINFO_URL)
+        if not req.ok:
+            raise RuntimeError('Could not contact information gatherer')
+        self.my_info = req.json()
+        return(None)
+
+    def get_mirrors(self):
+        req = requests.get(self.mirrorlist_url)
+        if not req.ok:
+            raise RuntimeError('Could not contact information gatherer')
+        self.raw_html = req.content.decode('utf-8')
+        self.bs = BeautifulSoup(self.raw_html, self.parser)
+        return(None)
--- a/utils/find_fastest_upstream/constants.py
+++ b/utils/find_fastest_upstream/constants.py
@@ -0,0 +1 @@
+MYINFO_URL = 'https://ipinfo.io'
--- a/utils/find_fastest_upstream/test.py
+++ b/utils/find_fastest_upstream/test.py
@@ -0,0 +1,20 @@
+#!/usr/bin/env python3
+
+import requests
+from bs4 import BeautifulSoup
+
+country = 'US'
+url = 'https://www.archlinux.org/mirrors/status/tier/1/'
+
+req = requests.get(url)
+html = req.content.decode('utf-8')
+bs = BeautifulSoup(html, 'lxml')
+
+mirrors = bs.find('table', {'id': 'successful_mirrors'})
+header = mirrors.find('thead').find('tr')
+headers = [h.text if h.text != '' else 'details' for h in header.find_all('th')]
+
+results = [{headers[i]: cell.text for i, cell in enumerate(row.find_all('td'))} for row in mirrors.find_all('tr')]
+
+import pprint
+pprint.pprint(results)