about to change up a lot of stuff...

This commit is contained in:
2020-06-14 00:53:12 -04:00
parent 5526111743
commit 2ba79cd801
6 changed files with 192 additions and 13 deletions

View File

@@ -0,0 +1,59 @@
#!/usr/bin/env python3
import datetime
import re
##
import iso3166
##
import classes
_strip_re = re.compile(r'^\s*(?P<num>[0-9.]+).*$')
class Ranker(classes.Ranker):
mirrorlist_url = 'https://www.archlinux.org/mirrors/status/tier/1/'
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.get_mirrors()
self.mycountry = iso3166.countries_by_alpha2[self.my_info['country']].name
def extract_mirrors(self):
# Limit to only successful mirrors.
mirrors = self.bs.find('table', {'id': 'successful_mirrors'})
# Ayyy, thanks dude.
# Modified from https://stackoverflow.com/a/56835562/733214.
header = mirrors.find('thead').find('tr')
headers = [h.text if h.text != '' else 'details' for h in header.find_all('th')]
raw_rows = mirrors.find_all('tr')
# rows = [{headers[i]: cell.text for i, cell in enumerate(row.find_all('td'))} for row in raw_rows]
rows = [{headers[i]: cell for i, cell in enumerate(row.find_all('td'))} for row in raw_rows]
for r in rows:
for k, v in r.items():
print(v)
if k in ('Completion %', 'Mirror Score', 'μ Duration (s)', 'σ Duration (s)'):
r[k] = float(_strip_re.sub(r'\g<num>', v.text).strip())
elif k == 'μ Delay (hh:mm)':
# HOO boy. Wish they just did it in seconds.
# elif k == 'Country':
self.raw_mirrors.append(r)
# for row in rows:
# if not row:
# continue
# for k in ('Completion %', 'Mirror Score', 'μ Duration (s)', 'σ Duration (s)'):
# row[k] = float(_strip_re.sub(r'\g<num>', row[k]).strip())
return(None)
def main():
r = Ranker()
r.extract_mirrors()
import pprint
pprint.pprint(r.raw_mirrors)
return(None)
if __name__ == '__main__':
main()

View File

View File

@@ -0,0 +1,39 @@
import socket
import time
##
import requests
from bs4 import BeautifulSoup
##
import constants
class Ranker(object):
mirrorlist_url = None # This is replaced by subclasses
def __init__(self, parser = 'lxml', *args, **kwargs):
self.my_info = {}
self.raw_html = None
self.parser = parser
self.bs = None
self.get_myinfo()
self.raw_mirrors = []
def extract_mirrors(self):
# A dummy func. This should be overridden by subclasses.
pass
return(None)
def get_myinfo(self):
req = requests.get(constants.MYINFO_URL)
if not req.ok:
raise RuntimeError('Could not contact information gatherer')
self.my_info = req.json()
return(None)
def get_mirrors(self):
req = requests.get(self.mirrorlist_url)
if not req.ok:
raise RuntimeError('Could not contact information gatherer')
self.raw_html = req.content.decode('utf-8')
self.bs = BeautifulSoup(self.raw_html, self.parser)
return(None)

View File

@@ -0,0 +1 @@
MYINFO_URL = 'https://ipinfo.io'

View File

@@ -0,0 +1,20 @@
#!/usr/bin/env python3
import requests
from bs4 import BeautifulSoup
country = 'US'
url = 'https://www.archlinux.org/mirrors/status/tier/1/'
req = requests.get(url)
html = req.content.decode('utf-8')
bs = BeautifulSoup(html, 'lxml')
mirrors = bs.find('table', {'id': 'successful_mirrors'})
header = mirrors.find('thead').find('tr')
headers = [h.text if h.text != '' else 'details' for h in header.find_all('th')]
results = [{headers[i]: cell.text for i, cell in enumerate(row.find_all('td'))} for row in mirrors.find_all('tr')]
import pprint
pprint.pprint(results)