new script, centos/isomirror_sort.py
This commit is contained in:
parent
3ccef84028
commit
caca1d1e84
92
centos/isomirror_sort.py
Normal file
92
centos/isomirror_sort.py
Normal file
@ -0,0 +1,92 @@
|
||||
#!/usr/bin/env python3.6
|
||||
|
||||
# requires python lxml module as well
|
||||
import os
|
||||
import socket
|
||||
import time
|
||||
from urllib.request import urlopen
|
||||
from urllib.parse import urlparse
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
# The page that contains the list of (authoritative ISO) mirrors
|
||||
URL = 'http://isoredirect.centos.org/centos/7/isos/x86_64/'
|
||||
# The formatting on the page is pretty simple - no divs, etc. - so we need to
|
||||
# blacklist some links we pull in.
|
||||
blacklisted_link_URLs = ('http://bittorrent.com/',
|
||||
'http://wiki.centos.org/AdditionalResources/Repositories')
|
||||
|
||||
mirrors = {}
|
||||
|
||||
dflt_ports = {'https': 443, # unlikely. "HTTPS is currently not used for mirrors." per https://wiki.centos.org/HowTos/CreatePublicMirrors
|
||||
'http': 80, # most likely.
|
||||
'ftp': 21,
|
||||
'rsync': 873}
|
||||
|
||||
def getMirrors():
|
||||
mirrors = []
|
||||
with urlopen(URL) as u:
|
||||
pg_src = u.read().decode('utf-8')
|
||||
soup = BeautifulSoup(pg_src, 'lxml')
|
||||
for tag in soup.find_all('br')[4].next_siblings:
|
||||
if tag.name == 'a' and tag['href'] not in blacklisted_link_URLs:
|
||||
mirrors.append(tag['href'].strip())
|
||||
return(mirrors)
|
||||
|
||||
def getHosts(mirror):
|
||||
port = None
|
||||
fqdn = None
|
||||
login = ''
|
||||
# "mirror" should be a base URI of the CentOS mirror path.
|
||||
# mirrors.centos.org is pointless to use for this!
|
||||
#url = os.path.join(mirror, 'sha256sum.txt.asc')
|
||||
uri = urlparse(mirror)
|
||||
spl_dom = uri.netloc.split(':')
|
||||
if len(spl_dom) >= 2: # more complex URI
|
||||
if len(spl_dom) == 2: # probably domain:port?
|
||||
try:
|
||||
port = int(spl_dom[-1:])
|
||||
except ValueError: # ooookay, so it's not domain:port, it's a user:pass@
|
||||
if '@' in uri.netloc:
|
||||
auth = uri.netloc.split('@')
|
||||
fqdn = auth[1]
|
||||
login = auth[0] + '@'
|
||||
elif len(spl_dom) > 2: # even more complex URI, which ironically makes parsing easier
|
||||
auth = uri.netloc.split('@')
|
||||
fqdn = spl_dom[1].split('@')[1]
|
||||
port = int(spl_dom[-1:])
|
||||
login = auth[0] + '@'
|
||||
# matches missing values and simple URI. like, 99%+ of mirror URIs being passed.
|
||||
if not fqdn:
|
||||
fqdn = uri.netloc
|
||||
if not port:
|
||||
port = dflt_ports[uri.scheme]
|
||||
mirrors[fqdn] = {'proto': uri.scheme,
|
||||
'port': port,
|
||||
'path': uri.path,
|
||||
'auth': login}
|
||||
return()
|
||||
|
||||
def getSpeeds():
|
||||
for fqdn in mirrors.keys():
|
||||
start = time.time()
|
||||
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
sock.connect((fqdn, mirrors[fqdn]['port']))
|
||||
mirrors[fqdn]['time'] = time.time() - start
|
||||
sock.close()
|
||||
return()
|
||||
|
||||
def main():
|
||||
for m in getMirrors():
|
||||
getHosts(m)
|
||||
getSpeeds()
|
||||
ranking = sorted(mirrors.keys(), key = lambda k: (mirrors[k]['time']))
|
||||
for i in ranking:
|
||||
str_port = ':' + str(mirrors[i]['port'])
|
||||
if mirrors[i]['port'] in dflt_ports.values():
|
||||
str_port = ''
|
||||
print('{proto}://{auth}{0}{p}{path}'.format(i,
|
||||
**mirrors[i],
|
||||
p = str_port))
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
Loading…
Reference in New Issue
Block a user