about to change up a lot of stuff...
This commit is contained in:
parent
5526111743
commit
2ba79cd801
@ -6,57 +6,108 @@
|
||||
xmlns="https://git.square-r00t.net/RepoMirror/"
|
||||
xsi:schemaLocation="https://git.square-r00t.net/RepoMirror/ http://schema.xml.r00t2.io/projects/repomirror.xsd">
|
||||
<distro name="arch">
|
||||
<!--
|
||||
If provided (and the sync script is running as the root user), the files/directories can be chowned to the
|
||||
provided user/group. Otherwise they'll be owned by whatever user the script is running as (and its primary group).
|
||||
-->
|
||||
<owner>
|
||||
<user>root</user>
|
||||
<group>root</group>
|
||||
</owner>
|
||||
<!--
|
||||
The local path to where the hierarchy/files should be synced to.
|
||||
-->
|
||||
<dest>/srv/repos/arch/.</dest>
|
||||
<!--
|
||||
The local file to update with a timestamp with the last time we checked for updates.
|
||||
The local file to update with a timestamp with the last time we *checked* for updates.
|
||||
If not provided, don't update a file (NOT recommended!).
|
||||
It may or may not be optional; check with the spec for mirroring for the specified distro.
|
||||
If the timeFormat attribute is provided, write the timestamp format in the specified format.
|
||||
See the following for details:
|
||||
* https://docs.python.org/library/datetime.html#strftime-and-strptime-format-codes
|
||||
* https://strftime.org/
|
||||
The default is to use a regular UNIX Epoch integer (e.g. June 13, 2020 5:03:53 PM UTC => 1592067833).
|
||||
This can be manually specified by the special string "UNIX_EPOCH".
|
||||
Optionally, you can use the special string "MICROSECOND_EPOCH", which will specify the above with microseconds.
|
||||
e.g. June 13, 2020 5:09:13.995777 PM UTC => 1592068153.995777
|
||||
-->
|
||||
<lastLocalCheck>/srv/http/arch.lastcheck</lastLocalCheck>
|
||||
<lastLocalCheck timeFormat="MICROSECOND_EPOCH">/srv/http/arch.lastcheck</lastLocalCheck>
|
||||
<!--
|
||||
The file to update with a timestamp with the last time we synced from our upstream.
|
||||
The file to update with a timestamp with the last time we *synced from our upstream*.
|
||||
If not provided, don't update a file (NOT recommended!).
|
||||
It may or may not be optional; check with the spec for mirroring for the specified distro.
|
||||
If not provided, don't update a file (NOT recommended!).
|
||||
It takes the same optional attribute "timeFormat" as above, with the same behaviour.
|
||||
-->
|
||||
<lastLocalSync>/srv/http/arch.lastsync</lastLocalSync>
|
||||
<lastLocalSync timeFormat="UNIX_EPOCH">/srv/repos/arch/lastsync</lastLocalSync>
|
||||
<!--
|
||||
The path to a file on the upstream(s) that gives a time when it last updated.
|
||||
The optional timeFormat attribute behavior is the same as above.
|
||||
If neither this nor lastRemoteSync is provided, a sync will be attempted regardless of when the last one was
|
||||
attempted.
|
||||
-->
|
||||
<lastRemoteUpdate>/lastupdate</lastRemoteUpdate>
|
||||
<lastRemoteUpdate timeFormat="UNIX_EPOCH">/lastupdate</lastRemoteUpdate>
|
||||
<!--
|
||||
The path to a file on the upstream(s) that gives a time when it last synced from its upstream.
|
||||
The optional timeFormat attribute behavior is the same as above.
|
||||
If neither this nor lastRemoteUpdate is provided, a sync will be attempted regardless of when the last one was
|
||||
attempted.
|
||||
-->
|
||||
<lastRemoteSync>/lastsync</lastRemoteSync>
|
||||
<lastRemoteSync timeFormat="UNIX_EPOCH">/lastsync</lastRemoteSync>
|
||||
<!--
|
||||
The path that must be currently mounted for sync to proceed.
|
||||
This is required.
|
||||
-->
|
||||
<mountCheck>/</mountCheck>
|
||||
<!--
|
||||
The speed to cap socket bandwidth at (in KiB). Decimals are okay.
|
||||
You cannot reliably use two dashes in XML strings, so this is a workaround.
|
||||
The following is only used for rsync upstreams and is optional. The default is just archive and delete-after.
|
||||
If arguments are provided, the defaults are overwritten so if you need the above, be sure to specify them.
|
||||
See the rsync man page (rsync(1)) for more details and a listing of supported flags on your system.
|
||||
-->
|
||||
<bwlimit>7000</bwlimit>
|
||||
<rsyncArgs>
|
||||
<!--
|
||||
A "long" option (two hyphens).
|
||||
-->
|
||||
<long>archive</long>
|
||||
<long>delete-after</long>
|
||||
<!--
|
||||
An argument with a value (info=2).
|
||||
-->
|
||||
<long value="2">info</long>
|
||||
<!--
|
||||
A "short" option (single hyphen).
|
||||
-->
|
||||
<short>c</short><!-- checksum -->
|
||||
</rsyncArgs>
|
||||
<upstream>
|
||||
<!--
|
||||
The following example uses "rsync://arch.mirror.constant.com/archlinux/"
|
||||
(https://www.archlinux.org/mirrors/constant.com/1008/)
|
||||
-->
|
||||
<!--
|
||||
One of:
|
||||
Required; one of:
|
||||
* rsync
|
||||
* ftp
|
||||
-->
|
||||
<syncType>rsync</syncType>
|
||||
<!--
|
||||
ONLY the domain goes here.
|
||||
Required; ONLY the domain goes here.
|
||||
-->
|
||||
<domain>arch.mirror.constant.com</domain>
|
||||
<!--
|
||||
If not specified,the protocol's default port will be used.
|
||||
Optional; if not specified,the protocol's default port will be used.
|
||||
-->
|
||||
<port>873</port>
|
||||
<!--
|
||||
The *remote* path part of the URI. The leading / is necessary. A trailing one will be assumed.
|
||||
Required; the *remote* path part of the URI. The leading / is necessary. A trailing one will be assumed.
|
||||
-->
|
||||
<path>/archlinux/</path>
|
||||
<!--
|
||||
The speed to cap socket bandwidth at (in KiB). Decimals are okay.
|
||||
Only valid for rsync; ignored for FTP. If not provided, the default is to not throttle.
|
||||
-->
|
||||
<bwlimit>7000</bwlimit>
|
||||
</upstream>
|
||||
<!--
|
||||
Multiple upstreams can be specified. They are tried in order specified and if connection fails or times out,
|
||||
@ -77,5 +128,14 @@
|
||||
<path>/distros/archlinux/</path>
|
||||
</upstream>
|
||||
</distro>
|
||||
<distro name="centos"/>
|
||||
<distro name="centos">
|
||||
<upstream>
|
||||
<syncType>rsync</syncType>
|
||||
<domain>mirrors.rit.edu</domain>
|
||||
<path>/centos/</path>
|
||||
</upstream>
|
||||
<dest>/srv/repos/arch/.</dest>
|
||||
<lastLocalCheck timeFormat="MICROSECOND_EPOCH">/srv/http/centos.lastcheck</lastLocalCheck>
|
||||
<lastLocalSync timeFormat="UNIX_EPOCH">/srv/repos/arch/lastsync</lastLocalSync>
|
||||
</distro>
|
||||
</mirror>
|
||||
|
59
utils/find_fastest_upstream/archlinux.py
Executable file
59
utils/find_fastest_upstream/archlinux.py
Executable file
@ -0,0 +1,59 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import datetime
|
||||
import re
|
||||
##
|
||||
import iso3166
|
||||
##
|
||||
import classes
|
||||
|
||||
|
||||
_strip_re = re.compile(r'^\s*(?P<num>[0-9.]+).*$')
|
||||
|
||||
|
||||
class Ranker(classes.Ranker):
|
||||
mirrorlist_url = 'https://www.archlinux.org/mirrors/status/tier/1/'
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.get_mirrors()
|
||||
self.mycountry = iso3166.countries_by_alpha2[self.my_info['country']].name
|
||||
|
||||
def extract_mirrors(self):
|
||||
# Limit to only successful mirrors.
|
||||
mirrors = self.bs.find('table', {'id': 'successful_mirrors'})
|
||||
# Ayyy, thanks dude.
|
||||
# Modified from https://stackoverflow.com/a/56835562/733214.
|
||||
header = mirrors.find('thead').find('tr')
|
||||
headers = [h.text if h.text != '' else 'details' for h in header.find_all('th')]
|
||||
raw_rows = mirrors.find_all('tr')
|
||||
# rows = [{headers[i]: cell.text for i, cell in enumerate(row.find_all('td'))} for row in raw_rows]
|
||||
rows = [{headers[i]: cell for i, cell in enumerate(row.find_all('td'))} for row in raw_rows]
|
||||
for r in rows:
|
||||
for k, v in r.items():
|
||||
print(v)
|
||||
if k in ('Completion %', 'Mirror Score', 'μ Duration (s)', 'σ Duration (s)'):
|
||||
r[k] = float(_strip_re.sub(r'\g<num>', v.text).strip())
|
||||
elif k == 'μ Delay (hh:mm)':
|
||||
# HOO boy. Wish they just did it in seconds.
|
||||
# elif k == 'Country':
|
||||
self.raw_mirrors.append(r)
|
||||
# for row in rows:
|
||||
# if not row:
|
||||
# continue
|
||||
# for k in ('Completion %', 'Mirror Score', 'μ Duration (s)', 'σ Duration (s)'):
|
||||
# row[k] = float(_strip_re.sub(r'\g<num>', row[k]).strip())
|
||||
|
||||
return(None)
|
||||
|
||||
|
||||
def main():
|
||||
r = Ranker()
|
||||
r.extract_mirrors()
|
||||
import pprint
|
||||
pprint.pprint(r.raw_mirrors)
|
||||
return(None)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
0
utils/find_fastest_upstream/centos.py
Executable file
0
utils/find_fastest_upstream/centos.py
Executable file
39
utils/find_fastest_upstream/classes.py
Normal file
39
utils/find_fastest_upstream/classes.py
Normal file
@ -0,0 +1,39 @@
|
||||
import socket
|
||||
import time
|
||||
##
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
##
|
||||
import constants
|
||||
|
||||
|
||||
class Ranker(object):
|
||||
mirrorlist_url = None # This is replaced by subclasses
|
||||
|
||||
def __init__(self, parser = 'lxml', *args, **kwargs):
|
||||
self.my_info = {}
|
||||
self.raw_html = None
|
||||
self.parser = parser
|
||||
self.bs = None
|
||||
self.get_myinfo()
|
||||
self.raw_mirrors = []
|
||||
|
||||
def extract_mirrors(self):
|
||||
# A dummy func. This should be overridden by subclasses.
|
||||
pass
|
||||
return(None)
|
||||
|
||||
def get_myinfo(self):
|
||||
req = requests.get(constants.MYINFO_URL)
|
||||
if not req.ok:
|
||||
raise RuntimeError('Could not contact information gatherer')
|
||||
self.my_info = req.json()
|
||||
return(None)
|
||||
|
||||
def get_mirrors(self):
|
||||
req = requests.get(self.mirrorlist_url)
|
||||
if not req.ok:
|
||||
raise RuntimeError('Could not contact information gatherer')
|
||||
self.raw_html = req.content.decode('utf-8')
|
||||
self.bs = BeautifulSoup(self.raw_html, self.parser)
|
||||
return(None)
|
1
utils/find_fastest_upstream/constants.py
Normal file
1
utils/find_fastest_upstream/constants.py
Normal file
@ -0,0 +1 @@
|
||||
MYINFO_URL = 'https://ipinfo.io'
|
20
utils/find_fastest_upstream/test.py
Executable file
20
utils/find_fastest_upstream/test.py
Executable file
@ -0,0 +1,20 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
country = 'US'
|
||||
url = 'https://www.archlinux.org/mirrors/status/tier/1/'
|
||||
|
||||
req = requests.get(url)
|
||||
html = req.content.decode('utf-8')
|
||||
bs = BeautifulSoup(html, 'lxml')
|
||||
|
||||
mirrors = bs.find('table', {'id': 'successful_mirrors'})
|
||||
header = mirrors.find('thead').find('tr')
|
||||
headers = [h.text if h.text != '' else 'details' for h in header.find_all('th')]
|
||||
|
||||
results = [{headers[i]: cell.text for i, cell in enumerate(row.find_all('td'))} for row in mirrors.find_all('tr')]
|
||||
|
||||
import pprint
|
||||
pprint.pprint(results)
|
Loading…
Reference in New Issue
Block a user