summaryrefslogtreecommitdiff
path: root/arch/repoclone.py
blob: f786cc5a8e0d1c858c9e3b35d3738d5ee5a754c2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
#!/usr/bin/env python3

import argparse
import configparser
import datetime
import os
import pprint
import subprocess
import sys

# TODO: convert .ini to treat [section]s as repositories, with a [DEFAULT]
# section for URL etc.

cfgfile = os.path.join(os.environ['HOME'],
                       '.config',
                       'optools',
                       'repoclone',
                       'arch.ini')

# Rsync options
opts = [
        '--recursive',  # recurse into directories
        '--times',  # preserve modification times
        '--links',  # copy symlinks as symlinks
        '--hard-links',  # preserve hard links
        '--quiet',  # suppress non-error messages
        '--delete-after',  # receiver deletes after transfer, not during
        '--delay-updates',  # put all updated files into place at end
        '--copy-links',  # transform symlink into referent file/dir
        '--safe-links',  # ignore symlinks that point outside the tree
        #'--max-delete',  # don't delete more than NUM files
        '--delete-excluded',  # also delete excluded files from dest dirs
        '--exclude=.*'  # exclude files matching PATTERN
       ]

def sync(args):
    with open(os.devnull, 'w') as devnull:
        mntchk = subprocess.run(['findmnt', args['mount']], stdout = devnull, stderr = devnull)
    if mntchk.returncode != 0:
        exit('!! BAILING OUT; {0} isn\'t mounted !!'.format(args['mount']))
    if args['bwlimit'] >= 1:
        opts.insert(10, '--bwlimit=' + str(args['bwlimit']))  # limit socket I/O bandwidth
    for k in ('destination', 'logfile', 'lockfile'):
        os.makedirs(os.path.dirname(args[k]), exist_ok = True)
    paths = os.environ['PATH'].split(':')
    rsync = '/usr/bin/rsync'  # set the default
    for p in paths:
        testpath = os.path.join(p, 'rsync')
        if os.path.isfile(testpath):
            rsync = testpath  # in case rsync isn't in /usr/bin/rsync
            break
    cmd = [rsync]  # the path to the binary
    cmd.extend(opts)  # the arguments
    # TODO: implement repos here?
    cmd.append(os.path.join(args['mirror'], '.'))  # the path on the remote mirror
    cmd.append(os.path.join(args['destination'], '.'))  # the local destination
    if os.path.isfile(args['lockfile']):
        with open(args['lockfile'], 'r') as f:
            existingpid = f.read().strip()
        if os.isatty(sys.stdin.fileno()):
            # Running from shell
            exit('!! A repo synchronization seems to already be running (PID: {0}). Quitting. !!'.format(existingpid))
        else:
            exit()  # we're running in cron, shut the hell up.
    else:
        with open(args['lockfile'], 'w') as f:
            f.write(str(os.getpid()))
    with open(args['logfile'], 'a') as log:
        c = subprocess.run(cmd, stdout = log, stderr = subprocess.PIPE)
        now = int(datetime.datetime.timestamp(datetime.datetime.utcnow()))
        with open(os.path.join(args['destination'], 'lastsync'), 'w') as f:
            f.write(str(now) + '\n')
        os.remove(args['lockfile'])
        # Only report errors at the end of the run if we aren't running in cron. Otherwise, log them.
        errors = c.stderr.decode('utf-8').splitlines()
        if os.isatty(sys.stdin.fileno()) and errors:
            print('We encountered some errors:')
            for e in errors:
                if e.startswith('symlink has no referent: '):
                    print('Broken upstream symlink: {0}'.format(e.split()[1].replace('"', '')))
                else:
                    print(e)
        else:
            with open(args['logfile'], 'a') as f:
                for e in errors:
                    f.write('{0}\n'.format(e))
    return()

def getDefaults():
    # Hardcoded defaults
    dflt = {'mirror': 'rsync://mirror.square-r00t.net/arch/',
            'repos': 'core,extra,community,multilib,iso/latest',
            'destination': '/srv/repos/arch',
            'mount': '/',
            'bwlimit': 0,
            'lockfile': '/var/run/repo-sync_arch.lck',
            'logfile': '/var/log/repo/arch.log'}
    realcfg = configparser.ConfigParser(defaults = dflt)
    if not os.path.isfile(cfgfile):
        with open(cfgfile, 'w') as f:
            realcfg.write(f)
    realcfg.read(cfgfile)
    return(realcfg)

def parseArgs():
    cfg = getDefaults()
    liveopts = cfg['DEFAULT']
    args = argparse.ArgumentParser(description = 'Synchronization for a remote Arch repository to a local one.',
                                   epilog = ('This program will write a default configuration file to {0} ' +
                                            'if one is not found.').format(cfgfile))
    args.add_argument('-m',
                      '--mirror',
                      dest = 'mirror',
                      default = liveopts['mirror'],
                      help = ('The upstream mirror to sync from, must be an rsync URI '+
                              '(Default: {0}').format(liveopts['mirror']))
# TODO: can we do this?
# We can; we need to .format() a repo in, probably, on the src and dest.
# Problem is the last updated/last synced files.
#    args.add_argument('-r',
#                      '--repos',
#                      dest = 'repos',
#                      default = liveopts['repos'],
#                      help = ('The repositories to sync; must be a comma-separated list. ' +
#                              '(Currently not used.) Default: {0}').format(','.join(liveopts['repos'])))
    args.add_argument('-d',
                      '--destination',
                      dest = 'destination',
                      default = liveopts['destination'],
                      help = 'The destination directory to sync to. Default: {0}'.format(liveopts['destination']))
    args.add_argument('-b',
                      '--bwlimit',
                      dest = 'bwlimit',
                      default = liveopts['bwlimit'],
                      type = int,
                      help = 'The amount, in Kilobytes per second, to throttle the sync to. Default is to not throttle (0).')
    args.add_argument('-l',
                      '--log',
                      dest = 'logfile',
                      default = liveopts['logfile'],
                      help = 'The path to the logfile. Default: {0}'.format(liveopts['logfile']))
    args.add_argument('-L',
                      '--lock',
                      dest = 'lockfile',
                      default = liveopts['lockfile'],
                      help = 'The path to the lockfile. Default: {0}'.format(liveopts['lockfile']))
    args.add_argument('-M',
                      '--mount',
                      dest = 'mount',
                      default = liveopts['mount'],
                      help = 'The mountpoint for your --destination. The script will exit if this point is not mounted. ' +
                             'If you don\'t need mount checking, just use /. Default: {0}'.format(liveopts['mount']))
    return(args)

def main():
    args = vars(parseArgs().parse_args())
    sync(args)
    return()

if __name__ == '__main__':
    main()