s3_cache.py 5.6 KB
Newer Older
1
#!/usr/bin/env python2.7
2
# pylint: disable=C0301
3
4
5
from __future__ import absolute_import, unicode_literals, print_function, division

from sys import argv
6
from os import environ, stat, chdir, remove as _delete_file
7
from os.path import dirname, basename, abspath, realpath, expandvars
8
9
from hashlib import sha256
from subprocess import check_call as run
10
from json import load, dump as save
11
12
from contextlib import contextmanager
from datetime import datetime
13
14
15
16
17
18

from boto.s3.connection import S3Connection
from boto.s3.key import Key
from boto.exception import S3ResponseError


19
CONFIG_FILE = './S3Cachefile.json'
20
UPLOAD_TODO_FILE = './S3CacheTodo.json'
21
22
23
BYTES_PER_MB = 1024 * 1024


24
25
26
27
28
29
@contextmanager
def timer():
    start = datetime.utcnow()
    yield
    end = datetime.utcnow()
    elapsed = end - start
Chris Rebert's avatar
grammar    
Chris Rebert committed
30
    print("\tDone. Took", int(elapsed.total_seconds()), "second(s).")
31
32


33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
@contextmanager
def todo_file(writeback=True):
    try:
        with open(UPLOAD_TODO_FILE, 'rt') as json_file:
            todo = load(json_file)
    except (IOError, OSError, ValueError):
        todo = {}

    yield todo

    if writeback:
        try:
            with open(UPLOAD_TODO_FILE, 'wt') as json_file:
                save(todo, json_file)
        except (OSError, IOError) as save_err:
            print("Error saving {}:".format(UPLOAD_TODO_FILE), save_err)


51
52
53
54
def _sha256_of_file(filename):
    hasher = sha256()
    with open(filename, 'rb') as input_file:
        hasher.update(input_file.read())
55
56
57
    file_hash = hasher.hexdigest()
    print('sha256({}) = {}'.format(filename, file_hash))
    return file_hash
58
59
60
61
62
63
64
65
66


def _delete_file_quietly(filename):
    try:
        _delete_file(filename)
    except (OSError, IOError):
        pass


67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
def mark_needs_uploading(cache_name):
    with todo_file() as todo:
        todo[cache_name] = True


def mark_uploaded(cache_name):
    with todo_file() as todo:
        todo.pop(cache_name, None)


def need_to_upload(cache_name):
    with todo_file(writeback=False) as todo:
        return todo.get(cache_name, False)


82
83
def _tarball_size(directory):
    kib = stat(_tarball_filename_for(directory)).st_size // BYTES_PER_MB
84
85
86
    return "{} MiB".format(kib)


87
88
89
90
91
92
def _tarball_filename_for(directory):
    return abspath('./{}.tar.gz'.format(basename(directory)))


def _create_tarball(directory):
    print("Creating tarball of {}...".format(directory))
93
94
    with timer():
        run(['tar', '-czf', _tarball_filename_for(directory), '-C', dirname(directory), basename(directory)])
95
96
97
98


def _extract_tarball(directory):
    print("Extracting tarball of {}...".format(directory))
99
100
    with timer():
        run(['tar', '-xzf', _tarball_filename_for(directory), '-C', dirname(directory)])
101
102
103


def download(directory):
104
    mark_uploaded(cache_name)  # reset
105
    try:
106
        print("Downloading {} tarball from S3...".format(cache_name))
107
108
        with timer():
            key.get_contents_to_filename(_tarball_filename_for(directory))
109
    except S3ResponseError as err:
110
        mark_needs_uploading(cache_name)
111
        raise SystemExit("Cached {} download failed!".format(cache_name))
112
113
    print("Downloaded {}.".format(_tarball_size(directory)))
    _extract_tarball(directory)
114
    print("{} successfully installed from cache.".format(cache_name))
115
116
117
118


def upload(directory):
    _create_tarball(directory)
119
    print("Uploading {} tarball to S3... ({})".format(cache_name, _tarball_size(directory)))
120
121
    with timer():
        key.set_contents_from_filename(_tarball_filename_for(directory))
122
    print("{} cache successfully updated.".format(cache_name))
123
    mark_uploaded(cache_name)
124
125


126
127
if __name__ == '__main__':
    # Uses environment variables:
128
129
    #   AWS_ACCESS_KEY_ID -- AWS Access Key ID
    #   AWS_SECRET_ACCESS_KEY -- AWS Secret Access Key
130
    argv.pop(0)
131
132
133
134
135
136
137
138
139
140
141
    if len(argv) != 2:
        raise SystemExit("USAGE: s3_cache.py <download | upload> <cache name>")
    mode, cache_name = argv
    script_dir = dirname(realpath(__file__))
    chdir(script_dir)
    try:
        with open(CONFIG_FILE, 'rt') as config_file:
            config = load(config_file)
    except (IOError, OSError, ValueError) as config_err:
        print(config_err)
        raise SystemExit("Error when trying to load config from JSON file!")
142

143
144
145
146
147
148
149
150
    try:
        cache_info = config[cache_name]
        key_file = expandvars(cache_info["key"])
        fallback_cmd = cache_info["generate"]
        directory = expandvars(cache_info["cache"])
    except (TypeError, KeyError) as load_err:
        print(load_err)
        raise SystemExit("Config for cache named {!r} is missing or malformed!".format(cache_name))
151

152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
    try:
        try:
            BUCKET_NAME = environ['TWBS_S3_BUCKET']
        except KeyError:
            raise SystemExit("TWBS_S3_BUCKET environment variable not set!")

        conn = S3Connection()
        bucket = conn.lookup(BUCKET_NAME)
        if bucket is None:
            raise SystemExit("Could not access bucket!")

        key_file_hash = _sha256_of_file(key_file)

        key = Key(bucket, key_file_hash)
        key.storage_class = 'REDUCED_REDUNDANCY'

        if mode == 'download':
            download(directory)
        elif mode == 'upload':
171
            if need_to_upload(cache_name):
172
173
174
                upload(directory)
            else:
                print("No need to upload anything.")
175
        else:
176
177
178
179
180
181
182
183
184
            raise SystemExit("Unrecognized mode {!r}".format(mode))
    except BaseException as exc:
        if mode != 'download':
            raise
        print("Error!:", exc)
        print("Unable to download from cache.")
        print("Running fallback command to generate cache directory {!r}: {}".format(directory, fallback_cmd))
        with timer():
            run(fallback_cmd, shell=True)