summaryrefslogtreecommitdiff
path: root/scripts/s3upload.py
blob: f39ea1951804b3102960b3e105f89ab0541eb52a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import datetime
import mimetypes
import os
import sys
import time
import S3

CONN = None
AWS_ACCESS_KEY_ID = 'AKIAIVBEMR22OOG7OMMQ'
AWS_SECRET_ACCESS_KEY = '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'
BUCKET_NAME = 'dumpfm'

def get_or_initialize_aws_connection():
    global CONN
    if not CONN:
        print "Initializing AWS connection with ID %s, bucket %s" % (AWS_ACCESS_KEY_ID,
                                                                     BUCKET_NAME)
        CONN = S3.AWSAuthConnection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
    return CONN
        

def retry_func(f, count):
    try:
        f()
    except KeyboardInterrupt:
        raise
    except:
        if count <= 1: raise
        else:
            print 'Error! retrying %s more time(s)' % (count - 1)
            retry_func(f, count - 1)

def upload_file(path, verbose=True, dryrun=False):
    path = os.path.normpath(path)
    if path == '.' or not os.path.isfile(path):
        return
    filedata = open(path, 'rb').read()
    size = os.path.getsize(path)
    content_type = mimetypes.guess_type(path)[0]
    if not content_type:
        content_type = 'text/plain'

    path = path.replace('\\', '/') # Windows hack
    start = time.time()
    def do_upload():
        conn = get_or_initialize_aws_connection()
        conn.put(BUCKET_NAME, path, S3.S3Object(filedata),
                 {'x-amz-acl': 'public-read', 'Content-Type': content_type})

    if not dryrun:
        retry_func(do_upload, 3)
    
    ms_took = (time.time() - start) * 1000
    if verbose:
        print "uploaded %s   (%0.0fms)   (%sKB)" % (path, ms_took, size / 1024)
    return 1

def upload_directory(path, verbose=True, dryrun=False):
    counter = 0
    for f in sorted(os.listdir(path)):
        subpath = os.path.join(path, f)
        if os.path.isdir(subpath):
            counter += upload_directory(subpath, verbose=verbose, dryrun=dryrun)
        else:
            counter += upload_file(subpath, verbose=verbose, dryrun=dryrun)
    return counter

def do_upload(path, verbose=True, dryrun=False):
    counter = 0

    start = time.time()

    if os.path.isdir(path):
        counter += upload_directory(path, verbose=verbose, dryrun=dryrun)
    else:
        counter += upload_file(path, verbose=verbose, dryrun=dryrun)

    s_took = (time.time() - start)

    if verbose:
        print "uploaded %s files in %0.0fs" % (counter, s_took)
    return { 'sec_elapsed': s_took,
             'files_uploaded': counter }


if __name__ == "__main__":
    if len(sys.argv) == 1:
        print 'usage: s3upload.py path'
        sys.exit(1)
    
    args = sys.argv[1:]
    for path in args:
        do_upload(path)
        print