From 1b0331fc7c3bd72ad2986d72e90a932f74996415 Mon Sep 17 00:00:00 2001 From: dumpfmprod Date: Fri, 6 Aug 2010 19:17:49 -0400 Subject: Updated s3upload.py --- scripts/s3upload.py | 176 ++++++++++++++++++++++++++++------------------------ 1 file changed, 96 insertions(+), 80 deletions(-) (limited to 'scripts') diff --git a/scripts/s3upload.py b/scripts/s3upload.py index 65eb66d..f4a5a77 100644 --- a/scripts/s3upload.py +++ b/scripts/s3upload.py @@ -1,80 +1,96 @@ -import datetime -import mimetypes -import os -import sys -import S3 - -CONN = None -AWS_ACCESS_KEY_ID = 'AKIAJAQK4CDDP6I6SNVA' -AWS_SECRET_ACCESS_KEY = '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' -BUCKET_NAME = 'dumpfm' - -def parse_date(date_string, fmt='%Y%m%d'): - return datetime.datetime.strptime(date_string, fmt) - -def upload_file(path, dry_run=True): - path = os.path.normpath(path) - if path == '.' or not os.path.isfile(path): - return - filedata = open(path, 'rb').read() - content_type = mimetypes.guess_type(path)[0] - if not content_type: - content_type = 'text/plain' - - path = path.replace('\\', '/') # Windows hack - if not dry_run: - CONN.put(BUCKET_NAME, path, S3.S3Object(filedata), - {'x-amz-acl': 'public-read', 'Content-Type': content_type}) - - -def do_upload(directory, start_date, end_date, dry_run=True): - global CONN - CONN = S3.AWSAuthConnection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) - - for subdir in sorted(os.listdir(directory)): - subdir_date = None - try: - subdir_date = parse_date(subdir) - except: - continue - - counter = 0 - if start_date <= subdir_date <= end_date: - print "uploading contents of %s" % subdir, - for filename in os.listdir(os.path.join(directory, subdir)): - path = os.path.join(directory, subdir, filename) - upload_file(path, dry_run=dry_run) - counter += 1 - - print 'handled %s files' % counter - -if __name__ == "__main__": - if not 4 <= len(sys.argv) <= 5: - print 'usage: s3upload.py directory startdate enddate [dryrun=true]' - sys.exit(1) - - - directory = sys.argv[1] - start_date = sys.argv[2] - end_date = sys.argv[3] - dry_run = sys.argv[4] if len(sys.argv) == 5 else 'true' - - if dry_run.lower() == 'true': - print 'doing dry run' - dry_run = True - else: - dry_run = False - - try: - start_date = parse_date(start_date) - except: - print "invalid start date: %s" % start_date - sys.exit(1) - - try: - end_date = parse_date(end_date) - except: - print "invalid end date: %s" % end_date - sys.exit(1) - - do_upload(directory, start_date, end_date, dry_run) +import datetime +import mimetypes +import os +import sys +import time +import S3 + +CONN = None +AWS_ACCESS_KEY_ID = 'AKIAJAQK4CDDP6I6SNVA' +AWS_SECRET_ACCESS_KEY = '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' +BUCKET_NAME = 'dumpfm' + +def parse_date(date_string, fmt='%Y%m%d'): + return datetime.datetime.strptime(date_string, fmt) + +def retry_func(f, count): + try: + f() + except: + if count <= 1: raise + else: + print 'Error! retrying %s more time(s)' % (count - 1) + retry_func(f, count - 1) + +def upload_file(path, dry_run=True): + path = os.path.normpath(path) + if path == '.' or not os.path.isfile(path): + return + filedata = open(path, 'rb').read() + content_type = mimetypes.guess_type(path)[0] + if not content_type: + content_type = 'text/plain' + + path = path.replace('\\', '/') # Windows hack + if not dry_run: + start = time.time() + def do_upload(): + CONN.put(BUCKET_NAME, path, S3.S3Object(filedata), + {'x-amz-acl': 'public-read', 'Content-Type': content_type}) + retry_func(do_upload, 3) + ms_took = (time.time() - start) * 1000 + print "uploaded %s (%0.0fms)" % (path, ms_took) + + +def do_upload(directory, start_date, end_date, dry_run=True): + global CONN + CONN = S3.AWSAuthConnection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) + + for subdir in sorted(os.listdir(directory)): + subdir_date = None + try: + subdir_date = parse_date(subdir) + except: + continue + + + if start_date <= subdir_date <= end_date: + counter = 0 + print "uploading contents of %s" % subdir, + for filename in os.listdir(os.path.join(directory, subdir)): + path = os.path.join(directory, subdir, filename) + upload_file(path, dry_run=dry_run) + counter += 1 + + print 'handled %s files' % counter + +if __name__ == "__main__": + if not 4 <= len(sys.argv) <= 5: + print 'usage: s3upload.py directory startdate enddate [dryrun=true]' + sys.exit(1) + + + directory = sys.argv[1] + start_date = sys.argv[2] + end_date = sys.argv[3] + dry_run = sys.argv[4] if len(sys.argv) == 5 else 'true' + + if dry_run.lower() == 'true': + print 'doing dry run' + dry_run = True + else: + dry_run = False + + try: + start_date = parse_date(start_date) + except: + print "invalid start date: %s" % start_date + sys.exit(1) + + try: + end_date = parse_date(end_date) + except: + print "invalid end date: %s" % end_date + sys.exit(1) + + do_upload(directory, start_date, end_date, dry_run) -- cgit v1.2.3-70-g09d2