From d765ecafa41542f3745522c164f9c8ed9bb0eb62 Mon Sep 17 00:00:00 2001 From: Scott Ostler Date: Sat, 6 Nov 2010 20:29:40 -0400 Subject: Added dailyimgupload.py, updated s3upload.py --- scripts/dailyimgupload.py | 152 ++++++++++++++++++++++++++++++++++++++++++++++ scripts/s3upload.py | 48 ++++++++++----- 2 files changed, 184 insertions(+), 16 deletions(-) create mode 100644 scripts/dailyimgupload.py diff --git a/scripts/dailyimgupload.py b/scripts/dailyimgupload.py new file mode 100644 index 0000000..81ee44a --- /dev/null +++ b/scripts/dailyimgupload.py @@ -0,0 +1,152 @@ +import ctypes +import datetime +import os +import platform +import sys +import traceback +import s3upload + + +def freespace(p): + """ + FROM: http://atlee.ca/blog/2008/02/23/getting-free-diskspace-in-python/ + http://stackoverflow.com/questions/51658/cross-platform-space-remaining-on-volume-using-python + Returns the number of free bytes on the drive that ``p`` is on + """ + if platform.system() == 'Windows': + free_bytes = ctypes.c_ulonglong(0) + ctypes.windll.kernel32.GetDiskFreeSpaceExW(ctypes.c_wchar_p(p), None, None, ctypes.pointer(free_bytes)) + return free_bytes.value + else: + s = os.statvfs(p) + return s.f_bsize * s.f_bavail + +def directory_size(path): + """ + FROM: http://stackoverflow.com/questions/1392413/calculating-a-directory-size-using-python + """ + total_size = 0 + for dirpath, dirnames, filenames in os.walk(path): + for f in filenames: + fp = os.path.join(dirpath, f) + total_size += os.path.getsize(fp) + return total_size + +def parse_date_dir(d, date_fmt): + if not os.path.exists(d): + raise ValueError('%s does not exist' % d) + if not os.path.isdir(d): + raise ValueError('%s is not a directory' % d) + return datetime.datetime.strptime(os.path.basename(d), date_fmt) + +def is_date_dir(d, date_fmt): + try: + parse_date_dir(d, date_fmt) + return True + except ValueError: + return False + +def get_directory_list(path, date_fmt='%Y%m%d'): + parse_func = lambda d: parse_date_dir(d, date_fmt) + subdirs = [os.path.join(path, child) for child in os.listdir(path)] + datedirs = [d for d in subdirs if is_date_dir(d, date_fmt)] + return sorted(datedirs, key=parse_func) + + +def upload_dirs_until_free(path, target_free_mbs, dryrun): + starting_freespace = float(freespace(path)) + dirs_uploaded = 0 + files_uploaded = 0 + cur_freespace = starting_freespace + reclaimed_space = 0 + error = False + directory_list = get_directory_list(path) + + if not directory_list: + print "Target directory %s has no subdirectories!" % path + sys.exit(1) + + print "Target directory: %s" % path + print "Starting freespace: %.02f MBs" % (starting_freespace / 1024 / 1024) + print "Target freespace: %.02f MBs" % target_free_mbs + print "Image subdirectories: %s" % len(directory_list) + + if dryrun: + print + print '!!! Doing dryrun -- current free space will be estimated !!!' + + print + try: + for dir_to_upload in directory_list: + if cur_freespace >= target_free_mbs * 1024 * 1024: + break + + dir_size = directory_size(dir_to_upload) + print 'Uploading %s (%.02f MBs)' % (dir_to_upload, dir_size / 1024 / 1024) + + res = s3upload.do_upload(dir_to_upload, verbose=False, dryrun=dryrun) + files_uploaded += res['files_uploaded'] + print "%s files uploaded in %.02fs" % (res['files_uploaded'], res['sec_elapsed']) + + dirs_uploaded += 1 + reclaimed_space += dir_size + + if dryrun: + cur_freespace -= dir_size + else: + cur_freespace = float(freespace(path)) + print "%.02f MBs now free" % (cur_freespace / 1024 / 1024) + print + + except Exception: + print "An unexpected error occured!" + error = True + traceback.print_exc() + + print "---------------------------------------" + if error: + pass + else: + pass + print "Finished successfully" if not error else "!!! Terminated abnormally !!!" + print "Current free space: %.02f MBs" % (cur_freespace / 1024 / 1024) + print "Reclaimed space: %.02f MBs" % (reclaimed_space / 1024 / 1024) + print "Directories uploaded: %s" % dirs_uploaded + print "Files uploaded: %s" % files_uploaded + + +if __name__ == '__main__': + if not 4 <= len(sys.argv) <= 5: + print "usage: dailyimgupload.py workingdir path megabytes [dryrun]" + sys.exit(1) + + wd = sys.argv[1] + if not os.path.isdir(wd): + print "Invalid working directory: %s" % wd + sys.exit(1) + print "Switching working directory to %s" % wd + os.chdir(wd) + + path = sys.argv[2] + if not os.path.isdir(path): + print "invalid image directory: %s" % path + sys.exit(1) + + mbs = sys.argv[3] + try: + target_free_mbs = float(mbs) + except ValueError: + print "invalid number of megabytes: %s" % mbs + sys.exit(1) + + if len(sys.argv) == 5: + dryrun = sys.argv[4] + if dryrun in ('true', 'false'): + dryrun = dryrun == 'true' + else: + print "invalid dry run argument: %s (must be either 'true' or 'false')" % dryrun + sys.exit(1) + else: + dryrun = True + + upload_dirs_until_free(path, target_free_mbs, dryrun) diff --git a/scripts/s3upload.py b/scripts/s3upload.py index 724561c..9263a8f 100644 --- a/scripts/s3upload.py +++ b/scripts/s3upload.py @@ -9,7 +9,15 @@ CONN = None AWS_ACCESS_KEY_ID = 'AKIAIOP42NFKLLJXEGJQ' AWS_SECRET_ACCESS_KEY = '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' BUCKET_NAME = 'dumpfm' -COUNTER = 0 + +def get_or_initialize_aws_connection(): + global CONN + if not CONN: + print "Initializing AWS connection with ID %s, bucket %s" % (AWS_ACCESS_KEY_ID, + BUCKET_NAME) + CONN = S3.AWSAuthConnection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) + return CONN + def retry_func(f, count): try: @@ -20,8 +28,7 @@ def retry_func(f, count): print 'Error! retrying %s more time(s)' % (count - 1) retry_func(f, count - 1) -def upload_file(path): - global COUNTER +def upload_file(path, conn, verbose=True, dryrun=False): path = os.path.normpath(path) if path == '.' or not os.path.isfile(path): return @@ -34,35 +41,44 @@ def upload_file(path): path = path.replace('\\', '/') # Windows hack start = time.time() def do_upload(): - CONN.put(BUCKET_NAME, path, S3.S3Object(filedata), + conn.put(BUCKET_NAME, path, S3.S3Object(filedata), {'x-amz-acl': 'public-read', 'Content-Type': content_type}) - retry_func(do_upload, 3) + + if not dryrun: + retry_func(do_upload, 3) ms_took = (time.time() - start) * 1000 - print "uploaded %s (%0.0fms) (%sKB)" % (path, ms_took, size / 1024) - COUNTER += 1 + if verbose: + print "uploaded %s (%0.0fms) (%sKB)" % (path, ms_took, size / 1024) + return 1 -def upload_directory(path): +def upload_directory(path, conn, verbose=True, dryrun=False): + counter = 0 for f in sorted(os.listdir(path)): subpath = os.path.join(path, f) if os.path.isdir(subpath): - upload_directory(subpath) + counter += upload_directory(subpath, conn, verbose=verbose, dryrun=dryrun) else: - upload_file(subpath) + counter += upload_file(subpath, conn, verbose=verbose, dryrun=dryrun) + return counter -def do_upload(path): - global CONN - CONN = S3.AWSAuthConnection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) +def do_upload(path, verbose=True, dryrun=False): + conn = get_or_initialize_aws_connection() + counter = 0 start = time.time() if os.path.isdir(path): - upload_directory(path) + counter += upload_directory(path, conn, verbose=verbose, dryrun=dryrun) else: - upload_file(path) + counter += upload_file(path, conn, verbose=verbose, dryrun=dryrun) s_took = (time.time() - start) - print "uploaded %s files in %0.0fs" % (COUNTER, s_took) + + if verbose: + print "uploaded %s files in %0.0fs" % (counter, s_took) + return { 'sec_elapsed': s_took, + 'files_uploaded': counter } if __name__ == "__main__": -- cgit v1.2.3-70-g09d2