diff options
| author | Scott Ostler <scottbot9000@gmail.com> | 2010-11-06 20:29:40 -0400 |
|---|---|---|
| committer | Scott Ostler <scottbot9000@gmail.com> | 2010-11-06 20:29:40 -0400 |
| commit | d765ecafa41542f3745522c164f9c8ed9bb0eb62 (patch) | |
| tree | d6eeab9158af66bf31c659516884e1d557ce0762 | |
| parent | e2e82dc608862c0c72e0d45599f2768665edf7dd (diff) | |
Added dailyimgupload.py, updated s3upload.py
| -rw-r--r-- | scripts/dailyimgupload.py | 152 | ||||
| -rw-r--r-- | scripts/s3upload.py | 48 |
2 files changed, 184 insertions, 16 deletions
diff --git a/scripts/dailyimgupload.py b/scripts/dailyimgupload.py new file mode 100644 index 0000000..81ee44a --- /dev/null +++ b/scripts/dailyimgupload.py @@ -0,0 +1,152 @@ +import ctypes
+import datetime
+import os
+import platform
+import sys
+import traceback
+import s3upload
+
+
+def freespace(p):
+ """
+ FROM: http://atlee.ca/blog/2008/02/23/getting-free-diskspace-in-python/
+ http://stackoverflow.com/questions/51658/cross-platform-space-remaining-on-volume-using-python
+ Returns the number of free bytes on the drive that ``p`` is on
+ """
+ if platform.system() == 'Windows':
+ free_bytes = ctypes.c_ulonglong(0)
+ ctypes.windll.kernel32.GetDiskFreeSpaceExW(ctypes.c_wchar_p(p), None, None, ctypes.pointer(free_bytes))
+ return free_bytes.value
+ else:
+ s = os.statvfs(p)
+ return s.f_bsize * s.f_bavail
+
+def directory_size(path):
+ """
+ FROM: http://stackoverflow.com/questions/1392413/calculating-a-directory-size-using-python
+ """
+ total_size = 0
+ for dirpath, dirnames, filenames in os.walk(path):
+ for f in filenames:
+ fp = os.path.join(dirpath, f)
+ total_size += os.path.getsize(fp)
+ return total_size
+
+def parse_date_dir(d, date_fmt):
+ if not os.path.exists(d):
+ raise ValueError('%s does not exist' % d)
+ if not os.path.isdir(d):
+ raise ValueError('%s is not a directory' % d)
+ return datetime.datetime.strptime(os.path.basename(d), date_fmt)
+
+def is_date_dir(d, date_fmt):
+ try:
+ parse_date_dir(d, date_fmt)
+ return True
+ except ValueError:
+ return False
+
+def get_directory_list(path, date_fmt='%Y%m%d'):
+ parse_func = lambda d: parse_date_dir(d, date_fmt)
+ subdirs = [os.path.join(path, child) for child in os.listdir(path)]
+ datedirs = [d for d in subdirs if is_date_dir(d, date_fmt)]
+ return sorted(datedirs, key=parse_func)
+
+
+def upload_dirs_until_free(path, target_free_mbs, dryrun):
+ starting_freespace = float(freespace(path))
+ dirs_uploaded = 0
+ files_uploaded = 0
+ cur_freespace = starting_freespace
+ reclaimed_space = 0
+ error = False
+ directory_list = get_directory_list(path)
+
+ if not directory_list:
+ print "Target directory %s has no subdirectories!" % path
+ sys.exit(1)
+
+ print "Target directory: %s" % path
+ print "Starting freespace: %.02f MBs" % (starting_freespace / 1024 / 1024)
+ print "Target freespace: %.02f MBs" % target_free_mbs
+ print "Image subdirectories: %s" % len(directory_list)
+
+ if dryrun:
+ print
+ print '!!! Doing dryrun -- current free space will be estimated !!!'
+
+ print
+ try:
+ for dir_to_upload in directory_list:
+ if cur_freespace >= target_free_mbs * 1024 * 1024:
+ break
+
+ dir_size = directory_size(dir_to_upload)
+ print 'Uploading %s (%.02f MBs)' % (dir_to_upload, dir_size / 1024 / 1024)
+
+ res = s3upload.do_upload(dir_to_upload, verbose=False, dryrun=dryrun)
+ files_uploaded += res['files_uploaded']
+ print "%s files uploaded in %.02fs" % (res['files_uploaded'], res['sec_elapsed'])
+
+ dirs_uploaded += 1
+ reclaimed_space += dir_size
+
+ if dryrun:
+ cur_freespace -= dir_size
+ else:
+ cur_freespace = float(freespace(path))
+ print "%.02f MBs now free" % (cur_freespace / 1024 / 1024)
+ print
+
+ except Exception:
+ print "An unexpected error occured!"
+ error = True
+ traceback.print_exc()
+
+ print "---------------------------------------"
+ if error:
+ pass
+ else:
+ pass
+ print "Finished successfully" if not error else "!!! Terminated abnormally !!!"
+ print "Current free space: %.02f MBs" % (cur_freespace / 1024 / 1024)
+ print "Reclaimed space: %.02f MBs" % (reclaimed_space / 1024 / 1024)
+ print "Directories uploaded: %s" % dirs_uploaded
+ print "Files uploaded: %s" % files_uploaded
+
+
+if __name__ == '__main__':
+ if not 4 <= len(sys.argv) <= 5:
+ print "usage: dailyimgupload.py workingdir path megabytes [dryrun]"
+ sys.exit(1)
+
+ wd = sys.argv[1]
+ if not os.path.isdir(wd):
+ print "Invalid working directory: %s" % wd
+ sys.exit(1)
+ print "Switching working directory to %s" % wd
+ os.chdir(wd)
+
+ path = sys.argv[2]
+ if not os.path.isdir(path):
+ print "invalid image directory: %s" % path
+ sys.exit(1)
+
+ mbs = sys.argv[3]
+ try:
+ target_free_mbs = float(mbs)
+ except ValueError:
+ print "invalid number of megabytes: %s" % mbs
+ sys.exit(1)
+
+ if len(sys.argv) == 5:
+ dryrun = sys.argv[4]
+ if dryrun in ('true', 'false'):
+ dryrun = dryrun == 'true'
+ else:
+ print "invalid dry run argument: %s (must be either 'true' or 'false')" % dryrun
+ sys.exit(1)
+ else:
+ dryrun = True
+
+ upload_dirs_until_free(path, target_free_mbs, dryrun)
diff --git a/scripts/s3upload.py b/scripts/s3upload.py index 724561c..9263a8f 100644 --- a/scripts/s3upload.py +++ b/scripts/s3upload.py @@ -9,7 +9,15 @@ CONN = None AWS_ACCESS_KEY_ID = 'AKIAIOP42NFKLLJXEGJQ' AWS_SECRET_ACCESS_KEY = '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' BUCKET_NAME = 'dumpfm' -COUNTER = 0 + +def get_or_initialize_aws_connection(): + global CONN + if not CONN: + print "Initializing AWS connection with ID %s, bucket %s" % (AWS_ACCESS_KEY_ID, + BUCKET_NAME) + CONN = S3.AWSAuthConnection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) + return CONN + def retry_func(f, count): try: @@ -20,8 +28,7 @@ def retry_func(f, count): print 'Error! retrying %s more time(s)' % (count - 1) retry_func(f, count - 1) -def upload_file(path): - global COUNTER +def upload_file(path, conn, verbose=True, dryrun=False): path = os.path.normpath(path) if path == '.' or not os.path.isfile(path): return @@ -34,35 +41,44 @@ def upload_file(path): path = path.replace('\\', '/') # Windows hack start = time.time() def do_upload(): - CONN.put(BUCKET_NAME, path, S3.S3Object(filedata), + conn.put(BUCKET_NAME, path, S3.S3Object(filedata), {'x-amz-acl': 'public-read', 'Content-Type': content_type}) - retry_func(do_upload, 3) + + if not dryrun: + retry_func(do_upload, 3) ms_took = (time.time() - start) * 1000 - print "uploaded %s (%0.0fms) (%sKB)" % (path, ms_took, size / 1024) - COUNTER += 1 + if verbose: + print "uploaded %s (%0.0fms) (%sKB)" % (path, ms_took, size / 1024) + return 1 -def upload_directory(path): +def upload_directory(path, conn, verbose=True, dryrun=False): + counter = 0 for f in sorted(os.listdir(path)): subpath = os.path.join(path, f) if os.path.isdir(subpath): - upload_directory(subpath) + counter += upload_directory(subpath, conn, verbose=verbose, dryrun=dryrun) else: - upload_file(subpath) + counter += upload_file(subpath, conn, verbose=verbose, dryrun=dryrun) + return counter -def do_upload(path): - global CONN - CONN = S3.AWSAuthConnection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) +def do_upload(path, verbose=True, dryrun=False): + conn = get_or_initialize_aws_connection() + counter = 0 start = time.time() if os.path.isdir(path): - upload_directory(path) + counter += upload_directory(path, conn, verbose=verbose, dryrun=dryrun) else: - upload_file(path) + counter += upload_file(path, conn, verbose=verbose, dryrun=dryrun) s_took = (time.time() - start) - print "uploaded %s files in %0.0fs" % (COUNTER, s_took) + + if verbose: + print "uploaded %s files in %0.0fs" % (counter, s_took) + return { 'sec_elapsed': s_took, + 'files_uploaded': counter } if __name__ == "__main__": |
