summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorScott Ostler <scottbot9000@gmail.com>2010-11-06 20:29:40 -0400
committerScott Ostler <scottbot9000@gmail.com>2010-11-06 20:29:40 -0400
commitd765ecafa41542f3745522c164f9c8ed9bb0eb62 (patch)
treed6eeab9158af66bf31c659516884e1d557ce0762
parente2e82dc608862c0c72e0d45599f2768665edf7dd (diff)
Added dailyimgupload.py, updated s3upload.py
-rw-r--r--scripts/dailyimgupload.py152
-rw-r--r--scripts/s3upload.py48
2 files changed, 184 insertions, 16 deletions
diff --git a/scripts/dailyimgupload.py b/scripts/dailyimgupload.py
new file mode 100644
index 0000000..81ee44a
--- /dev/null
+++ b/scripts/dailyimgupload.py
@@ -0,0 +1,152 @@
+import ctypes
+import datetime
+import os
+import platform
+import sys
+import traceback
+import s3upload
+
+
+def freespace(p):
+ """
+ FROM: http://atlee.ca/blog/2008/02/23/getting-free-diskspace-in-python/
+ http://stackoverflow.com/questions/51658/cross-platform-space-remaining-on-volume-using-python
+ Returns the number of free bytes on the drive that ``p`` is on
+ """
+ if platform.system() == 'Windows':
+ free_bytes = ctypes.c_ulonglong(0)
+ ctypes.windll.kernel32.GetDiskFreeSpaceExW(ctypes.c_wchar_p(p), None, None, ctypes.pointer(free_bytes))
+ return free_bytes.value
+ else:
+ s = os.statvfs(p)
+ return s.f_bsize * s.f_bavail
+
+def directory_size(path):
+ """
+ FROM: http://stackoverflow.com/questions/1392413/calculating-a-directory-size-using-python
+ """
+ total_size = 0
+ for dirpath, dirnames, filenames in os.walk(path):
+ for f in filenames:
+ fp = os.path.join(dirpath, f)
+ total_size += os.path.getsize(fp)
+ return total_size
+
+def parse_date_dir(d, date_fmt):
+ if not os.path.exists(d):
+ raise ValueError('%s does not exist' % d)
+ if not os.path.isdir(d):
+ raise ValueError('%s is not a directory' % d)
+ return datetime.datetime.strptime(os.path.basename(d), date_fmt)
+
+def is_date_dir(d, date_fmt):
+ try:
+ parse_date_dir(d, date_fmt)
+ return True
+ except ValueError:
+ return False
+
+def get_directory_list(path, date_fmt='%Y%m%d'):
+ parse_func = lambda d: parse_date_dir(d, date_fmt)
+ subdirs = [os.path.join(path, child) for child in os.listdir(path)]
+ datedirs = [d for d in subdirs if is_date_dir(d, date_fmt)]
+ return sorted(datedirs, key=parse_func)
+
+
+def upload_dirs_until_free(path, target_free_mbs, dryrun):
+ starting_freespace = float(freespace(path))
+ dirs_uploaded = 0
+ files_uploaded = 0
+ cur_freespace = starting_freespace
+ reclaimed_space = 0
+ error = False
+ directory_list = get_directory_list(path)
+
+ if not directory_list:
+ print "Target directory %s has no subdirectories!" % path
+ sys.exit(1)
+
+ print "Target directory: %s" % path
+ print "Starting freespace: %.02f MBs" % (starting_freespace / 1024 / 1024)
+ print "Target freespace: %.02f MBs" % target_free_mbs
+ print "Image subdirectories: %s" % len(directory_list)
+
+ if dryrun:
+ print
+ print '!!! Doing dryrun -- current free space will be estimated !!!'
+
+ print
+ try:
+ for dir_to_upload in directory_list:
+ if cur_freespace >= target_free_mbs * 1024 * 1024:
+ break
+
+ dir_size = directory_size(dir_to_upload)
+ print 'Uploading %s (%.02f MBs)' % (dir_to_upload, dir_size / 1024 / 1024)
+
+ res = s3upload.do_upload(dir_to_upload, verbose=False, dryrun=dryrun)
+ files_uploaded += res['files_uploaded']
+ print "%s files uploaded in %.02fs" % (res['files_uploaded'], res['sec_elapsed'])
+
+ dirs_uploaded += 1
+ reclaimed_space += dir_size
+
+ if dryrun:
+ cur_freespace -= dir_size
+ else:
+ cur_freespace = float(freespace(path))
+ print "%.02f MBs now free" % (cur_freespace / 1024 / 1024)
+ print
+
+ except Exception:
+ print "An unexpected error occured!"
+ error = True
+ traceback.print_exc()
+
+ print "---------------------------------------"
+ if error:
+ pass
+ else:
+ pass
+ print "Finished successfully" if not error else "!!! Terminated abnormally !!!"
+ print "Current free space: %.02f MBs" % (cur_freespace / 1024 / 1024)
+ print "Reclaimed space: %.02f MBs" % (reclaimed_space / 1024 / 1024)
+ print "Directories uploaded: %s" % dirs_uploaded
+ print "Files uploaded: %s" % files_uploaded
+
+
+if __name__ == '__main__':
+ if not 4 <= len(sys.argv) <= 5:
+ print "usage: dailyimgupload.py workingdir path megabytes [dryrun]"
+ sys.exit(1)
+
+ wd = sys.argv[1]
+ if not os.path.isdir(wd):
+ print "Invalid working directory: %s" % wd
+ sys.exit(1)
+ print "Switching working directory to %s" % wd
+ os.chdir(wd)
+
+ path = sys.argv[2]
+ if not os.path.isdir(path):
+ print "invalid image directory: %s" % path
+ sys.exit(1)
+
+ mbs = sys.argv[3]
+ try:
+ target_free_mbs = float(mbs)
+ except ValueError:
+ print "invalid number of megabytes: %s" % mbs
+ sys.exit(1)
+
+ if len(sys.argv) == 5:
+ dryrun = sys.argv[4]
+ if dryrun in ('true', 'false'):
+ dryrun = dryrun == 'true'
+ else:
+ print "invalid dry run argument: %s (must be either 'true' or 'false')" % dryrun
+ sys.exit(1)
+ else:
+ dryrun = True
+
+ upload_dirs_until_free(path, target_free_mbs, dryrun)
diff --git a/scripts/s3upload.py b/scripts/s3upload.py
index 724561c..9263a8f 100644
--- a/scripts/s3upload.py
+++ b/scripts/s3upload.py
@@ -9,7 +9,15 @@ CONN = None
AWS_ACCESS_KEY_ID = 'AKIAIOP42NFKLLJXEGJQ'
AWS_SECRET_ACCESS_KEY = '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'
BUCKET_NAME = 'dumpfm'
-COUNTER = 0
+
+def get_or_initialize_aws_connection():
+ global CONN
+ if not CONN:
+ print "Initializing AWS connection with ID %s, bucket %s" % (AWS_ACCESS_KEY_ID,
+ BUCKET_NAME)
+ CONN = S3.AWSAuthConnection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
+ return CONN
+
def retry_func(f, count):
try:
@@ -20,8 +28,7 @@ def retry_func(f, count):
print 'Error! retrying %s more time(s)' % (count - 1)
retry_func(f, count - 1)
-def upload_file(path):
- global COUNTER
+def upload_file(path, conn, verbose=True, dryrun=False):
path = os.path.normpath(path)
if path == '.' or not os.path.isfile(path):
return
@@ -34,35 +41,44 @@ def upload_file(path):
path = path.replace('\\', '/') # Windows hack
start = time.time()
def do_upload():
- CONN.put(BUCKET_NAME, path, S3.S3Object(filedata),
+ conn.put(BUCKET_NAME, path, S3.S3Object(filedata),
{'x-amz-acl': 'public-read', 'Content-Type': content_type})
- retry_func(do_upload, 3)
+
+ if not dryrun:
+ retry_func(do_upload, 3)
ms_took = (time.time() - start) * 1000
- print "uploaded %s (%0.0fms) (%sKB)" % (path, ms_took, size / 1024)
- COUNTER += 1
+ if verbose:
+ print "uploaded %s (%0.0fms) (%sKB)" % (path, ms_took, size / 1024)
+ return 1
-def upload_directory(path):
+def upload_directory(path, conn, verbose=True, dryrun=False):
+ counter = 0
for f in sorted(os.listdir(path)):
subpath = os.path.join(path, f)
if os.path.isdir(subpath):
- upload_directory(subpath)
+ counter += upload_directory(subpath, conn, verbose=verbose, dryrun=dryrun)
else:
- upload_file(subpath)
+ counter += upload_file(subpath, conn, verbose=verbose, dryrun=dryrun)
+ return counter
-def do_upload(path):
- global CONN
- CONN = S3.AWSAuthConnection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
+def do_upload(path, verbose=True, dryrun=False):
+ conn = get_or_initialize_aws_connection()
+ counter = 0
start = time.time()
if os.path.isdir(path):
- upload_directory(path)
+ counter += upload_directory(path, conn, verbose=verbose, dryrun=dryrun)
else:
- upload_file(path)
+ counter += upload_file(path, conn, verbose=verbose, dryrun=dryrun)
s_took = (time.time() - start)
- print "uploaded %s files in %0.0fs" % (COUNTER, s_took)
+
+ if verbose:
+ print "uploaded %s files in %0.0fs" % (counter, s_took)
+ return { 'sec_elapsed': s_took,
+ 'files_uploaded': counter }
if __name__ == "__main__":