summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorScott Ostler <scottbot9000@gmail.com>2010-08-06 02:09:39 -0400
committerScott Ostler <scottbot9000@gmail.com>2010-08-06 02:09:39 -0400
commit21b3acd64d3df9ee780876fff63e22fca99c7672 (patch)
tree4bce5e00ebe7f702a4fb5bad2d0393a927134070 /scripts
parent41f5177eba9650b181703eb95a89a7aa12ed6f92 (diff)
Add s3upload script
Diffstat (limited to 'scripts')
-rw-r--r--scripts/s3upload.py80
1 files changed, 80 insertions, 0 deletions
diff --git a/scripts/s3upload.py b/scripts/s3upload.py
new file mode 100644
index 0000000..d355dfd
--- /dev/null
+++ b/scripts/s3upload.py
@@ -0,0 +1,80 @@
+import datetime
+import mimetypes
+import os
+import sys
+import S3
+
+CONN = None
+AWS_ACCESS_KEY_ID = 'AKIAJAQK4CDDP6I6SNVA'
+AWS_SECRET_ACCESS_KEY = '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'
+BUCKET_NAME = 'dumpfm'
+
+def parse_date(date_string, fmt='%Y%m%d'):
+ return datetime.datetime.strptime(date_string, fmt)
+
+def upload_file(path, dry_run=True):
+ path = os.path.normpath(path)
+ if path == '.' or not os.path.isfile(path):
+ return
+ filedata = open(path, 'rb').read()
+ content_type = mimetypes.guess_type(path)[0]
+ if not content_type:
+ content_type = 'text/plain'
+
+ path = path.replace('\\', '/') # Windows hack
+ print '- %s' % path
+ if not dry_run:
+ CONN.put(BUCKET_NAME, path, S3.S3Object(filedata),
+ {'x-amz-acl': 'public-read', 'Content-Type': content_type})
+
+
+def do_upload(directory, start_date, end_date, dry_run=True):
+ global CONN
+ CONN = S3.AWSAuthConnection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
+ counter = 0
+ for subdir in os.listdir(directory):
+ subdir_date = None
+ try:
+ subdir_date = parse_date(subdir)
+ except:
+ continue
+
+ if start_date <= subdir_date <= end_date:
+ print "uploading contents of %s" % subdir
+ for filename in os.listdir(os.path.join(directory, subdir)):
+ path = os.path.join(directory, subdir, filename)
+ upload_file(path, dry_run=dry_run)
+ counter += 1
+
+ print '\nUploaded %s files' % counter
+
+if __name__ == "__main__":
+ if not 4 <= len(sys.argv) <= 5:
+ print 'usage: s3upload.py directory startdate enddate [dryrun=true]'
+ sys.exit(1)
+
+
+ directory = sys.argv[1]
+ start_date = sys.argv[2]
+ end_date = sys.argv[3]
+ dry_run = sys.argv[4] if len(sys.argv) == 5 else 'true'
+
+ if dry_run.lower() == 'true':
+ print 'doing dry run'
+ dry_run = True
+ else:
+ dry_run = False
+
+ try:
+ start_date = parse_date(start_date)
+ except:
+ print "invalid start date: %s" % start_date
+ sys.exit(1)
+
+ try:
+ end_date = parse_date(end_date)
+ except:
+ print "invalid end date: %s" % end_date
+ sys.exit(1)
+
+ do_upload(directory, start_date, end_date, dry_run)