summaryrefslogtreecommitdiff
path: root/ll.py
diff options
context:
space:
mode:
Diffstat (limited to 'll.py')
-rw-r--r--ll.py149
1 files changed, 149 insertions, 0 deletions
diff --git a/ll.py b/ll.py
new file mode 100644
index 0000000..e88bd7e
--- /dev/null
+++ b/ll.py
@@ -0,0 +1,149 @@
+#!/usr/bin/python2
+# coding: utf-8
+import sys
+import os
+import re
+import time
+from sqlalchemy import BigInteger, Column, Integer, String, create_engine
+from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import sessionmaker
+
+LOGS_PATH = "/mnt/sdc1/s3LOGS"
+SUCCESS_LOG = "/mnt/sdc1/db_success"
+FAILURE_LOG = "/mnt/sdc1/db_failure"
+
+
+DB_HOST = "localhost"
+DB_USER = "asdfus"
+DB_PASSWORD = "gTYgT&M6q"
+DB_NAME = "asdfus"
+
+Base = declarative_base()
+metadata = Base.metadata
+
+
+class Iasdfus(Base):
+ __tablename__ = 'iasdfus'
+
+ id = Column(BigInteger, primary_key=True)
+ address = Column(String(1000), index=True)
+ last_accessed = Column(BigInteger)
+ times_dumped = Column(Integer)
+ times_accessed = Column(Integer)
+ deleted = Column(Integer)
+
+
+# create table iasdfus (id BIGINT NOT NULL AUTO_INCREMENT, address varchar(255) NOT NULL UNIQUE, last_accessed BIGINT, times_dumped int, times_accessed int, deleted BOOL, PRIMARY KEY(id));
+# create index address_index on iasdfus (address(200));
+
+class Db(object):
+ """wrapper for all db methods"""
+ def __init__(self):
+ engine = create_engine('mysql://{}:{}@{}/{}'.format(
+ DB_USER,
+ DB_PASSWORD,
+ DB_HOST,
+ DB_NAME
+ ))
+ self.Session = sessionmaker(bind=engine)
+
+ def update_existing(self, last_accessed=None, address=None):
+ session = self.Session()
+ results = session.query(Iasdfus).filter_by(**{'address': address})
+ if results.count() == 0:
+ session.close()
+ return False
+ print results.count()
+ if last_accessed > results.first().last_accessed:
+ results.update({"last_accessed": last_accessed})
+ results.update({"times_accessed": results.first().times_accessed + 1})
+ session.commit()
+ session.close()
+ return True
+
+ def insert_new_from_log(self, **kwargs):
+ try:
+ session = self.Session()
+ entry_data = {
+ 'last_accessed': kwargs["last_accessed"],
+ 'address': kwargs['address'],
+ 'times_accessed': 1,
+ 'times_dumped': 0,
+ 'deleted': 0
+ }
+ session.add(Iasdfus(**entry_data))
+ session.commit()
+ session.close()
+ except Exception as e:
+ sys.stderr.write("Unable to commit database entry\n")
+ sys.stderr.write(str(e))
+
+db = Db()
+
+class s3LogReader(object):
+ def __init__(self):
+ if os.path.exists(SUCCESS_LOG):
+ success_log = open(SUCCESS_LOG, "r")
+ self.successes = map(lambda n: re.sub(r'\n', '', n), success_log.readlines())
+ success_log.close()
+ else:
+ self.successes = []
+ self.success_log = open(SUCCESS_LOG, "a")
+ self.failure_log = open(FAILURE_LOG, "a")
+
+ def list_logdir(self):
+ return os.listdir(LOGS_PATH)
+
+ def _logfile_fullpath(self, logfile):
+ return os.path.join(LOGS_PATH, logfile)
+
+ def log_success(self, logfile, remove=False):
+ self.success_log.write("%s\n" % logfile)
+ logfile_path = self._logfile_fullpath(logfile)
+ if remove:
+ try:
+ os.unlink(logfile_path)
+ except Exception as e:
+ sys.stderr.write("Unable to unlink %s\n" % logfile_path)
+ sys.stderr.write("%s\n" % e )
+ self.success_log.close()
+ sys.exit(1)
+
+ def log_error(self, logfile):
+ sys.stderr.write("Unable to process logfile: %s\n" % logfile)
+ self.failure_log.write("%s\n" % logfile)
+
+ def read_log(self, logfile):
+ if logfile in self.successes:
+ sys.stderr.write("it's in successes already...returning\n")
+ return
+ logfile_path = self._logfile_fullpath(logfile)
+ f = open(logfile_path, "r")
+ data = f.readlines()
+ for line in data:
+ line = re.sub(r'\[', "", line)
+ parts = re.split(r'\s', line)
+ timestamp = parts[2]
+ file_address = parts[8]
+ if re.match(r'.*im/', file_address):
+ timestamp = time.strptime("08/Sep/2014:19:58:11",
+ "%d/%b/%Y:%H:%M:%S")
+ timestamp = int(time.mktime(timestamp))
+ if db.update_existing(last_accessed=timestamp, address=file_address):
+ continue
+ else:
+ db.insert_new_from_log(last_accessed=timestamp, address=file_address)
+ continue
+
+ def process_logdir(self):
+ logs = self.list_logdir()
+ for log in logs:
+ try:
+ self.read_log(log)
+ self.log_success(log)
+ except Exception as e:
+ sys.stderr.write("%s\n" % e )
+
+if __name__ == "__main__":
+ logreader = s3LogReader()
+ logreader.process_logdir()