#!/usr/bin/python2 # coding: utf-8 import sys import os import re import time from sqlalchemy import BigInteger, Column, Integer, String, create_engine from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import sessionmaker LOGS_PATH = "/mnt/sdc1/s3LOGS" SUCCESS_LOG = "/mnt/sdc1/db_success" FAILURE_LOG = "/mnt/sdc1/db_failure" DB_HOST = "localhost" DB_USER = "asdfus" DB_PASSWORD = "gTYgT&M6q" DB_NAME = "asdfus" Base = declarative_base() metadata = Base.metadata class Iasdfus(Base): __tablename__ = 'iasdfus' id = Column(BigInteger, primary_key=True) address = Column(String(1000), index=True) last_accessed = Column(BigInteger) times_dumped = Column(Integer) times_accessed = Column(Integer) deleted = Column(Integer) # create table iasdfus (id BIGINT NOT NULL AUTO_INCREMENT, address varchar(255) NOT NULL UNIQUE, last_accessed BIGINT, times_dumped int, times_accessed int, deleted BOOL, PRIMARY KEY(id)); # create index address_index on iasdfus (address(200)); class Db(object): """wrapper for all db methods""" def __init__(self): engine = create_engine('mysql://{}:{}@{}/{}'.format( DB_USER, DB_PASSWORD, DB_HOST, DB_NAME )) self.Session = sessionmaker(bind=engine) def update_existing(self, last_accessed=None, address=None): session = self.Session() results = session.query(Iasdfus).filter_by(**{'address': address}) if results.count() == 0: session.close() return False print results.count() if last_accessed > results.first().last_accessed: results.update({"last_accessed": last_accessed}) results.update({"times_accessed": results.first().times_accessed + 1}) session.commit() session.close() return True def insert_new_from_log(self, **kwargs): try: session = self.Session() entry_data = { 'last_accessed': kwargs["last_accessed"], 'address': kwargs['address'], 'times_accessed': 1, 'times_dumped': 0, 'deleted': 0 } session.add(Iasdfus(**entry_data)) session.commit() session.close() except Exception as e: sys.stderr.write("Unable to commit database entry\n") sys.stderr.write(str(e)) db = Db() class s3LogReader(object): def __init__(self): if os.path.exists(SUCCESS_LOG): success_log = open(SUCCESS_LOG, "r") self.successes = map(lambda n: re.sub(r'\n', '', n), success_log.readlines()) success_log.close() else: self.successes = [] self.success_log = open(SUCCESS_LOG, "a") self.failure_log = open(FAILURE_LOG, "a") def list_logdir(self): return os.listdir(LOGS_PATH) def _logfile_fullpath(self, logfile): return os.path.join(LOGS_PATH, logfile) def log_success(self, logfile, remove=False): self.success_log.write("%s\n" % logfile) logfile_path = self._logfile_fullpath(logfile) if remove: try: os.unlink(logfile_path) except Exception as e: sys.stderr.write("Unable to unlink %s\n" % logfile_path) sys.stderr.write("%s\n" % e ) self.success_log.close() sys.exit(1) def log_error(self, logfile): sys.stderr.write("Unable to process logfile: %s\n" % logfile) self.failure_log.write("%s\n" % logfile) def read_log(self, logfile): if logfile in self.successes: sys.stderr.write("it's in successes already...returning\n") return logfile_path = self._logfile_fullpath(logfile) f = open(logfile_path, "r") data = f.readlines() for line in data: line = re.sub(r'\[', "", line) parts = re.split(r'\s', line) timestamp = parts[2] file_address = parts[8] if re.match(r'.*im/', file_address): timestamp = time.strptime("08/Sep/2014:19:58:11", "%d/%b/%Y:%H:%M:%S") timestamp = int(time.mktime(timestamp)) if db.update_existing(last_accessed=timestamp, address=file_address): continue else: db.insert_new_from_log(last_accessed=timestamp, address=file_address) continue def process_logdir(self): logs = self.list_logdir() for log in logs: try: self.read_log(log) self.log_success(log) except Exception as e: sys.stderr.write("%s\n" % e ) if __name__ == "__main__": logreader = s3LogReader() logreader.process_logdir()