#!/usr/bin/python2 # coding: utf-8 import sys import os import re import time from sqlalchemy.orm import sessionmaker from photoblaster.db.s3db import S3 as Db LOGS_PATH = "/mnt/sdc1/s3LOGS" DB_HOST = "lalalizard.com" DB_USER = "asdfus" DB_PASSWORD = "gTYgT&M6q" DB_NAME = "asdfus" db = Db() class s3LogReader(object): def __init__(self): self.successes = [] def list_logdir(self): return os.listdir(LOGS_PATH) def _logfile_fullpath(self, logfile): return os.path.join(LOGS_PATH, logfile) def log_success(self, logfile, remove=False): self.success_log.write("%s\n" % logfile) logfile_path = self._logfile_fullpath(logfile) if remove: try: os.unlink(logfile_path) except Exception as e: sys.stderr.write("Unable to unlink %s\n" % logfile_path) sys.stderr.write("%s\n" % e ) self.success_log.close() sys.exit(1) def log_error(self, logfile): sys.stderr.write("Unable to process logfile: %s\n" % logfile) self.failure_log.write("%s\n" % logfile) def read_log(self, logfile): if logfile in self.successes: sys.stderr.write("it's in successes already...returning\n") return logfile_path = self._logfile_fullpath(logfile) f = open(logfile_path, "r") data = f.readlines() for line in data: line = re.sub(r'\[', "", line) parts = re.split(r'\s', line) timestamp = parts[2] file_address = parts[8] if re.match(r'.*im/', file_address): timestamp = time.strptime("08/Sep/2014:19:58:11", "%d/%b/%Y:%H:%M:%S") timestamp = int(time.mktime(timestamp)) if db.update_existing(last_accessed=timestamp, address=file_address, logfile=logfile): continue else: db.insert_new_from_log(last_accessed=timestamp, address=file_address) continue def process_logdir(self): logs = self.list_logdir() for log in logs: try: self.read_log(log) self.log_success(log) except Exception as e: sys.stderr.write("%s\n" % e ) if __name__ == "__main__": logreader = s3LogReader() logreader.process_logdir()