summaryrefslogtreecommitdiff
path: root/ll.py
blob: e88bd7e6a29af19b86fa886c3375142ff39258cc (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#!/usr/bin/python2
# coding: utf-8
import sys
import os
import re
import time
from sqlalchemy import BigInteger, Column, Integer, String, create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker

LOGS_PATH = "/mnt/sdc1/s3LOGS"
SUCCESS_LOG = "/mnt/sdc1/db_success"
FAILURE_LOG = "/mnt/sdc1/db_failure"


DB_HOST = "localhost"
DB_USER = "asdfus"
DB_PASSWORD = "gTYgT&M6q"
DB_NAME = "asdfus"

Base = declarative_base()
metadata = Base.metadata


class Iasdfus(Base):
    __tablename__ = 'iasdfus'

    id = Column(BigInteger, primary_key=True)
    address = Column(String(1000), index=True)
    last_accessed = Column(BigInteger)
    times_dumped = Column(Integer)
    times_accessed = Column(Integer)
    deleted = Column(Integer)


# create table iasdfus (id BIGINT NOT NULL AUTO_INCREMENT, address varchar(255) NOT NULL UNIQUE, last_accessed BIGINT, times_dumped int, times_accessed int, deleted BOOL, PRIMARY KEY(id));
# create index address_index on iasdfus (address(200));

class Db(object):
    """wrapper for all db methods"""
    def __init__(self):
        engine = create_engine('mysql://{}:{}@{}/{}'.format(
            DB_USER,
            DB_PASSWORD,
            DB_HOST,
            DB_NAME
        ))
        self.Session = sessionmaker(bind=engine)

    def update_existing(self, last_accessed=None, address=None):
        session = self.Session()
        results = session.query(Iasdfus).filter_by(**{'address': address})
        if results.count() == 0:
            session.close()
            return False
        print results.count()
        if last_accessed > results.first().last_accessed:
            results.update({"last_accessed": last_accessed})
        results.update({"times_accessed": results.first().times_accessed + 1})
        session.commit()
        session.close()
        return True

    def insert_new_from_log(self, **kwargs):
        try:
            session = self.Session()
            entry_data = {
                'last_accessed': kwargs["last_accessed"],
                'address': kwargs['address'],
                'times_accessed': 1,
                'times_dumped': 0,
                'deleted': 0
            }
            session.add(Iasdfus(**entry_data))
            session.commit()
            session.close()
        except Exception as e:
            sys.stderr.write("Unable to commit database entry\n")
            sys.stderr.write(str(e))

db = Db()

class s3LogReader(object):
    def __init__(self):
        if os.path.exists(SUCCESS_LOG):
            success_log = open(SUCCESS_LOG, "r")
            self.successes = map(lambda n: re.sub(r'\n', '', n), success_log.readlines())
            success_log.close()
        else:
            self.successes = []
        self.success_log = open(SUCCESS_LOG, "a")
        self.failure_log = open(FAILURE_LOG, "a")

    def list_logdir(self):
        return os.listdir(LOGS_PATH)

    def _logfile_fullpath(self, logfile):
        return os.path.join(LOGS_PATH, logfile)

    def log_success(self, logfile, remove=False):
        self.success_log.write("%s\n" % logfile)
        logfile_path = self._logfile_fullpath(logfile)
        if remove:
            try:
                os.unlink(logfile_path)
            except Exception as e:
                sys.stderr.write("Unable to unlink %s\n" % logfile_path)
                sys.stderr.write("%s\n" % e )
                self.success_log.close()
                sys.exit(1)

    def log_error(self, logfile):
            sys.stderr.write("Unable to process logfile: %s\n" % logfile)
            self.failure_log.write("%s\n" % logfile)

    def read_log(self, logfile):
        if logfile in self.successes:
            sys.stderr.write("it's in successes already...returning\n")
            return
        logfile_path = self._logfile_fullpath(logfile)
        f = open(logfile_path, "r")
        data = f.readlines()
        for line in data:
            line = re.sub(r'\[', "", line)
            parts = re.split(r'\s', line)
            timestamp = parts[2]
            file_address = parts[8]
            if re.match(r'.*im/', file_address):
                timestamp = time.strptime("08/Sep/2014:19:58:11",
                                          "%d/%b/%Y:%H:%M:%S")
                timestamp = int(time.mktime(timestamp))
                if db.update_existing(last_accessed=timestamp, address=file_address):
                    continue
                else:
                    db.insert_new_from_log(last_accessed=timestamp, address=file_address)
                    continue

    def process_logdir(self):
        logs = self.list_logdir()
        for log in logs:
            try:
                self.read_log(log)
                self.log_success(log)
            except Exception as e:
                sys.stderr.write("%s\n" % e )

if __name__ == "__main__":
    logreader = s3LogReader()
    logreader.process_logdir()