summaryrefslogtreecommitdiff
path: root/feeder/feeder.py
diff options
context:
space:
mode:
Diffstat (limited to 'feeder/feeder.py')
-rwxr-xr-xfeeder/feeder.py237
1 files changed, 237 insertions, 0 deletions
diff --git a/feeder/feeder.py b/feeder/feeder.py
new file mode 100755
index 0000000..39b43e5
--- /dev/null
+++ b/feeder/feeder.py
@@ -0,0 +1,237 @@
+#!/usr/bin/python
+
+import commands
+#import MySQLdb
+import urllib
+import sha
+import sys
+import time
+import re
+import os
+def now ():
+ return int(time.mktime(time.localtime()))
+
+SERVER_HOST = 'scannerjammer.com'
+SERVER_PORT = 80
+
+API_HEADER = "#@scanjam 0.2\n"
+
+HTML_TITLE_RE = re.compile('<title>([^<]+)</title>')
+
+DUPE_LIST = "feeder/dupes.test"
+#DUPE_LIST = "feeder/dupes.txt"
+FEED_LIST = "feeder/feeds.txt"
+#FEED_PATH = "feeder/feeds"
+FEED_PATH = "feeder/test"
+FEED_STALE_TIME = 0;
+#FEED_STALE_TIME = 3600
+FEED_ROOM = "feederbleeder"
+
+
+API_LOGIN = "http://"+SERVER_HOST+":"+str(SERVER_PORT)+"/api/auth/sneakin"+"/"
+API_POST_VIDEO = "http://"+SERVER_HOST+":"+str(SERVER_PORT)+"/api/room/video"+"/"
+API_POST_IMAGE = "http://"+SERVER_HOST+":"+str(SERVER_PORT)+"/api/room/say"+"/"
+API_LOGOUT = "http://"+SERVER_HOST+":"+str(SERVER_PORT)+"/api/auth/logout"+"/"
+
+#{{{ **USE IF YOU ADD APPEND_SLASH
+#API_LOGIN = "http://"+SERVER_HOST+":"+str(SERVER_PORT)+"/api/auth/sneakin"
+#API_POST_VIDEO = "http://"+SERVER_HOST+":"+str(SERVER_PORT)+"/api/room/video"
+#API_POST_IMAGE = "http://"+SERVER_HOST+":"+str(SERVER_PORT)+"/api/room/say"
+#API_LOGOUT = "http://"+SERVER_HOST+":"+str(SERVER_PORT)+"/api/auth/logout"
+#}}}
+
+print API_LOGIN
+print API_POST_VIDEO
+print API_POST_IMAGE
+print API_LOGOUT
+
+
+dupes = {}
+
+class Feed:
+ def __init__ (self, src, title, userid):
+ self.src = src
+ self.title = title
+ self.userid = userid
+ self.domain = "http://" + src.split("/")[2]
+ self.urls = []
+ self.images = []
+ self.load()
+
+ def load (self):
+ filename = "/".join([FEED_PATH, self.title])
+ refresh = True
+
+ # check last update of feed
+ if os.path.exists(filename):
+ stat = os.stat(filename)
+ if stat.st_mtime > now() - FEED_STALE_TIME:
+ refresh = False
+
+ # if stale/empty then download
+ if refresh:
+ print self.title, "loading from web"
+ feedhtml = urllib.urlopen(self.src).read()
+ if len(feedhtml):
+ out = open(filename, 'w')
+ out.write(feedhtml)
+ self.parse(feedhtml)
+
+ # otherwise, load from disk
+ else:
+ print self.title, "loading from disk"
+ feed = open (filename, 'r')
+ feedhtml = feed.read()
+ feed.close()
+ self.parse(feedhtml)
+
+ # parse urls out of html files
+ # display these urls (by date, by source)
+ def parse (self, html):
+ tags = html.replace("&gt;","<").split("<")
+ lastimage = ""
+ for t in tags:
+ url = None
+ if len(t) < 1:
+ continue
+ if t[0] == "a":
+ if "href" not in t:
+ continue
+ url = self.getAttribute("href", t)
+ elif t[0] == "iframe":
+ if "src" not in t:
+ continue
+ url = self.getAttribute("src", t)
+ elif t[0:3] == "img":
+ if "src" not in t:
+ continue
+ if "php" in t:
+ continue
+ url = self.getAttribute("src", t)
+ if url is None:
+ continue
+ if url in dupes:
+ continue
+ if url[-3:] != "jpg":
+ continue
+ print url
+ lastimage = url
+ dupes[url.strip()] = True
+ self.images.append(url)
+ continue
+ else:
+ continue
+
+ if url is None:
+ continue
+ if url in dupes:
+ continue
+ if "youtube.com" in url:
+ dupes[url.strip()] = True
+ self.urls.append(url)
+ if "youtu.be" in url:
+ dupes[url.strip()] = True
+ self.urls.append(url)
+ if "vimeo.com" in url:
+ dupes[url.strip()] = True
+ # http://player.vimeo.com/video/23731158
+ if "http://player.vimeo.com/video/" in url:
+ url = "http://vimeo.com/" + url.replace('http://player.vimeo.com/video/', '')
+ self.urls.append(url)
+ if "soundcloud.com" in url:
+ dupes[url.strip()] = True
+ self.urls.append(url)
+ if url[-3:] == "mp3":
+ dupes[url.strip()] = True
+ u = url.replace(" ","%20")
+ self.urls.append(lastimage+" "+u)
+
+ def getAttribute (self, attr, s):
+ quote = None
+ if '\"' in s:
+ quote = '\"'
+ elif '\'' in s:
+ quote = '\''
+ else:
+ return None
+
+ attrpos = s.find(attr)
+ startquotepos = s.find(quote, attrpos+1)
+ endquotepos = s.find(quote, startquotepos+1)
+ url = s[startquotepos+1:endquotepos]
+ #if url[0] == "/":
+ # url = self.domain + url
+ if url[0:4] != "http":
+ return None
+ return url
+ def getTitle (self, s):
+ if '>' in s:
+ return s.split(">")[1]
+ return None
+ def login (self):
+ print "getting token for", self.title
+ data = urllib.urlencode({ 'userid': self.userid, 'username': self.title })
+ f = urllib.urlopen(API_LOGIN, data)
+ api = f.read().split("\n")
+ if api[0] != "#@scanjam 0.3b" or api[1] != "OK":
+ print "ERROR GETTING TOKEN"
+ return None
+ payload = api[2].split("\t")
+ print "GOT SESSION:", payload[2]
+ time.sleep(0.5)
+ return payload[2]
+ def report (self):
+ if len(self.urls) == 0 and len(self.images) == 0:
+ print self.title, "nothing to do"
+ return
+ self.session = self.login()
+ if self.session is None:
+ print self.title, "error getting session"
+ return
+ print ""
+ print self.title, "reported", len(self.urls), "urls,", len(self.images), "images"
+ for url in reversed(self.urls):
+ if "wearesolidgold" in url:
+ continue
+ if url == "http://vimeo.com/":
+ continue
+ print "URL", url
+ data = urllib.urlencode({ 'session': self.session, 'room': FEED_ROOM, 'msg': url })
+ f = urllib.urlopen(API_POST_VIDEO, data)
+ print f.read()
+ print data, API_POST_VIDEO; exit(0);
+ time.sleep(5)
+ for url in reversed(self.images):
+ print "IMG", url
+ data = urllib.urlencode({ 'session': self.session, 'room': FEED_ROOM, 'msg': url })
+ f = urllib.urlopen(API_POST_IMAGE, data)
+ time.sleep(5)
+
+def load_dupes ():
+ dupelist = open (DUPE_LIST, 'r')
+ for line in dupelist:
+ dupes[line.strip()] = True
+ dupelist.close()
+
+def load_feeds ():
+ feedlist = open (FEED_LIST, 'r')
+ feeds = []
+ for line in feedlist:
+ src,title,userid = line.strip().split("\t")
+ feed = Feed (src,title,userid)
+ feeds.append(feed)
+ feedlist.close()
+ for feed in reversed(feeds):
+ feed.report()
+
+def save_dupes ():
+ dupelist = open (DUPE_LIST+".tmp", 'w')
+ for k,v in dupes.iteritems():
+ dupelist.write(k.strip()+"\n")
+ dupelist.close()
+ os.rename(DUPE_LIST+".tmp", DUPE_LIST)
+
+load_dupes()
+load_feeds()
+save_dupes()
+