summaryrefslogtreecommitdiff
path: root/DumpSearchScraper
diff options
context:
space:
mode:
Diffstat (limited to 'DumpSearchScraper')
-rwxr-xr-xDumpSearchScraper88
1 files changed, 0 insertions, 88 deletions
diff --git a/DumpSearchScraper b/DumpSearchScraper
deleted file mode 100755
index 004aa83..0000000
--- a/DumpSearchScraper
+++ /dev/null
@@ -1,88 +0,0 @@
-#!/usr/bin/python
-
-import urllib
-import urllib2
-import simplejson
-import sys
-import re
-import os
-urlencode = urllib.urlencode
-urlopen = urllib2.urlopen
-Request = urllib2.Request
-
-class DumpSearchScrape:
- def __init__(self, term):
- self.search_api = 'http://dump.fm/cmd/search'
- self.term = urllib.quote_plus(term)
- self.url = self.search_api + "/" + term
- def showSelection(self, filelist):
- if not os.path.exists("dumpselections"):
- os.system("mkdir dumpselections")
- f = open('dumpselections/index.html','w')
- f.write("""
- <html>
- <head>
- <link href="main.css" rel="stylesheet" type="text/css" />
- </head>
- <body>
- """)
- for file in filelist:
- f.write("<img class='examplefiles' src='"+file+"'/>")
- f.write("""
- </body>
- </html>
- """)
- def makeScrapelist (self, makepage=False, data=None):
- headers = {
- 'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)',
- 'Accept': '*/*'
- }
- try:
- req = Request(self.url, data, headers)
- response = urlopen(req)
- thejson = response.read()
- urldata = simplejson.loads(thejson)
- urllist = []
- for row in urldata:
- url = ""
- if "url" not in row:
- continue
- if row['url'][0] == "/":
- url = "http://dump.fm/images" + row['url']
- urllist.append(url)
- else:
- url = "http://" + row['url']
- urllist.append(url)
- terms = self.term.split('+')
- for item in urllist:
- parts = item.split('/')
- theRe = ""
- for term in terms:
- new = '('+term+')*'
- theRe = theRe+new
- check = re.split(theRe, parts[-1])
- for term in terms:
- if term not in check:
- urllist.remove(item)
- break
- if makepage == True:
- self.showSelection(urllist)
- return urllist[0]
- except IOError, e:
- if hasattr(e, 'code'):
- print '%s - ERROR %s' % (self.url, e.code)
- return None
- else:
- return response
-
-if __name__ == '__main__':
- term = sys.argv[1]
- makepage = False
- if len(sys.argv) > 2:
- if str(sys.argv[2].lower())=='true':
- makepage = True
- else:
- makepage = False
- scrappy = DumpSearchScrape(term)
- josh = scrappy.makeScrapelist(makepage)
- print josh