blob: 004aa8310122062f489466e1e79b92ff01a809ce (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
|
#!/usr/bin/python
import urllib
import urllib2
import simplejson
import sys
import re
import os
urlencode = urllib.urlencode
urlopen = urllib2.urlopen
Request = urllib2.Request
class DumpSearchScrape:
def __init__(self, term):
self.search_api = 'http://dump.fm/cmd/search'
self.term = urllib.quote_plus(term)
self.url = self.search_api + "/" + term
def showSelection(self, filelist):
if not os.path.exists("dumpselections"):
os.system("mkdir dumpselections")
f = open('dumpselections/index.html','w')
f.write("""
<html>
<head>
<link href="main.css" rel="stylesheet" type="text/css" />
</head>
<body>
""")
for file in filelist:
f.write("<img class='examplefiles' src='"+file+"'/>")
f.write("""
</body>
</html>
""")
def makeScrapelist (self, makepage=False, data=None):
headers = {
'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)',
'Accept': '*/*'
}
try:
req = Request(self.url, data, headers)
response = urlopen(req)
thejson = response.read()
urldata = simplejson.loads(thejson)
urllist = []
for row in urldata:
url = ""
if "url" not in row:
continue
if row['url'][0] == "/":
url = "http://dump.fm/images" + row['url']
urllist.append(url)
else:
url = "http://" + row['url']
urllist.append(url)
terms = self.term.split('+')
for item in urllist:
parts = item.split('/')
theRe = ""
for term in terms:
new = '('+term+')*'
theRe = theRe+new
check = re.split(theRe, parts[-1])
for term in terms:
if term not in check:
urllist.remove(item)
break
if makepage == True:
self.showSelection(urllist)
return urllist[0]
except IOError, e:
if hasattr(e, 'code'):
print '%s - ERROR %s' % (self.url, e.code)
return None
else:
return response
if __name__ == '__main__':
term = sys.argv[1]
makepage = False
if len(sys.argv) > 2:
if str(sys.argv[2].lower())=='true':
makepage = True
else:
makepage = False
scrappy = DumpSearchScrape(term)
josh = scrappy.makeScrapelist(makepage)
print josh
|