summaryrefslogtreecommitdiff
path: root/megapixels/notebooks/bs4_scratch.ipynb
diff options
context:
space:
mode:
Diffstat (limited to 'megapixels/notebooks/bs4_scratch.ipynb')
-rw-r--r--megapixels/notebooks/bs4_scratch.ipynb243
1 files changed, 203 insertions, 40 deletions
diff --git a/megapixels/notebooks/bs4_scratch.ipynb b/megapixels/notebooks/bs4_scratch.ipynb
index dce0ddc2..e63d286f 100644
--- a/megapixels/notebooks/bs4_scratch.ipynb
+++ b/megapixels/notebooks/bs4_scratch.ipynb
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 33,
+ "execution_count": 56,
"metadata": {},
"outputs": [],
"source": [
@@ -13,69 +13,241 @@
},
{
"cell_type": "code",
- "execution_count": 46,
+ "execution_count": 57,
"metadata": {},
"outputs": [],
"source": [
"data = \"\"\"\n",
- "<table><tr><th><img src=\"/icons/blank.gif\" alt=\"[ICO]\"></th><th><a href=\"?C=N;O=D\">Name</a></th><th><a href=\"?C=M;O=A\">Last modified</a></th><th><a href=\"?C=S;O=A\">Size</a></th><th><a href=\"?C=D;O=A\">Description</a></th></tr><tr><th colspan=\"5\"><hr></th></tr>\n",
- "<tr><td valign=\"top\"><img src=\"/icons/back.gif\" alt=\"[DIR]\"></td><td><a href=\"/traces/mmsys/2015/paper-5/\">Parent Directory</a></td><td>&nbsp;</td><td align=\"right\"> - </td><td>&nbsp;</td></tr>\n",
- "<tr><td valign=\"top\"><img src=\"/icons/compressed.gif\" alt=\"[ ]\"></td><td><a href=\"desccred.zip\">desccred.zip</a></td><td align=\"right\">17-Nov-2015 02:21 </td><td align=\"right\">133M</td><td>&nbsp;</td></tr>\n",
- "<tr><td valign=\"top\"><img src=\"/icons/compressed.gif\" alt=\"[ ]\"></td><td><a href=\"desctxt.zip\">desctxt.zip</a></td><td align=\"right\">17-Nov-2015 02:21 </td><td align=\"right\"> 18M</td><td>&nbsp;</td></tr>\n",
- "<tr><td valign=\"top\"><img src=\"/icons/compressed.gif\" alt=\"[ ]\"></td><td><a href=\"descvis.zip\">descvis.zip</a></td><td align=\"right\">17-Nov-2015 02:22 </td><td align=\"right\"> 60M</td><td>&nbsp;</td></tr>\n",
- "<tr><td valign=\"top\"><img src=\"/icons/text.gif\" alt=\"[TXT]\"></td><td><a href=\"devset_topics.xml\">devset_topics.xml</a></td><td align=\"right\">17-Nov-2015 02:22 </td><td align=\"right\">6.0K</td><td>&nbsp;</td></tr>\n",
- "<tr><td valign=\"top\"><img src=\"/icons/compressed.gif\" alt=\"[ ]\"></td><td><a href=\"gt.zip\">gt.zip</a></td><td align=\"right\">17-Nov-2015 02:22 </td><td align=\"right\"> 91K</td><td>&nbsp;</td></tr>\n",
- "<tr><td valign=\"top\"><img src=\"/icons/compressed.gif\" alt=\"[ ]\"></td><td><a href=\"img.zip\">img.zip</a></td><td align=\"right\">17-Nov-2015 02:24 </td><td align=\"right\">1.1G</td><td>&nbsp;</td></tr>\n",
- "<tr><td valign=\"top\"><img src=\"/icons/compressed.gif\" alt=\"[ ]\"></td><td><a href=\"imgwiki.zip\">imgwiki.zip</a></td><td align=\"right\">17-Nov-2015 02:24 </td><td align=\"right\">341M</td><td>&nbsp;</td></tr>\n",
- "<tr><td valign=\"top\"><img src=\"/icons/text.gif\" alt=\"[TXT]\"></td><td><a href=\"poiNameCorrespondences.txt\">poiNameCorrespondences.txt</a></td><td align=\"right\">17-Nov-2015 02:24 </td><td align=\"right\">905 </td><td>&nbsp;</td></tr>\n",
- "<tr><td valign=\"top\"><img src=\"/icons/compressed.gif\" alt=\"[ ]\"></td><td><a href=\"xml.zip\">xml.zip</a></td><td align=\"right\">17-Nov-2015 02:24 </td><td align=\"right\">811K</td><td>&nbsp;</td></tr>\n",
- "<tr><th colspan=\"5\"><hr></th></tr>\n",
- "</table>\n",
+ "<div class=\"entry-content\">\n",
+ "<h2 id=\"annotations\" class=\"offset\">Annotations</h2>\n",
+ "<p>Below are examples of our different annotations within the dataset. Every pedestrian, cyclist and motorcyclist (higher than 50px) in every frame is annotated with a bounding box, along side with three attributes: occlusion, difficult (low contrast or unusual posture) and pose. People on posters, sculptures and groups where individuals are hard to seperate are marked as “ignore”.</p>\n",
+ "<style type=\"text/css\">\n",
+ "\t\t\t#gallery-3 {\n",
+ "\t\t\t\tmargin: auto;\n",
+ "\t\t\t}\n",
+ "\t\t\t#gallery-3 .gallery-item {\n",
+ "\t\t\t\tfloat: left;\n",
+ "\t\t\t\tmargin-top: 10px;\n",
+ "\t\t\t\ttext-align: center;\n",
+ "\t\t\t\twidth: 25%;\n",
+ "\t\t\t}\n",
+ "\t\t\t#gallery-3 img {\n",
+ "\t\t\t\tborder: 2px solid #cfcfcf;\n",
+ "\t\t\t}\n",
+ "\t\t\t#gallery-3 .gallery-caption {\n",
+ "\t\t\t\tmargin-left: 0;\n",
+ "\t\t\t}\n",
+ "\t\t\t/* see gallery_shortcode() in wp-includes/media.php */\n",
+ "\t\t</style>\n",
+ "<h2 id=\"pedestrian\" class=\"offset\">Pedestrian</h2>\n",
+ "<div id=\"gallery-3\" class=\"gallery galleryid-56 gallery-columns-4 gallery-size-medium\">\n",
+ "<dl class=\"gallery-item\">\n",
+ "<dt class=\"gallery-icon landscape\">\n",
+ "<p>\t\t\t\t<a class=\"grouped_elements\" rel=\"tc-fancybox-group\" href=\"http://www.nightowls-dataset.org/wp-content/uploads/2018/03/many_peds3.png\" data-lb-type=\"grouped-post\"><img src=\"http://www.nightowls-dataset.org/wp-content/uploads/2018/03/many_peds3.png\" alt=\"\" class=\"alignnone size-medium wp-image-43\" width=\"300\" height=\"184\"></a>\n",
+ "\t\t\t</p></dt>\n",
+ "<dd class=\"wp-caption-text gallery-caption\" id=\"gallery-3-81\">\n",
+ "\t\t\t\tHigh frequency of pedestrians\n",
+ "\t\t\t\t</dd>\n",
+ "</dl>\n",
+ "<dl class=\"gallery-item\">\n",
+ "<dt class=\"gallery-icon landscape\">\n",
+ "<p>\t\t\t\t<a class=\"grouped_elements\" rel=\"tc-fancybox-group\" href=\"http://www.nightowls-dataset.org/wp-content/uploads/2018/03/ped_dark_low_contrast.png\" data-lb-type=\"grouped-post\"><img src=\"http://www.nightowls-dataset.org/wp-content/uploads/2018/03/ped_dark_low_contrast.png\" alt=\"\" class=\"alignnone size-medium wp-image-28\" width=\"300\" height=\"186\"></a>\n",
+ "\t\t\t</p></dt>\n",
+ "<dd class=\"wp-caption-text gallery-caption\" id=\"gallery-3-82\">\n",
+ "\t\t\t\tDark scenes with low contrast\n",
+ "\t\t\t\t</dd>\n",
+ "</dl>\n",
+ "<dl class=\"gallery-item\">\n",
+ "<dt class=\"gallery-icon landscape\">\n",
+ "<p>\t\t\t\t<a class=\"grouped_elements\" rel=\"tc-fancybox-group\" href=\"http://www.nightowls-dataset.org/wp-content/uploads/2018/03/peds_occluded.png\" data-lb-type=\"grouped-post\"><img src=\"http://www.nightowls-dataset.org/wp-content/uploads/2018/03/peds_occluded.png\" alt=\"\" class=\"alignnone size-medium wp-image-44\" width=\"300\" height=\"193\"></a>\n",
+ "\t\t\t</p></dt>\n",
+ "<dd class=\"wp-caption-text gallery-caption\" id=\"gallery-3-83\">\n",
+ "\t\t\t\tOccluded pedestrians\n",
+ "\t\t\t\t</dd>\n",
+ "</dl>\n",
+ "<dl class=\"gallery-item\">\n",
+ "<dt class=\"gallery-icon landscape\">\n",
+ "<p>\t\t\t\t<a class=\"grouped_elements\" rel=\"tc-fancybox-group\" href=\"http://www.nightowls-dataset.org/wp-content/uploads/2018/03/ped_sideways.png\" data-lb-type=\"grouped-post\"><img src=\"http://www.nightowls-dataset.org/wp-content/uploads/2018/03/ped_sideways.png\" alt=\"\" class=\"alignnone size-medium wp-image-44\" width=\"300\" height=\"193\"></a>\n",
+ "\t\t\t</p></dt>\n",
+ "<dd class=\"wp-caption-text gallery-caption\" id=\"gallery-3-85\">\n",
+ "\t\t\t\tSideview of crossing pedestrians\n",
+ "\t\t\t\t</dd>\n",
+ "</dl>\n",
+ "<p><br style=\"clear: both\">\n",
+ "\t\t</p></div>\n",
+ "<h2 id=\"bicycle\" class=\"offset\">Bicycledriver and Motorbikedriver</h2>\n",
+ "<div id=\"gallery-3\" class=\"gallery galleryid-56 gallery-columns-4 gallery-size-medium\">\n",
+ "<dl class=\"gallery-item\">\n",
+ "<dt class=\"gallery-icon landscape\">\n",
+ "\t\t\t\t\t\t\t\t<a class=\"grouped_elements\" rel=\"tc-fancybox-group\" href=\"http://www.nightowls-dataset.org/wp-content/uploads/2018/03/bicycle_back_glare.png\" data-lb-type=\"grouped-post\"><img src=\"http://www.nightowls-dataset.org/wp-content/uploads/2018/03/bicycle_back_glare.png\" alt=\"\" class=\"alignnone size-medium wp-image-43\" width=\"300\" height=\"184\"></a>\n",
+ "\t\t\t</dt>\n",
+ "<dd class=\"wp-caption-text gallery-caption\" id=\"gallery-3-86\">\n",
+ "\t\t\t\tBicycle drivers from back including glare\n",
+ "\t\t\t\t</dd>\n",
+ "</dl>\n",
+ "<dl class=\"gallery-item\">\n",
+ "<dt class=\"gallery-icon landscape\">\n",
+ "<p>\t\t\t\t<a class=\"grouped_elements\" rel=\"tc-fancybox-group\" href=\"http://www.nightowls-dataset.org/wp-content/uploads/2018/03/bicycle_driver_sideways.png\" data-lb-type=\"grouped-post\"><img src=\"http://www.nightowls-dataset.org/wp-content/uploads/2018/03/bicycle_driver_sideways.png\" alt=\"\" class=\"alignnone size-medium wp-image-28\" width=\"300\" height=\"186\"></a>\n",
+ "\t\t\t</p></dt>\n",
+ "<dd class=\"wp-caption-text gallery-caption\" id=\"gallery-3-87\">\n",
+ "\t\t\t\tBicycledriver sideways\n",
+ "\t\t\t\t</dd>\n",
+ "</dl>\n",
+ "<dl class=\"gallery-item\">\n",
+ "<dt class=\"gallery-icon landscape\">\n",
+ "<p>\t\t\t\t<a class=\"grouped_elements\" rel=\"tc-fancybox-group\" href=\"http://www.nightowls-dataset.org/wp-content/uploads/2018/03/bicycle_mixed_with_ped.png\" data-lb-type=\"grouped-post\"><img src=\"http://www.nightowls-dataset.org/wp-content/uploads/2018/03/bicycle_mixed_with_ped.png\" alt=\"\" class=\"alignnone size-medium wp-image-44\" width=\"300\" height=\"193\"></a>\n",
+ "\t\t\t</p></dt>\n",
+ "<dd class=\"wp-caption-text gallery-caption\" id=\"gallery-3-88\">\n",
+ "\t\t\t\tScenes with mixed annotations\n",
+ "\t\t\t\t</dd>\n",
+ "</dl>\n",
+ "<dl class=\"gallery-item\">\n",
+ "<dt class=\"gallery-icon landscape\">\n",
+ "<p>\t\t\t\t<a class=\"grouped_elements\" rel=\"tc-fancybox-group\" href=\"http://www.nightowls-dataset.org/wp-content/uploads/2018/03/bicycle_multiple_drivers2.png\" data-lb-type=\"grouped-post\"><img src=\"http://www.nightowls-dataset.org/wp-content/uploads/2018/03/bicycle_multiple_drivers2.png\" alt=\"\" class=\"alignnone size-medium wp-image-44\" width=\"300\" height=\"193\"></a>\n",
+ "\t\t\t</p></dt>\n",
+ "<dd class=\"wp-caption-text gallery-caption\" id=\"gallery-3-89\">\n",
+ "\t\t\t\tMultiple bicycle drivers\n",
+ "\t\t\t\t</dd>\n",
+ "</dl>\n",
+ "<p><br style=\"clear: both\">\n",
+ "\t\t</p></div>\n",
+ "<div id=\"gallery-3\" class=\"gallery galleryid-56 gallery-columns-4 gallery-size-medium\">\n",
+ "<dl class=\"gallery-item\">\n",
+ "<dt class=\"gallery-icon landscape\">\n",
+ "\t\t\t\t\t\t\t\t<a class=\"grouped_elements\" rel=\"tc-fancybox-group\" href=\"http://www.nightowls-dataset.org/wp-content/uploads/2018/03/mb_driver_back.png\" data-lb-type=\"grouped-post\"><img src=\"http://www.nightowls-dataset.org/wp-content/uploads/2018/03/mb_driver_back.png\" alt=\"\" class=\"alignnone size-medium wp-image-43\" width=\"300\" height=\"184\"></a>\n",
+ "\t\t\t</dt>\n",
+ "<dd class=\"wp-caption-text gallery-caption\" id=\"gallery-3-90\">\n",
+ "\t\t\t\tMotorbikedrivers from back\n",
+ "\t\t\t\t</dd>\n",
+ "</dl>\n",
+ "<dl class=\"gallery-item\">\n",
+ "<dt class=\"gallery-icon landscape\">\n",
+ "<p>\t\t\t\t<a class=\"grouped_elements\" rel=\"tc-fancybox-group\" href=\"http://www.nightowls-dataset.org/wp-content/uploads/2018/03/mb_drivers_crossing.png\" data-lb-type=\"grouped-post\"><img src=\"http://www.nightowls-dataset.org/wp-content/uploads/2018/03/mb_drivers_crossing.png\" alt=\"\" class=\"alignnone size-medium wp-image-28\" width=\"300\" height=\"186\"></a>\n",
+ "\t\t\t</p></dt>\n",
+ "<dd class=\"wp-caption-text gallery-caption\" id=\"gallery-3-91\">\n",
+ "\t\t\t\tMultiple motorbikedrives in a scene\n",
+ "\t\t\t\t</dd>\n",
+ "</dl>\n",
+ "<dl class=\"gallery-item\">\n",
+ "<dt class=\"gallery-icon landscape\">\n",
+ "<p>\t\t\t\t<a class=\"grouped_elements\" rel=\"tc-fancybox-group\" href=\"http://www.nightowls-dataset.org/wp-content/uploads/2018/03/mb_driver_traffic_glare.png\" data-lb-type=\"grouped-post\"><img src=\"http://www.nightowls-dataset.org/wp-content/uploads/2018/03/mb_driver_traffic_glare.png\" alt=\"\" class=\"alignnone size-medium wp-image-44\" width=\"300\" height=\"193\"></a>\n",
+ "\t\t\t</p></dt>\n",
+ "<dd class=\"wp-caption-text gallery-caption\" id=\"gallery-3-92\">\n",
+ "\t\t\t\tMotorbikedrivers in traffic including glare\n",
+ "\t\t\t\t</dd>\n",
+ "</dl>\n",
+ "<dl class=\"gallery-item\">\n",
+ "<dt class=\"gallery-icon landscape\">\n",
+ "<p>\t\t\t\t<a class=\"grouped_elements\" rel=\"tc-fancybox-group\" href=\"http://www.nightowls-dataset.org/wp-content/uploads/2018/03/mb_driver_back2.png\" data-lb-type=\"grouped-post\"><img src=\"http://www.nightowls-dataset.org/wp-content/uploads/2018/03/mb_driver_back2.png\" alt=\"\" class=\"alignnone size-medium wp-image-44\" width=\"300\" height=\"193\"></a>\n",
+ "\t\t\t</p></dt>\n",
+ "<dd class=\"wp-caption-text gallery-caption\" id=\"gallery-3-93\">\n",
+ "\t\t\t\tMotorbike driver followed during several frames\n",
+ "\t\t\t\t</dd>\n",
+ "</dl>\n",
+ "<p><br style=\"clear: both\">\n",
+ "\t\t</p></div>\n",
+ "<h2 id=\"ignore\" class=\"offset\">Ignore</h2>\n",
+ "<div id=\"gallery-3\" class=\"gallery galleryid-56 gallery-columns-4 gallery-size-medium\">\n",
+ "<dl class=\"gallery-item\">\n",
+ "<dt class=\"gallery-icon landscape\">\n",
+ "\t\t\t\t\t\t\t\t<a class=\"grouped_elements\" rel=\"tc-fancybox-group\" href=\"http://www.nightowls-dataset.org/wp-content/uploads/2018/03/ignore_group.png\" data-lb-type=\"grouped-post\"><img src=\"http://www.nightowls-dataset.org/wp-content/uploads/2018/03/ignore_group.png\" alt=\"\" class=\"alignnone size-medium wp-image-43\" width=\"300\" height=\"184\"></a>\n",
+ "\t\t\t</dt>\n",
+ "<dd class=\"wp-caption-text gallery-caption\" id=\"gallery-3-94\">\n",
+ "\t\t\t\tIgnore larger group of pedestrians that can not be distinguished\n",
+ "\t\t\t\t</dd>\n",
+ "</dl>\n",
+ "<dl class=\"gallery-item\">\n",
+ "<dt class=\"gallery-icon landscape\">\n",
+ "<p>\t\t\t\t<a class=\"grouped_elements\" rel=\"tc-fancybox-group\" href=\"http://www.nightowls-dataset.org/wp-content/uploads/2018/03/ign_traffic_signs.png\" data-lb-type=\"grouped-post\"><img src=\"http://www.nightowls-dataset.org/wp-content/uploads/2018/03/ign_traffic_signs.png\" alt=\"\" class=\"alignnone size-medium wp-image-28\" width=\"300\" height=\"186\"></a>\n",
+ "\t\t\t</p></dt>\n",
+ "<dd class=\"wp-caption-text gallery-caption\" id=\"gallery-3-95\">\n",
+ "\t\t\t\tIgnore confusing traffic signs\n",
+ "\t\t\t\t</dd>\n",
+ "</dl>\n",
+ "<dl class=\"gallery-item\">\n",
+ "<dt class=\"gallery-icon landscape\">\n",
+ "<p>\t\t\t\t<a class=\"grouped_elements\" rel=\"tc-fancybox-group\" href=\"http://www.nightowls-dataset.org/wp-content/uploads/2018/03/ign_passengers.png\" data-lb-type=\"grouped-post\"><img src=\"http://www.nightowls-dataset.org/wp-content/uploads/2018/03/ign_passengers.png\" alt=\"\" class=\"alignnone size-medium wp-image-44\" width=\"300\" height=\"193\"></a>\n",
+ "\t\t\t</p></dt>\n",
+ "<dd class=\"wp-caption-text gallery-caption\" id=\"gallery-3-96\">\n",
+ "\t\t\t\tIgnore irrelevant people like passenger\n",
+ "\t\t\t\t</dd>\n",
+ "</dl>\n",
+ "<dl class=\"gallery-item\">\n",
+ "<dt class=\"gallery-icon landscape\">\n",
+ "<p>\t\t\t\t<a class=\"grouped_elements\" rel=\"tc-fancybox-group\" href=\"http://www.nightowls-dataset.org/wp-content/uploads/2018/03/ign.ads_.png\" data-lb-type=\"grouped-post\"><img src=\"http://www.nightowls-dataset.org/wp-content/uploads/2018/03/ign.ads_.png\" alt=\"\" class=\"alignnone size-medium wp-image-44\" width=\"300\" height=\"193\"></a>\n",
+ "\t\t\t</p></dt>\n",
+ "<dd class=\"wp-caption-text gallery-caption\" id=\"gallery-3-97\">\n",
+ "\t\t\t\tIgnore advertisements and billboards that may include target objects\n",
+ "\t\t\t\t</dd>\n",
+ "</dl>\n",
+ "<p><br style=\"clear: both\">\n",
+ "\t\t</p></div>\n",
+ "<p>&nbsp;</p>\n",
+ "</div>\n",
"\"\"\""
]
},
{
"cell_type": "code",
- "execution_count": 53,
+ "execution_count": 71,
"metadata": {},
"outputs": [],
"source": [
"def parse_urls(url, exts):\n",
- " ahrefs = soup.find_all('a')\n",
+ " ahrefs = soup.find_all('img')\n",
" urls = []\n",
" for a in ahrefs:\n",
" href = a['href']\n",
" if Path(href).suffix[1:] in exts:\n",
" urls.append('{}{}'.format(url, href))\n",
- " return urls"
+ " return urls\n",
+ "\n",
+ "def parse_images(url, exts):\n",
+ " imgs = soup.find_all('img')\n",
+ " tags = []\n",
+ " for img in imgs:\n",
+ " src = img['src']\n",
+ " if Path(src).suffix[1:] in exts:\n",
+ " tags.append('{}{}'.format(url, src))\n",
+ " return tags"
]
},
{
"cell_type": "code",
- "execution_count": 55,
+ "execution_count": 73,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "http://skuld.cs.umass.edu/traces/mmsys/2015/paper-5/devset/desccred.zip\n",
- "http://skuld.cs.umass.edu/traces/mmsys/2015/paper-5/devset/desctxt.zip\n",
- "http://skuld.cs.umass.edu/traces/mmsys/2015/paper-5/devset/descvis.zip\n",
- "http://skuld.cs.umass.edu/traces/mmsys/2015/paper-5/devset/gt.zip\n",
- "http://skuld.cs.umass.edu/traces/mmsys/2015/paper-5/devset/img.zip\n",
- "http://skuld.cs.umass.edu/traces/mmsys/2015/paper-5/devset/imgwiki.zip\n",
- "http://skuld.cs.umass.edu/traces/mmsys/2015/paper-5/devset/poiNameCorrespondences.txt\n",
- "http://skuld.cs.umass.edu/traces/mmsys/2015/paper-5/devset/xml.zip\n"
+ "http://www.nightowls-dataset.org/wp-content/uploads/2018/03/many_peds3.png\n",
+ "http://www.nightowls-dataset.org/wp-content/uploads/2018/03/ped_dark_low_contrast.png\n",
+ "http://www.nightowls-dataset.org/wp-content/uploads/2018/03/peds_occluded.png\n",
+ "http://www.nightowls-dataset.org/wp-content/uploads/2018/03/ped_sideways.png\n",
+ "http://www.nightowls-dataset.org/wp-content/uploads/2018/03/bicycle_back_glare.png\n",
+ "http://www.nightowls-dataset.org/wp-content/uploads/2018/03/bicycle_driver_sideways.png\n",
+ "http://www.nightowls-dataset.org/wp-content/uploads/2018/03/bicycle_mixed_with_ped.png\n",
+ "http://www.nightowls-dataset.org/wp-content/uploads/2018/03/bicycle_multiple_drivers2.png\n",
+ "http://www.nightowls-dataset.org/wp-content/uploads/2018/03/mb_driver_back.png\n",
+ "http://www.nightowls-dataset.org/wp-content/uploads/2018/03/mb_drivers_crossing.png\n",
+ "http://www.nightowls-dataset.org/wp-content/uploads/2018/03/mb_driver_traffic_glare.png\n",
+ "http://www.nightowls-dataset.org/wp-content/uploads/2018/03/mb_driver_back2.png\n",
+ "http://www.nightowls-dataset.org/wp-content/uploads/2018/03/ignore_group.png\n",
+ "http://www.nightowls-dataset.org/wp-content/uploads/2018/03/ign_traffic_signs.png\n",
+ "http://www.nightowls-dataset.org/wp-content/uploads/2018/03/ign_passengers.png\n",
+ "http://www.nightowls-dataset.org/wp-content/uploads/2018/03/ign.ads_.png\n"
]
}
],
"source": [
"soup = BeautifulSoup(data,'lxml')\n",
- "burl = 'http://skuld.cs.umass.edu/traces/mmsys/2015/paper-5/devset/'\n",
- "urls = parse_urls(burl, ['zip', 'txt'])\n",
- "for u in urls:\n",
- " print(u)\n",
+ "#burl = 'http://skuld.cs.umass.edu/traces/mmsys/2015/paper-5/devset/'\n",
+ "burl = ''\n",
+ "#tags = parse_urls(burl, ['jpg', 'txt'])\n",
+ "tags = parse_images(burl, ['jpg', 'png', 'gif'])\n",
+ "for t in tags:\n",
+ " print(t)\n",
"\n",
"# for row in rows:\n",
"# ahrefs = row.find_all(href=True)\n",
@@ -91,15 +263,6 @@
"execution_count": null,
"metadata": {},
"outputs": [],
- "source": [
- "%pip install lxml"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
"source": []
}
],