From 49a49bebe3f972e93add837180f5672a4ae62ce0 Mon Sep 17 00:00:00 2001 From: adamhrv Date: Thu, 13 Dec 2018 14:33:05 +0100 Subject: new nbs --- megapixels/notebooks/bs4_scratch.ipynb | 243 +++++++++++++++++++++++++++------ 1 file changed, 203 insertions(+), 40 deletions(-) (limited to 'megapixels/notebooks/bs4_scratch.ipynb') diff --git a/megapixels/notebooks/bs4_scratch.ipynb b/megapixels/notebooks/bs4_scratch.ipynb index dce0ddc2..e63d286f 100644 --- a/megapixels/notebooks/bs4_scratch.ipynb +++ b/megapixels/notebooks/bs4_scratch.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 33, + "execution_count": 56, "metadata": {}, "outputs": [], "source": [ @@ -13,69 +13,241 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 57, "metadata": {}, "outputs": [], "source": [ "data = \"\"\"\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
\"[ICO]\"NameLast modifiedSizeDescription

\"[DIR]\"Parent Directory  -  
\"[desccred.zip17-Nov-2015 02:21 133M 
\"[desctxt.zip17-Nov-2015 02:21 18M 
\"[descvis.zip17-Nov-2015 02:22 60M 
\"[TXT]\"devset_topics.xml17-Nov-2015 02:22 6.0K 
\"[gt.zip17-Nov-2015 02:22 91K 
\"[img.zip17-Nov-2015 02:24 1.1G 
\"[imgwiki.zip17-Nov-2015 02:24 341M 
\"[TXT]\"poiNameCorrespondences.txt17-Nov-2015 02:24 905  
\"[xml.zip17-Nov-2015 02:24 811K 

\n", + "
\n", + "

Annotations

\n", + "

Below are examples of our different annotations within the dataset. Every pedestrian, cyclist and motorcyclist (higher than 50px) in every frame is annotated with a bounding box, along side with three attributes: occlusion, difficult (low contrast or unusual posture) and pose. People on posters, sculptures and groups where individuals are hard to seperate are marked as “ignore”.

\n", + "\n", + "

Pedestrian

\n", + "
\n", + "
\n", + "
\n", + "

\t\t\t\t\"\"\n", + "\t\t\t

\n", + "
\n", + "\t\t\t\tHigh frequency of pedestrians\n", + "\t\t\t\t
\n", + "
\n", + "
\n", + "
\n", + "

\t\t\t\t\"\"\n", + "\t\t\t

\n", + "
\n", + "\t\t\t\tDark scenes with low contrast\n", + "\t\t\t\t
\n", + "
\n", + "
\n", + "
\n", + "

\t\t\t\t\"\"\n", + "\t\t\t

\n", + "
\n", + "\t\t\t\tOccluded pedestrians\n", + "\t\t\t\t
\n", + "
\n", + "
\n", + "
\n", + "

\t\t\t\t\"\"\n", + "\t\t\t

\n", + "
\n", + "\t\t\t\tSideview of crossing pedestrians\n", + "\t\t\t\t
\n", + "
\n", + "


\n", + "\t\t

\n", + "

Bicycledriver and Motorbikedriver

\n", + "
\n", + "
\n", + "
\n", + "\t\t\t\t\t\t\t\t\"\"\n", + "\t\t\t
\n", + "
\n", + "\t\t\t\tBicycle drivers from back including glare\n", + "\t\t\t\t
\n", + "
\n", + "
\n", + "
\n", + "

\t\t\t\t\"\"\n", + "\t\t\t

\n", + "
\n", + "\t\t\t\tBicycledriver sideways\n", + "\t\t\t\t
\n", + "
\n", + "
\n", + "
\n", + "

\t\t\t\t\"\"\n", + "\t\t\t

\n", + "
\n", + "\t\t\t\tScenes with mixed annotations\n", + "\t\t\t\t
\n", + "
\n", + "
\n", + "
\n", + "

\t\t\t\t\"\"\n", + "\t\t\t

\n", + "
\n", + "\t\t\t\tMultiple bicycle drivers\n", + "\t\t\t\t
\n", + "
\n", + "


\n", + "\t\t

\n", + "
\n", + "
\n", + "
\n", + "\t\t\t\t\t\t\t\t\"\"\n", + "\t\t\t
\n", + "
\n", + "\t\t\t\tMotorbikedrivers from back\n", + "\t\t\t\t
\n", + "
\n", + "
\n", + "
\n", + "

\t\t\t\t\"\"\n", + "\t\t\t

\n", + "
\n", + "\t\t\t\tMultiple motorbikedrives in a scene\n", + "\t\t\t\t
\n", + "
\n", + "
\n", + "
\n", + "

\t\t\t\t\"\"\n", + "\t\t\t

\n", + "
\n", + "\t\t\t\tMotorbikedrivers in traffic including glare\n", + "\t\t\t\t
\n", + "
\n", + "
\n", + "
\n", + "

\t\t\t\t\"\"\n", + "\t\t\t

\n", + "
\n", + "\t\t\t\tMotorbike driver followed during several frames\n", + "\t\t\t\t
\n", + "
\n", + "


\n", + "\t\t

\n", + "

Ignore

\n", + "
\n", + "
\n", + "
\n", + "\t\t\t\t\t\t\t\t\"\"\n", + "\t\t\t
\n", + "
\n", + "\t\t\t\tIgnore larger group of pedestrians that can not be distinguished\n", + "\t\t\t\t
\n", + "
\n", + "
\n", + "
\n", + "

\t\t\t\t\"\"\n", + "\t\t\t

\n", + "
\n", + "\t\t\t\tIgnore confusing traffic signs\n", + "\t\t\t\t
\n", + "
\n", + "
\n", + "
\n", + "

\t\t\t\t\"\"\n", + "\t\t\t

\n", + "
\n", + "\t\t\t\tIgnore irrelevant people like passenger\n", + "\t\t\t\t
\n", + "
\n", + "
\n", + "
\n", + "

\t\t\t\t\"\"\n", + "\t\t\t

\n", + "
\n", + "\t\t\t\tIgnore advertisements and billboards that may include target objects\n", + "\t\t\t\t
\n", + "
\n", + "


\n", + "\t\t

\n", + "

 

\n", + "
\n", "\"\"\"" ] }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 71, "metadata": {}, "outputs": [], "source": [ "def parse_urls(url, exts):\n", - " ahrefs = soup.find_all('a')\n", + " ahrefs = soup.find_all('img')\n", " urls = []\n", " for a in ahrefs:\n", " href = a['href']\n", " if Path(href).suffix[1:] in exts:\n", " urls.append('{}{}'.format(url, href))\n", - " return urls" + " return urls\n", + "\n", + "def parse_images(url, exts):\n", + " imgs = soup.find_all('img')\n", + " tags = []\n", + " for img in imgs:\n", + " src = img['src']\n", + " if Path(src).suffix[1:] in exts:\n", + " tags.append('{}{}'.format(url, src))\n", + " return tags" ] }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 73, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "http://skuld.cs.umass.edu/traces/mmsys/2015/paper-5/devset/desccred.zip\n", - "http://skuld.cs.umass.edu/traces/mmsys/2015/paper-5/devset/desctxt.zip\n", - "http://skuld.cs.umass.edu/traces/mmsys/2015/paper-5/devset/descvis.zip\n", - "http://skuld.cs.umass.edu/traces/mmsys/2015/paper-5/devset/gt.zip\n", - "http://skuld.cs.umass.edu/traces/mmsys/2015/paper-5/devset/img.zip\n", - "http://skuld.cs.umass.edu/traces/mmsys/2015/paper-5/devset/imgwiki.zip\n", - "http://skuld.cs.umass.edu/traces/mmsys/2015/paper-5/devset/poiNameCorrespondences.txt\n", - "http://skuld.cs.umass.edu/traces/mmsys/2015/paper-5/devset/xml.zip\n" + "http://www.nightowls-dataset.org/wp-content/uploads/2018/03/many_peds3.png\n", + "http://www.nightowls-dataset.org/wp-content/uploads/2018/03/ped_dark_low_contrast.png\n", + "http://www.nightowls-dataset.org/wp-content/uploads/2018/03/peds_occluded.png\n", + "http://www.nightowls-dataset.org/wp-content/uploads/2018/03/ped_sideways.png\n", + "http://www.nightowls-dataset.org/wp-content/uploads/2018/03/bicycle_back_glare.png\n", + "http://www.nightowls-dataset.org/wp-content/uploads/2018/03/bicycle_driver_sideways.png\n", + "http://www.nightowls-dataset.org/wp-content/uploads/2018/03/bicycle_mixed_with_ped.png\n", + "http://www.nightowls-dataset.org/wp-content/uploads/2018/03/bicycle_multiple_drivers2.png\n", + "http://www.nightowls-dataset.org/wp-content/uploads/2018/03/mb_driver_back.png\n", + "http://www.nightowls-dataset.org/wp-content/uploads/2018/03/mb_drivers_crossing.png\n", + "http://www.nightowls-dataset.org/wp-content/uploads/2018/03/mb_driver_traffic_glare.png\n", + "http://www.nightowls-dataset.org/wp-content/uploads/2018/03/mb_driver_back2.png\n", + "http://www.nightowls-dataset.org/wp-content/uploads/2018/03/ignore_group.png\n", + "http://www.nightowls-dataset.org/wp-content/uploads/2018/03/ign_traffic_signs.png\n", + "http://www.nightowls-dataset.org/wp-content/uploads/2018/03/ign_passengers.png\n", + "http://www.nightowls-dataset.org/wp-content/uploads/2018/03/ign.ads_.png\n" ] } ], "source": [ "soup = BeautifulSoup(data,'lxml')\n", - "burl = 'http://skuld.cs.umass.edu/traces/mmsys/2015/paper-5/devset/'\n", - "urls = parse_urls(burl, ['zip', 'txt'])\n", - "for u in urls:\n", - " print(u)\n", + "#burl = 'http://skuld.cs.umass.edu/traces/mmsys/2015/paper-5/devset/'\n", + "burl = ''\n", + "#tags = parse_urls(burl, ['jpg', 'txt'])\n", + "tags = parse_images(burl, ['jpg', 'png', 'gif'])\n", + "for t in tags:\n", + " print(t)\n", "\n", "# for row in rows:\n", "# ahrefs = row.find_all(href=True)\n", @@ -86,15 +258,6 @@ "# print(url)" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%pip install lxml" - ] - }, { "cell_type": "code", "execution_count": null, -- cgit v1.2.3-70-g09d2