summaryrefslogtreecommitdiff
path: root/megapixels/notebooks/datasets/pipa
diff options
context:
space:
mode:
Diffstat (limited to 'megapixels/notebooks/datasets/pipa')
-rw-r--r--megapixels/notebooks/datasets/pipa/flickr_cleanup.ipynb (renamed from megapixels/notebooks/datasets/pipa/pipa_flickr_metadata_cleanup.ipynb)44
1 files changed, 41 insertions, 3 deletions
diff --git a/megapixels/notebooks/datasets/pipa/pipa_flickr_metadata_cleanup.ipynb b/megapixels/notebooks/datasets/pipa/flickr_cleanup.ipynb
index 8746a740..57c32bec 100644
--- a/megapixels/notebooks/datasets/pipa/pipa_flickr_metadata_cleanup.ipynb
+++ b/megapixels/notebooks/datasets/pipa/flickr_cleanup.ipynb
@@ -38,12 +38,50 @@
},
{
"cell_type": "code",
- "execution_count": 31,
+ "execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
- "fp_in = '/data_store/datasets/people/pipa/research/pipa_flickr_metadata_ext.csv'\n",
- "fp_out = '/data_store/datasets/people/pipa/research/pipa_flickr_metadata_test.csv'"
+ "fp_in_api_photo_id = '/data_store_hdd/datasets/people/pipa/research/flickr_api_photo_id.csv'\n",
+ "fp_out_filepaths = '/data_store_hdd/datasets/people/pipa/research/pipa_filepaths.csv'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.read_csv(fp_in_api_photo_id)\n",
+ "records = df.to_dict('records')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "results = []\n",
+ "for record in records:\n",
+ " obj = {\n",
+ " 'photo_id': record.get('photo_id'),\n",
+ " 'nsid': record.get('nsid'),\n",
+ " 'url': record.get('url'),\n",
+ " 'secret': record.get('secret'),\n",
+ " 'filepath': f'{photo_id}_{secret}.jpg'\n",
+ " }\n",
+ " results.append(obj)\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "pd.DataFrame.from_dict(results).to_csv(fp_out_filepaths, index=False)"
]
},
{