diff options
Diffstat (limited to 'megapixels/notebooks/datasets/pipa')
| -rw-r--r-- | megapixels/notebooks/datasets/pipa/flickr_cleanup.ipynb (renamed from megapixels/notebooks/datasets/pipa/pipa_flickr_metadata_cleanup.ipynb) | 44 |
1 files changed, 41 insertions, 3 deletions
diff --git a/megapixels/notebooks/datasets/pipa/pipa_flickr_metadata_cleanup.ipynb b/megapixels/notebooks/datasets/pipa/flickr_cleanup.ipynb index 8746a740..57c32bec 100644 --- a/megapixels/notebooks/datasets/pipa/pipa_flickr_metadata_cleanup.ipynb +++ b/megapixels/notebooks/datasets/pipa/flickr_cleanup.ipynb @@ -38,12 +38,50 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ - "fp_in = '/data_store/datasets/people/pipa/research/pipa_flickr_metadata_ext.csv'\n", - "fp_out = '/data_store/datasets/people/pipa/research/pipa_flickr_metadata_test.csv'" + "fp_in_api_photo_id = '/data_store_hdd/datasets/people/pipa/research/flickr_api_photo_id.csv'\n", + "fp_out_filepaths = '/data_store_hdd/datasets/people/pipa/research/pipa_filepaths.csv'" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv(fp_in_api_photo_id)\n", + "records = df.to_dict('records')" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "results = []\n", + "for record in records:\n", + " obj = {\n", + " 'photo_id': record.get('photo_id'),\n", + " 'nsid': record.get('nsid'),\n", + " 'url': record.get('url'),\n", + " 'secret': record.get('secret'),\n", + " 'filepath': f'{photo_id}_{secret}.jpg'\n", + " }\n", + " results.append(obj)\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "pd.DataFrame.from_dict(results).to_csv(fp_out_filepaths, index=False)" ] }, { |
