diff options
| author | Adam Harvey <adam@ahprojects.com> | 2019-04-13 12:04:16 +0200 |
|---|---|---|
| committer | Adam Harvey <adam@ahprojects.com> | 2019-04-13 12:04:16 +0200 |
| commit | 7bb209f90be3b844522a11472539556d98b714b0 (patch) | |
| tree | 7a00d84e9f0817af2ae7432a430cd8d6d7f7d2da | |
| parent | 5cd091641f097b138cc03c88da6978763edb9740 (diff) | |
init
| -rw-r--r-- | README.md | 25 | ||||
| -rw-r--r-- | docs/api.md | 3 | ||||
| -rw-r--r-- | docs/images/example_1.jpg | bin | 0 -> 74825 bytes | |||
| -rw-r--r-- | docs/images/example_2.jpg | bin | 0 -> 74825 bytes | |||
| -rw-r--r-- | docs/images/example_collage_1.jpg | bin | 0 -> 47806 bytes | |||
| -rw-r--r-- | docs/images/example_collage_2.jpg | bin | 0 -> 86066 bytes | |||
| -rw-r--r-- | docs/images/example_collage_3.jpg | bin | 0 -> 60635 bytes | |||
| -rw-r--r-- | docs/images/vframe_logo_h.svg | 22 | ||||
| -rw-r--r-- | docs/overview.md | 75 | ||||
| -rw-r--r-- | docs/specifications.md | 119 |
10 files changed, 242 insertions, 2 deletions
@@ -1,2 +1,23 @@ -# vframe_check_api -VFRAME Check API +# VFRAME Check API + +VFRAME Check API Service + + +## Quick Start + +- ... + + +### Endpoints + +- ... + + +### Response Types + +- ... + +### Test Access + +- ... + diff --git a/docs/api.md b/docs/api.md new file mode 100644 index 0000000..c38e41d --- /dev/null +++ b/docs/api.md @@ -0,0 +1,3 @@ +# API Documentation + +[ placeholder for API documentation ]
\ No newline at end of file diff --git a/docs/images/example_1.jpg b/docs/images/example_1.jpg Binary files differnew file mode 100644 index 0000000..7f63efa --- /dev/null +++ b/docs/images/example_1.jpg diff --git a/docs/images/example_2.jpg b/docs/images/example_2.jpg Binary files differnew file mode 100644 index 0000000..7f63efa --- /dev/null +++ b/docs/images/example_2.jpg diff --git a/docs/images/example_collage_1.jpg b/docs/images/example_collage_1.jpg Binary files differnew file mode 100644 index 0000000..299a919 --- /dev/null +++ b/docs/images/example_collage_1.jpg diff --git a/docs/images/example_collage_2.jpg b/docs/images/example_collage_2.jpg Binary files differnew file mode 100644 index 0000000..af8c807 --- /dev/null +++ b/docs/images/example_collage_2.jpg diff --git a/docs/images/example_collage_3.jpg b/docs/images/example_collage_3.jpg Binary files differnew file mode 100644 index 0000000..1a8275c --- /dev/null +++ b/docs/images/example_collage_3.jpg diff --git a/docs/images/vframe_logo_h.svg b/docs/images/vframe_logo_h.svg new file mode 100644 index 0000000..6a9378b --- /dev/null +++ b/docs/images/vframe_logo_h.svg @@ -0,0 +1,22 @@ +<?xml version="1.0" encoding="utf-8"?>
+<!-- Generator: Adobe Illustrator 16.0.0, SVG Export Plug-In . SVG Version: 6.00 Build 0) -->
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN" "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd">
+<svg version="1.0" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
+ width="252.149px" height="60px" viewBox="0 0 252.149 60" enable-background="new 0 0 252.149 60" xml:space="preserve">
+<g>
+ <path fill="#0A1EFF" d="M0,0v60h59.998V0H0z M30,39.602L18.912,20.398h22.174L30,39.602z"/>
+ <g>
+ <path fill="#0A1EFF" d="M93.395,35.981l5.383-17.853h5.445l-8.271,23.744h-5.102l-8.238-23.744h5.435L93.395,35.981z"/>
+ <path fill="#0A1EFF" d="M128.754,32.166h-9.395v9.707h-4.889V18.129h15.458v3.959h-10.569v6.133h9.395V32.166z"/>
+ <path fill="#0A1EFF" d="M149.569,33.178h-3.895v8.695h-4.895V18.129h8.819c2.807,0,4.97,0.625,6.489,1.872
+ c1.528,1.252,2.282,3.019,2.282,5.305c0,1.615-0.341,2.967-1.047,4.049c-0.697,1.081-1.76,1.945-3.19,2.584l5.143,9.704v0.23
+ h-5.252L149.569,33.178z M145.675,29.217h3.946c1.225,0,2.178-0.312,2.85-0.938c0.677-0.628,1.011-1.487,1.011-2.584
+ c0-1.121-0.314-2.002-0.953-2.642c-0.632-0.646-1.611-0.965-2.929-0.965h-3.925V29.217z"/>
+ <path fill="#0A1EFF" d="M183.512,36.979h-8.578l-1.631,4.895h-5.199l8.84-23.744h4.533l8.886,23.744h-5.201L183.512,36.979z
+ M176.258,33.013h5.937l-2.988-8.884L176.258,33.013z"/>
+ <path fill="#0A1EFF" d="M206.988,18.129l6.099,17.221l6.064-17.221h6.425v23.744h-4.912v-6.492l0.489-11.207l-6.403,17.699h-3.359
+ l-6.39-17.678l0.487,11.186v6.492h-4.893V18.129H206.988z"/>
+ <path fill="#0A1EFF" d="M252.149,31.58h-9.392v6.361h11.023v3.932h-15.916V18.129h15.883v3.959h-10.991v5.661h9.392V31.58z"/>
+ </g>
+</g>
+</svg>
diff --git a/docs/overview.md b/docs/overview.md new file mode 100644 index 0000000..e14d33b --- /dev/null +++ b/docs/overview.md @@ -0,0 +1,75 @@ + + +# Overview + +The VFRAME/Check image deduplication API will provide capabilities to determine if a query image matches any prior submitted query images. The service is designed to integrate with the Check workflow described below. + +## Requirements + +- provide matching results for at least + - Second rate: peak 1 image every 10 seconds + - Hourly rate: ≈3.6K images per hour + - Daily rate: ≈87K image requests submitted per day + - Weekly rate: ≈610K images per week +- provide an authenticated API service to match a query image to all previously submitted query images and receive a match result +- authenticated requests only to protect against misuse +- authenticated services for Check will be handled manually requesting/exchanging credentials +- provide an interactive demo page to help Check users understand threshold settings +- provide adjustable threshold settings in URI parameter, and/or provide list of similar matches with threshold +- scale to accommodate up to 1 million unique image records to compare against +- after 1M records, we will need to rescale/rebuild the architecture to accommodate + + +## User story + +- Audience member sends image to a number on WhatsApp (or generically, user adds an image to Check). - Handled by Smooch. + - Image is ingested into Check. + - Handled by Smooch & Check. + - Image is matched against existing images in Check. + - MVP: + - detect near-identical matches that are different sizes, resolutions. + - Assess for feasibility: + - find same meme images used for different claims + - find same claims using different meme images + - find same images (not memes) with different text + - find same images + text in different physical files + - Image is automatically related to any matching images in Check. + - Analyst can confirm matches and dissociate any false matches. - Handled in Check + - Audience member receives the verification result for any matching images with existing final-status. + - Handled in Check, Smooch, and WA Business API + + +## Example Images + +The API should be able to detect exact matches such as this example + +|Query|Known Image|Match| +|---|---|---| +|||True| +|||False| +|||False| + + +## Data Retention + +- we will retain the posted images and store: + - the computed hash features + - timestamp + - sha256 of the file +- mysql data will be stored in Frankfurt +- image data on S3 storage will be stored in Amsterdam + + +## Out of Scope + +- Interactive matching +- Video matching +- Content analysis +- Text detection, text recognition (OCR) +- User-in-the-loop machine learning for improvement of matching algorithms + + +## Assets Required + +- we will need a local copy of the dataset of existing images to initialize the database and to test the image matching threshold + diff --git a/docs/specifications.md b/docs/specifications.md new file mode 100644 index 0000000..ec5c81f --- /dev/null +++ b/docs/specifications.md @@ -0,0 +1,119 @@ +# Check Image Deduplication API + +- Draft April 13, 2019 +- Specs from "VFRAME - SHARED Image Matching - Checkpoint Spec 2019APR.odt" + + +The VFRAME/Check image deduplication API will provide capabilities to determine if a query image matches any of prior submitted query images. + +Functional Requirements: + +- provide matching results for at least 10,000 image requests submitted per day +- provide scalable capacity for sustained usage of at least one year +- provide an authenticated API service to match a query image to all previously submitted query images and receive a match result + + +## Use Case Scenario + +User story: +- Audience member sends image to a number on WhatsApp (or generically, user adds an image to Check). - Handled by Smooch. + • Image is ingested into Check. - Handled by Smooch & Check. + • Image is matched against existing images in Check. + ◦ MVP: + ▪ detect near-identical matches that are different sizes, resolutions. + ◦ Assess for feasibility: + ▪ find same meme images used for different claims + ▪ find same claims using different meme images + ▪ find same images (not memes) with different text + ▪ find same images + text in different physical files + • Image is automatically related to any matching images in Check. + • Analyst can confirm matches and dissociate any false matches. - Handled in Check + • Audience member receives the verification result for any matching images with existing final-status. - Handled in Check, Smooch, and WA Business API + + +## Out of Scope + +- Video matching +- Machine vision or content analysis +- Indian-language OCR (though OCR models/ libraries should be easily integrated) +- User-in-the-loop machine learning for improvement of matching algorithms + + + +## Example Requests + +Example response for a successful image upload with no match: + +`check.vframe.io/v1/match/` + +``` +{ + "success": True, + "match": False, + "closest_matches": + [ + { + "sha256: "cf80cd8aed482d5d1527d7dc72fceff84e6326592848447d2dc0b0e87dfc9a90", + "score": 2 + }, + { + "sha256: "156350ca18fa04545c4192432860c7efe9ddba18ea6e40e4da81bb7097a7166f", + "score": 3 + } + ] +" +``` + + +Example response for a successful image upload with a match: + +`check.vframe.io/v1/match/` + +``` +{ + "success": True, + "match": True, + "match": + { + "sha256: "eadc688cd557ee351fa9b718e87a6e8dfb9c9fce69e9944c71c0f58f8b972632", + "score": 0 + }, + "close_matches": + [ + { + "sha256: "cf80cd8aed482d5d1527d7dc72fceff84e6326592848447d2dc0b0e87dfc9a90", + "score": 2 + }, + { + "sha256: "156350ca18fa04545c4192432860c7efe9ddba18ea6e40e4da81bb7097a7166f", + "score": 2 + } + ] +" +``` + +Get match, but with more permissive threshold + +`check.vframe.io/v1/match/threshold/3/` + +``` +{ + "success": True, + "match": True, + "matches": + { + "sha256: "eadc688cd557ee351fa9b718e87a6e8dfb9c9fce69e9944c71c0f58f8b972632", + "score": 0 + }, + "closest_matches": + [ + { + "sha256: "cf80cd8aed482d5d1527d7dc72fceff84e6326592848447d2dc0b0e87dfc9a90", + "score": 3 + }, + { + "sha256: "156350ca18fa04545c4192432860c7efe9ddba18ea6e40e4da81bb7097a7166f", + "score": 3 + } + ] +"
\ No newline at end of file |
