From 7bb209f90be3b844522a11472539556d98b714b0 Mon Sep 17 00:00:00 2001 From: Adam Harvey Date: Sat, 13 Apr 2019 12:04:16 +0200 Subject: init --- README.md | 25 +++++++- docs/api.md | 3 + docs/images/example_1.jpg | Bin 0 -> 74825 bytes docs/images/example_2.jpg | Bin 0 -> 74825 bytes docs/images/example_collage_1.jpg | Bin 0 -> 47806 bytes docs/images/example_collage_2.jpg | Bin 0 -> 86066 bytes docs/images/example_collage_3.jpg | Bin 0 -> 60635 bytes docs/images/vframe_logo_h.svg | 22 +++++++ docs/overview.md | 75 ++++++++++++++++++++++++ docs/specifications.md | 119 ++++++++++++++++++++++++++++++++++++++ 10 files changed, 242 insertions(+), 2 deletions(-) create mode 100644 docs/api.md create mode 100644 docs/images/example_1.jpg create mode 100644 docs/images/example_2.jpg create mode 100644 docs/images/example_collage_1.jpg create mode 100644 docs/images/example_collage_2.jpg create mode 100644 docs/images/example_collage_3.jpg create mode 100644 docs/images/vframe_logo_h.svg create mode 100644 docs/overview.md create mode 100644 docs/specifications.md diff --git a/README.md b/README.md index 28d3659..64a65f3 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,23 @@ -# vframe_check_api -VFRAME Check API +# VFRAME Check API + +VFRAME Check API Service + + +## Quick Start + +- ... + + +### Endpoints + +- ... + + +### Response Types + +- ... + +### Test Access + +- ... + diff --git a/docs/api.md b/docs/api.md new file mode 100644 index 0000000..c38e41d --- /dev/null +++ b/docs/api.md @@ -0,0 +1,3 @@ +# API Documentation + +[ placeholder for API documentation ] \ No newline at end of file diff --git a/docs/images/example_1.jpg b/docs/images/example_1.jpg new file mode 100644 index 0000000..7f63efa Binary files /dev/null and b/docs/images/example_1.jpg differ diff --git a/docs/images/example_2.jpg b/docs/images/example_2.jpg new file mode 100644 index 0000000..7f63efa Binary files /dev/null and b/docs/images/example_2.jpg differ diff --git a/docs/images/example_collage_1.jpg b/docs/images/example_collage_1.jpg new file mode 100644 index 0000000..299a919 Binary files /dev/null and b/docs/images/example_collage_1.jpg differ diff --git a/docs/images/example_collage_2.jpg b/docs/images/example_collage_2.jpg new file mode 100644 index 0000000..af8c807 Binary files /dev/null and b/docs/images/example_collage_2.jpg differ diff --git a/docs/images/example_collage_3.jpg b/docs/images/example_collage_3.jpg new file mode 100644 index 0000000..1a8275c Binary files /dev/null and b/docs/images/example_collage_3.jpg differ diff --git a/docs/images/vframe_logo_h.svg b/docs/images/vframe_logo_h.svg new file mode 100644 index 0000000..6a9378b --- /dev/null +++ b/docs/images/vframe_logo_h.svg @@ -0,0 +1,22 @@ + + + + + + + + + + + + + + + + diff --git a/docs/overview.md b/docs/overview.md new file mode 100644 index 0000000..e14d33b --- /dev/null +++ b/docs/overview.md @@ -0,0 +1,75 @@ +![](images/vframe_logo_h.svg) + +# Overview + +The VFRAME/Check image deduplication API will provide capabilities to determine if a query image matches any prior submitted query images. The service is designed to integrate with the Check workflow described below. + +## Requirements + +- provide matching results for at least + - Second rate: peak 1 image every 10 seconds + - Hourly rate: ≈3.6K images per hour + - Daily rate: ≈87K image requests submitted per day + - Weekly rate: ≈610K images per week +- provide an authenticated API service to match a query image to all previously submitted query images and receive a match result +- authenticated requests only to protect against misuse +- authenticated services for Check will be handled manually requesting/exchanging credentials +- provide an interactive demo page to help Check users understand threshold settings +- provide adjustable threshold settings in URI parameter, and/or provide list of similar matches with threshold +- scale to accommodate up to 1 million unique image records to compare against +- after 1M records, we will need to rescale/rebuild the architecture to accommodate + + +## User story + +- Audience member sends image to a number on WhatsApp (or generically, user adds an image to Check). - Handled by Smooch. + - Image is ingested into Check. + - Handled by Smooch & Check. + - Image is matched against existing images in Check. + - MVP: + - detect near-identical matches that are different sizes, resolutions. + - Assess for feasibility: + - find same meme images used for different claims + - find same claims using different meme images + - find same images (not memes) with different text + - find same images + text in different physical files + - Image is automatically related to any matching images in Check. + - Analyst can confirm matches and dissociate any false matches. - Handled in Check + - Audience member receives the verification result for any matching images with existing final-status. + - Handled in Check, Smooch, and WA Business API + + +## Example Images + +The API should be able to detect exact matches such as this example + +|Query|Known Image|Match| +|---|---|---| +|![](images/example_1.jpg)|![](images/example_2.jpg)|True| +|![](images/example_collage_1.jpg)|![](images/example_collage_2.jpg)|False| +|![](images/example_collage_1.jpg)|![](images/example_collage_3.jpg)|False| + + +## Data Retention + +- we will retain the posted images and store: + - the computed hash features + - timestamp + - sha256 of the file +- mysql data will be stored in Frankfurt +- image data on S3 storage will be stored in Amsterdam + + +## Out of Scope + +- Interactive matching +- Video matching +- Content analysis +- Text detection, text recognition (OCR) +- User-in-the-loop machine learning for improvement of matching algorithms + + +## Assets Required + +- we will need a local copy of the dataset of existing images to initialize the database and to test the image matching threshold + diff --git a/docs/specifications.md b/docs/specifications.md new file mode 100644 index 0000000..ec5c81f --- /dev/null +++ b/docs/specifications.md @@ -0,0 +1,119 @@ +# Check Image Deduplication API + +- Draft April 13, 2019 +- Specs from "VFRAME - SHARED Image Matching - Checkpoint Spec 2019APR.odt" + + +The VFRAME/Check image deduplication API will provide capabilities to determine if a query image matches any of prior submitted query images. + +Functional Requirements: + +- provide matching results for at least 10,000 image requests submitted per day +- provide scalable capacity for sustained usage of at least one year +- provide an authenticated API service to match a query image to all previously submitted query images and receive a match result + + +## Use Case Scenario + +User story: +- Audience member sends image to a number on WhatsApp (or generically, user adds an image to Check). - Handled by Smooch. + • Image is ingested into Check. - Handled by Smooch & Check. + • Image is matched against existing images in Check. + ◦ MVP: + ▪ detect near-identical matches that are different sizes, resolutions. + ◦ Assess for feasibility: + ▪ find same meme images used for different claims + ▪ find same claims using different meme images + ▪ find same images (not memes) with different text + ▪ find same images + text in different physical files + • Image is automatically related to any matching images in Check. + • Analyst can confirm matches and dissociate any false matches. - Handled in Check + • Audience member receives the verification result for any matching images with existing final-status. - Handled in Check, Smooch, and WA Business API + + +## Out of Scope + +- Video matching +- Machine vision or content analysis +- Indian-language OCR (though OCR models/ libraries should be easily integrated) +- User-in-the-loop machine learning for improvement of matching algorithms + + + +## Example Requests + +Example response for a successful image upload with no match: + +`check.vframe.io/v1/match/` + +``` +{ + "success": True, + "match": False, + "closest_matches": + [ + { + "sha256: "cf80cd8aed482d5d1527d7dc72fceff84e6326592848447d2dc0b0e87dfc9a90", + "score": 2 + }, + { + "sha256: "156350ca18fa04545c4192432860c7efe9ddba18ea6e40e4da81bb7097a7166f", + "score": 3 + } + ] +" +``` + + +Example response for a successful image upload with a match: + +`check.vframe.io/v1/match/` + +``` +{ + "success": True, + "match": True, + "match": + { + "sha256: "eadc688cd557ee351fa9b718e87a6e8dfb9c9fce69e9944c71c0f58f8b972632", + "score": 0 + }, + "close_matches": + [ + { + "sha256: "cf80cd8aed482d5d1527d7dc72fceff84e6326592848447d2dc0b0e87dfc9a90", + "score": 2 + }, + { + "sha256: "156350ca18fa04545c4192432860c7efe9ddba18ea6e40e4da81bb7097a7166f", + "score": 2 + } + ] +" +``` + +Get match, but with more permissive threshold + +`check.vframe.io/v1/match/threshold/3/` + +``` +{ + "success": True, + "match": True, + "matches": + { + "sha256: "eadc688cd557ee351fa9b718e87a6e8dfb9c9fce69e9944c71c0f58f8b972632", + "score": 0 + }, + "closest_matches": + [ + { + "sha256: "cf80cd8aed482d5d1527d7dc72fceff84e6326592848447d2dc0b0e87dfc9a90", + "score": 3 + }, + { + "sha256: "156350ca18fa04545c4192432860c7efe9ddba18ea6e40e4da81bb7097a7166f", + "score": 3 + } + ] +" \ No newline at end of file -- cgit v1.2.3-70-g09d2