diff options
| author | cam <cameron@ideum.com> | 2016-10-10 01:54:16 -0600 |
|---|---|---|
| committer | cam <cameron@ideum.com> | 2016-10-10 01:54:16 -0600 |
| commit | 43ebfac2db256c294e6dfef8c5eb2037df8350c1 (patch) | |
| tree | 27340b49dccc15998f681d39b5f83e0724a590f4 | |
| parent | 86ad6bd064e9bac38018a77a059a623176a3247a (diff) | |
Modified README
| -rw-r--r-- | README.md | 189 | ||||
| -rw-r--r-- | neural_style.py | 120 |
2 files changed, 180 insertions, 129 deletions
@@ -10,34 +10,35 @@ by Leon A. Gatys, Matthias Bethge, Aaron Hertzmann, Eli Shechtman Additionally, techniques are presented for semantic segmentation and multiple style transfer. -The first paper presents an algorithm for combining the content of one image with the style of another image using convolutional neural networks. Below is an example of transferring the artistic style of [The Starry Night](https://en.wikipedia.org/wiki/The_Starry_Night) onto a photograph of an African lion: +The Neural Style algorithm combines the content of one image with the style of another image using convolutional neural networks. Below is an example of transferring the artistic style of [The Starry Night](https://en.wikipedia.org/wiki/The_Starry_Night) onto a photograph of an African lion: <p align="center"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/lions/42_output.png" width="512"/> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/content_style.png" width="290"/> +<img src="examples/lions/42_output.png" width="512"/> +<img src="examples/lions/content_style.png" width="290"/> </p> Transfering the style of various artworks to the same content image produces qualitatively convincing results: <p align="center"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/lions/32_output.png" width="192"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/my_styles/matisse_crop.jpg" width="192"/> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/lions/33_output.png" width="192"/> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/my_styles/water_lilies_crop.jpg" width="192"/> +<img src="examples/lions/32_output.png" width="192"> +<img src="examples/lions/styles/matisse_crop.jpg" width="192"/> +<img src="examples/lions/33_output.png" width="192"/> +<img src="examples/lions/styles/water_lilies_crop.jpg" width="192"/> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/lions/kandinsky_output.png" width="192"/> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/my_styles/kandinsky_crop.jpg" width="192"/> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/lions/basquiat_output.png" width="192"/> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/my_styles/basquiat_crop.jpg" width="192"/> +<img src="examples/lions/kandinsky_output.png" width="192"/> +<img src="examples/lions/styles/kandinsky_crop.jpg" width="192"/> +<img src="examples/lions/basquiat_output.png" width="192"/> +<img src="examples/lions/styles/basquiat_crop.jpg" width="192"/> </p> Here we reproduce Figure 2 from the first paper, which renders a photograph of the Tubingen in Germany in the style of 5 different iconic paintings [The Shipwreck of the Minotaur](http://www.artble.com/artists/joseph_mallord_william_turner/paintings/the_shipwreck_of_the_minotaur), [The Starry Night](https://www.wikiart.org/en/vincent-van-gogh/the-starry-night-1889), [Composition VII](https://www.wikiart.org/en/wassily-kandinsky/composition-vii-1913), [The Scream](https://www.wikiart.org/en/edvard-munch/the-scream-1893), [Seated Nude](http://www.pablopicasso.org/seated-nude.jsp): <p align="center"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/gatys_figure/tubingen.png" height="192px"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/gatys_figure/tubingen_shipwreck.png" height="192px"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/gatys_figure/tubingen_starry_night.png" height="192px"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/gatys_figure/tubingen_picasso.png" height="192px"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/gatys_figure/1_output.png" height="192px"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/gatys_figure/tubingen_kandinsky.png" height="192px"> +<img src="examples/gatys_figure/tubingen.png" height="192px"> +<img src="examples/gatys_figure/tubingen_shipwreck.png" height="192px"> +<img src="examples/gatys_figure/tubingen_starry_night.png" height="192px"> + +<img src="examples/gatys_figure/tubingen_picasso.png" height="192px"> +<img src="examples/gatys_figure/tubingen_scream.png" height="192px"> +<img src="examples/gatys_figure/tubingen_kandinsky.png" height="192px"> </p> ### Content / Style Tradeoff @@ -46,11 +47,11 @@ The algorithm allows the user to trade-off the relative weight of the style and Here we render with an increasing style weight applied to [Red Canna](http://www.georgiaokeeffe.net/red-canna.jsp): <p align="center"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/style_and_content_tradeoff/okeffe.jpg" height="160px"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/style_and_content_tradeoff/okeffe_10.png" width="160px"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/style_and_content_tradeoff/okeffe_100.png" width="160px"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/style_and_content_tradeoff/okeffe_10000.png" width="160px"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/style_and_content_tradeoff/output_1000000.png" width="160px"> +<img src="examples/style_content_tradeoff/okeffe.jpg" height="160px"> +<img src="examples/style_content_tradeoff/okeffe_10.png" width="160px"> +<img src="examples/style_content_tradeoff/okeffe_100.png" width="160px"> +<img src="examples/style_content_tradeoff/okeffe_10000.png" width="160px"> +<img src="examples/style_content_tradeoff/output_1000000.png" width="160px"> </p> ### Multiple Style Images @@ -60,20 +61,21 @@ More than one style image can be used to blend multiple artistic styles. *Bottom row (left to right)*: [Seated Nude](http://www.pablopicasso.org/seated-nude.jsp) + [The Starry Night](https://www.wikiart.org/en/vincent-van-gogh/the-starry-night-1889), [Oversoul](http://alexgrey.com/art/paintings/soul/oversoul/) + [Freshness of Cold](https://afremov.com/FRESHNESS-OF-COLD-PALETTE-KNIFE-Oil-Painting-On-Canvas-By-Leonid-Afremov-Size-30-x40.html), [David Bowie](http://www.francoise-nielly.com/index.php/galerie/index/56) + [Skull](https://www.wikiart.org/en/jean-michel-basquiat/head) <p align="center"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/multiple_styles/tubingen_starry_scream.png" height="192px"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/multiple_styles/tubingen_scream_kandinsky.png" height="192px"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/multiple_styles/tubingen_starry_seated.png" height="192px"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/multiple_styles/tubingen_seated_kandinsky.png.png" height="192px"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/multiple_styles/output_tubingen_afremov_grey.png" height="192px"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/multiple_styles/output_basquiat_nielly.png" height="192px"> +<img src="examples/multiple_styles/tubingen_starry_scream.png" height="192px"> +<img src="examples/multiple_styles/tubingen_scream_kandinsky.png" height="192px"> +<img src="examples/multiple_styles/tubingen_starry_seated.png" height="192px"> + +<img src="examples/multiple_styles/tubingen_seated_kandinsky.png.png" height="192px"> +<img src="examples/multiple_styles/tubingen_afremov_grey.png" height="192px"> +<img src="examples/multiple_styles/tubingen_basquiat_nielly.png" height="192px"> </p> ### Style Interpolation When using multiple style images, the degree to which they are blended can be controlled. <p align="center"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/style_interpolation/golden_gate_scream_7_starry_3.png" height="192px"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/style_interpolation/golden_gate_scream_5_starry_5.png" height="192px"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/style_interpolation/golden_gate_scream_3_starry_7.png" height="192px"> +<img src="examples/style_interpolation/taj_mahal_scream_1_starry_9.png" height="192px"> +<img src="examples/style_interpolation/taj_mahal_scream_5_starry_5.png" height="192px"> +<img src="examples/style_interpolation/taj_mahal_scream_9_starry_1.png" height="192px"> </p> ### Transfer style but not color @@ -81,62 +83,70 @@ By including the flag `--original_colors` the output image will retain the color *Left to right*: content image, stylized image, stylized image with the original colors of the content image <p align="center"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/original_colors/new_york.png" height="165px"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/original_colors/stylized.png" height="165px"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/original_colors/stylized_original_colors.png" height="165px"> +<img src="examples/original_colors/new_york.png" height="165px"> +<img src="examples/original_colors/stylized.png" height="165px"> +<img src="examples/original_colors/stylized_original_colors.png" height="165px"> </p> ### Textures The algorithm is not constrained to artistic painting styles. It can also be applied to photographic textures to create [pareidolic](https://en.wikipedia.org/wiki/Pareidolia) images. <p align="center"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/pareidolic/flowers_output.png" width="192px"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/my_styles/flowers_crop.jpg" width="192px"/> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/pareidolic/oil_output.png" width="192px"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/my_styles/oil_crop.jpg" width="192px"> +<img src="examples/pareidolic/flowers_output.png" width="192px"> +<img src="examples/pareidolic/styles/flowers_crop.jpg" width="192px"/> +<img src="examples/pareidolic/oil_output.png" width="192px"> +<img src="examples/pareidolic/styles/oil_crop.jpg" width="192px"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/pareidolic/dark_matter_output.png" width="192px"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/my_styles/dark_matter_bw.png" width="192px"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/pareidolic/ben_giles_output.png" width="192px"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/my_styles/ben_giles.png" width="192px"> +<img src="examples/pareidolic/dark_matter_output.png" width="192px"> +<img src="examples/pareidolic/styles/dark_matter_bw.png" width="192px"> +<img src="examples/pareidolic/ben_giles_output.png" width="192px"> +<img src="examples/pareidolic/styles/ben_giles.png" width="192px"> </p> ### Segmentation Style can be transferred to semantic segmentations in the content image. <p align="center"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/segmentation/00110.jpg" height="192px"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/segmentation/00110_mask.png" height="192px"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/segmentation/00110_output.png" height="192px"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/segmentation/00017.jpg" height="192px"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/segmentation/00017_mask.png" height="192px"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/segmentation/output_nielly.png" height="192px"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/segmentation/00768.jpg" height="192px"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/segmentation/00768_mask.png" height="192px"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/segmentation/00768_output.png" height="192px"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/segmentation/02630.png" height="192px"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/segmentation/02630_mask.png" height="192px"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/segmentation/02630_output.png" height="192px"> +<img src="examples/segmentation/00110.jpg" height="192px"> +<img src="examples/segmentation/00110_mask.png" height="192px"> +<img src="examples/segmentation/00110_output.png" height="192px"> +<img src="examples/segmentation/00017.jpg" height="192px"> +<img src="examples/segmentation/00017_mask.png" height="192px"> +<img src="examples/segmentation/00017_output.png" height="192px"> + +<img src="examples/segmentation/00768.jpg" height="192px"> +<img src="examples/segmentation/00768_mask.png" height="192px"> +<img src="examples/segmentation/00768_output.png" height="192px"> +<img src="examples/segmentation/02630.png" height="192px"> +<img src="examples/segmentation/02630_mask.png" height="192px"> +<img src="examples/segmentation/02630_output.png" height="192px"> </p> Multiple styles can be transferred to the foreground and background of the content image. *Left to right*: content image, foreground style, background style, foreground mask, background mask, stylized image <p align="center"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/segmentation/02390.jpg" height="192px"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/segmentation/basquiat.png" height="192px"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/segmentation/frida.png" height="192px"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/segmentation/02390_mask.png" height="192px"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/segmentation/02390_mask_inv.png" height="192px"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/segmentation/02390_output.png" height="192px"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/segmentation/02270.jpg" height="192px"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/segmentation/okeffe_crop.png" height="192px"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/segmentation/okeffe_iris.png" height="192px"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/segmentation/02270_mask_face.png" height="192px"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/segmentation/02270_mask_face_inv.png" height="192px"> -<img src="https://dl.dropboxusercontent.com/u/63267778/examples/segmentation/02270_output.png" height="192px"> +<img src="examples/segmentation/02390.jpg" height="192px"> +<img src="examples/segmentation/basquiat.png" height="192px"> +<img src="examples/segmentation/frida.png" height="192px"> +<img src="examples/segmentation/02390_mask.png" height="192px"> +<img src="examples/segmentation/02390_mask_inv.png" height="192px"> +<img src="examples/segmentation/02390_output.png" height="192px"> + +<img src="examples/segmentation/02270.jpg" height="192px"> +<img src="examples/segmentation/okeffe_red_canna.png" height="192px"> +<img src="examples/segmentation/okeffe_iris.png" height="192px"> +<img src="examples/segmentation/02270_mask_face.png" height="192px"> +<img src="examples/segmentation/02270_mask_face_inv.png" height="192px"> +<img src="examples/segmentation/02270_output.png" height="192px"> </p> ### Video -Demo videos coming soon... +Animations can be rendered by applying the algorithm to each source frame. For the best results, the network is initialized with the previously stylized frame warped to the current frame according to the optical flow between the pair of frames. Loss functions for temporal consistency are used to penalize motion boundaries. + +<p align="center"> +<img src="examples/video/input.gif"> + +<img src="examples/video/output.gif"> +</p> ## Setup #### Dependencies: @@ -156,8 +166,8 @@ Demo videos coming soon... ### Basic Usage #### Single Image -1. Copy 1 content image (`.png`, `.jpg`, `.ppm`, `.pgm`) to the default single-image content directory `./image_input` -2. Copy 1 or more style images (`.png`, `.jpg`, `.ppm`, `.pgm`) to the default style directory `./styles` +1. Copy 1 content image to the default image content directory `./image_input` +2. Copy 1 or more style images to the default style directory `./styles` 3. Run the command: ``` bash stylize_image.sh <path_to_content_image> <path_to_style_image> @@ -166,11 +176,12 @@ bash stylize_image.sh <path_to_content_image> <path_to_style_image> ``` bash stylize_image.sh ./image_input/lion.jpg ./styles/starry-night.jpg ``` +*Note*: Supported image formats include: `.png`, `.jpg`, `.ppm`, `.pgm` *Note*: Paths to images should not contain the `~` character to represent your home directory; you should instead use a relative path or the absolute path. #### Video Frames -1. Copy 1 content video (`.mp4`, `.mov`) to the default video content directory `./video_input` +1. Copy 1 content video to the default video content directory `./video_input` 2. Copy 1 or more style images to the default style directory `./styles` 3. Run the command: ``` @@ -181,10 +192,12 @@ bash stylize_video.sh <path_to_video> <path_to_style_image> bash stylize_video.sh ./video_input/video.mp4 ./styles/starry-night.jpg ``` +*Note*: Supported video formats include: `.mp4`, `.mov`, `.mkv` + ### Advanced Usage #### Single Image or Video Frames -1. Copy content images (`.png`, `.jpg`, `.ppm`, `.pgm`) to the default single-image content directory `./image_input` or copy videos (`.mp4`, `.mov`) to the default video content directory `./video_input` -2. Copy 1 or more style images (`.png`, `.jpg`, `.ppm`, `.pgm`) to the default style directory `./styles` +1. Copy content images to the default image content directory `./image_input` or copy video frames to the default video content directory `./video_input` +2. Copy 1 or more style images to the default style directory `./styles` 3. Run the command with specific arguments: ``` python neural_style.py <arguments> @@ -196,21 +209,27 @@ python neural_style.py --content_img golden_gate.jpg \ --max_size 1000 \ --max_iterations 100 \ --original_colors \ - --device /cpu:0; + --device /cpu:0 \ + --verbose; ``` To use multiple style images, pass a *space-separated* list of the image names and image weights like this: `--style_imgs starry_night.jpg the_scream.jpg --style_imgs_weights 0.5 0.5` -*Example (Video)*: +*Example (Video Frames)*: ``` python neural_style.py --video \ + --video_input_dir ./video_input/my_video_frames \ --style_imgs starry-night.jpg \ + --content_weight 5 \ + --style_weight 1000 \ + --temporal_weight 1000 \ --start_frame 1 \ --end_frame 50 \ - --max_size 1000 \ - --max_iterations 2000; + --max_size 1024 \ + --first_frame_iterations 3000 \ + --verbose; ``` *Note*: When using `--init_frame_type prev_warp` you must have previously computed the backward and forward optical flow between the frames. See `./video_input/make-opt-flow.sh` and `./video_input/run-deepflow.sh` @@ -231,14 +250,14 @@ python neural_style.py --video \ * `--content_layer_weights`: Space-separated weights of each content layer to the content loss. *Default*: `1.0` * `--style_layer_weights`: Space-separated weights of each style layer to loss. *Default*: `0.2 0.2 0.2 0.2 0.2` * `--style_scale`: Scale of the style image. Not currently implemented. -* `--original_colors`: Boolean *flag* indicating if the style is transferred but not the colors. -* `--style_mask`: Boolean *flag* indicating if style is transferred to masked regions. +* `--original_colors`: Boolean flag indicating if the style is transferred but not the colors. +* `--style_mask`: Boolean flag indicating if style is transferred to masked regions. * `--style_mask_imgs`: Filenames of the style mask images (example: `face_mask.png`). To use multiple style mask images, pass a *space-separated* list. *Example*: `--style_mask_imgs face_mask.png face_mask_inv.png` * `--noise_ratio`: Interpolation value between the content image and noise image if network is initialized with `random`. *Default*: `1.0` * `--seed`: Seed for the random number generator. *Default*: `0` -* `--model_weights`: Weights of the VGG-19 network. Download [here](http://www.vlfeat.org/matconvnet/pretrained/). *Default*:`imagenet-vgg-verydeep-19.mat` +* `--model_weights`: Weights and biases of the VGG-19 network. Download [here](http://www.vlfeat.org/matconvnet/pretrained/). *Default*:`imagenet-vgg-verydeep-19.mat` * `--pooling_type`: Type of pooling in convolutional neural network. *Choices*: `avg`, `max`. *Default*: `avg` -* `--device`: GPU or CPU device. GPU mode highly recommended but requires NVIDIA CUDA. *Choices*: `/gpu:0` `/cpu:0`. *Default*: `/gpu:0`. +* `--device`: GPU or CPU device. GPU mode highly recommended but requires NVIDIA CUDA. *Choices*: `/gpu:0` `/cpu:0`. *Default*: `/gpu:0` * `--image_output_dir`: Directory to write output to. *Default*: `./image_output` * `--img_name`: Filename of the output image. * `--verbose`: Boolean flag indicating if statements should be printed to the console. @@ -246,10 +265,11 @@ python neural_style.py --video \ #### Optimization Arguments * `--optimizer`: Loss minimization optimizer. L-BFGS gives better results. Adam uses less memory. *Choices*: `lbfgs`, `adam`. *Default*: `lbfgs` * `--learning_rate`: Learning-rate parameter for the Adam optimizer. *Default*: `1e1` -* `--max_iterations`: Max number of iterations for the Adam or L-BFGS optimizer. *Default*: `1e3` +* `--max_iterations`: Max number of iterations for the Adam or L-BFGS optimizer. *Default*: `1000` +* `--print_iterations`: Number of iterations between optimizer print statements. *Default*: `50` #### Video Frame Arguments -* `--video`: Boolean *flag* indicating if the user is creating a video. +* `--video`: Boolean flag indicating if the user is creating a video. * `--start_frame`: First frame number. *Default*: `1` * `--end_frame`: Last frame number. *Default*: `1` * `--first_frame_type`: Image used to initialize the network during the rendering of the first frame. *Choices*: `content`, `random`, `style`. *Default*: `random` @@ -261,6 +281,8 @@ python neural_style.py --video \ * `--forward_optical_flow_frmt`: Format string of forward optical flow files. *Default*: `forward_{}_{}.flo` * `--content_weights_frmt`: Format string of optical flow consistency files. *Default*: `reliable_{}_{}.txt` * `--prev_frame_indices`: Previous frames to consider for longterm temporal consistency. *Default*: `1` +* `--first_frame_iterations`: Maximum number of optimizer iterations of the first frame. *Default*: `2000` +* `--frame_iterations`: Maximum number of optimizer iterations for each frame after the first frame. *Default*: `800` ## Questions and Errata @@ -310,8 +332,9 @@ Artistic images were created by the popular historical artists: * [Édouard Manet](http://www.manet.org/) * [Pablo Picasso](https://www.wikiart.org/en/pablo-picasso) * [Joseph Mallord William Turner](https://en.wikipedia.org/wiki/J._M._W._Turner) +* [Frida Kahlo](https://en.wikipedia.org/wiki/Frida_Kahlo) -Several Bash shell scripts for testing were created by my brother [Sheldon Smith](http://www.imdb.com/name/nm4328496/). +Bash shell scripts for testing were created by my brother [Sheldon Smith](http://www.imdb.com/name/nm4328496/). ## Citation diff --git a/neural_style.py b/neural_style.py index 0db8403..565dbc0 100644 --- a/neural_style.py +++ b/neural_style.py @@ -1,5 +1,3 @@ -import matplotlib.pyplot as plt -import tensorflow.python import tensorflow as tf import numpy as np import scipy.io @@ -20,11 +18,11 @@ def parse_args(): # options for single image parser.add_argument('--verbose', action='store_true', - help="Boolean flag indicating if statements should be printed to the console.") + help='Boolean flag indicating if statements should be printed to the console.') parser.add_argument('--img_name', type=str, - default="result", - help="Filename of the output image.") + default='result', + help='Filename of the output image.') parser.add_argument('--style_imgs', nargs='+', type=str, help='Filenames of the style images (example: starry-night.jpg)', @@ -73,7 +71,7 @@ def parse_args(): parser.add_argument('--content_loss_function', type=int, default=1, choices=[1, 2, 3], - help='A few different constants for the content layer loss functions have been presented. (default: %(default)s)') + help='Different constants for the content layer loss functions. (default: %(default)s)') parser.add_argument('--content_layers', type=str, default=['conv4_2'], @@ -112,12 +110,13 @@ def parse_args(): help='Seed for the random number generator. (default: %(default)s)') parser.add_argument('--model_weights', type=str, - default='imagenet-vgg-verydeep-19.mat') + default='imagenet-vgg-verydeep-19.mat', + help='Weights and biases of the VGG-19 network.') parser.add_argument('--pooling_type', type=str, default='avg', choices=['avg', 'max'], - help="Type of pooling in convolutional neural network. (default: %(default)s)") + help='Type of pooling in convolutional neural network. (default: %(default)s)') parser.add_argument('--device', type=str, default='/gpu:0', @@ -139,17 +138,23 @@ def parse_args(): help='Learning rate parameter for the Adam optimizer. (default: %(default)s)') parser.add_argument('--max_iterations', type=int, - default=1e3, + default=1000, help='Max number of iterations for the Adam or L-BFGS optimizer. (default: %(default)s)') + + parser.add_argument('--print_iterations', type=int, + default=50, + help='Number of iterations between optimizer print statements. (default: %(default)s)') # options for video frames parser.add_argument('--video', action='store_true', help='Boolean flag indicating if the user is generating a video.') - parser.add_argument('--start_frame', type=int, default=1, + parser.add_argument('--start_frame', type=int, + default=1, help='First frame number.') - parser.add_argument('--end_frame', type=int, default=1, + parser.add_argument('--end_frame', type=int, + default=1, help='Last frame number.') parser.add_argument('--first_frame_type', type=str, @@ -189,9 +194,22 @@ def parse_args(): parser.add_argument('--prev_frame_indices', nargs='+', type=int, default=[1], help='Previous frames to consider for longterm temporal consistency.') + + parser.add_argument('--first_frame_iterations', type=int, + default=2000, + help='Maximum number of optimizer iterations of the first frame. (default: %(default)s)') + parser.add_argument('--frame_iterations', type=int, + default=800, + help='Maximum number of optimizer iterations for each frame after the first frame. (default: %(default)s)') + args = parser.parse_args() + # normalize weights + args.style_layer_weights = norm(args.style_layer_weights) + args.content_layer_weights = norm(args.content_layer_weights) + args.style_imgs_weights = norm(args.style_imgs_weights) + # create directories for output if args.video: maybe_make_directory(args.video_output_dir) @@ -208,7 +226,7 @@ def parse_args(): vgg19_mean = np.array([123.68, 116.779, 103.939]).reshape((1,1,1,3)) def build_vgg19(input_img): - if args.verbose: print("\nBUILDING VGG-19 NETWORK") + if args.verbose: print('\nBUILDING VGG-19 NETWORK') net = {} _, h, w, d = input_img.shape @@ -324,9 +342,13 @@ def content_layer_loss(p, x): _, h, w, d = p.get_shape() M = h.value * w.value N = d.value - loss = (1./(2 * N**0.5 * M**0.5 )) * tf.reduce_sum(tf.pow((x - p), 2)) - #loss = (1./2.) * tf.reduce_sum(tf.pow((x - p), 2)) - #loss = (1./(N * M)) * tf.reduce_sum(tf.pow((x - p), 2)) + if args.content_loss_function == 1: + K = 1. / (2 * N**0.5 * M**0.5) + elif args.content_loss_function == 2: + K = 1. / 2. + elif args.content_loss_function == 3: + K = 1. / (N * M) + loss = K * tf.reduce_sum(tf.pow((x - p), 2)) return loss def gram_matrix(x, area, depth): @@ -398,7 +420,6 @@ def sum_content_losses(sess, net, content_img): p = sess.run(net[layer]) x = net[layer] p = tf.convert_to_tensor(p) - x = tf.convert_to_tensor(x) content_loss += content_layer_loss(p, x) * weight content_loss /= float(len(args.content_layers)) return content_loss @@ -471,7 +492,7 @@ def write_image(path, img): def preprocess(img, mean): # BGR to RGB img = img[...,::-1] - # shape (H, W, D) to (1, H, W, D) + # shape (h, w, d) to (1, h, w, d) img = img[np.newaxis,:,:,:] # subtract mean img -= mean @@ -480,7 +501,7 @@ def preprocess(img, mean): def postprocess(img, mean): # add mean img += mean - # shape (1, H, W, D) to (H, W, D) + # shape (1, h, w, d) to (h, w, d) img = img[0] img = np.clip(img, 0, 255).astype('uint8') # RGB to BGR @@ -488,7 +509,7 @@ def postprocess(img, mean): return img def read_flow_file(path): - with open(path, "rb") as f: + with open(path, 'rb') as f: # 4 bytes header header = struct.unpack('4s', f.read(4))[0] # 4 bytes width, height @@ -515,6 +536,9 @@ def read_weights_file(path): weights = np.dstack([vals.astype(np.float32)] * 3) return weights +def norm(weights): + return [float(i)/sum(weights) for i in weights] + def maybe_make_directory(dir_path): if not os.path.exists(dir_path): os.makedirs(dir_path) @@ -558,13 +582,13 @@ def stylize(content_img, style_imgs, init_img, frame=None): optimizer = get_optimizer(L_total) if args.optimizer == 'adam': - minimize_with_adam(sess, net, optimizer, init_img) + minimize_with_adam(sess, net, optimizer, init_img, L_total) elif args.optimizer == 'lbfgs': minimize_with_lbfgs(sess, net, optimizer, init_img) output_img = sess.run(net['input']) - if args.is_original_colors: + if args.original_colors: output_img = convert_to_original_colors(np.copy(content_img), np.copy(output_img)) if args.video: @@ -579,9 +603,9 @@ def minimize_with_lbfgs(sess, net, optimizer, init_img): sess.run(net['input'].assign(init_img)) optimizer.minimize(sess) -def minimize_with_adam(sess, net, optimizer, init_img): +def minimize_with_adam(sess, net, optimizer, init_img, loss): if args.verbose: print('MINIMIZING LOSS USING: ADAM OPTIMIZER') - train_op = optimizer.minimize(L_total) + train_op = optimizer.minimize(loss) init_op = tf.initialize_all_variables() sess.run(init_op) sess.run(net['input'].assign(init_img)) @@ -591,12 +615,13 @@ def minimize_with_adam(sess, net, optimizer, init_img): iterations += 1 def get_optimizer(loss): + print_iterations = args.print_iterations if args.verbose else 0 if args.optimizer == 'lbfgs': optimizer = tf.contrib.opt.ScipyOptimizerInterface( loss, method='L-BFGS-B', options={'maxiter': args.max_iterations, - 'disp': args.verbose}) + 'disp': print_iterations}) elif args.optimizer == 'adam': optimizer = tf.train.AdamOptimizer(args.learning_rate) return optimizer @@ -609,40 +634,42 @@ def write_video_output(frame, output_img): def write_image_output(output_img, content_img, style_imgs, init_img): out_dir = os.path.join(args.img_output_dir, args.img_name) maybe_make_directory(out_dir) - img_path = os.path.join(out_dir, "output.png") - content_path = os.path.join(out_dir, "content.png") - init_path = os.path.join(out_dir, "init.png") + img_path = os.path.join(out_dir, args.img_name+'.png') + content_path = os.path.join(out_dir, 'content.png') + init_path = os.path.join(out_dir, 'init.png') write_image(img_path, output_img) write_image(content_path, content_img) write_image(init_path, init_img) index = 0 for style_img in style_imgs: - path = os.path.join(out_dir, str(index)+"_style.png") + path = os.path.join(out_dir, 'style_'+str(index)+'.png') write_image(path, style_img) index += 1 - + # save the configuration settings - out_file = os.path.join(out_dir, "meta_data.txt") - f = open(out_file, "w") - f.write("image name: {}\n".format(args.img_name)) - f.write("content: {}\n".format(args.content_img)) + out_file = os.path.join(out_dir, 'meta_data.txt') + f = open(out_file, 'w') + f.write('image name: {}\n'.format(args.img_name)) + f.write('content: {}\n'.format(args.content_img)) index = 0 for style_img, weight in zip(args.style_imgs, args.style_imgs_weights): - f.write("styles ["+str(index)+"]: {} * {}\n".format(weight, style_img)) + f.write('styles ['+str(index)+']: {} * {}\n'.format(weight, style_img)) + index += 1 index = 0 if args.style_mask_imgs is not None: for mask in args.style_mask_imgs: - f.write("style masks ["+str(index)+"]: {}\n".format(mask)) - f.write("init_type: {}\n".format(args.init_img_type)) - f.write("content_weight: {}\n".format(args.content_weight)) - f.write("style_weight: {}\n".format(args.style_weight)) - f.write("tv_weight: {}\n".format(args.tv_weight)) - f.write("content_layers: {}\n".format(args.content_layers)) - f.write("style_layers: {}\n".format(args.style_layers)) - f.write("optimizer_type: {}\n".format(args.optimizer)) - f.write("max_iterations: {}\n".format(args.max_iterations)) - f.write("max_image_size: {}\n".format(args.max_size)) + f.write('style masks ['+str(index)+']: {}\n'.format(mask)) + index += 1 + f.write('init_type: {}\n'.format(args.init_img_type)) + f.write('content_weight: {}\n'.format(args.content_weight)) + f.write('style_weight: {}\n'.format(args.style_weight)) + f.write('tv_weight: {}\n'.format(args.tv_weight)) + f.write('content_layers: {}\n'.format(args.content_layers)) + f.write('style_layers: {}\n'.format(args.style_layers)) + f.write('optimizer_type: {}\n'.format(args.optimizer)) + f.write('max_iterations: {}\n'.format(args.max_iterations)) + f.write('max_image_size: {}\n'.format(args.max_size)) f.close() ''' @@ -752,8 +779,7 @@ def warp_image(src, flow): # remap pixels to optical flow dst = cv2.remap( src, flow_map[0], flow_map[1], - interpolation=cv2.INTER_CUBIC, - borderMode=cv2.BORDER_TRANSPARENT) + interpolation=cv2.INTER_CUBIC, borderMode=cv2.BORDER_TRANSPARENT) return dst def convert_to_original_colors(content_img, stylized_img): @@ -787,6 +813,7 @@ def render_video(): content_frame = get_content_frame(frame) style_imgs = get_style_images(content_frame, args.style_scale) init_img = get_init_image(args.first_frame_type, content_frame, style_imgs, frame) + args.max_iterations = args.first_frame_iterations tick = time.time() stylize(content_frame, style_imgs, init_img, frame) tock = time.time() @@ -795,6 +822,7 @@ def render_video(): content_frame = get_content_frame(frame) style_imgs = get_style_images(content_frame, args.style_scale) init_img = get_init_image(args.init_frame_type, content_frame, style_imgs, frame) + args.max_iterations = args.frame_iterations tick = time.time() stylize(content_frame, style_imgs, init_img, frame) tock = time.time() |
