Codes/flownet2/src/ops/downsample/downsample_kernel.cc


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47

#define EIGEN_USE_THREADS

#include "downsample_kernel.h"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/register_types.h"
#include "tensorflow/core/framework/types.h"
#include "tensorflow/core/platform/types.h"

namespace tensorflow {

typedef Eigen::GpuDevice GPUDevice;

template <typename Device>
class DownsampleKernel : public OpKernel {
 public:
  explicit DownsampleKernel(OpKernelConstruction* ctx) : OpKernel(ctx) {
    // Get the size [height, width] tensor and verify its dimensions
    OP_REQUIRES_OK(ctx, ctx->GetAttr("size", &size_));
    OP_REQUIRES(ctx, size_.size() == 2, errors::InvalidArgument("size must be 2 dimensions"));
  }

  void Compute(OpKernelContext* ctx) override {
    // Get the input images and transforms and verify their dimensions
    const Tensor& input_t = ctx->input(0);
    OP_REQUIRES(ctx, input_t.dims() == 4,
                errors::InvalidArgument("Input images must have rank 4"));

    // Allocate the memory for the output
    Tensor* output_t;
    OP_REQUIRES_OK(ctx, ctx->allocate_output(
        0, TensorShape({input_t.dim_size(0), size_[0], size_[1], input_t.dim_size(3)}), &output_t));

    // Perform flow augmentation
    auto input = input_t.tensor<float, 4>();
    auto output = output_t->tensor<float, 4>();

    Downsample(ctx->eigen_gpu_device(), input, output);
  }

  private:
    std::vector<int32> size_;
};

REGISTER_KERNEL_BUILDER(Name("Downsample")
                          .Device(DEVICE_GPU),
                      DownsampleKernel<GPUDevice>)
}  // end namespace tensorflow