summaryrefslogtreecommitdiff
path: root/src/SeparableConvolution_kernel.cu
diff options
context:
space:
mode:
authorsniklaus <simon.niklaus@outlook.com>2017-09-18 22:20:04 -0700
committersniklaus <simon.niklaus@outlook.com>2017-09-18 22:20:04 -0700
commitcfd6a91a628c603eeeecf517340ac0474a126496 (patch)
tree7c891bc8660904771bb612f56aca8f22760d0cdb /src/SeparableConvolution_kernel.cu
parente123297d61dc9915b70060def498560ca5d3d073 (diff)
no message
Diffstat (limited to 'src/SeparableConvolution_kernel.cu')
-rw-r--r--src/SeparableConvolution_kernel.cu20
1 files changed, 10 insertions, 10 deletions
diff --git a/src/SeparableConvolution_kernel.cu b/src/SeparableConvolution_kernel.cu
index b40786d..b4e6d59 100644
--- a/src/SeparableConvolution_kernel.cu
+++ b/src/SeparableConvolution_kernel.cu
@@ -17,9 +17,9 @@
__global__ void kernel_SeparableConvolution_updateOutput(
const int n,
- const float* input1, const long4 input1_size, const long4 input1_stride,
- const float* input2, const long4 input2_size, const long4 input2_stride,
- const float* input3, const long4 input3_size, const long4 input3_stride,
+ const float* input, const long4 input_size, const long4 input_stride,
+ const float* vertical, const long4 vertical_size, const long4 vertical_stride,
+ const float* horizontal, const long4 horizontal_size, const long4 horizontal_stride,
float* output, const long4 output_size, const long4 output_stride
) {
int intIndex = blockIdx.x * blockDim.x + threadIdx.x;
@@ -37,7 +37,7 @@ __global__ void kernel_SeparableConvolution_updateOutput(
for (int intFilterY = 0; intFilterY < 51; intFilterY += 1) {
for (int intFilterX = 0; intFilterX < 51; intFilterX += 1) {
- dblOutput += IDX_4(input1, intBatch, intDepth, intY + intFilterY, intX + intFilterX) * IDX_4(input2, intBatch, intFilterY, intY, intX) * IDX_4(input3, intBatch, intFilterX, intY, intX);
+ dblOutput += IDX_4(input, intBatch, intDepth, intY + intFilterY, intX + intFilterX) * IDX_4(vertical, intBatch, intFilterY, intY, intX) * IDX_4(horizontal, intBatch, intFilterX, intY, intX);
}
}
@@ -46,9 +46,9 @@ __global__ void kernel_SeparableConvolution_updateOutput(
void SeparableConvolution_kernel_forward(
THCState* state,
- THCudaTensor* input1,
- THCudaTensor* input2,
- THCudaTensor* input3,
+ THCudaTensor* input,
+ THCudaTensor* vertical,
+ THCudaTensor* horizontal,
THCudaTensor* output
) {
int n = 0;
@@ -56,9 +56,9 @@ void SeparableConvolution_kernel_forward(
n = THCudaTensor_nElement(state, output);
kernel_SeparableConvolution_updateOutput<<< (n + 512 - 1) / 512, 512, 0, THCState_getCurrentStream(state) >>>(
n,
- THCudaTensor_data(state, input1), make_long4(input1->size[0], input1->size[1], input1->size[2], input1->size[3]), make_long4(input1->stride[0], input1->stride[1], input1->stride[2], input1->stride[3]),
- THCudaTensor_data(state, input2), make_long4(input2->size[0], input2->size[1], input2->size[2], input2->size[3]), make_long4(input2->stride[0], input2->stride[1], input2->stride[2], input2->stride[3]),
- THCudaTensor_data(state, input3), make_long4(input3->size[0], input3->size[1], input3->size[2], input3->size[3]), make_long4(input3->stride[0], input3->stride[1], input3->stride[2], input3->stride[3]),
+ THCudaTensor_data(state, input), make_long4(input->size[0], input->size[1], input->size[2], input->size[3]), make_long4(input->stride[0], input->stride[1], input->stride[2], input->stride[3]),
+ THCudaTensor_data(state, vertical), make_long4(vertical->size[0], vertical->size[1], vertical->size[2], vertical->size[3]), make_long4(vertical->stride[0], vertical->stride[1], vertical->stride[2], vertical->stride[3]),
+ THCudaTensor_data(state, horizontal), make_long4(horizontal->size[0], horizontal->size[1], horizontal->size[2], horizontal->size[3]), make_long4(horizontal->stride[0], horizontal->stride[1], horizontal->stride[2], horizontal->stride[3]),
THCudaTensor_data(state, output), make_long4(output->size[0], output->size[1], output->size[2], output->size[3]), make_long4(output->stride[0], output->stride[1], output->stride[2], output->stride[3])
);