chenqiang
2018-09-21 08:50:41 UTC
Dear Sir:
I meet some problem that need you help.
block dim not work,just like below code
================================
import pycuda.driver as cuda
import pycuda.autoinit
from pycuda.compiler import SourceModule
import numpy as np
# set the init array
b_img = np.empty([2, 2, 2]).astype(np.int32)
label_img = np.random.randint(0, 2, [2,2,2]).astype(np.int32)
# kernel function
mod = SourceModule("""
__global__ void localUF(int *b_img, int *label_img)
{
int global_block_idx = gridDim.x*gridDim.y*blockIdx.z + gridDim.x*blockIdx.y + blockIdx.x;
int global_thread_idx = global_block_idx * (blockDim.x * blockDim.y * blockDim.z) + (threadIdx.z * (blockDim.x * blockDim.y)) + (threadIdx.y * blockDim.x) + threadIdx.x;
b_img[global_thread_idx] = global_block_idx;
}
""")
func = mod.get_function("localUF")
func(cuda.InOut(b_img), cuda.In(label_img),
block=(8,1,1), grid=(2,2,2))
Set block=(8,1,1) and the output is
b_img
array([[[0, 0, 0, 0],
[0, 0, 0, 0],
[1, 1, 1, 1],
[1, 1, 1, 1]],
[[2, 2, 2, 2],
[2, 2, 2, 2],
[3, 3, 3, 3],
[3, 3, 3, 3]],
[[4, 4, 4, 4],
[4, 4, 4, 4],
[5, 5, 5, 5],
[5, 5, 5, 5]],
[[6, 6, 6, 6],
[6, 6, 6, 6],
[7, 7, 7, 7],
[7, 7, 7, 7]]], dtype=int32)
if I set block=(2,2,2) and the output is same
array([[[0, 0, 0, 0],
[0, 0, 0, 0],
[1, 1, 1, 1],
[1, 1, 1, 1]],
[[2, 2, 2, 2],
[2, 2, 2, 2],
[3, 3, 3, 3],
[3, 3, 3, 3]],
[[4, 4, 4, 4],
[4, 4, 4, 4],
[5, 5, 5, 5],
[5, 5, 5, 5]],
[[6, 6, 6, 6],
[6, 6, 6, 6],
[7, 7, 7, 7],
[7, 7, 7, 7]]], dtype=int32)
I hope the output is
array([[[0, 0, 1, 1],
[0, 0, 1, 1],
[2, 2, 3, 3],
[2, 2, 3, 3]],
[[0, 0, 1, 1],
[0, 0, 1, 1],
[2, 2, 3, 3],
[2, 2, 3, 3]],
[[4, 4, 5, 5],
[4, 4, 5, 5],
[6, 6, 7, 7],
[6, 6, 7, 7]],
[[4, 4, 5, 5],
[4, 4, 5, 5],
[6, 6, 7, 7],
[6, 6, 7, 7]]], dtype=int32)
Blcok 3d unuseful? it means block(2,2,2) = bolck(2x2x2,1,1)???
I meet some problem that need you help.
block dim not work,just like below code
================================
import pycuda.driver as cuda
import pycuda.autoinit
from pycuda.compiler import SourceModule
import numpy as np
# set the init array
b_img = np.empty([2, 2, 2]).astype(np.int32)
label_img = np.random.randint(0, 2, [2,2,2]).astype(np.int32)
# kernel function
mod = SourceModule("""
__global__ void localUF(int *b_img, int *label_img)
{
int global_block_idx = gridDim.x*gridDim.y*blockIdx.z + gridDim.x*blockIdx.y + blockIdx.x;
int global_thread_idx = global_block_idx * (blockDim.x * blockDim.y * blockDim.z) + (threadIdx.z * (blockDim.x * blockDim.y)) + (threadIdx.y * blockDim.x) + threadIdx.x;
b_img[global_thread_idx] = global_block_idx;
}
""")
func = mod.get_function("localUF")
func(cuda.InOut(b_img), cuda.In(label_img),
block=(8,1,1), grid=(2,2,2))
Set block=(8,1,1) and the output is
b_img
array([[[0, 0, 0, 0],
[0, 0, 0, 0],
[1, 1, 1, 1],
[1, 1, 1, 1]],
[[2, 2, 2, 2],
[2, 2, 2, 2],
[3, 3, 3, 3],
[3, 3, 3, 3]],
[[4, 4, 4, 4],
[4, 4, 4, 4],
[5, 5, 5, 5],
[5, 5, 5, 5]],
[[6, 6, 6, 6],
[6, 6, 6, 6],
[7, 7, 7, 7],
[7, 7, 7, 7]]], dtype=int32)
if I set block=(2,2,2) and the output is same
array([[[0, 0, 0, 0],
[0, 0, 0, 0],
[1, 1, 1, 1],
[1, 1, 1, 1]],
[[2, 2, 2, 2],
[2, 2, 2, 2],
[3, 3, 3, 3],
[3, 3, 3, 3]],
[[4, 4, 4, 4],
[4, 4, 4, 4],
[5, 5, 5, 5],
[5, 5, 5, 5]],
[[6, 6, 6, 6],
[6, 6, 6, 6],
[7, 7, 7, 7],
[7, 7, 7, 7]]], dtype=int32)
I hope the output is
array([[[0, 0, 1, 1],
[0, 0, 1, 1],
[2, 2, 3, 3],
[2, 2, 3, 3]],
[[0, 0, 1, 1],
[0, 0, 1, 1],
[2, 2, 3, 3],
[2, 2, 3, 3]],
[[4, 4, 5, 5],
[4, 4, 5, 5],
[6, 6, 7, 7],
[6, 6, 7, 7]],
[[4, 4, 5, 5],
[4, 4, 5, 5],
[6, 6, 7, 7],
[6, 6, 7, 7]]], dtype=int32)
Blcok 3d unuseful? it means block(2,2,2) = bolck(2x2x2,1,1)???