[snip code]
That code is boring! I mean, show us something interesting, like this:
[snip code]
Here you go, I hope this makes you happy.
void getBlocksAndThreads(int n, int &blocks, int &threads)
{
threads = (n < MAX_REDUCTION_THREADS*2) ? nextPow2((n + 1)/ 2) : MAX_REDUCTION_THREADS;
blocks = (n + (threads * 2 - 1)) / (threads * 2);
blocks = MIN(MAX_REDUCTION_BLOCKS, blocks);
}
template <class T>
void
getreduced(int size, int threads, int blocks, T *d_idata, T *d_odata, int multiple = 1, int total_size = 0)
{
dim3 dimBlock(threads, 1, 1);
dim3 dimGrid(blocks, multiple, 1);
int smemSize = (threads <= 32) ? 2 * threads * sizeof(T) : threads * sizeof(T);
//if total size is not default then use mutiple reductions kernel
if (multiple > 1)
{
if (isPow2(size))
{
switch (threads)
{
case 512:
reduce6_multiple<T, 512, true><<< dimGrid, dimBlock, smemSize >>>(d_idata, d_odata, size, total_size); break;
case 256:
reduce6_multiple<T, 256, true><<< dimGrid, dimBlock, smemSize >>>(d_idata, d_odata, size, total_size); break;
case 128:
reduce6_multiple<T, 128, true><<< dimGrid, dimBlock, smemSize >>>(d_idata, d_odata, size, total_size); break;
case 64:
reduce6_multiple<T, 64, true><<< dimGrid, dimBlock, smemSize >>>(d_idata, d_odata, size, total_size); break;
case 32:
reduce6_multiple<T, 32, true><<< dimGrid, dimBlock, smemSize >>>(d_idata, d_odata, size, total_size); break;
case 16:
reduce6_multiple<T, 16, true><<< dimGrid, dimBlock, smemSize >>>(d_idata, d_odata, size, total_size); break;
case 8:
reduce6_multiple<T, 8, true><<< dimGrid, dimBlock, smemSize >>>(d_idata, d_odata, size, total_size); break;
case 4:
reduce6_multiple<T, 4, true><<< dimGrid, dimBlock, smemSize >>>(d_idata, d_odata, size, total_size); break;
case 2:
reduce6_multiple<T, 2, true><<< dimGrid, dimBlock, smemSize >>>(d_idata, d_odata, size, total_size); break;
case 1:
reduce6_multiple<T, 1, true><<< dimGrid, dimBlock, smemSize >>>(d_idata, d_odata, size, total_size); break;
}
}
else
{
switch (threads)
{
case 512:
reduce6_multiple<T, 512, false><<< dimGrid, dimBlock, smemSize >>>(d_idata, d_odata, size, total_size); break;
case 256:
reduce6_multiple<T, 256, false><<< dimGrid, dimBlock, smemSize >>>(d_idata, d_odata, size, total_size); break;
case 128:
reduce6_multiple<T, 128, false><<< dimGrid, dimBlock, smemSize >>>(d_idata, d_odata, size, total_size); break;
case 64:
reduce6_multiple<T, 64, false><<< dimGrid, dimBlock, smemSize >>>(d_idata, d_odata, size, total_size); break;
case 32:
reduce6_multiple<T, 32, false><<< dimGrid, dimBlock, smemSize >>>(d_idata, d_odata, size, total_size); break;
case 16:
reduce6_multiple<T, 16, false><<< dimGrid, dimBlock, smemSize >>>(d_idata, d_odata, size, total_size); break;
case 8:
reduce6_multiple<T, 8, false><<< dimGrid, dimBlock, smemSize >>>(d_idata, d_odata, size, total_size); break;
case 4:
reduce6_multiple<T, 4, false><<< dimGrid, dimBlock, smemSize >>>(d_idata, d_odata, size, total_size); break;
case 2:
reduce6_multiple<T, 2, false><<< dimGrid, dimBlock, smemSize >>>(d_idata, d_odata, size, total_size); break;
case 1:
reduce6_multiple<T, 1, false><<< dimGrid, dimBlock, smemSize >>>(d_idata, d_odata, size, total_size); break;
}
}
}
//only require a single reduction
else
{
if (isPow2(size))
{
switch (threads)
{
case 512:
reduce6<T, 512, true><<< dimGrid, dimBlock, smemSize >>>(d_idata, d_odata, size); break;
case 256:
reduce6<T, 256, true><<< dimGrid, dimBlock, smemSize >>>(d_idata, d_odata, size); break;
case 128:
reduce6<T, 128, true><<< dimGrid, dimBlock, smemSize >>>(d_idata, d_odata, size); break;
case 64:
reduce6<T, 64, true><<< dimGrid, dimBlock, smemSize >>>(d_idata, d_odata, size); break;
case 32:
reduce6<T, 32, true><<< dimGrid, dimBlock, smemSize >>>(d_idata, d_odata, size); break;
case 16:
reduce6<T, 16, true><<< dimGrid, dimBlock, smemSize >>>(d_idata, d_odata, size); break;
case 8:
reduce6<T, 8, true><<< dimGrid, dimBlock, smemSize >>>(d_idata, d_odata, size); break;
case 4:
reduce6<T, 4, true><<< dimGrid, dimBlock, smemSize >>>(d_idata, d_odata, size); break;
case 2:
reduce6<T, 2, true><<< dimGrid, dimBlock, smemSize >>>(d_idata, d_odata, size); break;
case 1:
reduce6<T, 1, true><<< dimGrid, dimBlock, smemSize >>>(d_idata, d_odata, size); break;
}
}
else
{
switch (threads)
{
case 512:
reduce6<T, 512, false><<< dimGrid, dimBlock, smemSize >>>(d_idata, d_odata, size); break;
case 256:
reduce6<T, 256, false><<< dimGrid, dimBlock, smemSize >>>(d_idata, d_odata, size); break;
case 128:
reduce6<T, 128, false><<< dimGrid, dimBlock, smemSize >>>(d_idata, d_odata, size); break;
case 64:
reduce6<T, 64, false><<< dimGrid, dimBlock, smemSize >>>(d_idata, d_odata, size); break;
case 32:
reduce6<T, 32, false><<< dimGrid, dimBlock, smemSize >>>(d_idata, d_odata, size); break;
case 16:
reduce6<T, 16, false><<< dimGrid, dimBlock, smemSize >>>(d_idata, d_odata, size); break;
case 8:
reduce6<T, 8, false><<< dimGrid, dimBlock, smemSize >>>(d_idata, d_odata, size); break;
case 4:
reduce6<T, 4, false><<< dimGrid, dimBlock, smemSize >>>(d_idata, d_odata, size); break;
case 2:
reduce6<T, 2, false><<< dimGrid, dimBlock, smemSize >>>(d_idata, d_odata, size); break;
case 1:
reduce6<T, 1, false><<< dimGrid, dimBlock, smemSize >>>(d_idata, d_odata, size); break;
}
}
}
}