Commit afb99efa authored by Marc Modat's avatar Marc Modat

CPU side of the benchmark file is done - still need to do the GPU

parent 4477b851
/*
* benchmark.cpp
*
*
* Created by Marc Modat on 15/11/2009.
* Copyright (c) 2009, University College London. All rights reserved.
* Centre for Medical Image Computing (CMIC)
* See the LICENSE.txt file in the nifty_reg root folder
*
*/
#include "_reg_resampling.h"
* benchmark.cpp
*
*
* Created by Marc Modat on 15/11/2009.
* Copyright (c) 2009, University College London. All rights reserved.
* Centre for Medical Image Computing (CMIC)
* See the LICENSE.txt file in the nifty_reg root folder
*
*/
#include "_reg_resampling.h"
#include "_reg_affineTransformation.h"
#include "_reg_bspline.h"
#include "_reg_mutualinformation.h"
......@@ -24,57 +24,86 @@
#include "_reg_mutualinformation_gpu.h"
#include "_reg_tools_gpu.h"
#include "_reg_blockMatching_gpu.h"
#define GRID_SPACING 10
#define BINNING 68
int main(int argc, char **argv)
{
//The image dimension is user-defined
if(argc<2){
fprintf(stderr, "The image dimension is expected.\n");
fprintf(stderr, "Exit ...\n");
return 1;
}
int dimension = atoi(argv[1]);
void Usage(char *);
int main(int argc, char **argv)
{
int dimension = 100;
float gridSpacing = 10.0f;
int binning = 68;
char *outputFileName = "benchmark_result.txt";
for(int i=1;i<argc;i++){
if(strcmp(argv[i], "-help")==0 || strcmp(argv[i], "-Help")==0 ||
strcmp(argv[i], "-HELP")==0 || strcmp(argv[i], "-h")==0 ||
strcmp(argv[i], "--h")==0 || strcmp(argv[i], "--help")==0){
Usage(argv[0]);
return 0;
}
else if(strcmp(argv[i], "-dim") == 0){
dimension=atoi(argv[++i]);
}
else if(strcmp(argv[i], "-sp") == 0){
gridSpacing=atof(argv[++i]);
}
else if(strcmp(argv[i], "-bin") == 0){
binning=atoi(argv[++i]);
}
else if(strcmp(argv[i], "-o") == 0){
outputFileName=argv[++i];
}
else{
fprintf(stderr,"Err:\tParameter %s unknown.\n",argv[i]);
Usage(argv[0]);
return 1;
}
}
// The target, source and result images are created
int dim_img[8];
dim_img[0]=3;
dim_img[1]=dimension;
dim_img[2]=dimension;
dim_img[3]=dimension;
dim_img[4]=dim_img[5]=dim_img[6]=dim_img[7]=1;
nifti_image *targetImage = nifti_make_new_nim(dim_img, NIFTI_TYPE_FLOAT32, true);
nifti_image *sourceImage = nifti_make_new_nim(dim_img, NIFTI_TYPE_FLOAT32, true);
nifti_image *resultImage = nifti_make_new_nim(dim_img, NIFTI_TYPE_FLOAT32, true);
targetImage->sform_code=0;
sourceImage->sform_code=0;
resultImage->sform_code=0;
// The target and source images are filled with random number
float *targetPtr=static_cast<float *>(targetImage->data);
float *sourcePtr=static_cast<float *>(sourceImage->data);
for(int i=0;i<targetImage->nvox;++i){
*targetPtr+=0.0f;
*sourcePtr+=0.0f;
}
// Deformation field image is created
dim_img[0]=5;
dim_img[1]=dimension;
dim_img[2]=dimension;
dim_img[3]=dimension;
dim_img[5]=3;
dim_img[4]=dim_img[6]=dim_img[7]=1;
nifti_image *deformationFieldImage = nifti_make_new_nim(dim_img, NIFTI_TYPE_FLOAT32, true);
targetImage->sform_code=0;
sourceImage->sform_code=0;
resultImage->sform_code=0;
// Joint histogram creation
double *probaJointHistogram=(double *)malloc(BINNING*(BINNING+2)*sizeof(double));
double *logJointHistogram=(double *)malloc(BINNING*(BINNING+2)*sizeof(double));
int dim_img[8];
dim_img[0]=3;
dim_img[1]=dimension;
dim_img[2]=dimension;
dim_img[3]=dimension;
dim_img[4]=dim_img[5]=dim_img[6]=dim_img[7]=1;
nifti_image *targetImage = nifti_make_new_nim(dim_img, NIFTI_TYPE_FLOAT32, true);
nifti_image *sourceImage = nifti_make_new_nim(dim_img, NIFTI_TYPE_FLOAT32, true);
nifti_image *resultImage = nifti_make_new_nim(dim_img, NIFTI_TYPE_FLOAT32, true);
targetImage->sform_code=0;
sourceImage->sform_code=0;
resultImage->sform_code=0;
int *maskImage = (int *)malloc(targetImage->nvox*sizeof(int));
// The target and source images are filled with random number
float *targetPtr=static_cast<float *>(targetImage->data);
float *sourcePtr=static_cast<float *>(sourceImage->data);
srand((unsigned)time(0));
for(int i=0;i<targetImage->nvox;++i){
*targetPtr++ = (float)(binning-4)*(float)rand()/(float)RAND_MAX + 2.0f;
*sourcePtr++ = (float)(binning-4)*(float)rand()/(float)RAND_MAX + 2.0f;
maskImage[i]=i;
}
nifti_set_filenames(targetImage, "temp.nii", 0, 0);
nifti_image_write(targetImage);
// Deformation field image is created
dim_img[0]=5;
dim_img[1]=dimension;
dim_img[2]=dimension;
dim_img[3]=dimension;
dim_img[5]=3;
dim_img[4]=dim_img[6]=dim_img[7]=1;
nifti_image *deformationFieldImage = nifti_make_new_nim(dim_img, NIFTI_TYPE_FLOAT32, true);
targetImage->sform_code=0;
sourceImage->sform_code=0;
resultImage->sform_code=0;
// Joint histogram creation
double *probaJointHistogram=(double *)malloc(binning*(binning+2)*sizeof(double));
double *logJointHistogram=(double *)malloc(binning*(binning+2)*sizeof(double));
// Affine transformation
mat44 *affineTransformation = (mat44 *)calloc(1,sizeof(mat44));
......@@ -85,18 +114,18 @@
// A control point image is created
dim_img[0]=5;
dim_img[1]=(int)floor(targetImage->nx*targetImage->dx/GRID_SPACING)+4;
dim_img[2]=(int)floor(targetImage->ny*targetImage->dy/GRID_SPACING)+4;
dim_img[3]=(int)floor(targetImage->nz*targetImage->dz/GRID_SPACING)+4;
dim_img[1]=(int)floor(targetImage->nx*targetImage->dx/gridSpacing)+4;
dim_img[2]=(int)floor(targetImage->ny*targetImage->dy/gridSpacing)+4;
dim_img[3]=(int)floor(targetImage->nz*targetImage->dz/gridSpacing)+4;
dim_img[5]=3;
dim_img[4]=dim_img[6]=dim_img[7]=1;
nifti_image *controlPointImage = nifti_make_new_nim(dim_img, NIFTI_TYPE_FLOAT32, true);
controlPointImage->cal_min=0;
controlPointImage->cal_max=0;
controlPointImage->pixdim[0]=1.0f;
controlPointImage->pixdim[1]=controlPointImage->dx=GRID_SPACING;
controlPointImage->pixdim[2]=controlPointImage->dy=GRID_SPACING;
controlPointImage->pixdim[3]=controlPointImage->dz=GRID_SPACING;
controlPointImage->pixdim[1]=controlPointImage->dx=gridSpacing;
controlPointImage->pixdim[2]=controlPointImage->dy=gridSpacing;
controlPointImage->pixdim[3]=controlPointImage->dz=gridSpacing;
controlPointImage->pixdim[4]=controlPointImage->dt=1.0f;
controlPointImage->pixdim[5]=controlPointImage->du=1.0f;
controlPointImage->pixdim[6]=controlPointImage->dv=1.0f;
......@@ -122,8 +151,8 @@
controlPointImage->qto_xyz.m[2][3] = controlPointImage->qoffset_z = originReal[2];
controlPointImage->qto_ijk = nifti_mat44_inverse(controlPointImage->qto_xyz);
if(reg_bspline_initialiseControlPointGridWithAffine(affineTransformation, controlPointImage))
return 1;
return 1;
// Different gradient images
nifti_image *resultGradientImage = nifti_copy_nim_info(deformationFieldImage);
resultGradientImage->datatype = NIFTI_TYPE_FLOAT32;
......@@ -143,6 +172,11 @@
float *conjugateH = (float *)calloc(nodeNMIGradientImage->nvox, sizeof(float));
time_t start,end;
int minutes, seconds, maxIt;
//
FILE *outputFile;
outputFile=fopen(outputFileName, "w");
/* Functions to be tested
- affine deformation field
......@@ -152,37 +186,173 @@
- spatial gradient computation
- voxel-based NMI gradient computation
- node-based NMI gradient computation
- conjugate gradient computation
- bending-energy computation
- bending-energy gradient computation
- gradient form voxel to real space
*/
// AFFINE DEFORMATION FIELD CREATION
maxIt=1000000 / dimension;
time(&start);
for(int i=0; i<100; ++i){
for(int i=0; i<maxIt; ++i){
reg_affine_positionField( affineTransformation,
targetImage,
deformationFieldImage);
}
time(&end);
int minutes = (int)floorf(float(end-start)/60.0f);
int seconds = (int)(end-start - 60*minutes);
printf("CPU - 100 affine deformation field computations - %i min %i sec\n", minutes, seconds);
minutes = (int)floorf(float(end-start)/60.0f);
seconds = (int)(end-start - 60*minutes);
fprintf(outputFile, "CPU - %i affine deformation field computations - %i min %i sec\n", maxIt, minutes, seconds);
printf("Affine deformation done\n");
// SPLINE DEFORMATION FIELD CREATION
maxIt=50000 / dimension;
time(&start);
for(int i=0; i<100; ++i){
for(int i=0; i<maxIt; ++i){
reg_bspline<float>( controlPointImage,
targetImage,
deformationFieldImage,
NULL,
maskImage,
0);
}
time(&end);
minutes = (int)floorf(float(end-start)/60.0f);
seconds = (int)(end-start - 60*minutes);
printf("CPU - 100 spline deformation field computations - %i min %i sec\n", minutes, seconds);
fprintf(outputFile, "CPU - %i spline deformation field computations - %i min %i sec\n", maxIt, minutes, seconds);
printf("Spline deformation done\n");
// LINEAR INTERPOLATION
maxIt=100000 / dimension;
time(&start);
for(int i=0; i<maxIt; ++i){
reg_resampleSourceImage<float>( targetImage,
sourceImage,
resultImage,
deformationFieldImage,
maskImage,
1,
0);
}
time(&end);
minutes = (int)floorf(float(end-start)/60.0f);
seconds = (int)(end-start - 60*minutes);
fprintf(outputFile, "CPU - %i linear interpolation computations - %i min %i sec\n", maxIt, minutes, seconds);
printf("Linear interpolation done\n");
// BLOCK MATCHING
_reg_blockMatchingParam blockMatchingParams;
initialise_block_matching_method( targetImage,
&blockMatchingParams,
100, // percentage of block kept
50, // percentage of inlier in the optimisation process
maskImage);
maxIt=2000 / dimension;
time(&start);
for(int i=0; i<maxIt; ++i){
block_matching_method<float>( targetImage,
resultImage,
&blockMatchingParams,
maskImage);
}
time(&end);
minutes = (int)floorf(float(end-start)/60.0f);
seconds = (int)(end-start - 60*minutes);
fprintf(outputFile, "CPU - %i block matching computations - %i min %i sec\n", maxIt, minutes, seconds);
printf("Block-matching done\n");
// SPATIAL GRADIENT COMPUTATION
maxIt=100000 / dimension;
time(&start);
for(int i=0; i<maxIt; ++i){
reg_getSourceImageGradient<float>( targetImage,
sourceImage,
resultGradientImage,
deformationFieldImage,
maskImage,
1);
}
time(&end);
minutes = (int)floorf(float(end-start)/60.0f);
seconds = (int)(end-start - 60*minutes);
fprintf(outputFile, "CPU - %i spatial gradient computations - %i min %i sec\n", maxIt, minutes, seconds);
printf("Spatial gradient done\n");
// JOINT HISTOGRAM COMPUTATION
double entropies[4];
reg_getEntropies<double>( targetImage,
resultImage,
2,
binning,
probaJointHistogram,
logJointHistogram,
entropies,
maskImage);
// VOXEL-BASED NMI GRADIENT COMPUTATION
maxIt=1000 / dimension;
time(&start);
for(int i=0; i<maxIt; ++i){
reg_getVoxelBasedNMIGradientUsingPW<double>(targetImage,
resultImage,
2,
resultGradientImage,
binning,
logJointHistogram,
entropies,
voxelNMIGradientImage,
maskImage);
}
time(&end);
minutes = (int)floorf(float(end-start)/60.0f);
seconds = (int)(end-start - 60*minutes);
fprintf(outputFile, "CPU - %i voxel-based NMI gradient computations - %i min %i sec\n", maxIt, minutes, seconds);
printf("Voxel-based NMI gradient done\n");
// NODE-BASED NMI GRADIENT COMPUTATION
maxIt=10000 / dimension;
int smoothingRadius[3];
smoothingRadius[0] = (int)floor( 2.0*controlPointImage->dx/targetImage->dx );
smoothingRadius[1] = (int)floor( 2.0*controlPointImage->dy/targetImage->dy );
smoothingRadius[2] = (int)floor( 2.0*controlPointImage->dz/targetImage->dz );
time(&start);
for(int i=0; i<maxIt; ++i){
reg_smoothImageForCubicSpline<float>(voxelNMIGradientImage,smoothingRadius);
reg_voxelCentric2NodeCentric(nodeNMIGradientImage,voxelNMIGradientImage);
}
time(&end);
minutes = (int)floorf(float(end-start)/60.0f);
seconds = (int)(end-start - 60*minutes);
fprintf(outputFile, "CPU - %i node-based NMI gradient computations - %i min %i sec\n", maxIt, minutes, seconds);
printf("Node-based NMI gradient done\n");
// BENDING ENERGY COMPUTATION
maxIt=10000000 / dimension;
time(&start);
for(int i=0; i<maxIt; ++i){
reg_bspline_bendingEnergy<float>(controlPointImage, targetImage,1);
}
time(&end);
minutes = (int)floorf(float(end-start)/60.0f);
seconds = (int)(end-start - 60*minutes);
fprintf(outputFile, "CPU - %i BE computations - %i min %i sec\n", maxIt, minutes, seconds);
printf("BE gradient done\n");
// BENDING ENERGY GRADIENT COMPUTATION
maxIt=5000000 / dimension;
time(&start);
for(int i=0; i<maxIt; ++i){
reg_bspline_bendingEnergyGradient<float>( controlPointImage,
targetImage,
nodeNMIGradientImage,
0.01f);
}
time(&end);
minutes = (int)floorf(float(end-start)/60.0f);
seconds = (int)(end-start - 60*minutes);
fprintf(outputFile, "CPU - %i BE gradient computations - %i min %i sec\n", maxIt, minutes, seconds);
printf("BE gradient done\n");
fclose(outputFile);
/* Monsieur Propre */
nifti_image_free(targetImage);
......@@ -193,9 +363,22 @@
nifti_image_free(resultGradientImage);
nifti_image_free(voxelNMIGradientImage);
nifti_image_free(nodeNMIGradientImage);
free(maskImage);
free(probaJointHistogram);
free(logJointHistogram);
free(conjugateG);
free(conjugateH);
return 0;
}
\ No newline at end of file
return 0;
}
void Usage(char *exec)
{
printf("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n");
printf("Usage:\t%s [OPTIONS].\n",exec);
printf("\t-dim <int>\tImage dimension [100]\n");
printf("\t-bin <int>\tBin number [68]\n");
printf("\t-sp <float>\tControl point grid spacing [10]\n");
printf("\t-o <char*>\t Output file name [benchmark_result.txt]\n");
printf("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n");
return;
}
......@@ -67,9 +67,12 @@ void block_matching_method_gpu( nifti_image *targetImage,
// process the target blocks
process_target_blocks_gpu<<<G1, B1>>>( *targetPosition_d,
targetValues);
CUDA_SAFE_CALL(cudaThreadSynchronize());
#if _VERBOSE
printf("[VERBOSE] process_target_blocks_gpu kernel: %s - Grid size [%i %i %i] - Block size [%i %i %i]\n",
cudaGetErrorString(cudaGetLastError()),G1.x,G1.y,G1.z,B1.x,B1.y,B1.z);
#endif
// Ensure that all the threads have done their job
CUDA_SAFE_CALL(cudaThreadSynchronize());
unsigned int Result_block_matching = params->activeBlockNumber;
unsigned int Result_block_matching_2 = 1;
......@@ -82,13 +85,10 @@ void block_matching_method_gpu( nifti_image *targetImage,
dim3 B2(Block_result_block,1,1);
dim3 G2(Result_block_matching,Result_block_matching_2,1);
process_result_blocks_gpu<<<G2, B2>>>(*resultPosition_d, targetValues);
// Ensure that all the threads have done their job
CUDA_SAFE_CALL(cudaThreadSynchronize());
#if _VERBOSE
printf("[VERBOSE] block_matching kernel: %s - Grid size [%i %i %i] - Block size [%i %i %i]\n",
cudaGetErrorString(cudaGetLastError()),G1.x,G1.y,G1.z,B1.x,B1.y,B1.z);
printf("[VERBOSE] process_result_blocks_gpu kernel: %s - Grid size [%i %i %i] - Block size [%i %i %i]\n",
cudaGetErrorString(cudaGetLastError()),G2.x,G2.y,G2.z,B2.x,B2.y,B2.z);
#endif
cudaFree(targetValues);
cudaFree(resultValues);
......
......@@ -45,7 +45,7 @@
#define Block_reg_bspline_storeApproxBendingEnergy 192 // 39 regs - 025% occupancy
#define Block_reg_bspline_getApproxBendingEnergyGradient 384 // 19 regs - 050% occupancy
#define Block_target_block 512 // 26 regs - 100% occupancy
#define Block_result_block 216 // 31 regs - 25% occupancy
#define Block_result_block 216 // 31 regs - 25% occupancy
#endif
#endif
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment