migrated repo contents from old git server

This commit is contained in:
Jake Goodwin 2023-02-02 15:11:06 -08:00
parent fe1a71aed4
commit f64d44b563
18 changed files with 3762 additions and 0 deletions

21
Makefile Executable file
View File

@ -0,0 +1,21 @@
CUDA_PATH = /usr/local/apps/cuda/cuda-10.1
CUDA_BIN_PATH = $(CUDA_PATH)/bin
CUDA_NVCC = $(CUDA_BIN_PATH)/nvcc
montecarlo: montecarlo.cu
$(CUDA_NVCC) -o montecarlo montecarlo.cu
carlotest:
$(CUDA_NVCC) -o carlotest montecarlo.cu
./carlotest
rm ./carlotest
test:
$(CUDA_NVCC) --verbose -o test arrayMul.cu
clean:
rm ./montecarlo
rm ./test
rm ./carlotest

143
arrayMul.cu Normal file
View File

@ -0,0 +1,143 @@
// Array multiplication: C = A * B:
// System includes
#include <stdio.h>
#include <assert.h>
#include <malloc.h>
#include <math.h>
#include <stdlib.h>
// CUDA runtime
#include <cuda_runtime.h>
// Helper functions and utilities to work with CUDA
#include "helper_functions.h"
#include "helper_cuda.h"
#ifndef THREADS_PER_BLOCK
#define THREADS_PER_BLOCK 128 // number of threads in each block
#endif
#ifndef DATASET_SIZE
#define DATASET_SIZE ( 8*1024*1024 ) // size of the array
#endif
float hA[ DATASET_SIZE ];
float hB[ DATASET_SIZE ];
float hC[ DATASET_SIZE ];
#ifndef TOL
#define TOL 0.00001f // tolerance to relative error
#endif
void
CudaCheckError( )
{
cudaError_t e = cudaGetLastError( );
if( e != cudaSuccess )
{
fprintf( stderr, "CUDA failure %s:%d: '%s'\n", __FILE__, __LINE__, cudaGetErrorString(e));
}
}
// array multiplication on the device: C = A * B
__global__ void ArrayMul( float *A, float *B, float *C )
{
int gid = blockIdx.x*blockDim.x + threadIdx.x;
if( gid < DATASET_SIZE )
C[gid] = A[gid] * B[gid];
}
// main program:
int
main( int argc, char* argv[ ] )
{
//int dev = findCudaDevice(argc, (const char **)argv);
// fill host memory:
for( int i = 0; i < DATASET_SIZE; i++ )
{
hA[i] = hB[i] = sqrtf( (float)i );
}
// allocate device memory:
float *dA, *dB, *dC;
cudaMalloc( (void **)(&dA), sizeof(hA) );
cudaMalloc( (void **)(&dB), sizeof(hB) );
cudaMalloc( (void **)(&dC), sizeof(hC) );
CudaCheckError( );
// copy host memory to the device:
cudaMemcpy( dA, hA, DATASET_SIZE*sizeof(float), cudaMemcpyHostToDevice );
cudaMemcpy( dB, hB, DATASET_SIZE*sizeof(float), cudaMemcpyHostToDevice );
CudaCheckError( );
// setup the execution parameters:
dim3 grid( DATASET_SIZE / THREADS_PER_BLOCK, 1, 1 );
dim3 threads( THREADS_PER_BLOCK, 1, 1 );
// create and start the timer:
cudaDeviceSynchronize( );
// allocate the events that we'll use for timing:
cudaEvent_t start, stop;
cudaEventCreate( &start );
cudaEventCreate( &stop );
CudaCheckError( );
// record the start event:
cudaEventRecord( start, NULL );
CudaCheckError( );
// execute the kernel:
ArrayMul<<< grid, threads >>>( dA, dB, dC );
// record the stop event:
cudaEventRecord( stop, NULL );
CudaCheckError( );
// wait for the stop event to complete:
cudaEventSynchronize( stop );
CudaCheckError( );
float msecTotal;
cudaEventElapsedTime( &msecTotal, start, stop );
CudaCheckError( );
// compute and print the performance
double secondsTotal = 0.001 * (double)msecTotal;
double multsPerSecond = (double)DATASET_SIZE / secondsTotal;
double megaMultsPerSecond = multsPerSecond / 1000000.;
fprintf( stderr, "%12d, %4d, %10.2lf\n", DATASET_SIZE, THREADS_PER_BLOCK, megaMultsPerSecond );
// copy result from the device to the host:
cudaMemcpy( hC, dC, sizeof(hC), cudaMemcpyDeviceToHost );
CudaCheckError( );
// clean up:
cudaFree( dA );
cudaFree( dB );
cudaFree( dC );
CudaCheckError( );
return 0;
}

BIN
cs475_project5.pdf Normal file

Binary file not shown.

24
data.csv Normal file
View File

@ -0,0 +1,24 @@
Number of Trials = 1024, Blocksize = 8, MegaTrials/Second = 10.5229, Probability = 24.61%
Number of Trials = 1024, Blocksize = 32, MegaTrials/Second = 9.2272, Probability = 24.51%
Number of Trials = 1024, Blocksize = 128, MegaTrials/Second = 11.7302, Probability = 22.07%
Number of Trials = 4096, Blocksize = 8, MegaTrials/Second = 40.1254, Probability = 22.12%
Number of Trials = 4096, Blocksize = 32, MegaTrials/Second = 35.5753, Probability = 23.19%
Number of Trials = 4096, Blocksize = 128, MegaTrials/Second = 37.2635, Probability = 22.22%
Number of Trials = 16384, Blocksize = 8, MegaTrials/Second = 138.5281, Probability = 22.83%
Number of Trials = 16384, Blocksize = 32, MegaTrials/Second = 155.2456, Probability = 22.67%
Number of Trials = 16384, Blocksize = 128, MegaTrials/Second = 162.3850, Probability = 22.71%
Number of Trials = 65536, Blocksize = 8, MegaTrials/Second = 371.7553, Probability = 22.45%
Number of Trials = 65536, Blocksize = 32, MegaTrials/Second = 583.6421, Probability = 22.31%
Number of Trials = 65536, Blocksize = 128, MegaTrials/Second = 688.6348, Probability = 22.26%
Number of Trials = 262144, Blocksize = 8, MegaTrials/Second = 779.2257, Probability = 22.40%
Number of Trials = 262144, Blocksize = 32, MegaTrials/Second = 1620.2532, Probability = 22.59%
Number of Trials = 262144, Blocksize = 128, MegaTrials/Second = 40.6791, Probability = 22.47%
Number of Trials = 1048576, Blocksize = 8, MegaTrials/Second = 953.2787, Probability = 22.47%
Number of Trials = 1048576, Blocksize = 32, MegaTrials/Second = 2574.4814, Probability = 22.50%
Number of Trials = 1048576, Blocksize = 128, MegaTrials/Second = 3121.0593, Probability = 22.48%
Number of Trials = 2097152, Blocksize = 8, MegaTrials/Second = 1066.0073, Probability = 22.48%
Number of Trials = 2097152, Blocksize = 32, MegaTrials/Second = 3028.0461, Probability = 22.47%
Number of Trials = 2097152, Blocksize = 128, MegaTrials/Second = 5043.5587, Probability = 22.59%
Number of Trials = 4194304, Blocksize = 8, MegaTrials/Second = 1094.4830, Probability = 22.53%
Number of Trials = 4194304, Blocksize = 32, MegaTrials/Second = 3745.0211, Probability = 22.51%
Number of Trials = 4194304, Blocksize = 128, MegaTrials/Second = 5873.9802, Probability = 22.50%
1 Number of Trials = 1024 Blocksize = 8 MegaTrials/Second = 10.5229 Probability = 24.61%
2 Number of Trials = 1024 Blocksize = 32 MegaTrials/Second = 9.2272 Probability = 24.51%
3 Number of Trials = 1024 Blocksize = 128 MegaTrials/Second = 11.7302 Probability = 22.07%
4 Number of Trials = 4096 Blocksize = 8 MegaTrials/Second = 40.1254 Probability = 22.12%
5 Number of Trials = 4096 Blocksize = 32 MegaTrials/Second = 35.5753 Probability = 23.19%
6 Number of Trials = 4096 Blocksize = 128 MegaTrials/Second = 37.2635 Probability = 22.22%
7 Number of Trials = 16384 Blocksize = 8 MegaTrials/Second = 138.5281 Probability = 22.83%
8 Number of Trials = 16384 Blocksize = 32 MegaTrials/Second = 155.2456 Probability = 22.67%
9 Number of Trials = 16384 Blocksize = 128 MegaTrials/Second = 162.3850 Probability = 22.71%
10 Number of Trials = 65536 Blocksize = 8 MegaTrials/Second = 371.7553 Probability = 22.45%
11 Number of Trials = 65536 Blocksize = 32 MegaTrials/Second = 583.6421 Probability = 22.31%
12 Number of Trials = 65536 Blocksize = 128 MegaTrials/Second = 688.6348 Probability = 22.26%
13 Number of Trials = 262144 Blocksize = 8 MegaTrials/Second = 779.2257 Probability = 22.40%
14 Number of Trials = 262144 Blocksize = 32 MegaTrials/Second = 1620.2532 Probability = 22.59%
15 Number of Trials = 262144 Blocksize = 128 MegaTrials/Second = 40.6791 Probability = 22.47%
16 Number of Trials = 1048576 Blocksize = 8 MegaTrials/Second = 953.2787 Probability = 22.47%
17 Number of Trials = 1048576 Blocksize = 32 MegaTrials/Second = 2574.4814 Probability = 22.50%
18 Number of Trials = 1048576 Blocksize = 128 MegaTrials/Second = 3121.0593 Probability = 22.48%
19 Number of Trials = 2097152 Blocksize = 8 MegaTrials/Second = 1066.0073 Probability = 22.48%
20 Number of Trials = 2097152 Blocksize = 32 MegaTrials/Second = 3028.0461 Probability = 22.47%
21 Number of Trials = 2097152 Blocksize = 128 MegaTrials/Second = 5043.5587 Probability = 22.59%
22 Number of Trials = 4194304 Blocksize = 8 MegaTrials/Second = 1094.4830 Probability = 22.53%
23 Number of Trials = 4194304 Blocksize = 32 MegaTrials/Second = 3745.0211 Probability = 22.51%
24 Number of Trials = 4194304 Blocksize = 128 MegaTrials/Second = 5873.9802 Probability = 22.50%

BIN
data_calc.ods Normal file

Binary file not shown.

135
exception.h Normal file
View File

@ -0,0 +1,135 @@
/*
* Copyright 1993-2017 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
/* CUda UTility Library */
#ifndef COMMON_EXCEPTION_H_
#define COMMON_EXCEPTION_H_
// includes, system
#include <stdlib.h>
#include <exception>
#include <iostream>
#include <stdexcept>
#include <string>
//! Exception wrapper.
//! @param Std_Exception Exception out of namespace std for easy typing.
template <class Std_Exception>
class Exception : public Std_Exception {
public:
//! @brief Static construction interface
//! @return Alwayss throws ( Located_Exception<Exception>)
//! @param file file in which the Exception occurs
//! @param line line in which the Exception occurs
//! @param detailed details on the code fragment causing the Exception
static void throw_it(const char *file, const int line,
const char *detailed = "-");
//! Static construction interface
//! @return Alwayss throws ( Located_Exception<Exception>)
//! @param file file in which the Exception occurs
//! @param line line in which the Exception occurs
//! @param detailed details on the code fragment causing the Exception
static void throw_it(const char *file, const int line,
const std::string &detailed);
//! Destructor
virtual ~Exception() throw();
private:
//! Constructor, default (private)
Exception();
//! Constructor, standard
//! @param str string returned by what()
explicit Exception(const std::string &str);
};
////////////////////////////////////////////////////////////////////////////////
//! Exception handler function for arbitrary exceptions
//! @param ex exception to handle
////////////////////////////////////////////////////////////////////////////////
template <class Exception_Typ>
inline void handleException(const Exception_Typ &ex) {
std::cerr << ex.what() << std::endl;
exit(EXIT_FAILURE);
}
//! Convenience macros
//! Exception caused by dynamic program behavior, e.g. file does not exist
#define RUNTIME_EXCEPTION(msg) \
Exception<std::runtime_error>::throw_it(__FILE__, __LINE__, msg)
//! Logic exception in program, e.g. an assert failed
#define LOGIC_EXCEPTION(msg) \
Exception<std::logic_error>::throw_it(__FILE__, __LINE__, msg)
//! Out of range exception
#define RANGE_EXCEPTION(msg) \
Exception<std::range_error>::throw_it(__FILE__, __LINE__, msg)
////////////////////////////////////////////////////////////////////////////////
//! Implementation
// includes, system
#include <sstream>
////////////////////////////////////////////////////////////////////////////////
//! Static construction interface.
//! @param Exception causing code fragment (file and line) and detailed infos.
////////////////////////////////////////////////////////////////////////////////
/*static*/ template <class Std_Exception>
void Exception<Std_Exception>::throw_it(const char *file, const int line,
const char *detailed) {
std::stringstream s;
// Quiet heavy-weight but exceptions are not for
// performance / release versions
s << "Exception in file '" << file << "' in line " << line << "\n"
<< "Detailed description: " << detailed << "\n";
throw Exception(s.str());
}
////////////////////////////////////////////////////////////////////////////////
//! Static construction interface.
//! @param Exception causing code fragment (file and line) and detailed infos.
////////////////////////////////////////////////////////////////////////////////
/*static*/ template <class Std_Exception>
void Exception<Std_Exception>::throw_it(const char *file, const int line,
const std::string &msg) {
throw_it(file, line, msg.c_str());
}
////////////////////////////////////////////////////////////////////////////////
//! Constructor, default (private).
////////////////////////////////////////////////////////////////////////////////
template <class Std_Exception>
Exception<Std_Exception>::Exception() : Std_Exception("Unknown Exception.\n") {}
////////////////////////////////////////////////////////////////////////////////
//! Constructor, standard (private).
//! String returned by what().
////////////////////////////////////////////////////////////////////////////////
template <class Std_Exception>
Exception<Std_Exception>::Exception(const std::string &s) : Std_Exception(s) {}
////////////////////////////////////////////////////////////////////////////////
//! Destructor
////////////////////////////////////////////////////////////////////////////////
template <class Std_Exception>
Exception<Std_Exception>::~Exception() throw() {}
// functions, exported
#endif // COMMON_EXCEPTION_H_

26
hardware.txt Normal file
View File

@ -0,0 +1,26 @@
Architecture: x86_64
CPU op-mode(s): 32-bit, 64-bit
Byte Order: Little Endian
CPU(s): 32
On-line CPU(s) list: 0-31
Thread(s) per core: 2
Core(s) per socket: 8
Socket(s): 2
NUMA node(s): 2
Vendor ID: GenuineIntel
CPU family: 6
Model: 63
Model name: Intel(R) Xeon(R) CPU E5-2630 v3 @ 2.40GHz
Stepping: 2
CPU MHz: 1258.007
CPU max MHz: 3200.0000
CPU min MHz: 1200.0000
BogoMIPS: 4800.00
Virtualization: VT-x
L1d cache: 32K
L1i cache: 32K
L2 cache: 256K
L3 cache: 20480K
NUMA node0 CPU(s): 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30
NUMA node1 CPU(s): 1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31
Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf eagerfpu pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm epb invpcid_single ssbd ibrs ibpb stibp tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm xsaveopt cqm_llc cqm_occup_llc dtherm ida arat pln pts md_clear spec_ctrl intel_stibp flush_l1d

898
helper_cuda.h Normal file
View File

@ -0,0 +1,898 @@
/**
* Copyright 1993-2017 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
////////////////////////////////////////////////////////////////////////////////
// These are CUDA Helper functions for initialization and error checking
#ifndef COMMON_HELPER_CUDA_H_
#define COMMON_HELPER_CUDA_H_
#pragma once
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "helper_string.h"
#ifndef EXIT_WAIVED
#define EXIT_WAIVED 2
#endif
// Note, it is required that your SDK sample to include the proper header
// files, please refer the CUDA examples for examples of the needed CUDA
// headers, which may change depending on which CUDA functions are used.
// CUDA Runtime error messages
#ifdef __DRIVER_TYPES_H__
static const char *_cudaGetErrorEnum(cudaError_t error) {
return cudaGetErrorName(error);
}
#endif
#ifdef CUDA_DRIVER_API
// CUDA Driver API errors
static const char *_cudaGetErrorEnum(CUresult error) {
static char unknown[] = "<unknown>";
const char *ret = NULL;
cuGetErrorName(error, &ret);
return ret ? ret : unknown;
}
#endif
#ifdef CUBLAS_API_H_
// cuBLAS API errors
static const char *_cudaGetErrorEnum(cublasStatus_t error) {
switch (error) {
case CUBLAS_STATUS_SUCCESS:
return "CUBLAS_STATUS_SUCCESS";
case CUBLAS_STATUS_NOT_INITIALIZED:
return "CUBLAS_STATUS_NOT_INITIALIZED";
case CUBLAS_STATUS_ALLOC_FAILED:
return "CUBLAS_STATUS_ALLOC_FAILED";
case CUBLAS_STATUS_INVALID_VALUE:
return "CUBLAS_STATUS_INVALID_VALUE";
case CUBLAS_STATUS_ARCH_MISMATCH:
return "CUBLAS_STATUS_ARCH_MISMATCH";
case CUBLAS_STATUS_MAPPING_ERROR:
return "CUBLAS_STATUS_MAPPING_ERROR";
case CUBLAS_STATUS_EXECUTION_FAILED:
return "CUBLAS_STATUS_EXECUTION_FAILED";
case CUBLAS_STATUS_INTERNAL_ERROR:
return "CUBLAS_STATUS_INTERNAL_ERROR";
case CUBLAS_STATUS_NOT_SUPPORTED:
return "CUBLAS_STATUS_NOT_SUPPORTED";
case CUBLAS_STATUS_LICENSE_ERROR:
return "CUBLAS_STATUS_LICENSE_ERROR";
}
return "<unknown>";
}
#endif
#ifdef _CUFFT_H_
// cuFFT API errors
static const char *_cudaGetErrorEnum(cufftResult error) {
switch (error) {
case CUFFT_SUCCESS:
return "CUFFT_SUCCESS";
case CUFFT_INVALID_PLAN:
return "CUFFT_INVALID_PLAN";
case CUFFT_ALLOC_FAILED:
return "CUFFT_ALLOC_FAILED";
case CUFFT_INVALID_TYPE:
return "CUFFT_INVALID_TYPE";
case CUFFT_INVALID_VALUE:
return "CUFFT_INVALID_VALUE";
case CUFFT_INTERNAL_ERROR:
return "CUFFT_INTERNAL_ERROR";
case CUFFT_EXEC_FAILED:
return "CUFFT_EXEC_FAILED";
case CUFFT_SETUP_FAILED:
return "CUFFT_SETUP_FAILED";
case CUFFT_INVALID_SIZE:
return "CUFFT_INVALID_SIZE";
case CUFFT_UNALIGNED_DATA:
return "CUFFT_UNALIGNED_DATA";
case CUFFT_INCOMPLETE_PARAMETER_LIST:
return "CUFFT_INCOMPLETE_PARAMETER_LIST";
case CUFFT_INVALID_DEVICE:
return "CUFFT_INVALID_DEVICE";
case CUFFT_PARSE_ERROR:
return "CUFFT_PARSE_ERROR";
case CUFFT_NO_WORKSPACE:
return "CUFFT_NO_WORKSPACE";
case CUFFT_NOT_IMPLEMENTED:
return "CUFFT_NOT_IMPLEMENTED";
case CUFFT_LICENSE_ERROR:
return "CUFFT_LICENSE_ERROR";
case CUFFT_NOT_SUPPORTED:
return "CUFFT_NOT_SUPPORTED";
}
return "<unknown>";
}
#endif
#ifdef CUSPARSEAPI
// cuSPARSE API errors
static const char *_cudaGetErrorEnum(cusparseStatus_t error) {
switch (error) {
case CUSPARSE_STATUS_SUCCESS:
return "CUSPARSE_STATUS_SUCCESS";
case CUSPARSE_STATUS_NOT_INITIALIZED:
return "CUSPARSE_STATUS_NOT_INITIALIZED";
case CUSPARSE_STATUS_ALLOC_FAILED:
return "CUSPARSE_STATUS_ALLOC_FAILED";
case CUSPARSE_STATUS_INVALID_VALUE:
return "CUSPARSE_STATUS_INVALID_VALUE";
case CUSPARSE_STATUS_ARCH_MISMATCH:
return "CUSPARSE_STATUS_ARCH_MISMATCH";
case CUSPARSE_STATUS_MAPPING_ERROR:
return "CUSPARSE_STATUS_MAPPING_ERROR";
case CUSPARSE_STATUS_EXECUTION_FAILED:
return "CUSPARSE_STATUS_EXECUTION_FAILED";
case CUSPARSE_STATUS_INTERNAL_ERROR:
return "CUSPARSE_STATUS_INTERNAL_ERROR";
case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED:
return "CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED";
}
return "<unknown>";
}
#endif
#ifdef CUSOLVER_COMMON_H_
// cuSOLVER API errors
static const char *_cudaGetErrorEnum(cusolverStatus_t error) {
switch (error) {
case CUSOLVER_STATUS_SUCCESS:
return "CUSOLVER_STATUS_SUCCESS";
case CUSOLVER_STATUS_NOT_INITIALIZED:
return "CUSOLVER_STATUS_NOT_INITIALIZED";
case CUSOLVER_STATUS_ALLOC_FAILED:
return "CUSOLVER_STATUS_ALLOC_FAILED";
case CUSOLVER_STATUS_INVALID_VALUE:
return "CUSOLVER_STATUS_INVALID_VALUE";
case CUSOLVER_STATUS_ARCH_MISMATCH:
return "CUSOLVER_STATUS_ARCH_MISMATCH";
case CUSOLVER_STATUS_MAPPING_ERROR:
return "CUSOLVER_STATUS_MAPPING_ERROR";
case CUSOLVER_STATUS_EXECUTION_FAILED:
return "CUSOLVER_STATUS_EXECUTION_FAILED";
case CUSOLVER_STATUS_INTERNAL_ERROR:
return "CUSOLVER_STATUS_INTERNAL_ERROR";
case CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED:
return "CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED";
case CUSOLVER_STATUS_NOT_SUPPORTED:
return "CUSOLVER_STATUS_NOT_SUPPORTED ";
case CUSOLVER_STATUS_ZERO_PIVOT:
return "CUSOLVER_STATUS_ZERO_PIVOT";
case CUSOLVER_STATUS_INVALID_LICENSE:
return "CUSOLVER_STATUS_INVALID_LICENSE";
}
return "<unknown>";
}
#endif
#ifdef CURAND_H_
// cuRAND API errors
static const char *_cudaGetErrorEnum(curandStatus_t error) {
switch (error) {
case CURAND_STATUS_SUCCESS:
return "CURAND_STATUS_SUCCESS";
case CURAND_STATUS_VERSION_MISMATCH:
return "CURAND_STATUS_VERSION_MISMATCH";
case CURAND_STATUS_NOT_INITIALIZED:
return "CURAND_STATUS_NOT_INITIALIZED";
case CURAND_STATUS_ALLOCATION_FAILED:
return "CURAND_STATUS_ALLOCATION_FAILED";
case CURAND_STATUS_TYPE_ERROR:
return "CURAND_STATUS_TYPE_ERROR";
case CURAND_STATUS_OUT_OF_RANGE:
return "CURAND_STATUS_OUT_OF_RANGE";
case CURAND_STATUS_LENGTH_NOT_MULTIPLE:
return "CURAND_STATUS_LENGTH_NOT_MULTIPLE";
case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED:
return "CURAND_STATUS_DOUBLE_PRECISION_REQUIRED";
case CURAND_STATUS_LAUNCH_FAILURE:
return "CURAND_STATUS_LAUNCH_FAILURE";
case CURAND_STATUS_PREEXISTING_FAILURE:
return "CURAND_STATUS_PREEXISTING_FAILURE";
case CURAND_STATUS_INITIALIZATION_FAILED:
return "CURAND_STATUS_INITIALIZATION_FAILED";
case CURAND_STATUS_ARCH_MISMATCH:
return "CURAND_STATUS_ARCH_MISMATCH";
case CURAND_STATUS_INTERNAL_ERROR:
return "CURAND_STATUS_INTERNAL_ERROR";
}
return "<unknown>";
}
#endif
#ifdef NVJPEGAPI
// nvJPEG API errors
static const char *_cudaGetErrorEnum(nvjpegStatus_t error) {
switch (error) {
case NVJPEG_STATUS_SUCCESS:
return "NVJPEG_STATUS_SUCCESS";
case NVJPEG_STATUS_NOT_INITIALIZED:
return "NVJPEG_STATUS_NOT_INITIALIZED";
case NVJPEG_STATUS_INVALID_PARAMETER:
return "NVJPEG_STATUS_INVALID_PARAMETER";
case NVJPEG_STATUS_BAD_JPEG:
return "NVJPEG_STATUS_BAD_JPEG";
case NVJPEG_STATUS_JPEG_NOT_SUPPORTED:
return "NVJPEG_STATUS_JPEG_NOT_SUPPORTED";
case NVJPEG_STATUS_ALLOCATOR_FAILURE:
return "NVJPEG_STATUS_ALLOCATOR_FAILURE";
case NVJPEG_STATUS_EXECUTION_FAILED:
return "NVJPEG_STATUS_EXECUTION_FAILED";
case NVJPEG_STATUS_ARCH_MISMATCH:
return "NVJPEG_STATUS_ARCH_MISMATCH";
case NVJPEG_STATUS_INTERNAL_ERROR:
return "NVJPEG_STATUS_INTERNAL_ERROR";
}
return "<unknown>";
}
#endif
#ifdef NV_NPPIDEFS_H
// NPP API errors
static const char *_cudaGetErrorEnum(NppStatus error) {
switch (error) {
case NPP_NOT_SUPPORTED_MODE_ERROR:
return "NPP_NOT_SUPPORTED_MODE_ERROR";
case NPP_ROUND_MODE_NOT_SUPPORTED_ERROR:
return "NPP_ROUND_MODE_NOT_SUPPORTED_ERROR";
case NPP_RESIZE_NO_OPERATION_ERROR:
return "NPP_RESIZE_NO_OPERATION_ERROR";
case NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY:
return "NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY";
#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000
case NPP_BAD_ARG_ERROR:
return "NPP_BAD_ARGUMENT_ERROR";
case NPP_COEFF_ERROR:
return "NPP_COEFFICIENT_ERROR";
case NPP_RECT_ERROR:
return "NPP_RECTANGLE_ERROR";
case NPP_QUAD_ERROR:
return "NPP_QUADRANGLE_ERROR";
case NPP_MEM_ALLOC_ERR:
return "NPP_MEMORY_ALLOCATION_ERROR";
case NPP_HISTO_NUMBER_OF_LEVELS_ERROR:
return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR";
case NPP_INVALID_INPUT:
return "NPP_INVALID_INPUT";
case NPP_POINTER_ERROR:
return "NPP_POINTER_ERROR";
case NPP_WARNING:
return "NPP_WARNING";
case NPP_ODD_ROI_WARNING:
return "NPP_ODD_ROI_WARNING";
#else
// These are for CUDA 5.5 or higher
case NPP_BAD_ARGUMENT_ERROR:
return "NPP_BAD_ARGUMENT_ERROR";
case NPP_COEFFICIENT_ERROR:
return "NPP_COEFFICIENT_ERROR";
case NPP_RECTANGLE_ERROR:
return "NPP_RECTANGLE_ERROR";
case NPP_QUADRANGLE_ERROR:
return "NPP_QUADRANGLE_ERROR";
case NPP_MEMORY_ALLOCATION_ERR:
return "NPP_MEMORY_ALLOCATION_ERROR";
case NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR:
return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR";
case NPP_INVALID_HOST_POINTER_ERROR:
return "NPP_INVALID_HOST_POINTER_ERROR";
case NPP_INVALID_DEVICE_POINTER_ERROR:
return "NPP_INVALID_DEVICE_POINTER_ERROR";
#endif
case NPP_LUT_NUMBER_OF_LEVELS_ERROR:
return "NPP_LUT_NUMBER_OF_LEVELS_ERROR";
case NPP_TEXTURE_BIND_ERROR:
return "NPP_TEXTURE_BIND_ERROR";
case NPP_WRONG_INTERSECTION_ROI_ERROR:
return "NPP_WRONG_INTERSECTION_ROI_ERROR";
case NPP_NOT_EVEN_STEP_ERROR:
return "NPP_NOT_EVEN_STEP_ERROR";
case NPP_INTERPOLATION_ERROR:
return "NPP_INTERPOLATION_ERROR";
case NPP_RESIZE_FACTOR_ERROR:
return "NPP_RESIZE_FACTOR_ERROR";
case NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR:
return "NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR";
#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000
case NPP_MEMFREE_ERR:
return "NPP_MEMFREE_ERR";
case NPP_MEMSET_ERR:
return "NPP_MEMSET_ERR";
case NPP_MEMCPY_ERR:
return "NPP_MEMCPY_ERROR";
case NPP_MIRROR_FLIP_ERR:
return "NPP_MIRROR_FLIP_ERR";
#else
case NPP_MEMFREE_ERROR:
return "NPP_MEMFREE_ERROR";
case NPP_MEMSET_ERROR:
return "NPP_MEMSET_ERROR";
case NPP_MEMCPY_ERROR:
return "NPP_MEMCPY_ERROR";
case NPP_MIRROR_FLIP_ERROR:
return "NPP_MIRROR_FLIP_ERROR";
#endif
case NPP_ALIGNMENT_ERROR:
return "NPP_ALIGNMENT_ERROR";
case NPP_STEP_ERROR:
return "NPP_STEP_ERROR";
case NPP_SIZE_ERROR:
return "NPP_SIZE_ERROR";
case NPP_NULL_POINTER_ERROR:
return "NPP_NULL_POINTER_ERROR";
case NPP_CUDA_KERNEL_EXECUTION_ERROR:
return "NPP_CUDA_KERNEL_EXECUTION_ERROR";
case NPP_NOT_IMPLEMENTED_ERROR:
return "NPP_NOT_IMPLEMENTED_ERROR";
case NPP_ERROR:
return "NPP_ERROR";
case NPP_SUCCESS:
return "NPP_SUCCESS";
case NPP_WRONG_INTERSECTION_QUAD_WARNING:
return "NPP_WRONG_INTERSECTION_QUAD_WARNING";
case NPP_MISALIGNED_DST_ROI_WARNING:
return "NPP_MISALIGNED_DST_ROI_WARNING";
case NPP_AFFINE_QUAD_INCORRECT_WARNING:
return "NPP_AFFINE_QUAD_INCORRECT_WARNING";
case NPP_DOUBLE_SIZE_WARNING:
return "NPP_DOUBLE_SIZE_WARNING";
case NPP_WRONG_INTERSECTION_ROI_WARNING:
return "NPP_WRONG_INTERSECTION_ROI_WARNING";
#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) >= 0x6000
/* These are 6.0 or higher */
case NPP_LUT_PALETTE_BITSIZE_ERROR:
return "NPP_LUT_PALETTE_BITSIZE_ERROR";
case NPP_ZC_MODE_NOT_SUPPORTED_ERROR:
return "NPP_ZC_MODE_NOT_SUPPORTED_ERROR";
case NPP_QUALITY_INDEX_ERROR:
return "NPP_QUALITY_INDEX_ERROR";
case NPP_CHANNEL_ORDER_ERROR:
return "NPP_CHANNEL_ORDER_ERROR";
case NPP_ZERO_MASK_VALUE_ERROR:
return "NPP_ZERO_MASK_VALUE_ERROR";
case NPP_NUMBER_OF_CHANNELS_ERROR:
return "NPP_NUMBER_OF_CHANNELS_ERROR";
case NPP_COI_ERROR:
return "NPP_COI_ERROR";
case NPP_DIVISOR_ERROR:
return "NPP_DIVISOR_ERROR";
case NPP_CHANNEL_ERROR:
return "NPP_CHANNEL_ERROR";
case NPP_STRIDE_ERROR:
return "NPP_STRIDE_ERROR";
case NPP_ANCHOR_ERROR:
return "NPP_ANCHOR_ERROR";
case NPP_MASK_SIZE_ERROR:
return "NPP_MASK_SIZE_ERROR";
case NPP_MOMENT_00_ZERO_ERROR:
return "NPP_MOMENT_00_ZERO_ERROR";
case NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR:
return "NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR";
case NPP_THRESHOLD_ERROR:
return "NPP_THRESHOLD_ERROR";
case NPP_CONTEXT_MATCH_ERROR:
return "NPP_CONTEXT_MATCH_ERROR";
case NPP_FFT_FLAG_ERROR:
return "NPP_FFT_FLAG_ERROR";
case NPP_FFT_ORDER_ERROR:
return "NPP_FFT_ORDER_ERROR";
case NPP_SCALE_RANGE_ERROR:
return "NPP_SCALE_RANGE_ERROR";
case NPP_DATA_TYPE_ERROR:
return "NPP_DATA_TYPE_ERROR";
case NPP_OUT_OFF_RANGE_ERROR:
return "NPP_OUT_OFF_RANGE_ERROR";
case NPP_DIVIDE_BY_ZERO_ERROR:
return "NPP_DIVIDE_BY_ZERO_ERROR";
case NPP_RANGE_ERROR:
return "NPP_RANGE_ERROR";
case NPP_NO_MEMORY_ERROR:
return "NPP_NO_MEMORY_ERROR";
case NPP_ERROR_RESERVED:
return "NPP_ERROR_RESERVED";
case NPP_NO_OPERATION_WARNING:
return "NPP_NO_OPERATION_WARNING";
case NPP_DIVIDE_BY_ZERO_WARNING:
return "NPP_DIVIDE_BY_ZERO_WARNING";
#endif
#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) >= 0x7000
/* These are 7.0 or higher */
case NPP_OVERFLOW_ERROR:
return "NPP_OVERFLOW_ERROR";
case NPP_CORRUPTED_DATA_ERROR:
return "NPP_CORRUPTED_DATA_ERROR";
#endif
}
return "<unknown>";
}
#endif
#ifdef __DRIVER_TYPES_H__
#ifndef DEVICE_RESET
#define DEVICE_RESET cudaDeviceReset();
#endif
#else
#ifndef DEVICE_RESET
#define DEVICE_RESET
#endif
#endif
template <typename T>
void check(T result, char const *const func, const char *const file,
int const line) {
if (result) {
fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n", file, line,
static_cast<unsigned int>(result), _cudaGetErrorEnum(result), func);
DEVICE_RESET
// Make sure we call CUDA Device Reset before exiting
exit(EXIT_FAILURE);
}
}
#ifdef __DRIVER_TYPES_H__
// This will output the proper CUDA error strings in the event
// that a CUDA host call returns an error
#define checkCudaErrors(val) check((val), #val, __FILE__, __LINE__)
// This will output the proper error string when calling cudaGetLastError
#define getLastCudaError(msg) __getLastCudaError(msg, __FILE__, __LINE__)
inline void __getLastCudaError(const char *errorMessage, const char *file,
const int line) {
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err) {
fprintf(stderr,
"%s(%i) : getLastCudaError() CUDA error :"
" %s : (%d) %s.\n",
file, line, errorMessage, static_cast<int>(err),
cudaGetErrorString(err));
DEVICE_RESET
exit(EXIT_FAILURE);
}
}
// This will only print the proper error string when calling cudaGetLastError
// but not exit program incase error detected.
#define printLastCudaError(msg) __printLastCudaError(msg, __FILE__, __LINE__)
inline void __printLastCudaError(const char *errorMessage, const char *file,
const int line) {
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err) {
fprintf(stderr,
"%s(%i) : getLastCudaError() CUDA error :"
" %s : (%d) %s.\n",
file, line, errorMessage, static_cast<int>(err),
cudaGetErrorString(err));
}
}
#endif
#ifndef MAX
#define MAX(a, b) (a > b ? a : b)
#endif
// Float To Int conversion
inline int ftoi(float value) {
return (value >= 0 ? static_cast<int>(value + 0.5)
: static_cast<int>(value - 0.5));
}
// Beginning of GPU Architecture definitions
inline int _ConvertSMVer2Cores(int major, int minor) {
// Defines for GPU Architecture types (using the SM version to determine
// the # of cores per SM
typedef struct {
int SM; // 0xMm (hexidecimal notation), M = SM Major version,
// and m = SM minor version
int Cores;
} sSMtoCores;
sSMtoCores nGpuArchCoresPerSM[] = {
{0x30, 192},
{0x32, 192},
{0x35, 192},
{0x37, 192},
{0x50, 128},
{0x52, 128},
{0x53, 128},
{0x60, 64},
{0x61, 128},
{0x62, 128},
{0x70, 64},
{0x72, 64},
{0x75, 64},
{-1, -1}};
int index = 0;
while (nGpuArchCoresPerSM[index].SM != -1) {
if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor)) {
return nGpuArchCoresPerSM[index].Cores;
}
index++;
}
// If we don't find the values, we default use the previous one
// to run properly
printf(
"MapSMtoCores for SM %d.%d is undefined."
" Default to use %d Cores/SM\n",
major, minor, nGpuArchCoresPerSM[index - 1].Cores);
return nGpuArchCoresPerSM[index - 1].Cores;
}
// end of GPU Architecture definitions
#ifdef __CUDA_RUNTIME_H__
// General GPU Device CUDA Initialization
inline int gpuDeviceInit(int devID) {
int device_count;
checkCudaErrors(cudaGetDeviceCount(&device_count));
if (device_count == 0) {
fprintf(stderr,
"gpuDeviceInit() CUDA error: "
"no devices supporting CUDA.\n");
exit(EXIT_FAILURE);
}
if (devID < 0) {
devID = 0;
}
if (devID > device_count - 1) {
fprintf(stderr, "\n");
fprintf(stderr, ">> %d CUDA capable GPU device(s) detected. <<\n",
device_count);
fprintf(stderr,
">> gpuDeviceInit (-device=%d) is not a valid"
" GPU device. <<\n",
devID);
fprintf(stderr, "\n");
return -devID;
}
cudaDeviceProp deviceProp;
checkCudaErrors(cudaGetDeviceProperties(&deviceProp, devID));
if (deviceProp.computeMode == cudaComputeModeProhibited) {
fprintf(stderr,
"Error: device is running in <Compute Mode "
"Prohibited>, no threads can use cudaSetDevice().\n");
return -1;
}
if (deviceProp.major < 1) {
fprintf(stderr, "gpuDeviceInit(): GPU device does not support CUDA.\n");
exit(EXIT_FAILURE);
}
checkCudaErrors(cudaSetDevice(devID));
printf("gpuDeviceInit() CUDA Device [%d]: \"%s\n", devID, deviceProp.name);
return devID;
}
// This function returns the best GPU (with maximum GFLOPS)
inline int gpuGetMaxGflopsDeviceId() {
int current_device = 0, sm_per_multiproc = 0;
int max_perf_device = 0;
int device_count = 0;
int devices_prohibited = 0;
uint64_t max_compute_perf = 0;
cudaDeviceProp deviceProp;
checkCudaErrors(cudaGetDeviceCount(&device_count));
if (device_count == 0) {
fprintf(stderr,
"gpuGetMaxGflopsDeviceId() CUDA error:"
" no devices supporting CUDA.\n");
exit(EXIT_FAILURE);
}
// Find the best CUDA capable GPU device
current_device = 0;
while (current_device < device_count) {
cudaGetDeviceProperties(&deviceProp, current_device);
// If this GPU is not running on Compute Mode prohibited,
// then we can add it to the list
if (deviceProp.computeMode != cudaComputeModeProhibited) {
if (deviceProp.major == 9999 && deviceProp.minor == 9999) {
sm_per_multiproc = 1;
} else {
sm_per_multiproc =
_ConvertSMVer2Cores(deviceProp.major, deviceProp.minor);
}
uint64_t compute_perf = (uint64_t)deviceProp.multiProcessorCount *
sm_per_multiproc * deviceProp.clockRate;
if (compute_perf > max_compute_perf) {
max_compute_perf = compute_perf;
max_perf_device = current_device;
}
} else {
devices_prohibited++;
}
++current_device;
}
if (devices_prohibited == device_count) {
fprintf(stderr,
"gpuGetMaxGflopsDeviceId() CUDA error:"
" all devices have compute mode prohibited.\n");
exit(EXIT_FAILURE);
}
return max_perf_device;
}
// Initialization code to find the best CUDA Device
inline int findCudaDevice(int argc, const char **argv) {
cudaDeviceProp deviceProp;
int devID = 0;
// If the command-line has a device number specified, use it
if (checkCmdLineFlag(argc, argv, "device")) {
devID = getCmdLineArgumentInt(argc, argv, "device=");
if (devID < 0) {
printf("Invalid command line parameter\n ");
exit(EXIT_FAILURE);
} else {
devID = gpuDeviceInit(devID);
if (devID < 0) {
printf("exiting...\n");
exit(EXIT_FAILURE);
}
}
} else {
// Otherwise pick the device with highest Gflops/s
devID = gpuGetMaxGflopsDeviceId();
checkCudaErrors(cudaSetDevice(devID));
checkCudaErrors(cudaGetDeviceProperties(&deviceProp, devID));
printf("GPU Device %d: \"%s\" with compute capability %d.%d\n\n", devID,
deviceProp.name, deviceProp.major, deviceProp.minor);
}
return devID;
}
inline int findIntegratedGPU() {
int current_device = 0;
int device_count = 0;
int devices_prohibited = 0;
cudaDeviceProp deviceProp;
checkCudaErrors(cudaGetDeviceCount(&device_count));
if (device_count == 0) {
fprintf(stderr, "CUDA error: no devices supporting CUDA.\n");
exit(EXIT_FAILURE);
}
// Find the integrated GPU which is compute capable
while (current_device < device_count) {
cudaGetDeviceProperties(&deviceProp, current_device);
// If GPU is integrated and is not running on Compute Mode prohibited,
// then cuda can map to GLES resource
if (deviceProp.integrated &&
(deviceProp.computeMode != cudaComputeModeProhibited)) {
checkCudaErrors(cudaSetDevice(current_device));
checkCudaErrors(cudaGetDeviceProperties(&deviceProp, current_device));
printf("GPU Device %d: \"%s\" with compute capability %d.%d\n\n",
current_device, deviceProp.name, deviceProp.major,
deviceProp.minor);
return current_device;
} else {
devices_prohibited++;
}
current_device++;
}
if (devices_prohibited == device_count) {
fprintf(stderr,
"CUDA error:"
" No GLES-CUDA Interop capable GPU found.\n");
exit(EXIT_FAILURE);
}
return -1;
}
// General check for CUDA GPU SM Capabilities
inline bool checkCudaCapabilities(int major_version, int minor_version) {
cudaDeviceProp deviceProp;
deviceProp.major = 0;
deviceProp.minor = 0;
int dev;
checkCudaErrors(cudaGetDevice(&dev));
checkCudaErrors(cudaGetDeviceProperties(&deviceProp, dev));
if ((deviceProp.major > major_version) ||
(deviceProp.major == major_version &&
deviceProp.minor >= minor_version)) {
printf(" Device %d: <%16s >, Compute SM %d.%d detected\n", dev,
deviceProp.name, deviceProp.major, deviceProp.minor);
return true;
} else {
printf(
" No GPU device was found that can support "
"CUDA compute capability %d.%d.\n",
major_version, minor_version);
return false;
}
}
#endif
// end of CUDA Helper Functions
#endif // COMMON_HELPER_CUDA_H_

43
helper_functions.h Normal file
View File

@ -0,0 +1,43 @@
/**
* Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
// These are helper functions for the SDK samples (string parsing,
// timers, image helpers, etc)
#ifndef COMMON_HELPER_FUNCTIONS_H_
#define COMMON_HELPER_FUNCTIONS_H_
#ifdef WIN32
#pragma warning(disable : 4996)
#endif
// includes, project
#include <assert.h>
#include "exception.h"
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <algorithm>
#include <fstream>
#include <iostream>
#include <string>
#include <vector>
// includes, timer, string parsing, image helpers
#include "helper_image.h" // helper functions for image compare, dump, data comparisons
#include "helper_string.h" // helper functions for string parsing
#include "helper_timer.h" // helper functions for timers
#ifndef EXIT_WAIVED
#define EXIT_WAIVED 2
#endif
#endif // COMMON_HELPER_FUNCTIONS_H_

985
helper_image.h Normal file
View File

@ -0,0 +1,985 @@
/**
* Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
// These are helper functions for the SDK samples (image,bitmap)
#ifndef COMMON_HELPER_IMAGE_H_
#define COMMON_HELPER_IMAGE_H_
#include <assert.h>
#include <math.h>
#include <stdint.h>
#include <algorithm>
#include <fstream>
#include <iostream>
#include <string>
#include <vector>
#ifndef MIN
#define MIN(a, b) ((a < b) ? a : b)
#endif
#ifndef MAX
#define MAX(a, b) ((a > b) ? a : b)
#endif
#ifndef EXIT_WAIVED
#define EXIT_WAIVED 2
#endif
#include "exception.h"
#include "helper_string.h"
// namespace unnamed (internal)
namespace helper_image_internal {
//! size of PGM file header
const unsigned int PGMHeaderSize = 0x40;
// types
//! Data converter from unsigned char / unsigned byte to type T
template <class T>
struct ConverterFromUByte;
//! Data converter from unsigned char / unsigned byte
template <>
struct ConverterFromUByte<unsigned char> {
//! Conversion operator
//! @return converted value
//! @param val value to convert
float operator()(const unsigned char &val) {
return static_cast<unsigned char>(val);
}
};
//! Data converter from unsigned char / unsigned byte to float
template <>
struct ConverterFromUByte<float> {
//! Conversion operator
//! @return converted value
//! @param val value to convert
float operator()(const unsigned char &val) {
return static_cast<float>(val) / 255.0f;
}
};
//! Data converter from unsigned char / unsigned byte to type T
template <class T>
struct ConverterToUByte;
//! Data converter from unsigned char / unsigned byte to unsigned int
template <>
struct ConverterToUByte<unsigned char> {
//! Conversion operator (essentially a passthru
//! @return converted value
//! @param val value to convert
unsigned char operator()(const unsigned char &val) { return val; }
};
//! Data converter from unsigned char / unsigned byte to unsigned int
template <>
struct ConverterToUByte<float> {
//! Conversion operator
//! @return converted value
//! @param val value to convert
unsigned char operator()(const float &val) {
return static_cast<unsigned char>(val * 255.0f);
}
};
} // namespace helper_image_internal
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
#ifndef FOPEN
#define FOPEN(fHandle, filename, mode) fopen_s(&fHandle, filename, mode)
#endif
#ifndef FOPEN_FAIL
#define FOPEN_FAIL(result) (result != 0)
#endif
#ifndef SSCANF
#define SSCANF sscanf_s
#endif
#else
#ifndef FOPEN
#define FOPEN(fHandle, filename, mode) (fHandle = fopen(filename, mode))
#endif
#ifndef FOPEN_FAIL
#define FOPEN_FAIL(result) (result == NULL)
#endif
#ifndef SSCANF
#define SSCANF sscanf
#endif
#endif
inline bool __loadPPM(const char *file, unsigned char **data, unsigned int *w,
unsigned int *h, unsigned int *channels) {
FILE *fp = NULL;
if (FOPEN_FAIL(FOPEN(fp, file, "rb"))) {
std::cerr << "__LoadPPM() : Failed to open file: " << file << std::endl;
return false;
}
// check header
char header[helper_image_internal::PGMHeaderSize];
if (fgets(header, helper_image_internal::PGMHeaderSize, fp) == NULL) {
std::cerr << "__LoadPPM() : reading PGM header returned NULL" << std::endl;
return false;
}
if (strncmp(header, "P5", 2) == 0) {
*channels = 1;
} else if (strncmp(header, "P6", 2) == 0) {
*channels = 3;
} else {
std::cerr << "__LoadPPM() : File is not a PPM or PGM image" << std::endl;
*channels = 0;
return false;
}
// parse header, read maxval, width and height
unsigned int width = 0;
unsigned int height = 0;
unsigned int maxval = 0;
unsigned int i = 0;
while (i < 3) {
if (fgets(header, helper_image_internal::PGMHeaderSize, fp) == NULL) {
std::cerr << "__LoadPPM() : reading PGM header returned NULL"
<< std::endl;
return false;
}
if (header[0] == '#') {
continue;
}
if (i == 0) {
i += SSCANF(header, "%u %u %u", &width, &height, &maxval);
} else if (i == 1) {
i += SSCANF(header, "%u %u", &height, &maxval);
} else if (i == 2) {
i += SSCANF(header, "%u", &maxval);
}
}
// check if given handle for the data is initialized
if (NULL != *data) {
if (*w != width || *h != height) {
std::cerr << "__LoadPPM() : Invalid image dimensions." << std::endl;
}
} else {
*data = (unsigned char *)malloc(sizeof(unsigned char) * width * height *
*channels);
*w = width;
*h = height;
}
// read and close file
if (fread(*data, sizeof(unsigned char), width * height * *channels, fp) ==
0) {
std::cerr << "__LoadPPM() read data returned error." << std::endl;
}
fclose(fp);
return true;
}
template <class T>
inline bool sdkLoadPGM(const char *file, T **data, unsigned int *w,
unsigned int *h) {
unsigned char *idata = NULL;
unsigned int channels;
if (true != __loadPPM(file, &idata, w, h, &channels)) {
return false;
}
unsigned int size = *w * *h * channels;
// initialize mem if necessary
// the correct size is checked / set in loadPGMc()
if (NULL == *data) {
*data = reinterpret_cast<T *>(malloc(sizeof(T) * size));
}
// copy and cast data
std::transform(idata, idata + size, *data,
helper_image_internal::ConverterFromUByte<T>());
free(idata);
return true;
}
template <class T>
inline bool sdkLoadPPM4(const char *file, T **data, unsigned int *w,
unsigned int *h) {
unsigned char *idata = 0;
unsigned int channels;
if (__loadPPM(file, &idata, w, h, &channels)) {
// pad 4th component
int size = *w * *h;
// keep the original pointer
unsigned char *idata_orig = idata;
*data = reinterpret_cast<T *>(malloc(sizeof(T) * size * 4));
unsigned char *ptr = *data;
for (int i = 0; i < size; i++) {
*ptr++ = *idata++;
*ptr++ = *idata++;
*ptr++ = *idata++;
*ptr++ = 0;
}
free(idata_orig);
return true;
} else {
free(idata);
return false;
}
}
inline bool __savePPM(const char *file, unsigned char *data, unsigned int w,
unsigned int h, unsigned int channels) {
assert(NULL != data);
assert(w > 0);
assert(h > 0);
std::fstream fh(file, std::fstream::out | std::fstream::binary);
if (fh.bad()) {
std::cerr << "__savePPM() : Opening file failed." << std::endl;
return false;
}
if (channels == 1) {
fh << "P5\n";
} else if (channels == 3) {
fh << "P6\n";
} else {
std::cerr << "__savePPM() : Invalid number of channels." << std::endl;
return false;
}
fh << w << "\n" << h << "\n" << 0xff << std::endl;
for (unsigned int i = 0; (i < (w * h * channels)) && fh.good(); ++i) {
fh << data[i];
}
fh.flush();
if (fh.bad()) {
std::cerr << "__savePPM() : Writing data failed." << std::endl;
return false;
}
fh.close();
return true;
}
template <class T>
inline bool sdkSavePGM(const char *file, T *data, unsigned int w,
unsigned int h) {
unsigned int size = w * h;
unsigned char *idata = (unsigned char *)malloc(sizeof(unsigned char) * size);
std::transform(data, data + size, idata,
helper_image_internal::ConverterToUByte<T>());
// write file
bool result = __savePPM(file, idata, w, h, 1);
// cleanup
free(idata);
return result;
}
inline bool sdkSavePPM4ub(const char *file, unsigned char *data, unsigned int w,
unsigned int h) {
// strip 4th component
int size = w * h;
unsigned char *ndata =
(unsigned char *)malloc(sizeof(unsigned char) * size * 3);
unsigned char *ptr = ndata;
for (int i = 0; i < size; i++) {
*ptr++ = *data++;
*ptr++ = *data++;
*ptr++ = *data++;
data++;
}
bool result = __savePPM(file, ndata, w, h, 3);
free(ndata);
return result;
}
//////////////////////////////////////////////////////////////////////////////
//! Read file \filename and return the data
//! @return bool if reading the file succeeded, otherwise false
//! @param filename name of the source file
//! @param data uninitialized pointer, returned initialized and pointing to
//! the data read
//! @param len number of data elements in data, -1 on error
//////////////////////////////////////////////////////////////////////////////
template <class T>
inline bool sdkReadFile(const char *filename, T **data, unsigned int *len,
bool verbose) {
// check input arguments
assert(NULL != filename);
assert(NULL != len);
// intermediate storage for the data read
std::vector<T> data_read;
// open file for reading
FILE *fh = NULL;
// check if filestream is valid
if (FOPEN_FAIL(FOPEN(fh, filename, "r"))) {
printf("Unable to open input file: %s\n", filename);
return false;
}
// read all data elements
T token;
while (!feof(fh)) {
fscanf(fh, "%f", &token);
data_read.push_back(token);
}
// the last element is read twice
data_read.pop_back();
fclose(fh);
// check if the given handle is already initialized
if (NULL != *data) {
if (*len != data_read.size()) {
std::cerr << "sdkReadFile() : Initialized memory given but "
<< "size mismatch with signal read "
<< "(data read / data init = " << (unsigned int)data_read.size()
<< " / " << *len << ")" << std::endl;
return false;
}
} else {
// allocate storage for the data read
*data = reinterpret_cast<T *>(malloc(sizeof(T) * data_read.size()));
// store signal size
*len = static_cast<unsigned int>(data_read.size());
}
// copy data
memcpy(*data, &data_read.front(), sizeof(T) * data_read.size());
return true;
}
//////////////////////////////////////////////////////////////////////////////
//! Read file \filename and return the data
//! @return bool if reading the file succeeded, otherwise false
//! @param filename name of the source file
//! @param data uninitialized pointer, returned initialized and pointing to
//! the data read
//! @param len number of data elements in data, -1 on error
//////////////////////////////////////////////////////////////////////////////
template <class T>
inline bool sdkReadFileBlocks(const char *filename, T **data, unsigned int *len,
unsigned int block_num, unsigned int block_size,
bool verbose) {
// check input arguments
assert(NULL != filename);
assert(NULL != len);
// open file for reading
FILE *fh = fopen(filename, "rb");
if (fh == NULL && verbose) {
std::cerr << "sdkReadFile() : Opening file failed." << std::endl;
return false;
}
// check if the given handle is already initialized
// allocate storage for the data read
data[block_num] = reinterpret_cast<T *>(malloc(block_size));
// read all data elements
fseek(fh, block_num * block_size, SEEK_SET);
*len = fread(data[block_num], sizeof(T), block_size / sizeof(T), fh);
fclose(fh);
return true;
}
//////////////////////////////////////////////////////////////////////////////
//! Write a data file \filename
//! @return true if writing the file succeeded, otherwise false
//! @param filename name of the source file
//! @param data data to write
//! @param len number of data elements in data, -1 on error
//! @param epsilon epsilon for comparison
//////////////////////////////////////////////////////////////////////////////
template <class T, class S>
inline bool sdkWriteFile(const char *filename, const T *data, unsigned int len,
const S epsilon, bool verbose, bool append = false) {
assert(NULL != filename);
assert(NULL != data);
// open file for writing
// if (append) {
std::fstream fh(filename, std::fstream::out | std::fstream::ate);
if (verbose) {
std::cerr << "sdkWriteFile() : Open file " << filename
<< " for write/append." << std::endl;
}
/* } else {
std::fstream fh(filename, std::fstream::out);
if (verbose) {
std::cerr << "sdkWriteFile() : Open file " << filename << " for
write." << std::endl;
}
}
*/
// check if filestream is valid
if (!fh.good()) {
if (verbose) {
std::cerr << "sdkWriteFile() : Opening file failed." << std::endl;
}
return false;
}
// first write epsilon
fh << "# " << epsilon << "\n";
// write data
for (unsigned int i = 0; (i < len) && (fh.good()); ++i) {
fh << data[i] << ' ';
}
// Check if writing succeeded
if (!fh.good()) {
if (verbose) {
std::cerr << "sdkWriteFile() : Writing file failed." << std::endl;
}
return false;
}
// file ends with nl
fh << std::endl;
return true;
}
//////////////////////////////////////////////////////////////////////////////
//! Compare two arrays of arbitrary type
//! @return true if \a reference and \a data are identical, otherwise false
//! @param reference timer_interface to the reference data / gold image
//! @param data handle to the computed data
//! @param len number of elements in reference and data
//! @param epsilon epsilon to use for the comparison
//////////////////////////////////////////////////////////////////////////////
template <class T, class S>
inline bool compareData(const T *reference, const T *data,
const unsigned int len, const S epsilon,
const float threshold) {
assert(epsilon >= 0);
bool result = true;
unsigned int error_count = 0;
for (unsigned int i = 0; i < len; ++i) {
float diff = static_cast<float>(reference[i]) - static_cast<float>(data[i]);
bool comp = (diff <= epsilon) && (diff >= -epsilon);
result &= comp;
error_count += !comp;
#if 0
if (!comp) {
std::cerr << "ERROR, i = " << i << ",\t "
<< reference[i] << " / "
<< data[i]
<< " (reference / data)\n";
}
#endif
}
if (threshold == 0.0f) {
return (result) ? true : false;
} else {
if (error_count) {
printf("%4.2f(%%) of bytes mismatched (count=%d)\n",
static_cast<float>(error_count) * 100 / static_cast<float>(len),
error_count);
}
return (len * threshold > error_count) ? true : false;
}
}
#ifndef __MIN_EPSILON_ERROR
#define __MIN_EPSILON_ERROR 1e-3f
#endif
//////////////////////////////////////////////////////////////////////////////
//! Compare two arrays of arbitrary type
//! @return true if \a reference and \a data are identical, otherwise false
//! @param reference handle to the reference data / gold image
//! @param data handle to the computed data
//! @param len number of elements in reference and data
//! @param epsilon epsilon to use for the comparison
//! @param epsilon threshold % of (# of bytes) for pass/fail
//////////////////////////////////////////////////////////////////////////////
template <class T, class S>
inline bool compareDataAsFloatThreshold(const T *reference, const T *data,
const unsigned int len, const S epsilon,
const float threshold) {
assert(epsilon >= 0);
// If we set epsilon to be 0, let's set a minimum threshold
float max_error = MAX((float)epsilon, __MIN_EPSILON_ERROR);
int error_count = 0;
bool result = true;
for (unsigned int i = 0; i < len; ++i) {
float diff =
fabs(static_cast<float>(reference[i]) - static_cast<float>(data[i]));
bool comp = (diff < max_error);
result &= comp;
if (!comp) {
error_count++;
}
}
if (threshold == 0.0f) {
if (error_count) {
printf("total # of errors = %d\n", error_count);
}
return (error_count == 0) ? true : false;
} else {
if (error_count) {
printf("%4.2f(%%) of bytes mismatched (count=%d)\n",
static_cast<float>(error_count) * 100 / static_cast<float>(len),
error_count);
}
return ((len * threshold > error_count) ? true : false);
}
}
inline void sdkDumpBin(void *data, unsigned int bytes, const char *filename) {
printf("sdkDumpBin: <%s>\n", filename);
FILE *fp;
FOPEN(fp, filename, "wb");
fwrite(data, bytes, 1, fp);
fflush(fp);
fclose(fp);
}
inline bool sdkCompareBin2BinUint(const char *src_file, const char *ref_file,
unsigned int nelements, const float epsilon,
const float threshold, char *exec_path) {
unsigned int *src_buffer, *ref_buffer;
FILE *src_fp = NULL, *ref_fp = NULL;
uint64_t error_count = 0;
size_t fsize = 0;
if (FOPEN_FAIL(FOPEN(src_fp, src_file, "rb"))) {
printf("compareBin2Bin <unsigned int> unable to open src_file: %s\n",
src_file);
error_count++;
}
char *ref_file_path = sdkFindFilePath(ref_file, exec_path);
if (ref_file_path == NULL) {
printf("compareBin2Bin <unsigned int> unable to find <%s> in <%s>\n",
ref_file, exec_path);
printf(">>> Check info.xml and [project//data] folder <%s> <<<\n",
ref_file);
printf("Aborting comparison!\n");
printf(" FAILED\n");
error_count++;
if (src_fp) {
fclose(src_fp);
}
if (ref_fp) {
fclose(ref_fp);
}
} else {
if (FOPEN_FAIL(FOPEN(ref_fp, ref_file_path, "rb"))) {
printf(
"compareBin2Bin <unsigned int>"
" unable to open ref_file: %s\n",
ref_file_path);
error_count++;
}
if (src_fp && ref_fp) {
src_buffer = (unsigned int *)malloc(nelements * sizeof(unsigned int));
ref_buffer = (unsigned int *)malloc(nelements * sizeof(unsigned int));
fsize = fread(src_buffer, nelements, sizeof(unsigned int), src_fp);
fsize = fread(ref_buffer, nelements, sizeof(unsigned int), ref_fp);
printf(
"> compareBin2Bin <unsigned int> nelements=%d,"
" epsilon=%4.2f, threshold=%4.2f\n",
nelements, epsilon, threshold);
printf(" src_file <%s>, size=%d bytes\n", src_file,
static_cast<int>(fsize));
printf(" ref_file <%s>, size=%d bytes\n", ref_file_path,
static_cast<int>(fsize));
if (!compareData<unsigned int, float>(ref_buffer, src_buffer, nelements,
epsilon, threshold)) {
error_count++;
}
fclose(src_fp);
fclose(ref_fp);
free(src_buffer);
free(ref_buffer);
} else {
if (src_fp) {
fclose(src_fp);
}
if (ref_fp) {
fclose(ref_fp);
}
}
}
if (error_count == 0) {
printf(" OK\n");
} else {
printf(" FAILURE: %d errors...\n", (unsigned int)error_count);
}
return (error_count == 0); // returns true if all pixels pass
}
inline bool sdkCompareBin2BinFloat(const char *src_file, const char *ref_file,
unsigned int nelements, const float epsilon,
const float threshold, char *exec_path) {
float *src_buffer = NULL, *ref_buffer = NULL;
FILE *src_fp = NULL, *ref_fp = NULL;
size_t fsize = 0;
uint64_t error_count = 0;
if (FOPEN_FAIL(FOPEN(src_fp, src_file, "rb"))) {
printf("compareBin2Bin <float> unable to open src_file: %s\n", src_file);
error_count = 1;
}
char *ref_file_path = sdkFindFilePath(ref_file, exec_path);
if (ref_file_path == NULL) {
printf("compareBin2Bin <float> unable to find <%s> in <%s>\n", ref_file,
exec_path);
printf(">>> Check info.xml and [project//data] folder <%s> <<<\n",
exec_path);
printf("Aborting comparison!\n");
printf(" FAILED\n");
error_count++;
if (src_fp) {
fclose(src_fp);
}
if (ref_fp) {
fclose(ref_fp);
}
} else {
if (FOPEN_FAIL(FOPEN(ref_fp, ref_file_path, "rb"))) {
printf("compareBin2Bin <float> unable to open ref_file: %s\n",
ref_file_path);
error_count = 1;
}
if (src_fp && ref_fp) {
src_buffer = reinterpret_cast<float *>(malloc(nelements * sizeof(float)));
ref_buffer = reinterpret_cast<float *>(malloc(nelements * sizeof(float)));
printf(
"> compareBin2Bin <float> nelements=%d, epsilon=%4.2f,"
" threshold=%4.2f\n",
nelements, epsilon, threshold);
fsize = fread(src_buffer, sizeof(float), nelements, src_fp);
printf(" src_file <%s>, size=%d bytes\n", src_file,
static_cast<int>(fsize * sizeof(float)));
fsize = fread(ref_buffer, sizeof(float), nelements, ref_fp);
printf(" ref_file <%s>, size=%d bytes\n", ref_file_path,
static_cast<int>(fsize * sizeof(float)));
if (!compareDataAsFloatThreshold<float, float>(
ref_buffer, src_buffer, nelements, epsilon, threshold)) {
error_count++;
}
fclose(src_fp);
fclose(ref_fp);
free(src_buffer);
free(ref_buffer);
} else {
if (src_fp) {
fclose(src_fp);
}
if (ref_fp) {
fclose(ref_fp);
}
}
}
if (error_count == 0) {
printf(" OK\n");
} else {
printf(" FAILURE: %d errors...\n", (unsigned int)error_count);
}
return (error_count == 0); // returns true if all pixels pass
}
inline bool sdkCompareL2fe(const float *reference, const float *data,
const unsigned int len, const float epsilon) {
assert(epsilon >= 0);
float error = 0;
float ref = 0;
for (unsigned int i = 0; i < len; ++i) {
float diff = reference[i] - data[i];
error += diff * diff;
ref += reference[i] * reference[i];
}
float normRef = sqrtf(ref);
if (fabs(ref) < 1e-7) {
#ifdef _DEBUG
std::cerr << "ERROR, reference l2-norm is 0\n";
#endif
return false;
}
float normError = sqrtf(error);
error = normError / normRef;
bool result = error < epsilon;
#ifdef _DEBUG
if (!result) {
std::cerr << "ERROR, l2-norm error " << error << " is greater than epsilon "
<< epsilon << "\n";
}
#endif
return result;
}
inline bool sdkLoadPPMub(const char *file, unsigned char **data,
unsigned int *w, unsigned int *h) {
unsigned int channels;
return __loadPPM(file, data, w, h, &channels);
}
inline bool sdkLoadPPM4ub(const char *file, unsigned char **data,
unsigned int *w, unsigned int *h) {
unsigned char *idata = 0;
unsigned int channels;
if (__loadPPM(file, &idata, w, h, &channels)) {
// pad 4th component
int size = *w * *h;
// keep the original pointer
unsigned char *idata_orig = idata;
*data = (unsigned char *)malloc(sizeof(unsigned char) * size * 4);
unsigned char *ptr = *data;
for (int i = 0; i < size; i++) {
*ptr++ = *idata++;
*ptr++ = *idata++;
*ptr++ = *idata++;
*ptr++ = 0;
}
free(idata_orig);
return true;
} else {
free(idata);
return false;
}
}
inline bool sdkComparePPM(const char *src_file, const char *ref_file,
const float epsilon, const float threshold,
bool verboseErrors) {
unsigned char *src_data, *ref_data;
uint64_t error_count = 0;
unsigned int ref_width, ref_height;
unsigned int src_width, src_height;
if (src_file == NULL || ref_file == NULL) {
if (verboseErrors) {
std::cerr << "PPMvsPPM: src_file or ref_file is NULL."
" Aborting comparison\n";
}
return false;
}
if (verboseErrors) {
std::cerr << "> Compare (a)rendered: <" << src_file << ">\n";
std::cerr << "> (b)reference: <" << ref_file << ">\n";
}
if (sdkLoadPPM4ub(ref_file, &ref_data, &ref_width, &ref_height) != true) {
if (verboseErrors) {
std::cerr << "PPMvsPPM: unable to load ref image file: " << ref_file
<< "\n";
}
return false;
}
if (sdkLoadPPM4ub(src_file, &src_data, &src_width, &src_height) != true) {
std::cerr << "PPMvsPPM: unable to load src image file: " << src_file
<< "\n";
return false;
}
if (src_height != ref_height || src_width != ref_width) {
if (verboseErrors) {
std::cerr << "PPMvsPPM: source and ref size mismatch (" << src_width
<< "," << src_height << ")vs(" << ref_width << "," << ref_height
<< ")\n";
}
}
if (verboseErrors) {
std::cerr << "PPMvsPPM: comparing images size (" << src_width << ","
<< src_height << ") epsilon(" << epsilon << "), threshold("
<< threshold * 100 << "%)\n";
}
if (compareData(ref_data, src_data, src_width * src_height * 4, epsilon,
threshold) == false) {
error_count = 1;
}
if (error_count == 0) {
if (verboseErrors) {
std::cerr << " OK\n\n";
}
} else {
if (verboseErrors) {
std::cerr << " FAILURE! " << error_count << " errors...\n\n";
}
}
// returns true if all pixels pass
return (error_count == 0) ? true : false;
}
inline bool sdkComparePGM(const char *src_file, const char *ref_file,
const float epsilon, const float threshold,
bool verboseErrors) {
unsigned char *src_data = 0, *ref_data = 0;
uint64_t error_count = 0;
unsigned int ref_width, ref_height;
unsigned int src_width, src_height;
if (src_file == NULL || ref_file == NULL) {
if (verboseErrors) {
std::cerr << "PGMvsPGM: src_file or ref_file is NULL."
" Aborting comparison\n";
}
return false;
}
if (verboseErrors) {
std::cerr << "> Compare (a)rendered: <" << src_file << ">\n";
std::cerr << "> (b)reference: <" << ref_file << ">\n";
}
if (sdkLoadPPMub(ref_file, &ref_data, &ref_width, &ref_height) != true) {
if (verboseErrors) {
std::cerr << "PGMvsPGM: unable to load ref image file: " << ref_file
<< "\n";
}
return false;
}
if (sdkLoadPPMub(src_file, &src_data, &src_width, &src_height) != true) {
std::cerr << "PGMvsPGM: unable to load src image file: " << src_file
<< "\n";
return false;
}
if (src_height != ref_height || src_width != ref_width) {
if (verboseErrors) {
std::cerr << "PGMvsPGM: source and ref size mismatch (" << src_width
<< "," << src_height << ")vs(" << ref_width << "," << ref_height
<< ")\n";
}
}
if (verboseErrors)
std::cerr << "PGMvsPGM: comparing images size (" << src_width << ","
<< src_height << ") epsilon(" << epsilon << "), threshold("
<< threshold * 100 << "%)\n";
if (compareData(ref_data, src_data, src_width * src_height, epsilon,
threshold) == false) {
error_count = 1;
}
if (error_count == 0) {
if (verboseErrors) {
std::cerr << " OK\n\n";
}
} else {
if (verboseErrors) {
std::cerr << " FAILURE! " << error_count << " errors...\n\n";
}
}
// returns true if all pixels pass
return (error_count == 0) ? true : false;
}
#endif // COMMON_HELPER_IMAGE_H_

683
helper_string.h Normal file
View File

@ -0,0 +1,683 @@
/**
* Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
// These are helper functions for the SDK samples (string parsing, timers, etc)
#ifndef COMMON_HELPER_STRING_H_
#define COMMON_HELPER_STRING_H_
#include <stdio.h>
#include <stdlib.h>
#include <fstream>
#include <string>
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
#ifndef _CRT_SECURE_NO_DEPRECATE
#define _CRT_SECURE_NO_DEPRECATE
#endif
#ifndef STRCASECMP
#define STRCASECMP _stricmp
#endif
#ifndef STRNCASECMP
#define STRNCASECMP _strnicmp
#endif
#ifndef STRCPY
#define STRCPY(sFilePath, nLength, sPath) strcpy_s(sFilePath, nLength, sPath)
#endif
#ifndef FOPEN
#define FOPEN(fHandle, filename, mode) fopen_s(&fHandle, filename, mode)
#endif
#ifndef FOPEN_FAIL
#define FOPEN_FAIL(result) (result != 0)
#endif
#ifndef SSCANF
#define SSCANF sscanf_s
#endif
#ifndef SPRINTF
#define SPRINTF sprintf_s
#endif
#else // Linux Includes
#include <string.h>
#include <strings.h>
#ifndef STRCASECMP
#define STRCASECMP strcasecmp
#endif
#ifndef STRNCASECMP
#define STRNCASECMP strncasecmp
#endif
#ifndef STRCPY
#define STRCPY(sFilePath, nLength, sPath) strcpy(sFilePath, sPath)
#endif
#ifndef FOPEN
#define FOPEN(fHandle, filename, mode) (fHandle = fopen(filename, mode))
#endif
#ifndef FOPEN_FAIL
#define FOPEN_FAIL(result) (result == NULL)
#endif
#ifndef SSCANF
#define SSCANF sscanf
#endif
#ifndef SPRINTF
#define SPRINTF sprintf
#endif
#endif
#ifndef EXIT_WAIVED
#define EXIT_WAIVED 2
#endif
// CUDA Utility Helper Functions
inline int stringRemoveDelimiter(char delimiter, const char *string) {
int string_start = 0;
while (string[string_start] == delimiter) {
string_start++;
}
if (string_start >= static_cast<int>(strlen(string) - 1)) {
return 0;
}
return string_start;
}
inline int getFileExtension(char *filename, char **extension) {
int string_length = static_cast<int>(strlen(filename));
while (filename[string_length--] != '.') {
if (string_length == 0) break;
}
if (string_length > 0) string_length += 2;
if (string_length == 0)
*extension = NULL;
else
*extension = &filename[string_length];
return string_length;
}
inline bool checkCmdLineFlag(const int argc, const char **argv,
const char *string_ref) {
bool bFound = false;
if (argc >= 1) {
for (int i = 1; i < argc; i++) {
int string_start = stringRemoveDelimiter('-', argv[i]);
const char *string_argv = &argv[i][string_start];
const char *equal_pos = strchr(string_argv, '=');
int argv_length = static_cast<int>(
equal_pos == 0 ? strlen(string_argv) : equal_pos - string_argv);
int length = static_cast<int>(strlen(string_ref));
if (length == argv_length &&
!STRNCASECMP(string_argv, string_ref, length)) {
bFound = true;
continue;
}
}
}
return bFound;
}
// This function wraps the CUDA Driver API into a template function
template <class T>
inline bool getCmdLineArgumentValue(const int argc, const char **argv,
const char *string_ref, T *value) {
bool bFound = false;
if (argc >= 1) {
for (int i = 1; i < argc; i++) {
int string_start = stringRemoveDelimiter('-', argv[i]);
const char *string_argv = &argv[i][string_start];
int length = static_cast<int>(strlen(string_ref));
if (!STRNCASECMP(string_argv, string_ref, length)) {
if (length + 1 <= static_cast<int>(strlen(string_argv))) {
int auto_inc = (string_argv[length] == '=') ? 1 : 0;
*value = (T)atoi(&string_argv[length + auto_inc]);
}
bFound = true;
i = argc;
}
}
}
return bFound;
}
inline int getCmdLineArgumentInt(const int argc, const char **argv,
const char *string_ref) {
bool bFound = false;
int value = -1;
if (argc >= 1) {
for (int i = 1; i < argc; i++) {
int string_start = stringRemoveDelimiter('-', argv[i]);
const char *string_argv = &argv[i][string_start];
int length = static_cast<int>(strlen(string_ref));
if (!STRNCASECMP(string_argv, string_ref, length)) {
if (length + 1 <= static_cast<int>(strlen(string_argv))) {
int auto_inc = (string_argv[length] == '=') ? 1 : 0;
value = atoi(&string_argv[length + auto_inc]);
} else {
value = 0;
}
bFound = true;
continue;
}
}
}
if (bFound) {
return value;
} else {
return 0;
}
}
inline float getCmdLineArgumentFloat(const int argc, const char **argv,
const char *string_ref) {
bool bFound = false;
float value = -1;
if (argc >= 1) {
for (int i = 1; i < argc; i++) {
int string_start = stringRemoveDelimiter('-', argv[i]);
const char *string_argv = &argv[i][string_start];
int length = static_cast<int>(strlen(string_ref));
if (!STRNCASECMP(string_argv, string_ref, length)) {
if (length + 1 <= static_cast<int>(strlen(string_argv))) {
int auto_inc = (string_argv[length] == '=') ? 1 : 0;
value = static_cast<float>(atof(&string_argv[length + auto_inc]));
} else {
value = 0.f;
}
bFound = true;
continue;
}
}
}
if (bFound) {
return value;
} else {
return 0;
}
}
inline bool getCmdLineArgumentString(const int argc, const char **argv,
const char *string_ref,
char **string_retval) {
bool bFound = false;
if (argc >= 1) {
for (int i = 1; i < argc; i++) {
int string_start = stringRemoveDelimiter('-', argv[i]);
char *string_argv = const_cast<char*>(&argv[i][string_start]);
int length = static_cast<int>(strlen(string_ref));
if (!STRNCASECMP(string_argv, string_ref, length)) {
*string_retval = &string_argv[length + 1];
bFound = true;
continue;
}
}
}
if (!bFound) {
*string_retval = NULL;
}
return bFound;
}
//////////////////////////////////////////////////////////////////////////////
//! Find the path for a file assuming that
//! files are found in the searchPath.
//!
//! @return the path if succeeded, otherwise 0
//! @param filename name of the file
//! @param executable_path optional absolute path of the executable
//////////////////////////////////////////////////////////////////////////////
inline char *sdkFindFilePath(const char *filename,
const char *executable_path) {
// <executable_name> defines a variable that is replaced with the name of the
// executable
// Typical relative search paths to locate needed companion files (e.g. sample
// input data, or JIT source files) The origin for the relative search may be
// the .exe file, a .bat file launching an .exe, a browser .exe launching the
// .exe or .bat, etc
const char *searchPath[] = {
"./", // same dir
"./<executable_name>_data_files/",
"./common/", // "/common/" subdir
"./common/data/", // "/common/data/" subdir
"./data/", // "/data/" subdir
"./src/", // "/src/" subdir
"./src/<executable_name>/data/", // "/src/<executable_name>/data/" subdir
"./inc/", // "/inc/" subdir
"./0_Simple/", // "/0_Simple/" subdir
"./1_Utilities/", // "/1_Utilities/" subdir
"./2_Graphics/", // "/2_Graphics/" subdir
"./3_Imaging/", // "/3_Imaging/" subdir
"./4_Finance/", // "/4_Finance/" subdir
"./5_Simulations/", // "/5_Simulations/" subdir
"./6_Advanced/", // "/6_Advanced/" subdir
"./7_CUDALibraries/", // "/7_CUDALibraries/" subdir
"./8_Android/", // "/8_Android/" subdir
"./samples/", // "/samples/" subdir
"./0_Simple/<executable_name>/data/", // "/0_Simple/<executable_name>/data/"
// subdir
"./1_Utilities/<executable_name>/data/", // "/1_Utilities/<executable_name>/data/"
// subdir
"./2_Graphics/<executable_name>/data/", // "/2_Graphics/<executable_name>/data/"
// subdir
"./3_Imaging/<executable_name>/data/", // "/3_Imaging/<executable_name>/data/"
// subdir
"./4_Finance/<executable_name>/data/", // "/4_Finance/<executable_name>/data/"
// subdir
"./5_Simulations/<executable_name>/data/", // "/5_Simulations/<executable_name>/data/"
// subdir
"./6_Advanced/<executable_name>/data/", // "/6_Advanced/<executable_name>/data/"
// subdir
"./7_CUDALibraries/<executable_name>/", // "/7_CUDALibraries/<executable_name>/"
// subdir
"./7_CUDALibraries/<executable_name>/data/", // "/7_CUDALibraries/<executable_name>/data/"
// subdir
"../", // up 1 in tree
"../common/", // up 1 in tree, "/common/" subdir
"../common/data/", // up 1 in tree, "/common/data/" subdir
"../data/", // up 1 in tree, "/data/" subdir
"../src/", // up 1 in tree, "/src/" subdir
"../inc/", // up 1 in tree, "/inc/" subdir
"../0_Simple/<executable_name>/data/", // up 1 in tree,
// "/0_Simple/<executable_name>/"
// subdir
"../1_Utilities/<executable_name>/data/", // up 1 in tree,
// "/1_Utilities/<executable_name>/"
// subdir
"../2_Graphics/<executable_name>/data/", // up 1 in tree,
// "/2_Graphics/<executable_name>/"
// subdir
"../3_Imaging/<executable_name>/data/", // up 1 in tree,
// "/3_Imaging/<executable_name>/"
// subdir
"../4_Finance/<executable_name>/data/", // up 1 in tree,
// "/4_Finance/<executable_name>/"
// subdir
"../5_Simulations/<executable_name>/data/", // up 1 in tree,
// "/5_Simulations/<executable_name>/"
// subdir
"../6_Advanced/<executable_name>/data/", // up 1 in tree,
// "/6_Advanced/<executable_name>/"
// subdir
"../7_CUDALibraries/<executable_name>/data/", // up 1 in tree,
// "/7_CUDALibraries/<executable_name>/"
// subdir
"../8_Android/<executable_name>/data/", // up 1 in tree,
// "/8_Android/<executable_name>/"
// subdir
"../samples/<executable_name>/data/", // up 1 in tree,
// "/samples/<executable_name>/"
// subdir
"../../", // up 2 in tree
"../../common/", // up 2 in tree, "/common/" subdir
"../../common/data/", // up 2 in tree, "/common/data/" subdir
"../../data/", // up 2 in tree, "/data/" subdir
"../../src/", // up 2 in tree, "/src/" subdir
"../../inc/", // up 2 in tree, "/inc/" subdir
"../../sandbox/<executable_name>/data/", // up 2 in tree,
// "/sandbox/<executable_name>/"
// subdir
"../../0_Simple/<executable_name>/data/", // up 2 in tree,
// "/0_Simple/<executable_name>/"
// subdir
"../../1_Utilities/<executable_name>/data/", // up 2 in tree,
// "/1_Utilities/<executable_name>/"
// subdir
"../../2_Graphics/<executable_name>/data/", // up 2 in tree,
// "/2_Graphics/<executable_name>/"
// subdir
"../../3_Imaging/<executable_name>/data/", // up 2 in tree,
// "/3_Imaging/<executable_name>/"
// subdir
"../../4_Finance/<executable_name>/data/", // up 2 in tree,
// "/4_Finance/<executable_name>/"
// subdir
"../../5_Simulations/<executable_name>/data/", // up 2 in tree,
// "/5_Simulations/<executable_name>/"
// subdir
"../../6_Advanced/<executable_name>/data/", // up 2 in tree,
// "/6_Advanced/<executable_name>/"
// subdir
"../../7_CUDALibraries/<executable_name>/data/", // up 2 in tree,
// "/7_CUDALibraries/<executable_name>/"
// subdir
"../../8_Android/<executable_name>/data/", // up 2 in tree,
// "/8_Android/<executable_name>/"
// subdir
"../../samples/<executable_name>/data/", // up 2 in tree,
// "/samples/<executable_name>/"
// subdir
"../../../", // up 3 in tree
"../../../src/<executable_name>/", // up 3 in tree,
// "/src/<executable_name>/" subdir
"../../../src/<executable_name>/data/", // up 3 in tree,
// "/src/<executable_name>/data/"
// subdir
"../../../src/<executable_name>/src/", // up 3 in tree,
// "/src/<executable_name>/src/"
// subdir
"../../../src/<executable_name>/inc/", // up 3 in tree,
// "/src/<executable_name>/inc/"
// subdir
"../../../sandbox/<executable_name>/", // up 3 in tree,
// "/sandbox/<executable_name>/"
// subdir
"../../../sandbox/<executable_name>/data/", // up 3 in tree,
// "/sandbox/<executable_name>/data/"
// subdir
"../../../sandbox/<executable_name>/src/", // up 3 in tree,
// "/sandbox/<executable_name>/src/"
// subdir
"../../../sandbox/<executable_name>/inc/", // up 3 in tree,
// "/sandbox/<executable_name>/inc/"
// subdir
"../../../0_Simple/<executable_name>/data/", // up 3 in tree,
// "/0_Simple/<executable_name>/"
// subdir
"../../../1_Utilities/<executable_name>/data/", // up 3 in tree,
// "/1_Utilities/<executable_name>/"
// subdir
"../../../2_Graphics/<executable_name>/data/", // up 3 in tree,
// "/2_Graphics/<executable_name>/"
// subdir
"../../../3_Imaging/<executable_name>/data/", // up 3 in tree,
// "/3_Imaging/<executable_name>/"
// subdir
"../../../4_Finance/<executable_name>/data/", // up 3 in tree,
// "/4_Finance/<executable_name>/"
// subdir
"../../../5_Simulations/<executable_name>/data/", // up 3 in tree,
// "/5_Simulations/<executable_name>/"
// subdir
"../../../6_Advanced/<executable_name>/data/", // up 3 in tree,
// "/6_Advanced/<executable_name>/"
// subdir
"../../../7_CUDALibraries/<executable_name>/data/", // up 3 in tree,
// "/7_CUDALibraries/<executable_name>/"
// subdir
"../../../8_Android/<executable_name>/data/", // up 3 in tree,
// "/8_Android/<executable_name>/"
// subdir
"../../../0_Simple/<executable_name>/", // up 3 in tree,
// "/0_Simple/<executable_name>/"
// subdir
"../../../1_Utilities/<executable_name>/", // up 3 in tree,
// "/1_Utilities/<executable_name>/"
// subdir
"../../../2_Graphics/<executable_name>/", // up 3 in tree,
// "/2_Graphics/<executable_name>/"
// subdir
"../../../3_Imaging/<executable_name>/", // up 3 in tree,
// "/3_Imaging/<executable_name>/"
// subdir
"../../../4_Finance/<executable_name>/", // up 3 in tree,
// "/4_Finance/<executable_name>/"
// subdir
"../../../5_Simulations/<executable_name>/", // up 3 in tree,
// "/5_Simulations/<executable_name>/"
// subdir
"../../../6_Advanced/<executable_name>/", // up 3 in tree,
// "/6_Advanced/<executable_name>/"
// subdir
"../../../7_CUDALibraries/<executable_name>/", // up 3 in tree,
// "/7_CUDALibraries/<executable_name>/"
// subdir
"../../../8_Android/<executable_name>/", // up 3 in tree,
// "/8_Android/<executable_name>/"
// subdir
"../../../samples/<executable_name>/data/", // up 3 in tree,
// "/samples/<executable_name>/"
// subdir
"../../../common/", // up 3 in tree, "../../../common/" subdir
"../../../common/data/", // up 3 in tree, "../../../common/data/" subdir
"../../../data/", // up 3 in tree, "../../../data/" subdir
"../../../../", // up 4 in tree
"../../../../src/<executable_name>/", // up 4 in tree,
// "/src/<executable_name>/" subdir
"../../../../src/<executable_name>/data/", // up 4 in tree,
// "/src/<executable_name>/data/"
// subdir
"../../../../src/<executable_name>/src/", // up 4 in tree,
// "/src/<executable_name>/src/"
// subdir
"../../../../src/<executable_name>/inc/", // up 4 in tree,
// "/src/<executable_name>/inc/"
// subdir
"../../../../sandbox/<executable_name>/", // up 4 in tree,
// "/sandbox/<executable_name>/"
// subdir
"../../../../sandbox/<executable_name>/data/", // up 4 in tree,
// "/sandbox/<executable_name>/data/"
// subdir
"../../../../sandbox/<executable_name>/src/", // up 4 in tree,
// "/sandbox/<executable_name>/src/"
// subdir
"../../../../sandbox/<executable_name>/inc/", // up 4 in tree,
// "/sandbox/<executable_name>/inc/"
// subdir
"../../../../0_Simple/<executable_name>/data/", // up 4 in tree,
// "/0_Simple/<executable_name>/"
// subdir
"../../../../1_Utilities/<executable_name>/data/", // up 4 in tree,
// "/1_Utilities/<executable_name>/"
// subdir
"../../../../2_Graphics/<executable_name>/data/", // up 4 in tree,
// "/2_Graphics/<executable_name>/"
// subdir
"../../../../3_Imaging/<executable_name>/data/", // up 4 in tree,
// "/3_Imaging/<executable_name>/"
// subdir
"../../../../4_Finance/<executable_name>/data/", // up 4 in tree,
// "/4_Finance/<executable_name>/"
// subdir
"../../../../5_Simulations/<executable_name>/data/", // up 4 in tree,
// "/5_Simulations/<executable_name>/"
// subdir
"../../../../6_Advanced/<executable_name>/data/", // up 4 in tree,
// "/6_Advanced/<executable_name>/"
// subdir
"../../../../7_CUDALibraries/<executable_name>/data/", // up 4 in tree,
// "/7_CUDALibraries/<executable_name>/"
// subdir
"../../../../8_Android/<executable_name>/data/", // up 4 in tree,
// "/8_Android/<executable_name>/"
// subdir
"../../../../0_Simple/<executable_name>/", // up 4 in tree,
// "/0_Simple/<executable_name>/"
// subdir
"../../../../1_Utilities/<executable_name>/", // up 4 in tree,
// "/1_Utilities/<executable_name>/"
// subdir
"../../../../2_Graphics/<executable_name>/", // up 4 in tree,
// "/2_Graphics/<executable_name>/"
// subdir
"../../../../3_Imaging/<executable_name>/", // up 4 in tree,
// "/3_Imaging/<executable_name>/"
// subdir
"../../../../4_Finance/<executable_name>/", // up 4 in tree,
// "/4_Finance/<executable_name>/"
// subdir
"../../../../5_Simulations/<executable_name>/", // up 4 in tree,
// "/5_Simulations/<executable_name>/"
// subdir
"../../../../6_Advanced/<executable_name>/", // up 4 in tree,
// "/6_Advanced/<executable_name>/"
// subdir
"../../../../7_CUDALibraries/<executable_name>/", // up 4 in tree,
// "/7_CUDALibraries/<executable_name>/"
// subdir
"../../../../8_Android/<executable_name>/", // up 4 in tree,
// "/8_Android/<executable_name>/"
// subdir
"../../../../samples/<executable_name>/data/", // up 4 in tree,
// "/samples/<executable_name>/"
// subdir
"../../../../common/", // up 4 in tree, "../../../common/" subdir
"../../../../common/data/", // up 4 in tree, "../../../common/data/"
// subdir
"../../../../data/", // up 4 in tree, "../../../data/" subdir
"../../../../../", // up 5 in tree
"../../../../../src/<executable_name>/", // up 5 in tree,
// "/src/<executable_name>/"
// subdir
"../../../../../src/<executable_name>/data/", // up 5 in tree,
// "/src/<executable_name>/data/"
// subdir
"../../../../../src/<executable_name>/src/", // up 5 in tree,
// "/src/<executable_name>/src/"
// subdir
"../../../../../src/<executable_name>/inc/", // up 5 in tree,
// "/src/<executable_name>/inc/"
// subdir
"../../../../../sandbox/<executable_name>/", // up 5 in tree,
// "/sandbox/<executable_name>/"
// subdir
"../../../../../sandbox/<executable_name>/data/", // up 5 in tree,
// "/sandbox/<executable_name>/data/"
// subdir
"../../../../../sandbox/<executable_name>/src/", // up 5 in tree,
// "/sandbox/<executable_name>/src/"
// subdir
"../../../../../sandbox/<executable_name>/inc/", // up 5 in tree,
// "/sandbox/<executable_name>/inc/"
// subdir
"../../../../../0_Simple/<executable_name>/data/", // up 5 in tree,
// "/0_Simple/<executable_name>/"
// subdir
"../../../../../1_Utilities/<executable_name>/data/", // up 5 in tree,
// "/1_Utilities/<executable_name>/"
// subdir
"../../../../../2_Graphics/<executable_name>/data/", // up 5 in tree,
// "/2_Graphics/<executable_name>/"
// subdir
"../../../../../3_Imaging/<executable_name>/data/", // up 5 in tree,
// "/3_Imaging/<executable_name>/"
// subdir
"../../../../../4_Finance/<executable_name>/data/", // up 5 in tree,
// "/4_Finance/<executable_name>/"
// subdir
"../../../../../5_Simulations/<executable_name>/data/", // up 5 in tree,
// "/5_Simulations/<executable_name>/"
// subdir
"../../../../../6_Advanced/<executable_name>/data/", // up 5 in tree,
// "/6_Advanced/<executable_name>/"
// subdir
"../../../../../7_CUDALibraries/<executable_name>/data/", // up 5 in
// tree,
// "/7_CUDALibraries/<executable_name>/"
// subdir
"../../../../../8_Android/<executable_name>/data/", // up 5 in tree,
// "/8_Android/<executable_name>/"
// subdir
"../../../../../samples/<executable_name>/data/", // up 5 in tree,
// "/samples/<executable_name>/"
// subdir
"../../../../../common/", // up 5 in tree, "../../../common/" subdir
"../../../../../common/data/", // up 5 in tree, "../../../common/data/"
// subdir
};
// Extract the executable name
std::string executable_name;
if (executable_path != 0) {
executable_name = std::string(executable_path);
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
// Windows path delimiter
size_t delimiter_pos = executable_name.find_last_of('\\');
executable_name.erase(0, delimiter_pos + 1);
if (executable_name.rfind(".exe") != std::string::npos) {
// we strip .exe, only if the .exe is found
executable_name.resize(executable_name.size() - 4);
}
#else
// Linux & OSX path delimiter
size_t delimiter_pos = executable_name.find_last_of('/');
executable_name.erase(0, delimiter_pos + 1);
#endif
}
// Loop over all search paths and return the first hit
for (unsigned int i = 0; i < sizeof(searchPath) / sizeof(char *); ++i) {
std::string path(searchPath[i]);
size_t executable_name_pos = path.find("<executable_name>");
// If there is executable_name variable in the searchPath
// replace it with the value
if (executable_name_pos != std::string::npos) {
if (executable_path != 0) {
path.replace(executable_name_pos, strlen("<executable_name>"),
executable_name);
} else {
// Skip this path entry if no executable argument is given
continue;
}
}
#ifdef _DEBUG
printf("sdkFindFilePath <%s> in %s\n", filename, path.c_str());
#endif
// Test if the file exists
path.append(filename);
FILE *fp;
FOPEN(fp, path.c_str(), "rb");
if (fp != NULL) {
fclose(fp);
// File found
// returning an allocated array here for backwards compatibility reasons
char *file_path = reinterpret_cast<char *>(malloc(path.length() + 1));
STRCPY(file_path, path.length() + 1, path.c_str());
return file_path;
}
if (fp) {
fclose(fp);
}
}
// File not found
return 0;
}
#endif // COMMON_HELPER_STRING_H_

450
helper_timer.h Normal file
View File

@ -0,0 +1,450 @@
/**
* Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
// Helper Timing Functions
#ifndef COMMON_HELPER_TIMER_H_
#define COMMON_HELPER_TIMER_H_
#ifndef EXIT_WAIVED
#define EXIT_WAIVED 2
#endif
// includes, system
#include <vector>
// includes, project
#include "exception.h"
// Definition of the StopWatch Interface, this is used if we don't want to use
// the CUT functions But rather in a self contained class interface
class StopWatchInterface {
public:
StopWatchInterface() {}
virtual ~StopWatchInterface() {}
public:
//! Start time measurement
virtual void start() = 0;
//! Stop time measurement
virtual void stop() = 0;
//! Reset time counters to zero
virtual void reset() = 0;
//! Time in msec. after start. If the stop watch is still running (i.e. there
//! was no call to stop()) then the elapsed time is returned, otherwise the
//! time between the last start() and stop call is returned
virtual float getTime() = 0;
//! Mean time to date based on the number of times the stopwatch has been
//! _stopped_ (ie finished sessions) and the current total time
virtual float getAverageTime() = 0;
};
//////////////////////////////////////////////////////////////////
// Begin Stopwatch timer class definitions for all OS platforms //
//////////////////////////////////////////////////////////////////
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
// includes, system
#define WINDOWS_LEAN_AND_MEAN
#include <windows.h>
#undef min
#undef max
//! Windows specific implementation of StopWatch
class StopWatchWin : public StopWatchInterface {
public:
//! Constructor, default
StopWatchWin()
: start_time(),
end_time(),
diff_time(0.0f),
total_time(0.0f),
running(false),
clock_sessions(0),
freq(0),
freq_set(false) {
if (!freq_set) {
// helper variable
LARGE_INTEGER temp;
// get the tick frequency from the OS
QueryPerformanceFrequency(reinterpret_cast<LARGE_INTEGER *>(&temp));
// convert to type in which it is needed
freq = (static_cast<double>(temp.QuadPart)) / 1000.0;
// rememeber query
freq_set = true;
}
}
// Destructor
~StopWatchWin() {}
public:
//! Start time measurement
inline void start();
//! Stop time measurement
inline void stop();
//! Reset time counters to zero
inline void reset();
//! Time in msec. after start. If the stop watch is still running (i.e. there
//! was no call to stop()) then the elapsed time is returned, otherwise the
//! time between the last start() and stop call is returned
inline float getTime();
//! Mean time to date based on the number of times the stopwatch has been
//! _stopped_ (ie finished sessions) and the current total time
inline float getAverageTime();
private:
// member variables
//! Start of measurement
LARGE_INTEGER start_time;
//! End of measurement
LARGE_INTEGER end_time;
//! Time difference between the last start and stop
float diff_time;
//! TOTAL time difference between starts and stops
float total_time;
//! flag if the stop watch is running
bool running;
//! Number of times clock has been started
//! and stopped to allow averaging
int clock_sessions;
//! tick frequency
double freq;
//! flag if the frequency has been set
bool freq_set;
};
// functions, inlined
////////////////////////////////////////////////////////////////////////////////
//! Start time measurement
////////////////////////////////////////////////////////////////////////////////
inline void StopWatchWin::start() {
QueryPerformanceCounter(reinterpret_cast<LARGE_INTEGER *>(&start_time));
running = true;
}
////////////////////////////////////////////////////////////////////////////////
//! Stop time measurement and increment add to the current diff_time summation
//! variable. Also increment the number of times this clock has been run.
////////////////////////////////////////////////////////////////////////////////
inline void StopWatchWin::stop() {
QueryPerformanceCounter(reinterpret_cast<LARGE_INTEGER *>(&end_time));
diff_time = static_cast<float>(((static_cast<double>(end_time.QuadPart) -
static_cast<double>(start_time.QuadPart)) /
freq));
total_time += diff_time;
clock_sessions++;
running = false;
}
////////////////////////////////////////////////////////////////////////////////
//! Reset the timer to 0. Does not change the timer running state but does
//! recapture this point in time as the current start time if it is running.
////////////////////////////////////////////////////////////////////////////////
inline void StopWatchWin::reset() {
diff_time = 0;
total_time = 0;
clock_sessions = 0;
if (running) {
QueryPerformanceCounter(reinterpret_cast<LARGE_INTEGER *>(&start_time));
}
}
////////////////////////////////////////////////////////////////////////////////
//! Time in msec. after start. If the stop watch is still running (i.e. there
//! was no call to stop()) then the elapsed time is returned added to the
//! current diff_time sum, otherwise the current summed time difference alone
//! is returned.
////////////////////////////////////////////////////////////////////////////////
inline float StopWatchWin::getTime() {
// Return the TOTAL time to date
float retval = total_time;
if (running) {
LARGE_INTEGER temp;
QueryPerformanceCounter(reinterpret_cast<LARGE_INTEGER *>(&temp));
retval += static_cast<float>(((static_cast<double>(temp.QuadPart) -
static_cast<double>(start_time.QuadPart)) /
freq));
}
return retval;
}
////////////////////////////////////////////////////////////////////////////////
//! Time in msec. for a single run based on the total number of COMPLETED runs
//! and the total time.
////////////////////////////////////////////////////////////////////////////////
inline float StopWatchWin::getAverageTime() {
return (clock_sessions > 0) ? (total_time / clock_sessions) : 0.0f;
}
#else
// Declarations for Stopwatch on Linux and Mac OSX
// includes, system
#include <sys/time.h>
#include <ctime>
//! Windows specific implementation of StopWatch
class StopWatchLinux : public StopWatchInterface {
public:
//! Constructor, default
StopWatchLinux()
: start_time(),
diff_time(0.0),
total_time(0.0),
running(false),
clock_sessions(0) {}
// Destructor
virtual ~StopWatchLinux() {}
public:
//! Start time measurement
inline void start();
//! Stop time measurement
inline void stop();
//! Reset time counters to zero
inline void reset();
//! Time in msec. after start. If the stop watch is still running (i.e. there
//! was no call to stop()) then the elapsed time is returned, otherwise the
//! time between the last start() and stop call is returned
inline float getTime();
//! Mean time to date based on the number of times the stopwatch has been
//! _stopped_ (ie finished sessions) and the current total time
inline float getAverageTime();
private:
// helper functions
//! Get difference between start time and current time
inline float getDiffTime();
private:
// member variables
//! Start of measurement
struct timeval start_time;
//! Time difference between the last start and stop
float diff_time;
//! TOTAL time difference between starts and stops
float total_time;
//! flag if the stop watch is running
bool running;
//! Number of times clock has been started
//! and stopped to allow averaging
int clock_sessions;
};
// functions, inlined
////////////////////////////////////////////////////////////////////////////////
//! Start time measurement
////////////////////////////////////////////////////////////////////////////////
inline void StopWatchLinux::start() {
gettimeofday(&start_time, 0);
running = true;
}
////////////////////////////////////////////////////////////////////////////////
//! Stop time measurement and increment add to the current diff_time summation
//! variable. Also increment the number of times this clock has been run.
////////////////////////////////////////////////////////////////////////////////
inline void StopWatchLinux::stop() {
diff_time = getDiffTime();
total_time += diff_time;
running = false;
clock_sessions++;
}
////////////////////////////////////////////////////////////////////////////////
//! Reset the timer to 0. Does not change the timer running state but does
//! recapture this point in time as the current start time if it is running.
////////////////////////////////////////////////////////////////////////////////
inline void StopWatchLinux::reset() {
diff_time = 0;
total_time = 0;
clock_sessions = 0;
if (running) {
gettimeofday(&start_time, 0);
}
}
////////////////////////////////////////////////////////////////////////////////
//! Time in msec. after start. If the stop watch is still running (i.e. there
//! was no call to stop()) then the elapsed time is returned added to the
//! current diff_time sum, otherwise the current summed time difference alone
//! is returned.
////////////////////////////////////////////////////////////////////////////////
inline float StopWatchLinux::getTime() {
// Return the TOTAL time to date
float retval = total_time;
if (running) {
retval += getDiffTime();
}
return retval;
}
////////////////////////////////////////////////////////////////////////////////
//! Time in msec. for a single run based on the total number of COMPLETED runs
//! and the total time.
////////////////////////////////////////////////////////////////////////////////
inline float StopWatchLinux::getAverageTime() {
return (clock_sessions > 0) ? (total_time / clock_sessions) : 0.0f;
}
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
inline float StopWatchLinux::getDiffTime() {
struct timeval t_time;
gettimeofday(&t_time, 0);
// time difference in milli-seconds
return static_cast<float>(1000.0 * (t_time.tv_sec - start_time.tv_sec) +
(0.001 * (t_time.tv_usec - start_time.tv_usec)));
}
#endif // WIN32
////////////////////////////////////////////////////////////////////////////////
//! Timer functionality exported
////////////////////////////////////////////////////////////////////////////////
//! Create a new timer
//! @return true if a time has been created, otherwise false
//! @param name of the new timer, 0 if the creation failed
////////////////////////////////////////////////////////////////////////////////
inline bool sdkCreateTimer(StopWatchInterface **timer_interface) {
// printf("sdkCreateTimer called object %08x\n", (void *)*timer_interface);
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
*timer_interface = reinterpret_cast<StopWatchInterface *>(new StopWatchWin());
#else
*timer_interface =
reinterpret_cast<StopWatchInterface *>(new StopWatchLinux());
#endif
return (*timer_interface != NULL) ? true : false;
}
////////////////////////////////////////////////////////////////////////////////
//! Delete a timer
//! @return true if a time has been deleted, otherwise false
//! @param name of the timer to delete
////////////////////////////////////////////////////////////////////////////////
inline bool sdkDeleteTimer(StopWatchInterface **timer_interface) {
// printf("sdkDeleteTimer called object %08x\n", (void *)*timer_interface);
if (*timer_interface) {
delete *timer_interface;
*timer_interface = NULL;
}
return true;
}
////////////////////////////////////////////////////////////////////////////////
//! Start the time with name \a name
//! @param name name of the timer to start
////////////////////////////////////////////////////////////////////////////////
inline bool sdkStartTimer(StopWatchInterface **timer_interface) {
// printf("sdkStartTimer called object %08x\n", (void *)*timer_interface);
if (*timer_interface) {
(*timer_interface)->start();
}
return true;
}
////////////////////////////////////////////////////////////////////////////////
//! Stop the time with name \a name. Does not reset.
//! @param name name of the timer to stop
////////////////////////////////////////////////////////////////////////////////
inline bool sdkStopTimer(StopWatchInterface **timer_interface) {
// printf("sdkStopTimer called object %08x\n", (void *)*timer_interface);
if (*timer_interface) {
(*timer_interface)->stop();
}
return true;
}
////////////////////////////////////////////////////////////////////////////////
//! Resets the timer's counter.
//! @param name name of the timer to reset.
////////////////////////////////////////////////////////////////////////////////
inline bool sdkResetTimer(StopWatchInterface **timer_interface) {
// printf("sdkResetTimer called object %08x\n", (void *)*timer_interface);
if (*timer_interface) {
(*timer_interface)->reset();
}
return true;
}
////////////////////////////////////////////////////////////////////////////////
//! Return the average time for timer execution as the total time
//! for the timer dividied by the number of completed (stopped) runs the timer
//! has made.
//! Excludes the current running time if the timer is currently running.
//! @param name name of the timer to return the time of
////////////////////////////////////////////////////////////////////////////////
inline float sdkGetAverageTimerValue(StopWatchInterface **timer_interface) {
// printf("sdkGetAverageTimerValue called object %08x\n", (void
// *)*timer_interface);
if (*timer_interface) {
return (*timer_interface)->getAverageTime();
} else {
return 0.0f;
}
}
////////////////////////////////////////////////////////////////////////////////
//! Total execution time for the timer over all runs since the last reset
//! or timer creation.
//! @param name name of the timer to obtain the value of.
////////////////////////////////////////////////////////////////////////////////
inline float sdkGetTimerValue(StopWatchInterface **timer_interface) {
// printf("sdkGetTimerValue called object %08x\n", (void *)*timer_interface);
if (*timer_interface) {
return (*timer_interface)->getTime();
} else {
return 0.0f;
}
}
#endif // COMMON_HELPER_TIMER_H_

BIN
montecarlo Executable file

Binary file not shown.

343
montecarlo.cu Normal file
View File

@ -0,0 +1,343 @@
// Monte Carlo simulation of a snowball fight:
// system includes
#include <stdio.h>
#include <assert.h>
#include <malloc.h>
#include <math.h>
#include <stdlib.h>
// CUDA runtime
#include <cuda_runtime.h>
// Helper functions and utilities to work with CUDA
#include "helper_functions.h"
#include "helper_cuda.h"
// setting the number of trials in the monte carlo simulation:
#ifndef NUMTRIALS
#define NUMTRIALS 1024
#endif
#ifndef BLOCKSIZE
#define BLOCKSIZE 8 // number of threads per block
#endif
#define NUMBLOCKS ( NUMTRIALS / BLOCKSIZE )
// ranges for the random numbers:
//#define PROJECT1
#ifdef PROJECT1
const float TXMIN = -10.0; // truck starting location in feet
const float TXMAX = 10.0; // truck starting location in feet
const float TYMIN = 45.0; // depth distance to truck in feet
const float TYMAX = 55.0; // depth distance to truck in feet
const float TXVMIN = 10.0; // truck x velocity in feet/sec
const float TXVMAX = 30.0; // truck x velocity in feet/sec
const float SVMIN = 10.0; // snowball velocity in feet/sec
const float SVMAX = 30.0; // snowball velocity in feet/sec
const float STHMIN = 10.0; // snowball launch angle in degrees
const float STHMAX = 90.0; // snowball launch angle in degrees
const float HALFLENMIN = 20.; // half length of the truck in feet
const float HALFLENMAX = 20.; // half length of the truck in feet
#else
const float TXMIN = -10.0; // truck starting location in feet
const float TXMAX = 10.0; // truck starting location in feet
const float TXVMIN = 15.0; // truck x velocity in feet/sec
const float TXVMAX = 35.0; // truck x velocity in feet/sec
const float TYMIN = 40.0; // depth distance to truck in feet
const float TYMAX = 50.0; // depth distance to truck in feet
const float SVMIN = 5.0; // snowball velocity in feet/sec
const float SVMAX = 30.0; // snowball velocity in feet/sec
const float STHMIN = 10.0; // snowball launch angle in degrees
const float STHMAX = 70.0; // snowball launch angle in degrees
const float HALFLENMIN = 15.; // half length of the truck in feet
const float HALFLENMAX = 30.; // half length of the truck in feet
#endif
// these are here just to be pretty labels, other than that, they do nothing:
#define IN
#define OUT
// function prototypes:
float Ranf( float, float );
int Ranf( int, int );
void TimeOfDaySeed( );
void
CudaCheckError()
{
cudaError_t e = cudaGetLastError();
if(e != cudaSuccess)
{
fprintf( stderr, "Cuda failure %s:%d: '%s'\n", __FILE__, __LINE__, cudaGetErrorString(e));
}
}
// degrees-to-radians:
__device__
float
Radians( float d )
{
return (M_PI/180.f) * d;
}
//Global means gpu and callable from cpu
__global__
void
MonteCarlo( IN float *dtxs, IN float *dtys, IN float *dtxvs, IN float *dsvs, IN float *dsths, IN float *dhalflens, OUT int *dhits )
{
//Not sure about this, as it's differnt in the project video.
//__shared__ int numHits[BLOCKSIZE];
//unsigned int numItems = blockDim.x;
//unsigned int wgNum = blockIdx.x;
//unsigned int tnum = threadIdx.x;
unsigned int gid = blockIdx.x*blockDim.x + threadIdx.x;
dhits[gid] = 0;
//Get stuff based off the gid/thread.
// randomize everything:
float tx = dtxs[gid];
float ty = dtys[gid];
float txv = dtxvs[gid];
float sv = dsvs[gid];
float sthd = dsths[gid];
float sthr = Radians(sthd);
float svx = sv * cos(sthr);
float svy = sv * sin(sthr);
//I don't see anythign of the half len so I put this here.
//float halflen = dhalflens[gid];
// how long until the snowball reaches the y depth:
float tstar = ty / svy;
float truckx = tx + txv * tstar;
float sbx = svx * tstar;
//check snowball location.
//Project video says HALF_LENGTH but I don't see that in the template file.
if( fabs(truckx - sbx) < dhalflens[gid])
{
dhits[gid] = 1;
}
}
// main program:
int
main( int argc, char* argv[ ] )
{
TimeOfDaySeed( );
//int dev = findCudaDevice(argc, (const char **)argv);
float *htxs = new float [NUMTRIALS];
float *htys = new float [NUMTRIALS];
float *htxvs = new float [NUMTRIALS];
float *hsvs = new float [NUMTRIALS];
float *hsths = new float [NUMTRIALS];
float *hhalflens = new float [NUMTRIALS];
// fill the random-value arrays:
for( int n = 0; n < NUMTRIALS; n++ )
{
htxs[n] = Ranf( TXMIN, TXMAX );
htys[n] = Ranf( TYMIN, TYMAX );
htxvs[n] = Ranf( TXVMIN, TXVMAX );
hsvs[n] = Ranf( SVMIN, SVMAX );
hsths[n] = Ranf( STHMIN, STHMAX );
hhalflens[n] = Ranf( HALFLENMIN, HALFLENMAX );
}
int *hhits = new int [NUMTRIALS];
// allocate device memory:
float *dtxs, *dtys, *dtxvs, *dsvs, *dsths, *dhalflens;
int *dhits;
//cudaError_t status;
cudaMalloc( (void **)(&dtxs), NUMTRIALS*sizeof(float) );
CudaCheckError( );
//Malloc the space for the devices ones.
cudaMalloc((void **)(&dtys), NUMTRIALS*sizeof(float) );
CudaCheckError( );
cudaMalloc((void **)(&dtxvs), NUMTRIALS*sizeof(float) );
CudaCheckError( );
cudaMalloc((void **)(&dsvs), NUMTRIALS*sizeof(float) );
CudaCheckError( );
cudaMalloc((void **)(&dsths), NUMTRIALS*sizeof(float) );
CudaCheckError( );
cudaMalloc((void **)(&dhalflens), NUMTRIALS*sizeof(float) );
CudaCheckError( );
cudaMalloc((void **)(&dhits), NUMTRIALS*sizeof(int) );
CudaCheckError( );
// copy host memory to the device:
cudaMemcpy( dtxs, htxs, NUMTRIALS*sizeof(float), cudaMemcpyHostToDevice );
CudaCheckError( );
cudaMemcpy( dtys, htys, NUMTRIALS*sizeof(float), cudaMemcpyHostToDevice );
CudaCheckError( );
cudaMemcpy( dtxvs, htxvs, NUMTRIALS*sizeof(float), cudaMemcpyHostToDevice );
CudaCheckError( );
cudaMemcpy( dsvs, hsvs, NUMTRIALS*sizeof(float), cudaMemcpyHostToDevice );
CudaCheckError( );
cudaMemcpy( dsths, hsths, NUMTRIALS*sizeof(float), cudaMemcpyHostToDevice );
CudaCheckError( );
cudaMemcpy( dhalflens, hhalflens, NUMTRIALS*sizeof(float), cudaMemcpyHostToDevice );
CudaCheckError( );
//The project video doesn't show this being done.....but it's part of the function...
cudaMemcpy( dhits, hhits, NUMTRIALS*sizeof(int), cudaMemcpyHostToDevice );
CudaCheckError( );
// setup the execution parameters:
dim3 threads(BLOCKSIZE, 1, 1 );
dim3 grid(NUMBLOCKS, 1, 1 );
// create and start timer
cudaDeviceSynchronize( );
// allocate CUDA events that we'll use for timing:
cudaEvent_t start, stop;
cudaEventCreate( &start );
CudaCheckError( );
cudaEventCreate( &stop );
CudaCheckError( );
// record the start event:
cudaEventRecord( start, NULL );
CudaCheckError( );
// execute the kernel:
MonteCarlo<<< grid, threads >>>( dtxs, dtys, dtxvs, dsvs, dsths, dhalflens, dhits );
// record the stop event:
cudaEventRecord( stop, NULL );
CudaCheckError( );
// wait for the stop event to complete:
cudaEventSynchronize( stop );
CudaCheckError( );
float msecTotal = 0.0f;
cudaEventElapsedTime( &msecTotal, start, stop );
CudaCheckError( );
// copy result from the device to the host:
cudaMemcpy( hhits, dhits, NUMTRIALS *sizeof(int), cudaMemcpyDeviceToHost );
CudaCheckError( );
// compute the sum :
int numHits = 0;
//????
for(int i = 0; i < NUMTRIALS; i++)
{
numHits += hhits[i];
//fprintf(stderr, "hhits[%d] = %5d ; Total numHits = %5d\n", i, hhits[i], numHits);
}
float probability = 100.f * (float)numHits / (float)NUMTRIALS;
// compute and printL
double secondsTotal = 0.001 * (double)msecTotal;
double trialsPerSecond = (float)NUMTRIALS / secondsTotal;
double megaTrialsPerSecond = trialsPerSecond / 1000000.;
fprintf( stderr, "Number of Trials = %10d, Blocksize = %8d, MegaTrials/Second = %10.4lf, Probability = %6.2f%%\n",
NUMTRIALS, BLOCKSIZE, megaTrialsPerSecond, probability );
// clean up memory:
delete [ ] htxs;
delete [ ] htys;
delete [ ] htxvs;
delete [ ] hsvs;
delete [ ] hsths;
delete [ ] hhits;
cudaFree( dtxs );
CudaCheckError( );
cudaFree( dtys );
CudaCheckError( );
cudaFree( dtxvs );
CudaCheckError( );
cudaFree( dsvs );
CudaCheckError( );
cudaFree( dsths );
CudaCheckError( );
cudaFree( dhits );
CudaCheckError( );
return 0;
}
float
Ranf( float low, float high )
{
float r = (float) rand(); // 0 - RAND_MAX
float t = r / (float) RAND_MAX; // 0. - 1.
return low + t * ( high - low );
}
int
Ranf( int ilow, int ihigh )
{
float low = (float)ilow;
float high = ceil( (float)ihigh );
return (int) Ranf(low,high);
}
void
TimeOfDaySeed( )
{
struct tm y2k = { 0 };
y2k.tm_hour = 0; y2k.tm_min = 0; y2k.tm_sec = 0;
y2k.tm_year = 100; y2k.tm_mon = 0; y2k.tm_mday = 1;
time_t timer;
time( &timer );
double seconds = difftime( timer, mktime(&y2k) );
unsigned int seed = (unsigned int)( 1000.*seconds ); // milliseconds
srand( seed );
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 29 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

10
script.sh Normal file
View File

@ -0,0 +1,10 @@
#!/bin/bash
for t in 1024 4096 16384 65536 262144 1048576 2097152 4194304
do
for b in 8 32 128
do
echo "Running with $t trials and $b blocksize"
/usr/local/apps/cuda/cuda-10.1/bin/nvcc -DNUMTRIALS=$t -DBLOCKSIZE=$b -o montecarlo montecarlo.cu
./montecarlo 2>&1 | tee -a data.csv
done
done

1
uptime.txt Normal file
View File

@ -0,0 +1 @@
17:40:16 up 154 days, 4:15, 62 users, load average: 5.86, 9.92, 12.18