migrated repo contents from old git server
This commit is contained in:
parent
fe1a71aed4
commit
f64d44b563
|
@ -0,0 +1,21 @@
|
|||
CUDA_PATH = /usr/local/apps/cuda/cuda-10.1
|
||||
CUDA_BIN_PATH = $(CUDA_PATH)/bin
|
||||
CUDA_NVCC = $(CUDA_BIN_PATH)/nvcc
|
||||
|
||||
montecarlo: montecarlo.cu
|
||||
$(CUDA_NVCC) -o montecarlo montecarlo.cu
|
||||
|
||||
|
||||
carlotest:
|
||||
$(CUDA_NVCC) -o carlotest montecarlo.cu
|
||||
./carlotest
|
||||
rm ./carlotest
|
||||
|
||||
test:
|
||||
$(CUDA_NVCC) --verbose -o test arrayMul.cu
|
||||
|
||||
|
||||
clean:
|
||||
rm ./montecarlo
|
||||
rm ./test
|
||||
rm ./carlotest
|
|
@ -0,0 +1,143 @@
|
|||
// Array multiplication: C = A * B:
|
||||
|
||||
// System includes
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
#include <malloc.h>
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
// CUDA runtime
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
// Helper functions and utilities to work with CUDA
|
||||
#include "helper_functions.h"
|
||||
#include "helper_cuda.h"
|
||||
|
||||
|
||||
#ifndef THREADS_PER_BLOCK
|
||||
#define THREADS_PER_BLOCK 128 // number of threads in each block
|
||||
#endif
|
||||
|
||||
#ifndef DATASET_SIZE
|
||||
#define DATASET_SIZE ( 8*1024*1024 ) // size of the array
|
||||
#endif
|
||||
|
||||
float hA[ DATASET_SIZE ];
|
||||
float hB[ DATASET_SIZE ];
|
||||
float hC[ DATASET_SIZE ];
|
||||
|
||||
#ifndef TOL
|
||||
#define TOL 0.00001f // tolerance to relative error
|
||||
#endif
|
||||
|
||||
void
|
||||
CudaCheckError( )
|
||||
{
|
||||
cudaError_t e = cudaGetLastError( );
|
||||
if( e != cudaSuccess )
|
||||
{
|
||||
fprintf( stderr, "CUDA failure %s:%d: '%s'\n", __FILE__, __LINE__, cudaGetErrorString(e));
|
||||
}
|
||||
}
|
||||
|
||||
// array multiplication on the device: C = A * B
|
||||
|
||||
__global__ void ArrayMul( float *A, float *B, float *C )
|
||||
{
|
||||
int gid = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if( gid < DATASET_SIZE )
|
||||
C[gid] = A[gid] * B[gid];
|
||||
}
|
||||
|
||||
|
||||
|
||||
// main program:
|
||||
|
||||
int
|
||||
main( int argc, char* argv[ ] )
|
||||
{
|
||||
//int dev = findCudaDevice(argc, (const char **)argv);
|
||||
|
||||
// fill host memory:
|
||||
|
||||
for( int i = 0; i < DATASET_SIZE; i++ )
|
||||
{
|
||||
hA[i] = hB[i] = sqrtf( (float)i );
|
||||
}
|
||||
|
||||
// allocate device memory:
|
||||
|
||||
float *dA, *dB, *dC;
|
||||
|
||||
cudaMalloc( (void **)(&dA), sizeof(hA) );
|
||||
cudaMalloc( (void **)(&dB), sizeof(hB) );
|
||||
cudaMalloc( (void **)(&dC), sizeof(hC) );
|
||||
CudaCheckError( );
|
||||
|
||||
// copy host memory to the device:
|
||||
|
||||
cudaMemcpy( dA, hA, DATASET_SIZE*sizeof(float), cudaMemcpyHostToDevice );
|
||||
cudaMemcpy( dB, hB, DATASET_SIZE*sizeof(float), cudaMemcpyHostToDevice );
|
||||
CudaCheckError( );
|
||||
|
||||
// setup the execution parameters:
|
||||
|
||||
dim3 grid( DATASET_SIZE / THREADS_PER_BLOCK, 1, 1 );
|
||||
dim3 threads( THREADS_PER_BLOCK, 1, 1 );
|
||||
|
||||
// create and start the timer:
|
||||
|
||||
cudaDeviceSynchronize( );
|
||||
|
||||
// allocate the events that we'll use for timing:
|
||||
|
||||
cudaEvent_t start, stop;
|
||||
cudaEventCreate( &start );
|
||||
cudaEventCreate( &stop );
|
||||
CudaCheckError( );
|
||||
|
||||
// record the start event:
|
||||
|
||||
cudaEventRecord( start, NULL );
|
||||
CudaCheckError( );
|
||||
|
||||
// execute the kernel:
|
||||
|
||||
ArrayMul<<< grid, threads >>>( dA, dB, dC );
|
||||
|
||||
// record the stop event:
|
||||
|
||||
cudaEventRecord( stop, NULL );
|
||||
CudaCheckError( );
|
||||
|
||||
// wait for the stop event to complete:
|
||||
|
||||
cudaEventSynchronize( stop );
|
||||
CudaCheckError( );
|
||||
|
||||
float msecTotal;
|
||||
cudaEventElapsedTime( &msecTotal, start, stop );
|
||||
CudaCheckError( );
|
||||
|
||||
// compute and print the performance
|
||||
|
||||
double secondsTotal = 0.001 * (double)msecTotal;
|
||||
double multsPerSecond = (double)DATASET_SIZE / secondsTotal;
|
||||
double megaMultsPerSecond = multsPerSecond / 1000000.;
|
||||
fprintf( stderr, "%12d, %4d, %10.2lf\n", DATASET_SIZE, THREADS_PER_BLOCK, megaMultsPerSecond );
|
||||
|
||||
// copy result from the device to the host:
|
||||
|
||||
cudaMemcpy( hC, dC, sizeof(hC), cudaMemcpyDeviceToHost );
|
||||
CudaCheckError( );
|
||||
|
||||
// clean up:
|
||||
|
||||
cudaFree( dA );
|
||||
cudaFree( dB );
|
||||
cudaFree( dC );
|
||||
CudaCheckError( );
|
||||
|
||||
return 0;
|
||||
}
|
Binary file not shown.
|
@ -0,0 +1,24 @@
|
|||
Number of Trials = 1024, Blocksize = 8, MegaTrials/Second = 10.5229, Probability = 24.61%
|
||||
Number of Trials = 1024, Blocksize = 32, MegaTrials/Second = 9.2272, Probability = 24.51%
|
||||
Number of Trials = 1024, Blocksize = 128, MegaTrials/Second = 11.7302, Probability = 22.07%
|
||||
Number of Trials = 4096, Blocksize = 8, MegaTrials/Second = 40.1254, Probability = 22.12%
|
||||
Number of Trials = 4096, Blocksize = 32, MegaTrials/Second = 35.5753, Probability = 23.19%
|
||||
Number of Trials = 4096, Blocksize = 128, MegaTrials/Second = 37.2635, Probability = 22.22%
|
||||
Number of Trials = 16384, Blocksize = 8, MegaTrials/Second = 138.5281, Probability = 22.83%
|
||||
Number of Trials = 16384, Blocksize = 32, MegaTrials/Second = 155.2456, Probability = 22.67%
|
||||
Number of Trials = 16384, Blocksize = 128, MegaTrials/Second = 162.3850, Probability = 22.71%
|
||||
Number of Trials = 65536, Blocksize = 8, MegaTrials/Second = 371.7553, Probability = 22.45%
|
||||
Number of Trials = 65536, Blocksize = 32, MegaTrials/Second = 583.6421, Probability = 22.31%
|
||||
Number of Trials = 65536, Blocksize = 128, MegaTrials/Second = 688.6348, Probability = 22.26%
|
||||
Number of Trials = 262144, Blocksize = 8, MegaTrials/Second = 779.2257, Probability = 22.40%
|
||||
Number of Trials = 262144, Blocksize = 32, MegaTrials/Second = 1620.2532, Probability = 22.59%
|
||||
Number of Trials = 262144, Blocksize = 128, MegaTrials/Second = 40.6791, Probability = 22.47%
|
||||
Number of Trials = 1048576, Blocksize = 8, MegaTrials/Second = 953.2787, Probability = 22.47%
|
||||
Number of Trials = 1048576, Blocksize = 32, MegaTrials/Second = 2574.4814, Probability = 22.50%
|
||||
Number of Trials = 1048576, Blocksize = 128, MegaTrials/Second = 3121.0593, Probability = 22.48%
|
||||
Number of Trials = 2097152, Blocksize = 8, MegaTrials/Second = 1066.0073, Probability = 22.48%
|
||||
Number of Trials = 2097152, Blocksize = 32, MegaTrials/Second = 3028.0461, Probability = 22.47%
|
||||
Number of Trials = 2097152, Blocksize = 128, MegaTrials/Second = 5043.5587, Probability = 22.59%
|
||||
Number of Trials = 4194304, Blocksize = 8, MegaTrials/Second = 1094.4830, Probability = 22.53%
|
||||
Number of Trials = 4194304, Blocksize = 32, MegaTrials/Second = 3745.0211, Probability = 22.51%
|
||||
Number of Trials = 4194304, Blocksize = 128, MegaTrials/Second = 5873.9802, Probability = 22.50%
|
|
Binary file not shown.
|
@ -0,0 +1,135 @@
|
|||
/*
|
||||
* Copyright 1993-2017 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Please refer to the NVIDIA end user license agreement (EULA) associated
|
||||
* with this source code for terms and conditions that govern your use of
|
||||
* this software. Any use, reproduction, disclosure, or distribution of
|
||||
* this software and related documentation outside the terms of the EULA
|
||||
* is strictly prohibited.
|
||||
*
|
||||
*/
|
||||
|
||||
/* CUda UTility Library */
|
||||
#ifndef COMMON_EXCEPTION_H_
|
||||
#define COMMON_EXCEPTION_H_
|
||||
|
||||
// includes, system
|
||||
#include <stdlib.h>
|
||||
#include <exception>
|
||||
#include <iostream>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
|
||||
//! Exception wrapper.
|
||||
//! @param Std_Exception Exception out of namespace std for easy typing.
|
||||
template <class Std_Exception>
|
||||
class Exception : public Std_Exception {
|
||||
public:
|
||||
//! @brief Static construction interface
|
||||
//! @return Alwayss throws ( Located_Exception<Exception>)
|
||||
//! @param file file in which the Exception occurs
|
||||
//! @param line line in which the Exception occurs
|
||||
//! @param detailed details on the code fragment causing the Exception
|
||||
static void throw_it(const char *file, const int line,
|
||||
const char *detailed = "-");
|
||||
|
||||
//! Static construction interface
|
||||
//! @return Alwayss throws ( Located_Exception<Exception>)
|
||||
//! @param file file in which the Exception occurs
|
||||
//! @param line line in which the Exception occurs
|
||||
//! @param detailed details on the code fragment causing the Exception
|
||||
static void throw_it(const char *file, const int line,
|
||||
const std::string &detailed);
|
||||
|
||||
//! Destructor
|
||||
virtual ~Exception() throw();
|
||||
|
||||
private:
|
||||
//! Constructor, default (private)
|
||||
Exception();
|
||||
|
||||
//! Constructor, standard
|
||||
//! @param str string returned by what()
|
||||
explicit Exception(const std::string &str);
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Exception handler function for arbitrary exceptions
|
||||
//! @param ex exception to handle
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
template <class Exception_Typ>
|
||||
inline void handleException(const Exception_Typ &ex) {
|
||||
std::cerr << ex.what() << std::endl;
|
||||
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
//! Convenience macros
|
||||
|
||||
//! Exception caused by dynamic program behavior, e.g. file does not exist
|
||||
#define RUNTIME_EXCEPTION(msg) \
|
||||
Exception<std::runtime_error>::throw_it(__FILE__, __LINE__, msg)
|
||||
|
||||
//! Logic exception in program, e.g. an assert failed
|
||||
#define LOGIC_EXCEPTION(msg) \
|
||||
Exception<std::logic_error>::throw_it(__FILE__, __LINE__, msg)
|
||||
|
||||
//! Out of range exception
|
||||
#define RANGE_EXCEPTION(msg) \
|
||||
Exception<std::range_error>::throw_it(__FILE__, __LINE__, msg)
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Implementation
|
||||
|
||||
// includes, system
|
||||
#include <sstream>
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Static construction interface.
|
||||
//! @param Exception causing code fragment (file and line) and detailed infos.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/*static*/ template <class Std_Exception>
|
||||
void Exception<Std_Exception>::throw_it(const char *file, const int line,
|
||||
const char *detailed) {
|
||||
std::stringstream s;
|
||||
|
||||
// Quiet heavy-weight but exceptions are not for
|
||||
// performance / release versions
|
||||
s << "Exception in file '" << file << "' in line " << line << "\n"
|
||||
<< "Detailed description: " << detailed << "\n";
|
||||
|
||||
throw Exception(s.str());
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Static construction interface.
|
||||
//! @param Exception causing code fragment (file and line) and detailed infos.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/*static*/ template <class Std_Exception>
|
||||
void Exception<Std_Exception>::throw_it(const char *file, const int line,
|
||||
const std::string &msg) {
|
||||
throw_it(file, line, msg.c_str());
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Constructor, default (private).
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
template <class Std_Exception>
|
||||
Exception<Std_Exception>::Exception() : Std_Exception("Unknown Exception.\n") {}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Constructor, standard (private).
|
||||
//! String returned by what().
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
template <class Std_Exception>
|
||||
Exception<Std_Exception>::Exception(const std::string &s) : Std_Exception(s) {}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Destructor
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
template <class Std_Exception>
|
||||
Exception<Std_Exception>::~Exception() throw() {}
|
||||
|
||||
// functions, exported
|
||||
|
||||
#endif // COMMON_EXCEPTION_H_
|
|
@ -0,0 +1,26 @@
|
|||
Architecture: x86_64
|
||||
CPU op-mode(s): 32-bit, 64-bit
|
||||
Byte Order: Little Endian
|
||||
CPU(s): 32
|
||||
On-line CPU(s) list: 0-31
|
||||
Thread(s) per core: 2
|
||||
Core(s) per socket: 8
|
||||
Socket(s): 2
|
||||
NUMA node(s): 2
|
||||
Vendor ID: GenuineIntel
|
||||
CPU family: 6
|
||||
Model: 63
|
||||
Model name: Intel(R) Xeon(R) CPU E5-2630 v3 @ 2.40GHz
|
||||
Stepping: 2
|
||||
CPU MHz: 1258.007
|
||||
CPU max MHz: 3200.0000
|
||||
CPU min MHz: 1200.0000
|
||||
BogoMIPS: 4800.00
|
||||
Virtualization: VT-x
|
||||
L1d cache: 32K
|
||||
L1i cache: 32K
|
||||
L2 cache: 256K
|
||||
L3 cache: 20480K
|
||||
NUMA node0 CPU(s): 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30
|
||||
NUMA node1 CPU(s): 1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31
|
||||
Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf eagerfpu pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm epb invpcid_single ssbd ibrs ibpb stibp tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm xsaveopt cqm_llc cqm_occup_llc dtherm ida arat pln pts md_clear spec_ctrl intel_stibp flush_l1d
|
|
@ -0,0 +1,898 @@
|
|||
/**
|
||||
* Copyright 1993-2017 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Please refer to the NVIDIA end user license agreement (EULA) associated
|
||||
* with this source code for terms and conditions that govern your use of
|
||||
* this software. Any use, reproduction, disclosure, or distribution of
|
||||
* this software and related documentation outside the terms of the EULA
|
||||
* is strictly prohibited.
|
||||
*
|
||||
*/
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// These are CUDA Helper functions for initialization and error checking
|
||||
|
||||
#ifndef COMMON_HELPER_CUDA_H_
|
||||
#define COMMON_HELPER_CUDA_H_
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "helper_string.h"
|
||||
|
||||
#ifndef EXIT_WAIVED
|
||||
#define EXIT_WAIVED 2
|
||||
#endif
|
||||
|
||||
// Note, it is required that your SDK sample to include the proper header
|
||||
// files, please refer the CUDA examples for examples of the needed CUDA
|
||||
// headers, which may change depending on which CUDA functions are used.
|
||||
|
||||
// CUDA Runtime error messages
|
||||
#ifdef __DRIVER_TYPES_H__
|
||||
static const char *_cudaGetErrorEnum(cudaError_t error) {
|
||||
return cudaGetErrorName(error);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CUDA_DRIVER_API
|
||||
// CUDA Driver API errors
|
||||
static const char *_cudaGetErrorEnum(CUresult error) {
|
||||
static char unknown[] = "<unknown>";
|
||||
const char *ret = NULL;
|
||||
cuGetErrorName(error, &ret);
|
||||
return ret ? ret : unknown;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CUBLAS_API_H_
|
||||
// cuBLAS API errors
|
||||
static const char *_cudaGetErrorEnum(cublasStatus_t error) {
|
||||
switch (error) {
|
||||
case CUBLAS_STATUS_SUCCESS:
|
||||
return "CUBLAS_STATUS_SUCCESS";
|
||||
|
||||
case CUBLAS_STATUS_NOT_INITIALIZED:
|
||||
return "CUBLAS_STATUS_NOT_INITIALIZED";
|
||||
|
||||
case CUBLAS_STATUS_ALLOC_FAILED:
|
||||
return "CUBLAS_STATUS_ALLOC_FAILED";
|
||||
|
||||
case CUBLAS_STATUS_INVALID_VALUE:
|
||||
return "CUBLAS_STATUS_INVALID_VALUE";
|
||||
|
||||
case CUBLAS_STATUS_ARCH_MISMATCH:
|
||||
return "CUBLAS_STATUS_ARCH_MISMATCH";
|
||||
|
||||
case CUBLAS_STATUS_MAPPING_ERROR:
|
||||
return "CUBLAS_STATUS_MAPPING_ERROR";
|
||||
|
||||
case CUBLAS_STATUS_EXECUTION_FAILED:
|
||||
return "CUBLAS_STATUS_EXECUTION_FAILED";
|
||||
|
||||
case CUBLAS_STATUS_INTERNAL_ERROR:
|
||||
return "CUBLAS_STATUS_INTERNAL_ERROR";
|
||||
|
||||
case CUBLAS_STATUS_NOT_SUPPORTED:
|
||||
return "CUBLAS_STATUS_NOT_SUPPORTED";
|
||||
|
||||
case CUBLAS_STATUS_LICENSE_ERROR:
|
||||
return "CUBLAS_STATUS_LICENSE_ERROR";
|
||||
}
|
||||
|
||||
return "<unknown>";
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef _CUFFT_H_
|
||||
// cuFFT API errors
|
||||
static const char *_cudaGetErrorEnum(cufftResult error) {
|
||||
switch (error) {
|
||||
case CUFFT_SUCCESS:
|
||||
return "CUFFT_SUCCESS";
|
||||
|
||||
case CUFFT_INVALID_PLAN:
|
||||
return "CUFFT_INVALID_PLAN";
|
||||
|
||||
case CUFFT_ALLOC_FAILED:
|
||||
return "CUFFT_ALLOC_FAILED";
|
||||
|
||||
case CUFFT_INVALID_TYPE:
|
||||
return "CUFFT_INVALID_TYPE";
|
||||
|
||||
case CUFFT_INVALID_VALUE:
|
||||
return "CUFFT_INVALID_VALUE";
|
||||
|
||||
case CUFFT_INTERNAL_ERROR:
|
||||
return "CUFFT_INTERNAL_ERROR";
|
||||
|
||||
case CUFFT_EXEC_FAILED:
|
||||
return "CUFFT_EXEC_FAILED";
|
||||
|
||||
case CUFFT_SETUP_FAILED:
|
||||
return "CUFFT_SETUP_FAILED";
|
||||
|
||||
case CUFFT_INVALID_SIZE:
|
||||
return "CUFFT_INVALID_SIZE";
|
||||
|
||||
case CUFFT_UNALIGNED_DATA:
|
||||
return "CUFFT_UNALIGNED_DATA";
|
||||
|
||||
case CUFFT_INCOMPLETE_PARAMETER_LIST:
|
||||
return "CUFFT_INCOMPLETE_PARAMETER_LIST";
|
||||
|
||||
case CUFFT_INVALID_DEVICE:
|
||||
return "CUFFT_INVALID_DEVICE";
|
||||
|
||||
case CUFFT_PARSE_ERROR:
|
||||
return "CUFFT_PARSE_ERROR";
|
||||
|
||||
case CUFFT_NO_WORKSPACE:
|
||||
return "CUFFT_NO_WORKSPACE";
|
||||
|
||||
case CUFFT_NOT_IMPLEMENTED:
|
||||
return "CUFFT_NOT_IMPLEMENTED";
|
||||
|
||||
case CUFFT_LICENSE_ERROR:
|
||||
return "CUFFT_LICENSE_ERROR";
|
||||
|
||||
case CUFFT_NOT_SUPPORTED:
|
||||
return "CUFFT_NOT_SUPPORTED";
|
||||
}
|
||||
|
||||
return "<unknown>";
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CUSPARSEAPI
|
||||
// cuSPARSE API errors
|
||||
static const char *_cudaGetErrorEnum(cusparseStatus_t error) {
|
||||
switch (error) {
|
||||
case CUSPARSE_STATUS_SUCCESS:
|
||||
return "CUSPARSE_STATUS_SUCCESS";
|
||||
|
||||
case CUSPARSE_STATUS_NOT_INITIALIZED:
|
||||
return "CUSPARSE_STATUS_NOT_INITIALIZED";
|
||||
|
||||
case CUSPARSE_STATUS_ALLOC_FAILED:
|
||||
return "CUSPARSE_STATUS_ALLOC_FAILED";
|
||||
|
||||
case CUSPARSE_STATUS_INVALID_VALUE:
|
||||
return "CUSPARSE_STATUS_INVALID_VALUE";
|
||||
|
||||
case CUSPARSE_STATUS_ARCH_MISMATCH:
|
||||
return "CUSPARSE_STATUS_ARCH_MISMATCH";
|
||||
|
||||
case CUSPARSE_STATUS_MAPPING_ERROR:
|
||||
return "CUSPARSE_STATUS_MAPPING_ERROR";
|
||||
|
||||
case CUSPARSE_STATUS_EXECUTION_FAILED:
|
||||
return "CUSPARSE_STATUS_EXECUTION_FAILED";
|
||||
|
||||
case CUSPARSE_STATUS_INTERNAL_ERROR:
|
||||
return "CUSPARSE_STATUS_INTERNAL_ERROR";
|
||||
|
||||
case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED:
|
||||
return "CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED";
|
||||
}
|
||||
|
||||
return "<unknown>";
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CUSOLVER_COMMON_H_
|
||||
// cuSOLVER API errors
|
||||
static const char *_cudaGetErrorEnum(cusolverStatus_t error) {
|
||||
switch (error) {
|
||||
case CUSOLVER_STATUS_SUCCESS:
|
||||
return "CUSOLVER_STATUS_SUCCESS";
|
||||
case CUSOLVER_STATUS_NOT_INITIALIZED:
|
||||
return "CUSOLVER_STATUS_NOT_INITIALIZED";
|
||||
case CUSOLVER_STATUS_ALLOC_FAILED:
|
||||
return "CUSOLVER_STATUS_ALLOC_FAILED";
|
||||
case CUSOLVER_STATUS_INVALID_VALUE:
|
||||
return "CUSOLVER_STATUS_INVALID_VALUE";
|
||||
case CUSOLVER_STATUS_ARCH_MISMATCH:
|
||||
return "CUSOLVER_STATUS_ARCH_MISMATCH";
|
||||
case CUSOLVER_STATUS_MAPPING_ERROR:
|
||||
return "CUSOLVER_STATUS_MAPPING_ERROR";
|
||||
case CUSOLVER_STATUS_EXECUTION_FAILED:
|
||||
return "CUSOLVER_STATUS_EXECUTION_FAILED";
|
||||
case CUSOLVER_STATUS_INTERNAL_ERROR:
|
||||
return "CUSOLVER_STATUS_INTERNAL_ERROR";
|
||||
case CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED:
|
||||
return "CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED";
|
||||
case CUSOLVER_STATUS_NOT_SUPPORTED:
|
||||
return "CUSOLVER_STATUS_NOT_SUPPORTED ";
|
||||
case CUSOLVER_STATUS_ZERO_PIVOT:
|
||||
return "CUSOLVER_STATUS_ZERO_PIVOT";
|
||||
case CUSOLVER_STATUS_INVALID_LICENSE:
|
||||
return "CUSOLVER_STATUS_INVALID_LICENSE";
|
||||
}
|
||||
|
||||
return "<unknown>";
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CURAND_H_
|
||||
// cuRAND API errors
|
||||
static const char *_cudaGetErrorEnum(curandStatus_t error) {
|
||||
switch (error) {
|
||||
case CURAND_STATUS_SUCCESS:
|
||||
return "CURAND_STATUS_SUCCESS";
|
||||
|
||||
case CURAND_STATUS_VERSION_MISMATCH:
|
||||
return "CURAND_STATUS_VERSION_MISMATCH";
|
||||
|
||||
case CURAND_STATUS_NOT_INITIALIZED:
|
||||
return "CURAND_STATUS_NOT_INITIALIZED";
|
||||
|
||||
case CURAND_STATUS_ALLOCATION_FAILED:
|
||||
return "CURAND_STATUS_ALLOCATION_FAILED";
|
||||
|
||||
case CURAND_STATUS_TYPE_ERROR:
|
||||
return "CURAND_STATUS_TYPE_ERROR";
|
||||
|
||||
case CURAND_STATUS_OUT_OF_RANGE:
|
||||
return "CURAND_STATUS_OUT_OF_RANGE";
|
||||
|
||||
case CURAND_STATUS_LENGTH_NOT_MULTIPLE:
|
||||
return "CURAND_STATUS_LENGTH_NOT_MULTIPLE";
|
||||
|
||||
case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED:
|
||||
return "CURAND_STATUS_DOUBLE_PRECISION_REQUIRED";
|
||||
|
||||
case CURAND_STATUS_LAUNCH_FAILURE:
|
||||
return "CURAND_STATUS_LAUNCH_FAILURE";
|
||||
|
||||
case CURAND_STATUS_PREEXISTING_FAILURE:
|
||||
return "CURAND_STATUS_PREEXISTING_FAILURE";
|
||||
|
||||
case CURAND_STATUS_INITIALIZATION_FAILED:
|
||||
return "CURAND_STATUS_INITIALIZATION_FAILED";
|
||||
|
||||
case CURAND_STATUS_ARCH_MISMATCH:
|
||||
return "CURAND_STATUS_ARCH_MISMATCH";
|
||||
|
||||
case CURAND_STATUS_INTERNAL_ERROR:
|
||||
return "CURAND_STATUS_INTERNAL_ERROR";
|
||||
}
|
||||
|
||||
return "<unknown>";
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef NVJPEGAPI
|
||||
// nvJPEG API errors
|
||||
static const char *_cudaGetErrorEnum(nvjpegStatus_t error) {
|
||||
switch (error) {
|
||||
case NVJPEG_STATUS_SUCCESS:
|
||||
return "NVJPEG_STATUS_SUCCESS";
|
||||
|
||||
case NVJPEG_STATUS_NOT_INITIALIZED:
|
||||
return "NVJPEG_STATUS_NOT_INITIALIZED";
|
||||
|
||||
case NVJPEG_STATUS_INVALID_PARAMETER:
|
||||
return "NVJPEG_STATUS_INVALID_PARAMETER";
|
||||
|
||||
case NVJPEG_STATUS_BAD_JPEG:
|
||||
return "NVJPEG_STATUS_BAD_JPEG";
|
||||
|
||||
case NVJPEG_STATUS_JPEG_NOT_SUPPORTED:
|
||||
return "NVJPEG_STATUS_JPEG_NOT_SUPPORTED";
|
||||
|
||||
case NVJPEG_STATUS_ALLOCATOR_FAILURE:
|
||||
return "NVJPEG_STATUS_ALLOCATOR_FAILURE";
|
||||
|
||||
case NVJPEG_STATUS_EXECUTION_FAILED:
|
||||
return "NVJPEG_STATUS_EXECUTION_FAILED";
|
||||
|
||||
case NVJPEG_STATUS_ARCH_MISMATCH:
|
||||
return "NVJPEG_STATUS_ARCH_MISMATCH";
|
||||
|
||||
case NVJPEG_STATUS_INTERNAL_ERROR:
|
||||
return "NVJPEG_STATUS_INTERNAL_ERROR";
|
||||
}
|
||||
|
||||
return "<unknown>";
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef NV_NPPIDEFS_H
|
||||
// NPP API errors
|
||||
static const char *_cudaGetErrorEnum(NppStatus error) {
|
||||
switch (error) {
|
||||
case NPP_NOT_SUPPORTED_MODE_ERROR:
|
||||
return "NPP_NOT_SUPPORTED_MODE_ERROR";
|
||||
|
||||
case NPP_ROUND_MODE_NOT_SUPPORTED_ERROR:
|
||||
return "NPP_ROUND_MODE_NOT_SUPPORTED_ERROR";
|
||||
|
||||
case NPP_RESIZE_NO_OPERATION_ERROR:
|
||||
return "NPP_RESIZE_NO_OPERATION_ERROR";
|
||||
|
||||
case NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY:
|
||||
return "NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY";
|
||||
|
||||
#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000
|
||||
|
||||
case NPP_BAD_ARG_ERROR:
|
||||
return "NPP_BAD_ARGUMENT_ERROR";
|
||||
|
||||
case NPP_COEFF_ERROR:
|
||||
return "NPP_COEFFICIENT_ERROR";
|
||||
|
||||
case NPP_RECT_ERROR:
|
||||
return "NPP_RECTANGLE_ERROR";
|
||||
|
||||
case NPP_QUAD_ERROR:
|
||||
return "NPP_QUADRANGLE_ERROR";
|
||||
|
||||
case NPP_MEM_ALLOC_ERR:
|
||||
return "NPP_MEMORY_ALLOCATION_ERROR";
|
||||
|
||||
case NPP_HISTO_NUMBER_OF_LEVELS_ERROR:
|
||||
return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR";
|
||||
|
||||
case NPP_INVALID_INPUT:
|
||||
return "NPP_INVALID_INPUT";
|
||||
|
||||
case NPP_POINTER_ERROR:
|
||||
return "NPP_POINTER_ERROR";
|
||||
|
||||
case NPP_WARNING:
|
||||
return "NPP_WARNING";
|
||||
|
||||
case NPP_ODD_ROI_WARNING:
|
||||
return "NPP_ODD_ROI_WARNING";
|
||||
#else
|
||||
|
||||
// These are for CUDA 5.5 or higher
|
||||
case NPP_BAD_ARGUMENT_ERROR:
|
||||
return "NPP_BAD_ARGUMENT_ERROR";
|
||||
|
||||
case NPP_COEFFICIENT_ERROR:
|
||||
return "NPP_COEFFICIENT_ERROR";
|
||||
|
||||
case NPP_RECTANGLE_ERROR:
|
||||
return "NPP_RECTANGLE_ERROR";
|
||||
|
||||
case NPP_QUADRANGLE_ERROR:
|
||||
return "NPP_QUADRANGLE_ERROR";
|
||||
|
||||
case NPP_MEMORY_ALLOCATION_ERR:
|
||||
return "NPP_MEMORY_ALLOCATION_ERROR";
|
||||
|
||||
case NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR:
|
||||
return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR";
|
||||
|
||||
case NPP_INVALID_HOST_POINTER_ERROR:
|
||||
return "NPP_INVALID_HOST_POINTER_ERROR";
|
||||
|
||||
case NPP_INVALID_DEVICE_POINTER_ERROR:
|
||||
return "NPP_INVALID_DEVICE_POINTER_ERROR";
|
||||
#endif
|
||||
|
||||
case NPP_LUT_NUMBER_OF_LEVELS_ERROR:
|
||||
return "NPP_LUT_NUMBER_OF_LEVELS_ERROR";
|
||||
|
||||
case NPP_TEXTURE_BIND_ERROR:
|
||||
return "NPP_TEXTURE_BIND_ERROR";
|
||||
|
||||
case NPP_WRONG_INTERSECTION_ROI_ERROR:
|
||||
return "NPP_WRONG_INTERSECTION_ROI_ERROR";
|
||||
|
||||
case NPP_NOT_EVEN_STEP_ERROR:
|
||||
return "NPP_NOT_EVEN_STEP_ERROR";
|
||||
|
||||
case NPP_INTERPOLATION_ERROR:
|
||||
return "NPP_INTERPOLATION_ERROR";
|
||||
|
||||
case NPP_RESIZE_FACTOR_ERROR:
|
||||
return "NPP_RESIZE_FACTOR_ERROR";
|
||||
|
||||
case NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR:
|
||||
return "NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR";
|
||||
|
||||
#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000
|
||||
|
||||
case NPP_MEMFREE_ERR:
|
||||
return "NPP_MEMFREE_ERR";
|
||||
|
||||
case NPP_MEMSET_ERR:
|
||||
return "NPP_MEMSET_ERR";
|
||||
|
||||
case NPP_MEMCPY_ERR:
|
||||
return "NPP_MEMCPY_ERROR";
|
||||
|
||||
case NPP_MIRROR_FLIP_ERR:
|
||||
return "NPP_MIRROR_FLIP_ERR";
|
||||
#else
|
||||
|
||||
case NPP_MEMFREE_ERROR:
|
||||
return "NPP_MEMFREE_ERROR";
|
||||
|
||||
case NPP_MEMSET_ERROR:
|
||||
return "NPP_MEMSET_ERROR";
|
||||
|
||||
case NPP_MEMCPY_ERROR:
|
||||
return "NPP_MEMCPY_ERROR";
|
||||
|
||||
case NPP_MIRROR_FLIP_ERROR:
|
||||
return "NPP_MIRROR_FLIP_ERROR";
|
||||
#endif
|
||||
|
||||
case NPP_ALIGNMENT_ERROR:
|
||||
return "NPP_ALIGNMENT_ERROR";
|
||||
|
||||
case NPP_STEP_ERROR:
|
||||
return "NPP_STEP_ERROR";
|
||||
|
||||
case NPP_SIZE_ERROR:
|
||||
return "NPP_SIZE_ERROR";
|
||||
|
||||
case NPP_NULL_POINTER_ERROR:
|
||||
return "NPP_NULL_POINTER_ERROR";
|
||||
|
||||
case NPP_CUDA_KERNEL_EXECUTION_ERROR:
|
||||
return "NPP_CUDA_KERNEL_EXECUTION_ERROR";
|
||||
|
||||
case NPP_NOT_IMPLEMENTED_ERROR:
|
||||
return "NPP_NOT_IMPLEMENTED_ERROR";
|
||||
|
||||
case NPP_ERROR:
|
||||
return "NPP_ERROR";
|
||||
|
||||
case NPP_SUCCESS:
|
||||
return "NPP_SUCCESS";
|
||||
|
||||
case NPP_WRONG_INTERSECTION_QUAD_WARNING:
|
||||
return "NPP_WRONG_INTERSECTION_QUAD_WARNING";
|
||||
|
||||
case NPP_MISALIGNED_DST_ROI_WARNING:
|
||||
return "NPP_MISALIGNED_DST_ROI_WARNING";
|
||||
|
||||
case NPP_AFFINE_QUAD_INCORRECT_WARNING:
|
||||
return "NPP_AFFINE_QUAD_INCORRECT_WARNING";
|
||||
|
||||
case NPP_DOUBLE_SIZE_WARNING:
|
||||
return "NPP_DOUBLE_SIZE_WARNING";
|
||||
|
||||
case NPP_WRONG_INTERSECTION_ROI_WARNING:
|
||||
return "NPP_WRONG_INTERSECTION_ROI_WARNING";
|
||||
|
||||
#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) >= 0x6000
|
||||
/* These are 6.0 or higher */
|
||||
case NPP_LUT_PALETTE_BITSIZE_ERROR:
|
||||
return "NPP_LUT_PALETTE_BITSIZE_ERROR";
|
||||
|
||||
case NPP_ZC_MODE_NOT_SUPPORTED_ERROR:
|
||||
return "NPP_ZC_MODE_NOT_SUPPORTED_ERROR";
|
||||
|
||||
case NPP_QUALITY_INDEX_ERROR:
|
||||
return "NPP_QUALITY_INDEX_ERROR";
|
||||
|
||||
case NPP_CHANNEL_ORDER_ERROR:
|
||||
return "NPP_CHANNEL_ORDER_ERROR";
|
||||
|
||||
case NPP_ZERO_MASK_VALUE_ERROR:
|
||||
return "NPP_ZERO_MASK_VALUE_ERROR";
|
||||
|
||||
case NPP_NUMBER_OF_CHANNELS_ERROR:
|
||||
return "NPP_NUMBER_OF_CHANNELS_ERROR";
|
||||
|
||||
case NPP_COI_ERROR:
|
||||
return "NPP_COI_ERROR";
|
||||
|
||||
case NPP_DIVISOR_ERROR:
|
||||
return "NPP_DIVISOR_ERROR";
|
||||
|
||||
case NPP_CHANNEL_ERROR:
|
||||
return "NPP_CHANNEL_ERROR";
|
||||
|
||||
case NPP_STRIDE_ERROR:
|
||||
return "NPP_STRIDE_ERROR";
|
||||
|
||||
case NPP_ANCHOR_ERROR:
|
||||
return "NPP_ANCHOR_ERROR";
|
||||
|
||||
case NPP_MASK_SIZE_ERROR:
|
||||
return "NPP_MASK_SIZE_ERROR";
|
||||
|
||||
case NPP_MOMENT_00_ZERO_ERROR:
|
||||
return "NPP_MOMENT_00_ZERO_ERROR";
|
||||
|
||||
case NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR:
|
||||
return "NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR";
|
||||
|
||||
case NPP_THRESHOLD_ERROR:
|
||||
return "NPP_THRESHOLD_ERROR";
|
||||
|
||||
case NPP_CONTEXT_MATCH_ERROR:
|
||||
return "NPP_CONTEXT_MATCH_ERROR";
|
||||
|
||||
case NPP_FFT_FLAG_ERROR:
|
||||
return "NPP_FFT_FLAG_ERROR";
|
||||
|
||||
case NPP_FFT_ORDER_ERROR:
|
||||
return "NPP_FFT_ORDER_ERROR";
|
||||
|
||||
case NPP_SCALE_RANGE_ERROR:
|
||||
return "NPP_SCALE_RANGE_ERROR";
|
||||
|
||||
case NPP_DATA_TYPE_ERROR:
|
||||
return "NPP_DATA_TYPE_ERROR";
|
||||
|
||||
case NPP_OUT_OFF_RANGE_ERROR:
|
||||
return "NPP_OUT_OFF_RANGE_ERROR";
|
||||
|
||||
case NPP_DIVIDE_BY_ZERO_ERROR:
|
||||
return "NPP_DIVIDE_BY_ZERO_ERROR";
|
||||
|
||||
case NPP_RANGE_ERROR:
|
||||
return "NPP_RANGE_ERROR";
|
||||
|
||||
case NPP_NO_MEMORY_ERROR:
|
||||
return "NPP_NO_MEMORY_ERROR";
|
||||
|
||||
case NPP_ERROR_RESERVED:
|
||||
return "NPP_ERROR_RESERVED";
|
||||
|
||||
case NPP_NO_OPERATION_WARNING:
|
||||
return "NPP_NO_OPERATION_WARNING";
|
||||
|
||||
case NPP_DIVIDE_BY_ZERO_WARNING:
|
||||
return "NPP_DIVIDE_BY_ZERO_WARNING";
|
||||
#endif
|
||||
|
||||
#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) >= 0x7000
|
||||
/* These are 7.0 or higher */
|
||||
case NPP_OVERFLOW_ERROR:
|
||||
return "NPP_OVERFLOW_ERROR";
|
||||
|
||||
case NPP_CORRUPTED_DATA_ERROR:
|
||||
return "NPP_CORRUPTED_DATA_ERROR";
|
||||
#endif
|
||||
}
|
||||
|
||||
return "<unknown>";
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __DRIVER_TYPES_H__
|
||||
#ifndef DEVICE_RESET
|
||||
#define DEVICE_RESET cudaDeviceReset();
|
||||
#endif
|
||||
#else
|
||||
#ifndef DEVICE_RESET
|
||||
#define DEVICE_RESET
|
||||
#endif
|
||||
#endif
|
||||
|
||||
template <typename T>
|
||||
void check(T result, char const *const func, const char *const file,
|
||||
int const line) {
|
||||
if (result) {
|
||||
fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n", file, line,
|
||||
static_cast<unsigned int>(result), _cudaGetErrorEnum(result), func);
|
||||
DEVICE_RESET
|
||||
// Make sure we call CUDA Device Reset before exiting
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __DRIVER_TYPES_H__
|
||||
// This will output the proper CUDA error strings in the event
|
||||
// that a CUDA host call returns an error
|
||||
#define checkCudaErrors(val) check((val), #val, __FILE__, __LINE__)
|
||||
|
||||
// This will output the proper error string when calling cudaGetLastError
|
||||
#define getLastCudaError(msg) __getLastCudaError(msg, __FILE__, __LINE__)
|
||||
|
||||
inline void __getLastCudaError(const char *errorMessage, const char *file,
|
||||
const int line) {
|
||||
cudaError_t err = cudaGetLastError();
|
||||
|
||||
if (cudaSuccess != err) {
|
||||
fprintf(stderr,
|
||||
"%s(%i) : getLastCudaError() CUDA error :"
|
||||
" %s : (%d) %s.\n",
|
||||
file, line, errorMessage, static_cast<int>(err),
|
||||
cudaGetErrorString(err));
|
||||
DEVICE_RESET
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
// This will only print the proper error string when calling cudaGetLastError
|
||||
// but not exit program incase error detected.
|
||||
#define printLastCudaError(msg) __printLastCudaError(msg, __FILE__, __LINE__)
|
||||
|
||||
inline void __printLastCudaError(const char *errorMessage, const char *file,
|
||||
const int line) {
|
||||
cudaError_t err = cudaGetLastError();
|
||||
|
||||
if (cudaSuccess != err) {
|
||||
fprintf(stderr,
|
||||
"%s(%i) : getLastCudaError() CUDA error :"
|
||||
" %s : (%d) %s.\n",
|
||||
file, line, errorMessage, static_cast<int>(err),
|
||||
cudaGetErrorString(err));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef MAX
|
||||
#define MAX(a, b) (a > b ? a : b)
|
||||
#endif
|
||||
|
||||
// Float To Int conversion
|
||||
inline int ftoi(float value) {
|
||||
return (value >= 0 ? static_cast<int>(value + 0.5)
|
||||
: static_cast<int>(value - 0.5));
|
||||
}
|
||||
|
||||
// Beginning of GPU Architecture definitions
|
||||
inline int _ConvertSMVer2Cores(int major, int minor) {
|
||||
// Defines for GPU Architecture types (using the SM version to determine
|
||||
// the # of cores per SM
|
||||
typedef struct {
|
||||
int SM; // 0xMm (hexidecimal notation), M = SM Major version,
|
||||
// and m = SM minor version
|
||||
int Cores;
|
||||
} sSMtoCores;
|
||||
|
||||
sSMtoCores nGpuArchCoresPerSM[] = {
|
||||
{0x30, 192},
|
||||
{0x32, 192},
|
||||
{0x35, 192},
|
||||
{0x37, 192},
|
||||
{0x50, 128},
|
||||
{0x52, 128},
|
||||
{0x53, 128},
|
||||
{0x60, 64},
|
||||
{0x61, 128},
|
||||
{0x62, 128},
|
||||
{0x70, 64},
|
||||
{0x72, 64},
|
||||
{0x75, 64},
|
||||
{-1, -1}};
|
||||
|
||||
int index = 0;
|
||||
|
||||
while (nGpuArchCoresPerSM[index].SM != -1) {
|
||||
if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor)) {
|
||||
return nGpuArchCoresPerSM[index].Cores;
|
||||
}
|
||||
|
||||
index++;
|
||||
}
|
||||
|
||||
// If we don't find the values, we default use the previous one
|
||||
// to run properly
|
||||
printf(
|
||||
"MapSMtoCores for SM %d.%d is undefined."
|
||||
" Default to use %d Cores/SM\n",
|
||||
major, minor, nGpuArchCoresPerSM[index - 1].Cores);
|
||||
return nGpuArchCoresPerSM[index - 1].Cores;
|
||||
}
|
||||
// end of GPU Architecture definitions
|
||||
|
||||
#ifdef __CUDA_RUNTIME_H__
|
||||
// General GPU Device CUDA Initialization
|
||||
inline int gpuDeviceInit(int devID) {
|
||||
int device_count;
|
||||
checkCudaErrors(cudaGetDeviceCount(&device_count));
|
||||
|
||||
if (device_count == 0) {
|
||||
fprintf(stderr,
|
||||
"gpuDeviceInit() CUDA error: "
|
||||
"no devices supporting CUDA.\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
if (devID < 0) {
|
||||
devID = 0;
|
||||
}
|
||||
|
||||
if (devID > device_count - 1) {
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, ">> %d CUDA capable GPU device(s) detected. <<\n",
|
||||
device_count);
|
||||
fprintf(stderr,
|
||||
">> gpuDeviceInit (-device=%d) is not a valid"
|
||||
" GPU device. <<\n",
|
||||
devID);
|
||||
fprintf(stderr, "\n");
|
||||
return -devID;
|
||||
}
|
||||
|
||||
cudaDeviceProp deviceProp;
|
||||
checkCudaErrors(cudaGetDeviceProperties(&deviceProp, devID));
|
||||
|
||||
if (deviceProp.computeMode == cudaComputeModeProhibited) {
|
||||
fprintf(stderr,
|
||||
"Error: device is running in <Compute Mode "
|
||||
"Prohibited>, no threads can use cudaSetDevice().\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (deviceProp.major < 1) {
|
||||
fprintf(stderr, "gpuDeviceInit(): GPU device does not support CUDA.\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
checkCudaErrors(cudaSetDevice(devID));
|
||||
printf("gpuDeviceInit() CUDA Device [%d]: \"%s\n", devID, deviceProp.name);
|
||||
|
||||
return devID;
|
||||
}
|
||||
|
||||
// This function returns the best GPU (with maximum GFLOPS)
|
||||
inline int gpuGetMaxGflopsDeviceId() {
|
||||
int current_device = 0, sm_per_multiproc = 0;
|
||||
int max_perf_device = 0;
|
||||
int device_count = 0;
|
||||
int devices_prohibited = 0;
|
||||
|
||||
uint64_t max_compute_perf = 0;
|
||||
cudaDeviceProp deviceProp;
|
||||
checkCudaErrors(cudaGetDeviceCount(&device_count));
|
||||
|
||||
if (device_count == 0) {
|
||||
fprintf(stderr,
|
||||
"gpuGetMaxGflopsDeviceId() CUDA error:"
|
||||
" no devices supporting CUDA.\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
// Find the best CUDA capable GPU device
|
||||
current_device = 0;
|
||||
|
||||
while (current_device < device_count) {
|
||||
cudaGetDeviceProperties(&deviceProp, current_device);
|
||||
|
||||
// If this GPU is not running on Compute Mode prohibited,
|
||||
// then we can add it to the list
|
||||
if (deviceProp.computeMode != cudaComputeModeProhibited) {
|
||||
if (deviceProp.major == 9999 && deviceProp.minor == 9999) {
|
||||
sm_per_multiproc = 1;
|
||||
} else {
|
||||
sm_per_multiproc =
|
||||
_ConvertSMVer2Cores(deviceProp.major, deviceProp.minor);
|
||||
}
|
||||
|
||||
uint64_t compute_perf = (uint64_t)deviceProp.multiProcessorCount *
|
||||
sm_per_multiproc * deviceProp.clockRate;
|
||||
|
||||
if (compute_perf > max_compute_perf) {
|
||||
max_compute_perf = compute_perf;
|
||||
max_perf_device = current_device;
|
||||
}
|
||||
} else {
|
||||
devices_prohibited++;
|
||||
}
|
||||
|
||||
++current_device;
|
||||
}
|
||||
|
||||
if (devices_prohibited == device_count) {
|
||||
fprintf(stderr,
|
||||
"gpuGetMaxGflopsDeviceId() CUDA error:"
|
||||
" all devices have compute mode prohibited.\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
return max_perf_device;
|
||||
}
|
||||
|
||||
// Initialization code to find the best CUDA Device
|
||||
inline int findCudaDevice(int argc, const char **argv) {
|
||||
cudaDeviceProp deviceProp;
|
||||
int devID = 0;
|
||||
|
||||
// If the command-line has a device number specified, use it
|
||||
if (checkCmdLineFlag(argc, argv, "device")) {
|
||||
devID = getCmdLineArgumentInt(argc, argv, "device=");
|
||||
|
||||
if (devID < 0) {
|
||||
printf("Invalid command line parameter\n ");
|
||||
exit(EXIT_FAILURE);
|
||||
} else {
|
||||
devID = gpuDeviceInit(devID);
|
||||
|
||||
if (devID < 0) {
|
||||
printf("exiting...\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Otherwise pick the device with highest Gflops/s
|
||||
devID = gpuGetMaxGflopsDeviceId();
|
||||
checkCudaErrors(cudaSetDevice(devID));
|
||||
checkCudaErrors(cudaGetDeviceProperties(&deviceProp, devID));
|
||||
printf("GPU Device %d: \"%s\" with compute capability %d.%d\n\n", devID,
|
||||
deviceProp.name, deviceProp.major, deviceProp.minor);
|
||||
}
|
||||
|
||||
return devID;
|
||||
}
|
||||
|
||||
inline int findIntegratedGPU() {
|
||||
int current_device = 0;
|
||||
int device_count = 0;
|
||||
int devices_prohibited = 0;
|
||||
|
||||
cudaDeviceProp deviceProp;
|
||||
checkCudaErrors(cudaGetDeviceCount(&device_count));
|
||||
|
||||
if (device_count == 0) {
|
||||
fprintf(stderr, "CUDA error: no devices supporting CUDA.\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
// Find the integrated GPU which is compute capable
|
||||
while (current_device < device_count) {
|
||||
cudaGetDeviceProperties(&deviceProp, current_device);
|
||||
|
||||
// If GPU is integrated and is not running on Compute Mode prohibited,
|
||||
// then cuda can map to GLES resource
|
||||
if (deviceProp.integrated &&
|
||||
(deviceProp.computeMode != cudaComputeModeProhibited)) {
|
||||
checkCudaErrors(cudaSetDevice(current_device));
|
||||
checkCudaErrors(cudaGetDeviceProperties(&deviceProp, current_device));
|
||||
printf("GPU Device %d: \"%s\" with compute capability %d.%d\n\n",
|
||||
current_device, deviceProp.name, deviceProp.major,
|
||||
deviceProp.minor);
|
||||
|
||||
return current_device;
|
||||
} else {
|
||||
devices_prohibited++;
|
||||
}
|
||||
|
||||
current_device++;
|
||||
}
|
||||
|
||||
if (devices_prohibited == device_count) {
|
||||
fprintf(stderr,
|
||||
"CUDA error:"
|
||||
" No GLES-CUDA Interop capable GPU found.\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
// General check for CUDA GPU SM Capabilities
|
||||
inline bool checkCudaCapabilities(int major_version, int minor_version) {
|
||||
cudaDeviceProp deviceProp;
|
||||
deviceProp.major = 0;
|
||||
deviceProp.minor = 0;
|
||||
int dev;
|
||||
|
||||
checkCudaErrors(cudaGetDevice(&dev));
|
||||
checkCudaErrors(cudaGetDeviceProperties(&deviceProp, dev));
|
||||
|
||||
if ((deviceProp.major > major_version) ||
|
||||
(deviceProp.major == major_version &&
|
||||
deviceProp.minor >= minor_version)) {
|
||||
printf(" Device %d: <%16s >, Compute SM %d.%d detected\n", dev,
|
||||
deviceProp.name, deviceProp.major, deviceProp.minor);
|
||||
return true;
|
||||
} else {
|
||||
printf(
|
||||
" No GPU device was found that can support "
|
||||
"CUDA compute capability %d.%d.\n",
|
||||
major_version, minor_version);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// end of CUDA Helper Functions
|
||||
|
||||
#endif // COMMON_HELPER_CUDA_H_
|
|
@ -0,0 +1,43 @@
|
|||
/**
|
||||
* Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Please refer to the NVIDIA end user license agreement (EULA) associated
|
||||
* with this source code for terms and conditions that govern your use of
|
||||
* this software. Any use, reproduction, disclosure, or distribution of
|
||||
* this software and related documentation outside the terms of the EULA
|
||||
* is strictly prohibited.
|
||||
*
|
||||
*/
|
||||
|
||||
// These are helper functions for the SDK samples (string parsing,
|
||||
// timers, image helpers, etc)
|
||||
#ifndef COMMON_HELPER_FUNCTIONS_H_
|
||||
#define COMMON_HELPER_FUNCTIONS_H_
|
||||
|
||||
#ifdef WIN32
|
||||
#pragma warning(disable : 4996)
|
||||
#endif
|
||||
|
||||
// includes, project
|
||||
#include <assert.h>
|
||||
#include "exception.h"
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
// includes, timer, string parsing, image helpers
|
||||
#include "helper_image.h" // helper functions for image compare, dump, data comparisons
|
||||
#include "helper_string.h" // helper functions for string parsing
|
||||
#include "helper_timer.h" // helper functions for timers
|
||||
|
||||
#ifndef EXIT_WAIVED
|
||||
#define EXIT_WAIVED 2
|
||||
#endif
|
||||
|
||||
#endif // COMMON_HELPER_FUNCTIONS_H_
|
|
@ -0,0 +1,985 @@
|
|||
/**
|
||||
* Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Please refer to the NVIDIA end user license agreement (EULA) associated
|
||||
* with this source code for terms and conditions that govern your use of
|
||||
* this software. Any use, reproduction, disclosure, or distribution of
|
||||
* this software and related documentation outside the terms of the EULA
|
||||
* is strictly prohibited.
|
||||
*
|
||||
*/
|
||||
|
||||
// These are helper functions for the SDK samples (image,bitmap)
|
||||
#ifndef COMMON_HELPER_IMAGE_H_
|
||||
#define COMMON_HELPER_IMAGE_H_
|
||||
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#ifndef MIN
|
||||
#define MIN(a, b) ((a < b) ? a : b)
|
||||
#endif
|
||||
#ifndef MAX
|
||||
#define MAX(a, b) ((a > b) ? a : b)
|
||||
#endif
|
||||
|
||||
#ifndef EXIT_WAIVED
|
||||
#define EXIT_WAIVED 2
|
||||
#endif
|
||||
|
||||
#include "exception.h"
|
||||
#include "helper_string.h"
|
||||
|
||||
// namespace unnamed (internal)
|
||||
namespace helper_image_internal {
|
||||
//! size of PGM file header
|
||||
const unsigned int PGMHeaderSize = 0x40;
|
||||
|
||||
// types
|
||||
|
||||
//! Data converter from unsigned char / unsigned byte to type T
|
||||
template <class T>
|
||||
struct ConverterFromUByte;
|
||||
|
||||
//! Data converter from unsigned char / unsigned byte
|
||||
template <>
|
||||
struct ConverterFromUByte<unsigned char> {
|
||||
//! Conversion operator
|
||||
//! @return converted value
|
||||
//! @param val value to convert
|
||||
float operator()(const unsigned char &val) {
|
||||
return static_cast<unsigned char>(val);
|
||||
}
|
||||
};
|
||||
|
||||
//! Data converter from unsigned char / unsigned byte to float
|
||||
template <>
|
||||
struct ConverterFromUByte<float> {
|
||||
//! Conversion operator
|
||||
//! @return converted value
|
||||
//! @param val value to convert
|
||||
float operator()(const unsigned char &val) {
|
||||
return static_cast<float>(val) / 255.0f;
|
||||
}
|
||||
};
|
||||
|
||||
//! Data converter from unsigned char / unsigned byte to type T
|
||||
template <class T>
|
||||
struct ConverterToUByte;
|
||||
|
||||
//! Data converter from unsigned char / unsigned byte to unsigned int
|
||||
template <>
|
||||
struct ConverterToUByte<unsigned char> {
|
||||
//! Conversion operator (essentially a passthru
|
||||
//! @return converted value
|
||||
//! @param val value to convert
|
||||
unsigned char operator()(const unsigned char &val) { return val; }
|
||||
};
|
||||
|
||||
//! Data converter from unsigned char / unsigned byte to unsigned int
|
||||
template <>
|
||||
struct ConverterToUByte<float> {
|
||||
//! Conversion operator
|
||||
//! @return converted value
|
||||
//! @param val value to convert
|
||||
unsigned char operator()(const float &val) {
|
||||
return static_cast<unsigned char>(val * 255.0f);
|
||||
}
|
||||
};
|
||||
} // namespace helper_image_internal
|
||||
|
||||
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
|
||||
#ifndef FOPEN
|
||||
#define FOPEN(fHandle, filename, mode) fopen_s(&fHandle, filename, mode)
|
||||
#endif
|
||||
#ifndef FOPEN_FAIL
|
||||
#define FOPEN_FAIL(result) (result != 0)
|
||||
#endif
|
||||
#ifndef SSCANF
|
||||
#define SSCANF sscanf_s
|
||||
#endif
|
||||
#else
|
||||
#ifndef FOPEN
|
||||
#define FOPEN(fHandle, filename, mode) (fHandle = fopen(filename, mode))
|
||||
#endif
|
||||
#ifndef FOPEN_FAIL
|
||||
#define FOPEN_FAIL(result) (result == NULL)
|
||||
#endif
|
||||
#ifndef SSCANF
|
||||
#define SSCANF sscanf
|
||||
#endif
|
||||
#endif
|
||||
|
||||
inline bool __loadPPM(const char *file, unsigned char **data, unsigned int *w,
|
||||
unsigned int *h, unsigned int *channels) {
|
||||
FILE *fp = NULL;
|
||||
|
||||
if (FOPEN_FAIL(FOPEN(fp, file, "rb"))) {
|
||||
std::cerr << "__LoadPPM() : Failed to open file: " << file << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
// check header
|
||||
char header[helper_image_internal::PGMHeaderSize];
|
||||
|
||||
if (fgets(header, helper_image_internal::PGMHeaderSize, fp) == NULL) {
|
||||
std::cerr << "__LoadPPM() : reading PGM header returned NULL" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (strncmp(header, "P5", 2) == 0) {
|
||||
*channels = 1;
|
||||
} else if (strncmp(header, "P6", 2) == 0) {
|
||||
*channels = 3;
|
||||
} else {
|
||||
std::cerr << "__LoadPPM() : File is not a PPM or PGM image" << std::endl;
|
||||
*channels = 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
// parse header, read maxval, width and height
|
||||
unsigned int width = 0;
|
||||
unsigned int height = 0;
|
||||
unsigned int maxval = 0;
|
||||
unsigned int i = 0;
|
||||
|
||||
while (i < 3) {
|
||||
if (fgets(header, helper_image_internal::PGMHeaderSize, fp) == NULL) {
|
||||
std::cerr << "__LoadPPM() : reading PGM header returned NULL"
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (header[0] == '#') {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (i == 0) {
|
||||
i += SSCANF(header, "%u %u %u", &width, &height, &maxval);
|
||||
} else if (i == 1) {
|
||||
i += SSCANF(header, "%u %u", &height, &maxval);
|
||||
} else if (i == 2) {
|
||||
i += SSCANF(header, "%u", &maxval);
|
||||
}
|
||||
}
|
||||
|
||||
// check if given handle for the data is initialized
|
||||
if (NULL != *data) {
|
||||
if (*w != width || *h != height) {
|
||||
std::cerr << "__LoadPPM() : Invalid image dimensions." << std::endl;
|
||||
}
|
||||
} else {
|
||||
*data = (unsigned char *)malloc(sizeof(unsigned char) * width * height *
|
||||
*channels);
|
||||
*w = width;
|
||||
*h = height;
|
||||
}
|
||||
|
||||
// read and close file
|
||||
if (fread(*data, sizeof(unsigned char), width * height * *channels, fp) ==
|
||||
0) {
|
||||
std::cerr << "__LoadPPM() read data returned error." << std::endl;
|
||||
}
|
||||
|
||||
fclose(fp);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
inline bool sdkLoadPGM(const char *file, T **data, unsigned int *w,
|
||||
unsigned int *h) {
|
||||
unsigned char *idata = NULL;
|
||||
unsigned int channels;
|
||||
|
||||
if (true != __loadPPM(file, &idata, w, h, &channels)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
unsigned int size = *w * *h * channels;
|
||||
|
||||
// initialize mem if necessary
|
||||
// the correct size is checked / set in loadPGMc()
|
||||
if (NULL == *data) {
|
||||
*data = reinterpret_cast<T *>(malloc(sizeof(T) * size));
|
||||
}
|
||||
|
||||
// copy and cast data
|
||||
std::transform(idata, idata + size, *data,
|
||||
helper_image_internal::ConverterFromUByte<T>());
|
||||
|
||||
free(idata);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
inline bool sdkLoadPPM4(const char *file, T **data, unsigned int *w,
|
||||
unsigned int *h) {
|
||||
unsigned char *idata = 0;
|
||||
unsigned int channels;
|
||||
|
||||
if (__loadPPM(file, &idata, w, h, &channels)) {
|
||||
// pad 4th component
|
||||
int size = *w * *h;
|
||||
// keep the original pointer
|
||||
unsigned char *idata_orig = idata;
|
||||
*data = reinterpret_cast<T *>(malloc(sizeof(T) * size * 4));
|
||||
unsigned char *ptr = *data;
|
||||
|
||||
for (int i = 0; i < size; i++) {
|
||||
*ptr++ = *idata++;
|
||||
*ptr++ = *idata++;
|
||||
*ptr++ = *idata++;
|
||||
*ptr++ = 0;
|
||||
}
|
||||
|
||||
free(idata_orig);
|
||||
return true;
|
||||
} else {
|
||||
free(idata);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
inline bool __savePPM(const char *file, unsigned char *data, unsigned int w,
|
||||
unsigned int h, unsigned int channels) {
|
||||
assert(NULL != data);
|
||||
assert(w > 0);
|
||||
assert(h > 0);
|
||||
|
||||
std::fstream fh(file, std::fstream::out | std::fstream::binary);
|
||||
|
||||
if (fh.bad()) {
|
||||
std::cerr << "__savePPM() : Opening file failed." << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (channels == 1) {
|
||||
fh << "P5\n";
|
||||
} else if (channels == 3) {
|
||||
fh << "P6\n";
|
||||
} else {
|
||||
std::cerr << "__savePPM() : Invalid number of channels." << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
fh << w << "\n" << h << "\n" << 0xff << std::endl;
|
||||
|
||||
for (unsigned int i = 0; (i < (w * h * channels)) && fh.good(); ++i) {
|
||||
fh << data[i];
|
||||
}
|
||||
|
||||
fh.flush();
|
||||
|
||||
if (fh.bad()) {
|
||||
std::cerr << "__savePPM() : Writing data failed." << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
fh.close();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
inline bool sdkSavePGM(const char *file, T *data, unsigned int w,
|
||||
unsigned int h) {
|
||||
unsigned int size = w * h;
|
||||
unsigned char *idata = (unsigned char *)malloc(sizeof(unsigned char) * size);
|
||||
|
||||
std::transform(data, data + size, idata,
|
||||
helper_image_internal::ConverterToUByte<T>());
|
||||
|
||||
// write file
|
||||
bool result = __savePPM(file, idata, w, h, 1);
|
||||
|
||||
// cleanup
|
||||
free(idata);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
inline bool sdkSavePPM4ub(const char *file, unsigned char *data, unsigned int w,
|
||||
unsigned int h) {
|
||||
// strip 4th component
|
||||
int size = w * h;
|
||||
unsigned char *ndata =
|
||||
(unsigned char *)malloc(sizeof(unsigned char) * size * 3);
|
||||
unsigned char *ptr = ndata;
|
||||
|
||||
for (int i = 0; i < size; i++) {
|
||||
*ptr++ = *data++;
|
||||
*ptr++ = *data++;
|
||||
*ptr++ = *data++;
|
||||
data++;
|
||||
}
|
||||
|
||||
bool result = __savePPM(file, ndata, w, h, 3);
|
||||
free(ndata);
|
||||
return result;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//! Read file \filename and return the data
|
||||
//! @return bool if reading the file succeeded, otherwise false
|
||||
//! @param filename name of the source file
|
||||
//! @param data uninitialized pointer, returned initialized and pointing to
|
||||
//! the data read
|
||||
//! @param len number of data elements in data, -1 on error
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
template <class T>
|
||||
inline bool sdkReadFile(const char *filename, T **data, unsigned int *len,
|
||||
bool verbose) {
|
||||
// check input arguments
|
||||
assert(NULL != filename);
|
||||
assert(NULL != len);
|
||||
|
||||
// intermediate storage for the data read
|
||||
std::vector<T> data_read;
|
||||
|
||||
// open file for reading
|
||||
FILE *fh = NULL;
|
||||
|
||||
// check if filestream is valid
|
||||
if (FOPEN_FAIL(FOPEN(fh, filename, "r"))) {
|
||||
printf("Unable to open input file: %s\n", filename);
|
||||
return false;
|
||||
}
|
||||
|
||||
// read all data elements
|
||||
T token;
|
||||
|
||||
while (!feof(fh)) {
|
||||
fscanf(fh, "%f", &token);
|
||||
data_read.push_back(token);
|
||||
}
|
||||
|
||||
// the last element is read twice
|
||||
data_read.pop_back();
|
||||
fclose(fh);
|
||||
|
||||
// check if the given handle is already initialized
|
||||
if (NULL != *data) {
|
||||
if (*len != data_read.size()) {
|
||||
std::cerr << "sdkReadFile() : Initialized memory given but "
|
||||
<< "size mismatch with signal read "
|
||||
<< "(data read / data init = " << (unsigned int)data_read.size()
|
||||
<< " / " << *len << ")" << std::endl;
|
||||
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
// allocate storage for the data read
|
||||
*data = reinterpret_cast<T *>(malloc(sizeof(T) * data_read.size()));
|
||||
// store signal size
|
||||
*len = static_cast<unsigned int>(data_read.size());
|
||||
}
|
||||
|
||||
// copy data
|
||||
memcpy(*data, &data_read.front(), sizeof(T) * data_read.size());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//! Read file \filename and return the data
|
||||
//! @return bool if reading the file succeeded, otherwise false
|
||||
//! @param filename name of the source file
|
||||
//! @param data uninitialized pointer, returned initialized and pointing to
|
||||
//! the data read
|
||||
//! @param len number of data elements in data, -1 on error
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
template <class T>
|
||||
inline bool sdkReadFileBlocks(const char *filename, T **data, unsigned int *len,
|
||||
unsigned int block_num, unsigned int block_size,
|
||||
bool verbose) {
|
||||
// check input arguments
|
||||
assert(NULL != filename);
|
||||
assert(NULL != len);
|
||||
|
||||
// open file for reading
|
||||
FILE *fh = fopen(filename, "rb");
|
||||
|
||||
if (fh == NULL && verbose) {
|
||||
std::cerr << "sdkReadFile() : Opening file failed." << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
// check if the given handle is already initialized
|
||||
// allocate storage for the data read
|
||||
data[block_num] = reinterpret_cast<T *>(malloc(block_size));
|
||||
|
||||
// read all data elements
|
||||
fseek(fh, block_num * block_size, SEEK_SET);
|
||||
*len = fread(data[block_num], sizeof(T), block_size / sizeof(T), fh);
|
||||
|
||||
fclose(fh);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//! Write a data file \filename
|
||||
//! @return true if writing the file succeeded, otherwise false
|
||||
//! @param filename name of the source file
|
||||
//! @param data data to write
|
||||
//! @param len number of data elements in data, -1 on error
|
||||
//! @param epsilon epsilon for comparison
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
template <class T, class S>
|
||||
inline bool sdkWriteFile(const char *filename, const T *data, unsigned int len,
|
||||
const S epsilon, bool verbose, bool append = false) {
|
||||
assert(NULL != filename);
|
||||
assert(NULL != data);
|
||||
|
||||
// open file for writing
|
||||
// if (append) {
|
||||
std::fstream fh(filename, std::fstream::out | std::fstream::ate);
|
||||
|
||||
if (verbose) {
|
||||
std::cerr << "sdkWriteFile() : Open file " << filename
|
||||
<< " for write/append." << std::endl;
|
||||
}
|
||||
|
||||
/* } else {
|
||||
std::fstream fh(filename, std::fstream::out);
|
||||
if (verbose) {
|
||||
std::cerr << "sdkWriteFile() : Open file " << filename << " for
|
||||
write." << std::endl;
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
// check if filestream is valid
|
||||
if (!fh.good()) {
|
||||
if (verbose) {
|
||||
std::cerr << "sdkWriteFile() : Opening file failed." << std::endl;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// first write epsilon
|
||||
fh << "# " << epsilon << "\n";
|
||||
|
||||
// write data
|
||||
for (unsigned int i = 0; (i < len) && (fh.good()); ++i) {
|
||||
fh << data[i] << ' ';
|
||||
}
|
||||
|
||||
// Check if writing succeeded
|
||||
if (!fh.good()) {
|
||||
if (verbose) {
|
||||
std::cerr << "sdkWriteFile() : Writing file failed." << std::endl;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// file ends with nl
|
||||
fh << std::endl;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//! Compare two arrays of arbitrary type
|
||||
//! @return true if \a reference and \a data are identical, otherwise false
|
||||
//! @param reference timer_interface to the reference data / gold image
|
||||
//! @param data handle to the computed data
|
||||
//! @param len number of elements in reference and data
|
||||
//! @param epsilon epsilon to use for the comparison
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
template <class T, class S>
|
||||
inline bool compareData(const T *reference, const T *data,
|
||||
const unsigned int len, const S epsilon,
|
||||
const float threshold) {
|
||||
assert(epsilon >= 0);
|
||||
|
||||
bool result = true;
|
||||
unsigned int error_count = 0;
|
||||
|
||||
for (unsigned int i = 0; i < len; ++i) {
|
||||
float diff = static_cast<float>(reference[i]) - static_cast<float>(data[i]);
|
||||
bool comp = (diff <= epsilon) && (diff >= -epsilon);
|
||||
result &= comp;
|
||||
|
||||
error_count += !comp;
|
||||
|
||||
#if 0
|
||||
|
||||
if (!comp) {
|
||||
std::cerr << "ERROR, i = " << i << ",\t "
|
||||
<< reference[i] << " / "
|
||||
<< data[i]
|
||||
<< " (reference / data)\n";
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
if (threshold == 0.0f) {
|
||||
return (result) ? true : false;
|
||||
} else {
|
||||
if (error_count) {
|
||||
printf("%4.2f(%%) of bytes mismatched (count=%d)\n",
|
||||
static_cast<float>(error_count) * 100 / static_cast<float>(len),
|
||||
error_count);
|
||||
}
|
||||
|
||||
return (len * threshold > error_count) ? true : false;
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef __MIN_EPSILON_ERROR
|
||||
#define __MIN_EPSILON_ERROR 1e-3f
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//! Compare two arrays of arbitrary type
|
||||
//! @return true if \a reference and \a data are identical, otherwise false
|
||||
//! @param reference handle to the reference data / gold image
|
||||
//! @param data handle to the computed data
|
||||
//! @param len number of elements in reference and data
|
||||
//! @param epsilon epsilon to use for the comparison
|
||||
//! @param epsilon threshold % of (# of bytes) for pass/fail
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
template <class T, class S>
|
||||
inline bool compareDataAsFloatThreshold(const T *reference, const T *data,
|
||||
const unsigned int len, const S epsilon,
|
||||
const float threshold) {
|
||||
assert(epsilon >= 0);
|
||||
|
||||
// If we set epsilon to be 0, let's set a minimum threshold
|
||||
float max_error = MAX((float)epsilon, __MIN_EPSILON_ERROR);
|
||||
int error_count = 0;
|
||||
bool result = true;
|
||||
|
||||
for (unsigned int i = 0; i < len; ++i) {
|
||||
float diff =
|
||||
fabs(static_cast<float>(reference[i]) - static_cast<float>(data[i]));
|
||||
bool comp = (diff < max_error);
|
||||
result &= comp;
|
||||
|
||||
if (!comp) {
|
||||
error_count++;
|
||||
}
|
||||
}
|
||||
|
||||
if (threshold == 0.0f) {
|
||||
if (error_count) {
|
||||
printf("total # of errors = %d\n", error_count);
|
||||
}
|
||||
|
||||
return (error_count == 0) ? true : false;
|
||||
} else {
|
||||
if (error_count) {
|
||||
printf("%4.2f(%%) of bytes mismatched (count=%d)\n",
|
||||
static_cast<float>(error_count) * 100 / static_cast<float>(len),
|
||||
error_count);
|
||||
}
|
||||
|
||||
return ((len * threshold > error_count) ? true : false);
|
||||
}
|
||||
}
|
||||
|
||||
inline void sdkDumpBin(void *data, unsigned int bytes, const char *filename) {
|
||||
printf("sdkDumpBin: <%s>\n", filename);
|
||||
FILE *fp;
|
||||
FOPEN(fp, filename, "wb");
|
||||
fwrite(data, bytes, 1, fp);
|
||||
fflush(fp);
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
inline bool sdkCompareBin2BinUint(const char *src_file, const char *ref_file,
|
||||
unsigned int nelements, const float epsilon,
|
||||
const float threshold, char *exec_path) {
|
||||
unsigned int *src_buffer, *ref_buffer;
|
||||
FILE *src_fp = NULL, *ref_fp = NULL;
|
||||
|
||||
uint64_t error_count = 0;
|
||||
size_t fsize = 0;
|
||||
|
||||
if (FOPEN_FAIL(FOPEN(src_fp, src_file, "rb"))) {
|
||||
printf("compareBin2Bin <unsigned int> unable to open src_file: %s\n",
|
||||
src_file);
|
||||
error_count++;
|
||||
}
|
||||
|
||||
char *ref_file_path = sdkFindFilePath(ref_file, exec_path);
|
||||
|
||||
if (ref_file_path == NULL) {
|
||||
printf("compareBin2Bin <unsigned int> unable to find <%s> in <%s>\n",
|
||||
ref_file, exec_path);
|
||||
printf(">>> Check info.xml and [project//data] folder <%s> <<<\n",
|
||||
ref_file);
|
||||
printf("Aborting comparison!\n");
|
||||
printf(" FAILED\n");
|
||||
error_count++;
|
||||
|
||||
if (src_fp) {
|
||||
fclose(src_fp);
|
||||
}
|
||||
|
||||
if (ref_fp) {
|
||||
fclose(ref_fp);
|
||||
}
|
||||
} else {
|
||||
if (FOPEN_FAIL(FOPEN(ref_fp, ref_file_path, "rb"))) {
|
||||
printf(
|
||||
"compareBin2Bin <unsigned int>"
|
||||
" unable to open ref_file: %s\n",
|
||||
ref_file_path);
|
||||
error_count++;
|
||||
}
|
||||
|
||||
if (src_fp && ref_fp) {
|
||||
src_buffer = (unsigned int *)malloc(nelements * sizeof(unsigned int));
|
||||
ref_buffer = (unsigned int *)malloc(nelements * sizeof(unsigned int));
|
||||
|
||||
fsize = fread(src_buffer, nelements, sizeof(unsigned int), src_fp);
|
||||
fsize = fread(ref_buffer, nelements, sizeof(unsigned int), ref_fp);
|
||||
|
||||
printf(
|
||||
"> compareBin2Bin <unsigned int> nelements=%d,"
|
||||
" epsilon=%4.2f, threshold=%4.2f\n",
|
||||
nelements, epsilon, threshold);
|
||||
printf(" src_file <%s>, size=%d bytes\n", src_file,
|
||||
static_cast<int>(fsize));
|
||||
printf(" ref_file <%s>, size=%d bytes\n", ref_file_path,
|
||||
static_cast<int>(fsize));
|
||||
|
||||
if (!compareData<unsigned int, float>(ref_buffer, src_buffer, nelements,
|
||||
epsilon, threshold)) {
|
||||
error_count++;
|
||||
}
|
||||
|
||||
fclose(src_fp);
|
||||
fclose(ref_fp);
|
||||
|
||||
free(src_buffer);
|
||||
free(ref_buffer);
|
||||
} else {
|
||||
if (src_fp) {
|
||||
fclose(src_fp);
|
||||
}
|
||||
|
||||
if (ref_fp) {
|
||||
fclose(ref_fp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (error_count == 0) {
|
||||
printf(" OK\n");
|
||||
} else {
|
||||
printf(" FAILURE: %d errors...\n", (unsigned int)error_count);
|
||||
}
|
||||
|
||||
return (error_count == 0); // returns true if all pixels pass
|
||||
}
|
||||
|
||||
inline bool sdkCompareBin2BinFloat(const char *src_file, const char *ref_file,
|
||||
unsigned int nelements, const float epsilon,
|
||||
const float threshold, char *exec_path) {
|
||||
float *src_buffer = NULL, *ref_buffer = NULL;
|
||||
FILE *src_fp = NULL, *ref_fp = NULL;
|
||||
size_t fsize = 0;
|
||||
|
||||
uint64_t error_count = 0;
|
||||
|
||||
if (FOPEN_FAIL(FOPEN(src_fp, src_file, "rb"))) {
|
||||
printf("compareBin2Bin <float> unable to open src_file: %s\n", src_file);
|
||||
error_count = 1;
|
||||
}
|
||||
|
||||
char *ref_file_path = sdkFindFilePath(ref_file, exec_path);
|
||||
|
||||
if (ref_file_path == NULL) {
|
||||
printf("compareBin2Bin <float> unable to find <%s> in <%s>\n", ref_file,
|
||||
exec_path);
|
||||
printf(">>> Check info.xml and [project//data] folder <%s> <<<\n",
|
||||
exec_path);
|
||||
printf("Aborting comparison!\n");
|
||||
printf(" FAILED\n");
|
||||
error_count++;
|
||||
|
||||
if (src_fp) {
|
||||
fclose(src_fp);
|
||||
}
|
||||
|
||||
if (ref_fp) {
|
||||
fclose(ref_fp);
|
||||
}
|
||||
} else {
|
||||
if (FOPEN_FAIL(FOPEN(ref_fp, ref_file_path, "rb"))) {
|
||||
printf("compareBin2Bin <float> unable to open ref_file: %s\n",
|
||||
ref_file_path);
|
||||
error_count = 1;
|
||||
}
|
||||
|
||||
if (src_fp && ref_fp) {
|
||||
src_buffer = reinterpret_cast<float *>(malloc(nelements * sizeof(float)));
|
||||
ref_buffer = reinterpret_cast<float *>(malloc(nelements * sizeof(float)));
|
||||
|
||||
printf(
|
||||
"> compareBin2Bin <float> nelements=%d, epsilon=%4.2f,"
|
||||
" threshold=%4.2f\n",
|
||||
nelements, epsilon, threshold);
|
||||
fsize = fread(src_buffer, sizeof(float), nelements, src_fp);
|
||||
printf(" src_file <%s>, size=%d bytes\n", src_file,
|
||||
static_cast<int>(fsize * sizeof(float)));
|
||||
fsize = fread(ref_buffer, sizeof(float), nelements, ref_fp);
|
||||
printf(" ref_file <%s>, size=%d bytes\n", ref_file_path,
|
||||
static_cast<int>(fsize * sizeof(float)));
|
||||
|
||||
if (!compareDataAsFloatThreshold<float, float>(
|
||||
ref_buffer, src_buffer, nelements, epsilon, threshold)) {
|
||||
error_count++;
|
||||
}
|
||||
|
||||
fclose(src_fp);
|
||||
fclose(ref_fp);
|
||||
|
||||
free(src_buffer);
|
||||
free(ref_buffer);
|
||||
} else {
|
||||
if (src_fp) {
|
||||
fclose(src_fp);
|
||||
}
|
||||
|
||||
if (ref_fp) {
|
||||
fclose(ref_fp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (error_count == 0) {
|
||||
printf(" OK\n");
|
||||
} else {
|
||||
printf(" FAILURE: %d errors...\n", (unsigned int)error_count);
|
||||
}
|
||||
|
||||
return (error_count == 0); // returns true if all pixels pass
|
||||
}
|
||||
|
||||
inline bool sdkCompareL2fe(const float *reference, const float *data,
|
||||
const unsigned int len, const float epsilon) {
|
||||
assert(epsilon >= 0);
|
||||
|
||||
float error = 0;
|
||||
float ref = 0;
|
||||
|
||||
for (unsigned int i = 0; i < len; ++i) {
|
||||
float diff = reference[i] - data[i];
|
||||
error += diff * diff;
|
||||
ref += reference[i] * reference[i];
|
||||
}
|
||||
|
||||
float normRef = sqrtf(ref);
|
||||
|
||||
if (fabs(ref) < 1e-7) {
|
||||
#ifdef _DEBUG
|
||||
std::cerr << "ERROR, reference l2-norm is 0\n";
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
||||
float normError = sqrtf(error);
|
||||
error = normError / normRef;
|
||||
bool result = error < epsilon;
|
||||
#ifdef _DEBUG
|
||||
|
||||
if (!result) {
|
||||
std::cerr << "ERROR, l2-norm error " << error << " is greater than epsilon "
|
||||
<< epsilon << "\n";
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
inline bool sdkLoadPPMub(const char *file, unsigned char **data,
|
||||
unsigned int *w, unsigned int *h) {
|
||||
unsigned int channels;
|
||||
return __loadPPM(file, data, w, h, &channels);
|
||||
}
|
||||
|
||||
inline bool sdkLoadPPM4ub(const char *file, unsigned char **data,
|
||||
unsigned int *w, unsigned int *h) {
|
||||
unsigned char *idata = 0;
|
||||
unsigned int channels;
|
||||
|
||||
if (__loadPPM(file, &idata, w, h, &channels)) {
|
||||
// pad 4th component
|
||||
int size = *w * *h;
|
||||
// keep the original pointer
|
||||
unsigned char *idata_orig = idata;
|
||||
*data = (unsigned char *)malloc(sizeof(unsigned char) * size * 4);
|
||||
unsigned char *ptr = *data;
|
||||
|
||||
for (int i = 0; i < size; i++) {
|
||||
*ptr++ = *idata++;
|
||||
*ptr++ = *idata++;
|
||||
*ptr++ = *idata++;
|
||||
*ptr++ = 0;
|
||||
}
|
||||
|
||||
free(idata_orig);
|
||||
return true;
|
||||
} else {
|
||||
free(idata);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
inline bool sdkComparePPM(const char *src_file, const char *ref_file,
|
||||
const float epsilon, const float threshold,
|
||||
bool verboseErrors) {
|
||||
unsigned char *src_data, *ref_data;
|
||||
uint64_t error_count = 0;
|
||||
unsigned int ref_width, ref_height;
|
||||
unsigned int src_width, src_height;
|
||||
|
||||
if (src_file == NULL || ref_file == NULL) {
|
||||
if (verboseErrors) {
|
||||
std::cerr << "PPMvsPPM: src_file or ref_file is NULL."
|
||||
" Aborting comparison\n";
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
if (verboseErrors) {
|
||||
std::cerr << "> Compare (a)rendered: <" << src_file << ">\n";
|
||||
std::cerr << "> (b)reference: <" << ref_file << ">\n";
|
||||
}
|
||||
|
||||
if (sdkLoadPPM4ub(ref_file, &ref_data, &ref_width, &ref_height) != true) {
|
||||
if (verboseErrors) {
|
||||
std::cerr << "PPMvsPPM: unable to load ref image file: " << ref_file
|
||||
<< "\n";
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
if (sdkLoadPPM4ub(src_file, &src_data, &src_width, &src_height) != true) {
|
||||
std::cerr << "PPMvsPPM: unable to load src image file: " << src_file
|
||||
<< "\n";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (src_height != ref_height || src_width != ref_width) {
|
||||
if (verboseErrors) {
|
||||
std::cerr << "PPMvsPPM: source and ref size mismatch (" << src_width
|
||||
<< "," << src_height << ")vs(" << ref_width << "," << ref_height
|
||||
<< ")\n";
|
||||
}
|
||||
}
|
||||
|
||||
if (verboseErrors) {
|
||||
std::cerr << "PPMvsPPM: comparing images size (" << src_width << ","
|
||||
<< src_height << ") epsilon(" << epsilon << "), threshold("
|
||||
<< threshold * 100 << "%)\n";
|
||||
}
|
||||
|
||||
if (compareData(ref_data, src_data, src_width * src_height * 4, epsilon,
|
||||
threshold) == false) {
|
||||
error_count = 1;
|
||||
}
|
||||
|
||||
if (error_count == 0) {
|
||||
if (verboseErrors) {
|
||||
std::cerr << " OK\n\n";
|
||||
}
|
||||
} else {
|
||||
if (verboseErrors) {
|
||||
std::cerr << " FAILURE! " << error_count << " errors...\n\n";
|
||||
}
|
||||
}
|
||||
|
||||
// returns true if all pixels pass
|
||||
return (error_count == 0) ? true : false;
|
||||
}
|
||||
|
||||
inline bool sdkComparePGM(const char *src_file, const char *ref_file,
|
||||
const float epsilon, const float threshold,
|
||||
bool verboseErrors) {
|
||||
unsigned char *src_data = 0, *ref_data = 0;
|
||||
uint64_t error_count = 0;
|
||||
unsigned int ref_width, ref_height;
|
||||
unsigned int src_width, src_height;
|
||||
|
||||
if (src_file == NULL || ref_file == NULL) {
|
||||
if (verboseErrors) {
|
||||
std::cerr << "PGMvsPGM: src_file or ref_file is NULL."
|
||||
" Aborting comparison\n";
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
if (verboseErrors) {
|
||||
std::cerr << "> Compare (a)rendered: <" << src_file << ">\n";
|
||||
std::cerr << "> (b)reference: <" << ref_file << ">\n";
|
||||
}
|
||||
|
||||
if (sdkLoadPPMub(ref_file, &ref_data, &ref_width, &ref_height) != true) {
|
||||
if (verboseErrors) {
|
||||
std::cerr << "PGMvsPGM: unable to load ref image file: " << ref_file
|
||||
<< "\n";
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
if (sdkLoadPPMub(src_file, &src_data, &src_width, &src_height) != true) {
|
||||
std::cerr << "PGMvsPGM: unable to load src image file: " << src_file
|
||||
<< "\n";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (src_height != ref_height || src_width != ref_width) {
|
||||
if (verboseErrors) {
|
||||
std::cerr << "PGMvsPGM: source and ref size mismatch (" << src_width
|
||||
<< "," << src_height << ")vs(" << ref_width << "," << ref_height
|
||||
<< ")\n";
|
||||
}
|
||||
}
|
||||
|
||||
if (verboseErrors)
|
||||
std::cerr << "PGMvsPGM: comparing images size (" << src_width << ","
|
||||
<< src_height << ") epsilon(" << epsilon << "), threshold("
|
||||
<< threshold * 100 << "%)\n";
|
||||
|
||||
if (compareData(ref_data, src_data, src_width * src_height, epsilon,
|
||||
threshold) == false) {
|
||||
error_count = 1;
|
||||
}
|
||||
|
||||
if (error_count == 0) {
|
||||
if (verboseErrors) {
|
||||
std::cerr << " OK\n\n";
|
||||
}
|
||||
} else {
|
||||
if (verboseErrors) {
|
||||
std::cerr << " FAILURE! " << error_count << " errors...\n\n";
|
||||
}
|
||||
}
|
||||
|
||||
// returns true if all pixels pass
|
||||
return (error_count == 0) ? true : false;
|
||||
}
|
||||
|
||||
#endif // COMMON_HELPER_IMAGE_H_
|
|
@ -0,0 +1,683 @@
|
|||
/**
|
||||
* Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Please refer to the NVIDIA end user license agreement (EULA) associated
|
||||
* with this source code for terms and conditions that govern your use of
|
||||
* this software. Any use, reproduction, disclosure, or distribution of
|
||||
* this software and related documentation outside the terms of the EULA
|
||||
* is strictly prohibited.
|
||||
*
|
||||
*/
|
||||
|
||||
// These are helper functions for the SDK samples (string parsing, timers, etc)
|
||||
#ifndef COMMON_HELPER_STRING_H_
|
||||
#define COMMON_HELPER_STRING_H_
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
|
||||
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
|
||||
#ifndef _CRT_SECURE_NO_DEPRECATE
|
||||
#define _CRT_SECURE_NO_DEPRECATE
|
||||
#endif
|
||||
#ifndef STRCASECMP
|
||||
#define STRCASECMP _stricmp
|
||||
#endif
|
||||
#ifndef STRNCASECMP
|
||||
#define STRNCASECMP _strnicmp
|
||||
#endif
|
||||
#ifndef STRCPY
|
||||
#define STRCPY(sFilePath, nLength, sPath) strcpy_s(sFilePath, nLength, sPath)
|
||||
#endif
|
||||
|
||||
#ifndef FOPEN
|
||||
#define FOPEN(fHandle, filename, mode) fopen_s(&fHandle, filename, mode)
|
||||
#endif
|
||||
#ifndef FOPEN_FAIL
|
||||
#define FOPEN_FAIL(result) (result != 0)
|
||||
#endif
|
||||
#ifndef SSCANF
|
||||
#define SSCANF sscanf_s
|
||||
#endif
|
||||
#ifndef SPRINTF
|
||||
#define SPRINTF sprintf_s
|
||||
#endif
|
||||
#else // Linux Includes
|
||||
#include <string.h>
|
||||
#include <strings.h>
|
||||
|
||||
#ifndef STRCASECMP
|
||||
#define STRCASECMP strcasecmp
|
||||
#endif
|
||||
#ifndef STRNCASECMP
|
||||
#define STRNCASECMP strncasecmp
|
||||
#endif
|
||||
#ifndef STRCPY
|
||||
#define STRCPY(sFilePath, nLength, sPath) strcpy(sFilePath, sPath)
|
||||
#endif
|
||||
|
||||
#ifndef FOPEN
|
||||
#define FOPEN(fHandle, filename, mode) (fHandle = fopen(filename, mode))
|
||||
#endif
|
||||
#ifndef FOPEN_FAIL
|
||||
#define FOPEN_FAIL(result) (result == NULL)
|
||||
#endif
|
||||
#ifndef SSCANF
|
||||
#define SSCANF sscanf
|
||||
#endif
|
||||
#ifndef SPRINTF
|
||||
#define SPRINTF sprintf
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef EXIT_WAIVED
|
||||
#define EXIT_WAIVED 2
|
||||
#endif
|
||||
|
||||
// CUDA Utility Helper Functions
|
||||
inline int stringRemoveDelimiter(char delimiter, const char *string) {
|
||||
int string_start = 0;
|
||||
|
||||
while (string[string_start] == delimiter) {
|
||||
string_start++;
|
||||
}
|
||||
|
||||
if (string_start >= static_cast<int>(strlen(string) - 1)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return string_start;
|
||||
}
|
||||
|
||||
inline int getFileExtension(char *filename, char **extension) {
|
||||
int string_length = static_cast<int>(strlen(filename));
|
||||
|
||||
while (filename[string_length--] != '.') {
|
||||
if (string_length == 0) break;
|
||||
}
|
||||
|
||||
if (string_length > 0) string_length += 2;
|
||||
|
||||
if (string_length == 0)
|
||||
*extension = NULL;
|
||||
else
|
||||
*extension = &filename[string_length];
|
||||
|
||||
return string_length;
|
||||
}
|
||||
|
||||
inline bool checkCmdLineFlag(const int argc, const char **argv,
|
||||
const char *string_ref) {
|
||||
bool bFound = false;
|
||||
|
||||
if (argc >= 1) {
|
||||
for (int i = 1; i < argc; i++) {
|
||||
int string_start = stringRemoveDelimiter('-', argv[i]);
|
||||
const char *string_argv = &argv[i][string_start];
|
||||
|
||||
const char *equal_pos = strchr(string_argv, '=');
|
||||
int argv_length = static_cast<int>(
|
||||
equal_pos == 0 ? strlen(string_argv) : equal_pos - string_argv);
|
||||
|
||||
int length = static_cast<int>(strlen(string_ref));
|
||||
|
||||
if (length == argv_length &&
|
||||
!STRNCASECMP(string_argv, string_ref, length)) {
|
||||
bFound = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return bFound;
|
||||
}
|
||||
|
||||
// This function wraps the CUDA Driver API into a template function
|
||||
template <class T>
|
||||
inline bool getCmdLineArgumentValue(const int argc, const char **argv,
|
||||
const char *string_ref, T *value) {
|
||||
bool bFound = false;
|
||||
|
||||
if (argc >= 1) {
|
||||
for (int i = 1; i < argc; i++) {
|
||||
int string_start = stringRemoveDelimiter('-', argv[i]);
|
||||
const char *string_argv = &argv[i][string_start];
|
||||
int length = static_cast<int>(strlen(string_ref));
|
||||
|
||||
if (!STRNCASECMP(string_argv, string_ref, length)) {
|
||||
if (length + 1 <= static_cast<int>(strlen(string_argv))) {
|
||||
int auto_inc = (string_argv[length] == '=') ? 1 : 0;
|
||||
*value = (T)atoi(&string_argv[length + auto_inc]);
|
||||
}
|
||||
|
||||
bFound = true;
|
||||
i = argc;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return bFound;
|
||||
}
|
||||
|
||||
inline int getCmdLineArgumentInt(const int argc, const char **argv,
|
||||
const char *string_ref) {
|
||||
bool bFound = false;
|
||||
int value = -1;
|
||||
|
||||
if (argc >= 1) {
|
||||
for (int i = 1; i < argc; i++) {
|
||||
int string_start = stringRemoveDelimiter('-', argv[i]);
|
||||
const char *string_argv = &argv[i][string_start];
|
||||
int length = static_cast<int>(strlen(string_ref));
|
||||
|
||||
if (!STRNCASECMP(string_argv, string_ref, length)) {
|
||||
if (length + 1 <= static_cast<int>(strlen(string_argv))) {
|
||||
int auto_inc = (string_argv[length] == '=') ? 1 : 0;
|
||||
value = atoi(&string_argv[length + auto_inc]);
|
||||
} else {
|
||||
value = 0;
|
||||
}
|
||||
|
||||
bFound = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (bFound) {
|
||||
return value;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
inline float getCmdLineArgumentFloat(const int argc, const char **argv,
|
||||
const char *string_ref) {
|
||||
bool bFound = false;
|
||||
float value = -1;
|
||||
|
||||
if (argc >= 1) {
|
||||
for (int i = 1; i < argc; i++) {
|
||||
int string_start = stringRemoveDelimiter('-', argv[i]);
|
||||
const char *string_argv = &argv[i][string_start];
|
||||
int length = static_cast<int>(strlen(string_ref));
|
||||
|
||||
if (!STRNCASECMP(string_argv, string_ref, length)) {
|
||||
if (length + 1 <= static_cast<int>(strlen(string_argv))) {
|
||||
int auto_inc = (string_argv[length] == '=') ? 1 : 0;
|
||||
value = static_cast<float>(atof(&string_argv[length + auto_inc]));
|
||||
} else {
|
||||
value = 0.f;
|
||||
}
|
||||
|
||||
bFound = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (bFound) {
|
||||
return value;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
inline bool getCmdLineArgumentString(const int argc, const char **argv,
|
||||
const char *string_ref,
|
||||
char **string_retval) {
|
||||
bool bFound = false;
|
||||
|
||||
if (argc >= 1) {
|
||||
for (int i = 1; i < argc; i++) {
|
||||
int string_start = stringRemoveDelimiter('-', argv[i]);
|
||||
char *string_argv = const_cast<char*>(&argv[i][string_start]);
|
||||
int length = static_cast<int>(strlen(string_ref));
|
||||
|
||||
if (!STRNCASECMP(string_argv, string_ref, length)) {
|
||||
*string_retval = &string_argv[length + 1];
|
||||
bFound = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!bFound) {
|
||||
*string_retval = NULL;
|
||||
}
|
||||
|
||||
return bFound;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//! Find the path for a file assuming that
|
||||
//! files are found in the searchPath.
|
||||
//!
|
||||
//! @return the path if succeeded, otherwise 0
|
||||
//! @param filename name of the file
|
||||
//! @param executable_path optional absolute path of the executable
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
inline char *sdkFindFilePath(const char *filename,
|
||||
const char *executable_path) {
|
||||
// <executable_name> defines a variable that is replaced with the name of the
|
||||
// executable
|
||||
|
||||
// Typical relative search paths to locate needed companion files (e.g. sample
|
||||
// input data, or JIT source files) The origin for the relative search may be
|
||||
// the .exe file, a .bat file launching an .exe, a browser .exe launching the
|
||||
// .exe or .bat, etc
|
||||
const char *searchPath[] = {
|
||||
"./", // same dir
|
||||
"./<executable_name>_data_files/",
|
||||
"./common/", // "/common/" subdir
|
||||
"./common/data/", // "/common/data/" subdir
|
||||
"./data/", // "/data/" subdir
|
||||
"./src/", // "/src/" subdir
|
||||
"./src/<executable_name>/data/", // "/src/<executable_name>/data/" subdir
|
||||
"./inc/", // "/inc/" subdir
|
||||
"./0_Simple/", // "/0_Simple/" subdir
|
||||
"./1_Utilities/", // "/1_Utilities/" subdir
|
||||
"./2_Graphics/", // "/2_Graphics/" subdir
|
||||
"./3_Imaging/", // "/3_Imaging/" subdir
|
||||
"./4_Finance/", // "/4_Finance/" subdir
|
||||
"./5_Simulations/", // "/5_Simulations/" subdir
|
||||
"./6_Advanced/", // "/6_Advanced/" subdir
|
||||
"./7_CUDALibraries/", // "/7_CUDALibraries/" subdir
|
||||
"./8_Android/", // "/8_Android/" subdir
|
||||
"./samples/", // "/samples/" subdir
|
||||
|
||||
"./0_Simple/<executable_name>/data/", // "/0_Simple/<executable_name>/data/"
|
||||
// subdir
|
||||
"./1_Utilities/<executable_name>/data/", // "/1_Utilities/<executable_name>/data/"
|
||||
// subdir
|
||||
"./2_Graphics/<executable_name>/data/", // "/2_Graphics/<executable_name>/data/"
|
||||
// subdir
|
||||
"./3_Imaging/<executable_name>/data/", // "/3_Imaging/<executable_name>/data/"
|
||||
// subdir
|
||||
"./4_Finance/<executable_name>/data/", // "/4_Finance/<executable_name>/data/"
|
||||
// subdir
|
||||
"./5_Simulations/<executable_name>/data/", // "/5_Simulations/<executable_name>/data/"
|
||||
// subdir
|
||||
"./6_Advanced/<executable_name>/data/", // "/6_Advanced/<executable_name>/data/"
|
||||
// subdir
|
||||
"./7_CUDALibraries/<executable_name>/", // "/7_CUDALibraries/<executable_name>/"
|
||||
// subdir
|
||||
"./7_CUDALibraries/<executable_name>/data/", // "/7_CUDALibraries/<executable_name>/data/"
|
||||
// subdir
|
||||
|
||||
"../", // up 1 in tree
|
||||
"../common/", // up 1 in tree, "/common/" subdir
|
||||
"../common/data/", // up 1 in tree, "/common/data/" subdir
|
||||
"../data/", // up 1 in tree, "/data/" subdir
|
||||
"../src/", // up 1 in tree, "/src/" subdir
|
||||
"../inc/", // up 1 in tree, "/inc/" subdir
|
||||
|
||||
"../0_Simple/<executable_name>/data/", // up 1 in tree,
|
||||
// "/0_Simple/<executable_name>/"
|
||||
// subdir
|
||||
"../1_Utilities/<executable_name>/data/", // up 1 in tree,
|
||||
// "/1_Utilities/<executable_name>/"
|
||||
// subdir
|
||||
"../2_Graphics/<executable_name>/data/", // up 1 in tree,
|
||||
// "/2_Graphics/<executable_name>/"
|
||||
// subdir
|
||||
"../3_Imaging/<executable_name>/data/", // up 1 in tree,
|
||||
// "/3_Imaging/<executable_name>/"
|
||||
// subdir
|
||||
"../4_Finance/<executable_name>/data/", // up 1 in tree,
|
||||
// "/4_Finance/<executable_name>/"
|
||||
// subdir
|
||||
"../5_Simulations/<executable_name>/data/", // up 1 in tree,
|
||||
// "/5_Simulations/<executable_name>/"
|
||||
// subdir
|
||||
"../6_Advanced/<executable_name>/data/", // up 1 in tree,
|
||||
// "/6_Advanced/<executable_name>/"
|
||||
// subdir
|
||||
"../7_CUDALibraries/<executable_name>/data/", // up 1 in tree,
|
||||
// "/7_CUDALibraries/<executable_name>/"
|
||||
// subdir
|
||||
"../8_Android/<executable_name>/data/", // up 1 in tree,
|
||||
// "/8_Android/<executable_name>/"
|
||||
// subdir
|
||||
"../samples/<executable_name>/data/", // up 1 in tree,
|
||||
// "/samples/<executable_name>/"
|
||||
// subdir
|
||||
"../../", // up 2 in tree
|
||||
"../../common/", // up 2 in tree, "/common/" subdir
|
||||
"../../common/data/", // up 2 in tree, "/common/data/" subdir
|
||||
"../../data/", // up 2 in tree, "/data/" subdir
|
||||
"../../src/", // up 2 in tree, "/src/" subdir
|
||||
"../../inc/", // up 2 in tree, "/inc/" subdir
|
||||
"../../sandbox/<executable_name>/data/", // up 2 in tree,
|
||||
// "/sandbox/<executable_name>/"
|
||||
// subdir
|
||||
"../../0_Simple/<executable_name>/data/", // up 2 in tree,
|
||||
// "/0_Simple/<executable_name>/"
|
||||
// subdir
|
||||
"../../1_Utilities/<executable_name>/data/", // up 2 in tree,
|
||||
// "/1_Utilities/<executable_name>/"
|
||||
// subdir
|
||||
"../../2_Graphics/<executable_name>/data/", // up 2 in tree,
|
||||
// "/2_Graphics/<executable_name>/"
|
||||
// subdir
|
||||
"../../3_Imaging/<executable_name>/data/", // up 2 in tree,
|
||||
// "/3_Imaging/<executable_name>/"
|
||||
// subdir
|
||||
"../../4_Finance/<executable_name>/data/", // up 2 in tree,
|
||||
// "/4_Finance/<executable_name>/"
|
||||
// subdir
|
||||
"../../5_Simulations/<executable_name>/data/", // up 2 in tree,
|
||||
// "/5_Simulations/<executable_name>/"
|
||||
// subdir
|
||||
"../../6_Advanced/<executable_name>/data/", // up 2 in tree,
|
||||
// "/6_Advanced/<executable_name>/"
|
||||
// subdir
|
||||
"../../7_CUDALibraries/<executable_name>/data/", // up 2 in tree,
|
||||
// "/7_CUDALibraries/<executable_name>/"
|
||||
// subdir
|
||||
"../../8_Android/<executable_name>/data/", // up 2 in tree,
|
||||
// "/8_Android/<executable_name>/"
|
||||
// subdir
|
||||
"../../samples/<executable_name>/data/", // up 2 in tree,
|
||||
// "/samples/<executable_name>/"
|
||||
// subdir
|
||||
"../../../", // up 3 in tree
|
||||
"../../../src/<executable_name>/", // up 3 in tree,
|
||||
// "/src/<executable_name>/" subdir
|
||||
"../../../src/<executable_name>/data/", // up 3 in tree,
|
||||
// "/src/<executable_name>/data/"
|
||||
// subdir
|
||||
"../../../src/<executable_name>/src/", // up 3 in tree,
|
||||
// "/src/<executable_name>/src/"
|
||||
// subdir
|
||||
"../../../src/<executable_name>/inc/", // up 3 in tree,
|
||||
// "/src/<executable_name>/inc/"
|
||||
// subdir
|
||||
"../../../sandbox/<executable_name>/", // up 3 in tree,
|
||||
// "/sandbox/<executable_name>/"
|
||||
// subdir
|
||||
"../../../sandbox/<executable_name>/data/", // up 3 in tree,
|
||||
// "/sandbox/<executable_name>/data/"
|
||||
// subdir
|
||||
"../../../sandbox/<executable_name>/src/", // up 3 in tree,
|
||||
// "/sandbox/<executable_name>/src/"
|
||||
// subdir
|
||||
"../../../sandbox/<executable_name>/inc/", // up 3 in tree,
|
||||
// "/sandbox/<executable_name>/inc/"
|
||||
// subdir
|
||||
"../../../0_Simple/<executable_name>/data/", // up 3 in tree,
|
||||
// "/0_Simple/<executable_name>/"
|
||||
// subdir
|
||||
"../../../1_Utilities/<executable_name>/data/", // up 3 in tree,
|
||||
// "/1_Utilities/<executable_name>/"
|
||||
// subdir
|
||||
"../../../2_Graphics/<executable_name>/data/", // up 3 in tree,
|
||||
// "/2_Graphics/<executable_name>/"
|
||||
// subdir
|
||||
"../../../3_Imaging/<executable_name>/data/", // up 3 in tree,
|
||||
// "/3_Imaging/<executable_name>/"
|
||||
// subdir
|
||||
"../../../4_Finance/<executable_name>/data/", // up 3 in tree,
|
||||
// "/4_Finance/<executable_name>/"
|
||||
// subdir
|
||||
"../../../5_Simulations/<executable_name>/data/", // up 3 in tree,
|
||||
// "/5_Simulations/<executable_name>/"
|
||||
// subdir
|
||||
"../../../6_Advanced/<executable_name>/data/", // up 3 in tree,
|
||||
// "/6_Advanced/<executable_name>/"
|
||||
// subdir
|
||||
"../../../7_CUDALibraries/<executable_name>/data/", // up 3 in tree,
|
||||
// "/7_CUDALibraries/<executable_name>/"
|
||||
// subdir
|
||||
"../../../8_Android/<executable_name>/data/", // up 3 in tree,
|
||||
// "/8_Android/<executable_name>/"
|
||||
// subdir
|
||||
"../../../0_Simple/<executable_name>/", // up 3 in tree,
|
||||
// "/0_Simple/<executable_name>/"
|
||||
// subdir
|
||||
"../../../1_Utilities/<executable_name>/", // up 3 in tree,
|
||||
// "/1_Utilities/<executable_name>/"
|
||||
// subdir
|
||||
"../../../2_Graphics/<executable_name>/", // up 3 in tree,
|
||||
// "/2_Graphics/<executable_name>/"
|
||||
// subdir
|
||||
"../../../3_Imaging/<executable_name>/", // up 3 in tree,
|
||||
// "/3_Imaging/<executable_name>/"
|
||||
// subdir
|
||||
"../../../4_Finance/<executable_name>/", // up 3 in tree,
|
||||
// "/4_Finance/<executable_name>/"
|
||||
// subdir
|
||||
"../../../5_Simulations/<executable_name>/", // up 3 in tree,
|
||||
// "/5_Simulations/<executable_name>/"
|
||||
// subdir
|
||||
"../../../6_Advanced/<executable_name>/", // up 3 in tree,
|
||||
// "/6_Advanced/<executable_name>/"
|
||||
// subdir
|
||||
"../../../7_CUDALibraries/<executable_name>/", // up 3 in tree,
|
||||
// "/7_CUDALibraries/<executable_name>/"
|
||||
// subdir
|
||||
"../../../8_Android/<executable_name>/", // up 3 in tree,
|
||||
// "/8_Android/<executable_name>/"
|
||||
// subdir
|
||||
"../../../samples/<executable_name>/data/", // up 3 in tree,
|
||||
// "/samples/<executable_name>/"
|
||||
// subdir
|
||||
"../../../common/", // up 3 in tree, "../../../common/" subdir
|
||||
"../../../common/data/", // up 3 in tree, "../../../common/data/" subdir
|
||||
"../../../data/", // up 3 in tree, "../../../data/" subdir
|
||||
"../../../../", // up 4 in tree
|
||||
"../../../../src/<executable_name>/", // up 4 in tree,
|
||||
// "/src/<executable_name>/" subdir
|
||||
"../../../../src/<executable_name>/data/", // up 4 in tree,
|
||||
// "/src/<executable_name>/data/"
|
||||
// subdir
|
||||
"../../../../src/<executable_name>/src/", // up 4 in tree,
|
||||
// "/src/<executable_name>/src/"
|
||||
// subdir
|
||||
"../../../../src/<executable_name>/inc/", // up 4 in tree,
|
||||
// "/src/<executable_name>/inc/"
|
||||
// subdir
|
||||
"../../../../sandbox/<executable_name>/", // up 4 in tree,
|
||||
// "/sandbox/<executable_name>/"
|
||||
// subdir
|
||||
"../../../../sandbox/<executable_name>/data/", // up 4 in tree,
|
||||
// "/sandbox/<executable_name>/data/"
|
||||
// subdir
|
||||
"../../../../sandbox/<executable_name>/src/", // up 4 in tree,
|
||||
// "/sandbox/<executable_name>/src/"
|
||||
// subdir
|
||||
"../../../../sandbox/<executable_name>/inc/", // up 4 in tree,
|
||||
// "/sandbox/<executable_name>/inc/"
|
||||
// subdir
|
||||
"../../../../0_Simple/<executable_name>/data/", // up 4 in tree,
|
||||
// "/0_Simple/<executable_name>/"
|
||||
// subdir
|
||||
"../../../../1_Utilities/<executable_name>/data/", // up 4 in tree,
|
||||
// "/1_Utilities/<executable_name>/"
|
||||
// subdir
|
||||
"../../../../2_Graphics/<executable_name>/data/", // up 4 in tree,
|
||||
// "/2_Graphics/<executable_name>/"
|
||||
// subdir
|
||||
"../../../../3_Imaging/<executable_name>/data/", // up 4 in tree,
|
||||
// "/3_Imaging/<executable_name>/"
|
||||
// subdir
|
||||
"../../../../4_Finance/<executable_name>/data/", // up 4 in tree,
|
||||
// "/4_Finance/<executable_name>/"
|
||||
// subdir
|
||||
"../../../../5_Simulations/<executable_name>/data/", // up 4 in tree,
|
||||
// "/5_Simulations/<executable_name>/"
|
||||
// subdir
|
||||
"../../../../6_Advanced/<executable_name>/data/", // up 4 in tree,
|
||||
// "/6_Advanced/<executable_name>/"
|
||||
// subdir
|
||||
"../../../../7_CUDALibraries/<executable_name>/data/", // up 4 in tree,
|
||||
// "/7_CUDALibraries/<executable_name>/"
|
||||
// subdir
|
||||
"../../../../8_Android/<executable_name>/data/", // up 4 in tree,
|
||||
// "/8_Android/<executable_name>/"
|
||||
// subdir
|
||||
"../../../../0_Simple/<executable_name>/", // up 4 in tree,
|
||||
// "/0_Simple/<executable_name>/"
|
||||
// subdir
|
||||
"../../../../1_Utilities/<executable_name>/", // up 4 in tree,
|
||||
// "/1_Utilities/<executable_name>/"
|
||||
// subdir
|
||||
"../../../../2_Graphics/<executable_name>/", // up 4 in tree,
|
||||
// "/2_Graphics/<executable_name>/"
|
||||
// subdir
|
||||
"../../../../3_Imaging/<executable_name>/", // up 4 in tree,
|
||||
// "/3_Imaging/<executable_name>/"
|
||||
// subdir
|
||||
"../../../../4_Finance/<executable_name>/", // up 4 in tree,
|
||||
// "/4_Finance/<executable_name>/"
|
||||
// subdir
|
||||
"../../../../5_Simulations/<executable_name>/", // up 4 in tree,
|
||||
// "/5_Simulations/<executable_name>/"
|
||||
// subdir
|
||||
"../../../../6_Advanced/<executable_name>/", // up 4 in tree,
|
||||
// "/6_Advanced/<executable_name>/"
|
||||
// subdir
|
||||
"../../../../7_CUDALibraries/<executable_name>/", // up 4 in tree,
|
||||
// "/7_CUDALibraries/<executable_name>/"
|
||||
// subdir
|
||||
"../../../../8_Android/<executable_name>/", // up 4 in tree,
|
||||
// "/8_Android/<executable_name>/"
|
||||
// subdir
|
||||
"../../../../samples/<executable_name>/data/", // up 4 in tree,
|
||||
// "/samples/<executable_name>/"
|
||||
// subdir
|
||||
"../../../../common/", // up 4 in tree, "../../../common/" subdir
|
||||
"../../../../common/data/", // up 4 in tree, "../../../common/data/"
|
||||
// subdir
|
||||
"../../../../data/", // up 4 in tree, "../../../data/" subdir
|
||||
"../../../../../", // up 5 in tree
|
||||
"../../../../../src/<executable_name>/", // up 5 in tree,
|
||||
// "/src/<executable_name>/"
|
||||
// subdir
|
||||
"../../../../../src/<executable_name>/data/", // up 5 in tree,
|
||||
// "/src/<executable_name>/data/"
|
||||
// subdir
|
||||
"../../../../../src/<executable_name>/src/", // up 5 in tree,
|
||||
// "/src/<executable_name>/src/"
|
||||
// subdir
|
||||
"../../../../../src/<executable_name>/inc/", // up 5 in tree,
|
||||
// "/src/<executable_name>/inc/"
|
||||
// subdir
|
||||
"../../../../../sandbox/<executable_name>/", // up 5 in tree,
|
||||
// "/sandbox/<executable_name>/"
|
||||
// subdir
|
||||
"../../../../../sandbox/<executable_name>/data/", // up 5 in tree,
|
||||
// "/sandbox/<executable_name>/data/"
|
||||
// subdir
|
||||
"../../../../../sandbox/<executable_name>/src/", // up 5 in tree,
|
||||
// "/sandbox/<executable_name>/src/"
|
||||
// subdir
|
||||
"../../../../../sandbox/<executable_name>/inc/", // up 5 in tree,
|
||||
// "/sandbox/<executable_name>/inc/"
|
||||
// subdir
|
||||
"../../../../../0_Simple/<executable_name>/data/", // up 5 in tree,
|
||||
// "/0_Simple/<executable_name>/"
|
||||
// subdir
|
||||
"../../../../../1_Utilities/<executable_name>/data/", // up 5 in tree,
|
||||
// "/1_Utilities/<executable_name>/"
|
||||
// subdir
|
||||
"../../../../../2_Graphics/<executable_name>/data/", // up 5 in tree,
|
||||
// "/2_Graphics/<executable_name>/"
|
||||
// subdir
|
||||
"../../../../../3_Imaging/<executable_name>/data/", // up 5 in tree,
|
||||
// "/3_Imaging/<executable_name>/"
|
||||
// subdir
|
||||
"../../../../../4_Finance/<executable_name>/data/", // up 5 in tree,
|
||||
// "/4_Finance/<executable_name>/"
|
||||
// subdir
|
||||
"../../../../../5_Simulations/<executable_name>/data/", // up 5 in tree,
|
||||
// "/5_Simulations/<executable_name>/"
|
||||
// subdir
|
||||
"../../../../../6_Advanced/<executable_name>/data/", // up 5 in tree,
|
||||
// "/6_Advanced/<executable_name>/"
|
||||
// subdir
|
||||
"../../../../../7_CUDALibraries/<executable_name>/data/", // up 5 in
|
||||
// tree,
|
||||
// "/7_CUDALibraries/<executable_name>/"
|
||||
// subdir
|
||||
"../../../../../8_Android/<executable_name>/data/", // up 5 in tree,
|
||||
// "/8_Android/<executable_name>/"
|
||||
// subdir
|
||||
"../../../../../samples/<executable_name>/data/", // up 5 in tree,
|
||||
// "/samples/<executable_name>/"
|
||||
// subdir
|
||||
"../../../../../common/", // up 5 in tree, "../../../common/" subdir
|
||||
"../../../../../common/data/", // up 5 in tree, "../../../common/data/"
|
||||
// subdir
|
||||
};
|
||||
|
||||
// Extract the executable name
|
||||
std::string executable_name;
|
||||
|
||||
if (executable_path != 0) {
|
||||
executable_name = std::string(executable_path);
|
||||
|
||||
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
|
||||
// Windows path delimiter
|
||||
size_t delimiter_pos = executable_name.find_last_of('\\');
|
||||
executable_name.erase(0, delimiter_pos + 1);
|
||||
|
||||
if (executable_name.rfind(".exe") != std::string::npos) {
|
||||
// we strip .exe, only if the .exe is found
|
||||
executable_name.resize(executable_name.size() - 4);
|
||||
}
|
||||
|
||||
#else
|
||||
// Linux & OSX path delimiter
|
||||
size_t delimiter_pos = executable_name.find_last_of('/');
|
||||
executable_name.erase(0, delimiter_pos + 1);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Loop over all search paths and return the first hit
|
||||
for (unsigned int i = 0; i < sizeof(searchPath) / sizeof(char *); ++i) {
|
||||
std::string path(searchPath[i]);
|
||||
size_t executable_name_pos = path.find("<executable_name>");
|
||||
|
||||
// If there is executable_name variable in the searchPath
|
||||
// replace it with the value
|
||||
if (executable_name_pos != std::string::npos) {
|
||||
if (executable_path != 0) {
|
||||
path.replace(executable_name_pos, strlen("<executable_name>"),
|
||||
executable_name);
|
||||
} else {
|
||||
// Skip this path entry if no executable argument is given
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef _DEBUG
|
||||
printf("sdkFindFilePath <%s> in %s\n", filename, path.c_str());
|
||||
#endif
|
||||
|
||||
// Test if the file exists
|
||||
path.append(filename);
|
||||
FILE *fp;
|
||||
FOPEN(fp, path.c_str(), "rb");
|
||||
|
||||
if (fp != NULL) {
|
||||
fclose(fp);
|
||||
// File found
|
||||
// returning an allocated array here for backwards compatibility reasons
|
||||
char *file_path = reinterpret_cast<char *>(malloc(path.length() + 1));
|
||||
STRCPY(file_path, path.length() + 1, path.c_str());
|
||||
return file_path;
|
||||
}
|
||||
|
||||
if (fp) {
|
||||
fclose(fp);
|
||||
}
|
||||
}
|
||||
|
||||
// File not found
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif // COMMON_HELPER_STRING_H_
|
|
@ -0,0 +1,450 @@
|
|||
/**
|
||||
* Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Please refer to the NVIDIA end user license agreement (EULA) associated
|
||||
* with this source code for terms and conditions that govern your use of
|
||||
* this software. Any use, reproduction, disclosure, or distribution of
|
||||
* this software and related documentation outside the terms of the EULA
|
||||
* is strictly prohibited.
|
||||
*
|
||||
*/
|
||||
|
||||
// Helper Timing Functions
|
||||
#ifndef COMMON_HELPER_TIMER_H_
|
||||
#define COMMON_HELPER_TIMER_H_
|
||||
|
||||
#ifndef EXIT_WAIVED
|
||||
#define EXIT_WAIVED 2
|
||||
#endif
|
||||
|
||||
// includes, system
|
||||
#include <vector>
|
||||
|
||||
// includes, project
|
||||
#include "exception.h"
|
||||
|
||||
// Definition of the StopWatch Interface, this is used if we don't want to use
|
||||
// the CUT functions But rather in a self contained class interface
|
||||
class StopWatchInterface {
|
||||
public:
|
||||
StopWatchInterface() {}
|
||||
virtual ~StopWatchInterface() {}
|
||||
|
||||
public:
|
||||
//! Start time measurement
|
||||
virtual void start() = 0;
|
||||
|
||||
//! Stop time measurement
|
||||
virtual void stop() = 0;
|
||||
|
||||
//! Reset time counters to zero
|
||||
virtual void reset() = 0;
|
||||
|
||||
//! Time in msec. after start. If the stop watch is still running (i.e. there
|
||||
//! was no call to stop()) then the elapsed time is returned, otherwise the
|
||||
//! time between the last start() and stop call is returned
|
||||
virtual float getTime() = 0;
|
||||
|
||||
//! Mean time to date based on the number of times the stopwatch has been
|
||||
//! _stopped_ (ie finished sessions) and the current total time
|
||||
virtual float getAverageTime() = 0;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// Begin Stopwatch timer class definitions for all OS platforms //
|
||||
//////////////////////////////////////////////////////////////////
|
||||
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
|
||||
// includes, system
|
||||
#define WINDOWS_LEAN_AND_MEAN
|
||||
#include <windows.h>
|
||||
#undef min
|
||||
#undef max
|
||||
|
||||
//! Windows specific implementation of StopWatch
|
||||
class StopWatchWin : public StopWatchInterface {
|
||||
public:
|
||||
//! Constructor, default
|
||||
StopWatchWin()
|
||||
: start_time(),
|
||||
end_time(),
|
||||
diff_time(0.0f),
|
||||
total_time(0.0f),
|
||||
running(false),
|
||||
clock_sessions(0),
|
||||
freq(0),
|
||||
freq_set(false) {
|
||||
if (!freq_set) {
|
||||
// helper variable
|
||||
LARGE_INTEGER temp;
|
||||
|
||||
// get the tick frequency from the OS
|
||||
QueryPerformanceFrequency(reinterpret_cast<LARGE_INTEGER *>(&temp));
|
||||
|
||||
// convert to type in which it is needed
|
||||
freq = (static_cast<double>(temp.QuadPart)) / 1000.0;
|
||||
|
||||
// rememeber query
|
||||
freq_set = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Destructor
|
||||
~StopWatchWin() {}
|
||||
|
||||
public:
|
||||
//! Start time measurement
|
||||
inline void start();
|
||||
|
||||
//! Stop time measurement
|
||||
inline void stop();
|
||||
|
||||
//! Reset time counters to zero
|
||||
inline void reset();
|
||||
|
||||
//! Time in msec. after start. If the stop watch is still running (i.e. there
|
||||
//! was no call to stop()) then the elapsed time is returned, otherwise the
|
||||
//! time between the last start() and stop call is returned
|
||||
inline float getTime();
|
||||
|
||||
//! Mean time to date based on the number of times the stopwatch has been
|
||||
//! _stopped_ (ie finished sessions) and the current total time
|
||||
inline float getAverageTime();
|
||||
|
||||
private:
|
||||
// member variables
|
||||
|
||||
//! Start of measurement
|
||||
LARGE_INTEGER start_time;
|
||||
//! End of measurement
|
||||
LARGE_INTEGER end_time;
|
||||
|
||||
//! Time difference between the last start and stop
|
||||
float diff_time;
|
||||
|
||||
//! TOTAL time difference between starts and stops
|
||||
float total_time;
|
||||
|
||||
//! flag if the stop watch is running
|
||||
bool running;
|
||||
|
||||
//! Number of times clock has been started
|
||||
//! and stopped to allow averaging
|
||||
int clock_sessions;
|
||||
|
||||
//! tick frequency
|
||||
double freq;
|
||||
|
||||
//! flag if the frequency has been set
|
||||
bool freq_set;
|
||||
};
|
||||
|
||||
// functions, inlined
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Start time measurement
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
inline void StopWatchWin::start() {
|
||||
QueryPerformanceCounter(reinterpret_cast<LARGE_INTEGER *>(&start_time));
|
||||
running = true;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Stop time measurement and increment add to the current diff_time summation
|
||||
//! variable. Also increment the number of times this clock has been run.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
inline void StopWatchWin::stop() {
|
||||
QueryPerformanceCounter(reinterpret_cast<LARGE_INTEGER *>(&end_time));
|
||||
diff_time = static_cast<float>(((static_cast<double>(end_time.QuadPart) -
|
||||
static_cast<double>(start_time.QuadPart)) /
|
||||
freq));
|
||||
|
||||
total_time += diff_time;
|
||||
clock_sessions++;
|
||||
running = false;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Reset the timer to 0. Does not change the timer running state but does
|
||||
//! recapture this point in time as the current start time if it is running.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
inline void StopWatchWin::reset() {
|
||||
diff_time = 0;
|
||||
total_time = 0;
|
||||
clock_sessions = 0;
|
||||
|
||||
if (running) {
|
||||
QueryPerformanceCounter(reinterpret_cast<LARGE_INTEGER *>(&start_time));
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Time in msec. after start. If the stop watch is still running (i.e. there
|
||||
//! was no call to stop()) then the elapsed time is returned added to the
|
||||
//! current diff_time sum, otherwise the current summed time difference alone
|
||||
//! is returned.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
inline float StopWatchWin::getTime() {
|
||||
// Return the TOTAL time to date
|
||||
float retval = total_time;
|
||||
|
||||
if (running) {
|
||||
LARGE_INTEGER temp;
|
||||
QueryPerformanceCounter(reinterpret_cast<LARGE_INTEGER *>(&temp));
|
||||
retval += static_cast<float>(((static_cast<double>(temp.QuadPart) -
|
||||
static_cast<double>(start_time.QuadPart)) /
|
||||
freq));
|
||||
}
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Time in msec. for a single run based on the total number of COMPLETED runs
|
||||
//! and the total time.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
inline float StopWatchWin::getAverageTime() {
|
||||
return (clock_sessions > 0) ? (total_time / clock_sessions) : 0.0f;
|
||||
}
|
||||
#else
|
||||
// Declarations for Stopwatch on Linux and Mac OSX
|
||||
// includes, system
|
||||
#include <sys/time.h>
|
||||
#include <ctime>
|
||||
|
||||
//! Windows specific implementation of StopWatch
|
||||
class StopWatchLinux : public StopWatchInterface {
|
||||
public:
|
||||
//! Constructor, default
|
||||
StopWatchLinux()
|
||||
: start_time(),
|
||||
diff_time(0.0),
|
||||
total_time(0.0),
|
||||
running(false),
|
||||
clock_sessions(0) {}
|
||||
|
||||
// Destructor
|
||||
virtual ~StopWatchLinux() {}
|
||||
|
||||
public:
|
||||
//! Start time measurement
|
||||
inline void start();
|
||||
|
||||
//! Stop time measurement
|
||||
inline void stop();
|
||||
|
||||
//! Reset time counters to zero
|
||||
inline void reset();
|
||||
|
||||
//! Time in msec. after start. If the stop watch is still running (i.e. there
|
||||
//! was no call to stop()) then the elapsed time is returned, otherwise the
|
||||
//! time between the last start() and stop call is returned
|
||||
inline float getTime();
|
||||
|
||||
//! Mean time to date based on the number of times the stopwatch has been
|
||||
//! _stopped_ (ie finished sessions) and the current total time
|
||||
inline float getAverageTime();
|
||||
|
||||
private:
|
||||
// helper functions
|
||||
|
||||
//! Get difference between start time and current time
|
||||
inline float getDiffTime();
|
||||
|
||||
private:
|
||||
// member variables
|
||||
|
||||
//! Start of measurement
|
||||
struct timeval start_time;
|
||||
|
||||
//! Time difference between the last start and stop
|
||||
float diff_time;
|
||||
|
||||
//! TOTAL time difference between starts and stops
|
||||
float total_time;
|
||||
|
||||
//! flag if the stop watch is running
|
||||
bool running;
|
||||
|
||||
//! Number of times clock has been started
|
||||
//! and stopped to allow averaging
|
||||
int clock_sessions;
|
||||
};
|
||||
|
||||
// functions, inlined
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Start time measurement
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
inline void StopWatchLinux::start() {
|
||||
gettimeofday(&start_time, 0);
|
||||
running = true;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Stop time measurement and increment add to the current diff_time summation
|
||||
//! variable. Also increment the number of times this clock has been run.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
inline void StopWatchLinux::stop() {
|
||||
diff_time = getDiffTime();
|
||||
total_time += diff_time;
|
||||
running = false;
|
||||
clock_sessions++;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Reset the timer to 0. Does not change the timer running state but does
|
||||
//! recapture this point in time as the current start time if it is running.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
inline void StopWatchLinux::reset() {
|
||||
diff_time = 0;
|
||||
total_time = 0;
|
||||
clock_sessions = 0;
|
||||
|
||||
if (running) {
|
||||
gettimeofday(&start_time, 0);
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Time in msec. after start. If the stop watch is still running (i.e. there
|
||||
//! was no call to stop()) then the elapsed time is returned added to the
|
||||
//! current diff_time sum, otherwise the current summed time difference alone
|
||||
//! is returned.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
inline float StopWatchLinux::getTime() {
|
||||
// Return the TOTAL time to date
|
||||
float retval = total_time;
|
||||
|
||||
if (running) {
|
||||
retval += getDiffTime();
|
||||
}
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Time in msec. for a single run based on the total number of COMPLETED runs
|
||||
//! and the total time.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
inline float StopWatchLinux::getAverageTime() {
|
||||
return (clock_sessions > 0) ? (total_time / clock_sessions) : 0.0f;
|
||||
}
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
inline float StopWatchLinux::getDiffTime() {
|
||||
struct timeval t_time;
|
||||
gettimeofday(&t_time, 0);
|
||||
|
||||
// time difference in milli-seconds
|
||||
return static_cast<float>(1000.0 * (t_time.tv_sec - start_time.tv_sec) +
|
||||
(0.001 * (t_time.tv_usec - start_time.tv_usec)));
|
||||
}
|
||||
#endif // WIN32
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Timer functionality exported
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Create a new timer
|
||||
//! @return true if a time has been created, otherwise false
|
||||
//! @param name of the new timer, 0 if the creation failed
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
inline bool sdkCreateTimer(StopWatchInterface **timer_interface) {
|
||||
// printf("sdkCreateTimer called object %08x\n", (void *)*timer_interface);
|
||||
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
|
||||
*timer_interface = reinterpret_cast<StopWatchInterface *>(new StopWatchWin());
|
||||
#else
|
||||
*timer_interface =
|
||||
reinterpret_cast<StopWatchInterface *>(new StopWatchLinux());
|
||||
#endif
|
||||
return (*timer_interface != NULL) ? true : false;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Delete a timer
|
||||
//! @return true if a time has been deleted, otherwise false
|
||||
//! @param name of the timer to delete
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
inline bool sdkDeleteTimer(StopWatchInterface **timer_interface) {
|
||||
// printf("sdkDeleteTimer called object %08x\n", (void *)*timer_interface);
|
||||
if (*timer_interface) {
|
||||
delete *timer_interface;
|
||||
*timer_interface = NULL;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Start the time with name \a name
|
||||
//! @param name name of the timer to start
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
inline bool sdkStartTimer(StopWatchInterface **timer_interface) {
|
||||
// printf("sdkStartTimer called object %08x\n", (void *)*timer_interface);
|
||||
if (*timer_interface) {
|
||||
(*timer_interface)->start();
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Stop the time with name \a name. Does not reset.
|
||||
//! @param name name of the timer to stop
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
inline bool sdkStopTimer(StopWatchInterface **timer_interface) {
|
||||
// printf("sdkStopTimer called object %08x\n", (void *)*timer_interface);
|
||||
if (*timer_interface) {
|
||||
(*timer_interface)->stop();
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Resets the timer's counter.
|
||||
//! @param name name of the timer to reset.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
inline bool sdkResetTimer(StopWatchInterface **timer_interface) {
|
||||
// printf("sdkResetTimer called object %08x\n", (void *)*timer_interface);
|
||||
if (*timer_interface) {
|
||||
(*timer_interface)->reset();
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Return the average time for timer execution as the total time
|
||||
//! for the timer dividied by the number of completed (stopped) runs the timer
|
||||
//! has made.
|
||||
//! Excludes the current running time if the timer is currently running.
|
||||
//! @param name name of the timer to return the time of
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
inline float sdkGetAverageTimerValue(StopWatchInterface **timer_interface) {
|
||||
// printf("sdkGetAverageTimerValue called object %08x\n", (void
|
||||
// *)*timer_interface);
|
||||
if (*timer_interface) {
|
||||
return (*timer_interface)->getAverageTime();
|
||||
} else {
|
||||
return 0.0f;
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Total execution time for the timer over all runs since the last reset
|
||||
//! or timer creation.
|
||||
//! @param name name of the timer to obtain the value of.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
inline float sdkGetTimerValue(StopWatchInterface **timer_interface) {
|
||||
// printf("sdkGetTimerValue called object %08x\n", (void *)*timer_interface);
|
||||
if (*timer_interface) {
|
||||
return (*timer_interface)->getTime();
|
||||
} else {
|
||||
return 0.0f;
|
||||
}
|
||||
}
|
||||
|
||||
#endif // COMMON_HELPER_TIMER_H_
|
||||
|
Binary file not shown.
|
@ -0,0 +1,343 @@
|
|||
// Monte Carlo simulation of a snowball fight:
|
||||
|
||||
// system includes
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
#include <malloc.h>
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
// CUDA runtime
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
// Helper functions and utilities to work with CUDA
|
||||
#include "helper_functions.h"
|
||||
#include "helper_cuda.h"
|
||||
|
||||
// setting the number of trials in the monte carlo simulation:
|
||||
#ifndef NUMTRIALS
|
||||
#define NUMTRIALS 1024
|
||||
#endif
|
||||
|
||||
#ifndef BLOCKSIZE
|
||||
#define BLOCKSIZE 8 // number of threads per block
|
||||
#endif
|
||||
|
||||
#define NUMBLOCKS ( NUMTRIALS / BLOCKSIZE )
|
||||
|
||||
|
||||
|
||||
// ranges for the random numbers:
|
||||
//#define PROJECT1
|
||||
|
||||
#ifdef PROJECT1
|
||||
const float TXMIN = -10.0; // truck starting location in feet
|
||||
const float TXMAX = 10.0; // truck starting location in feet
|
||||
const float TYMIN = 45.0; // depth distance to truck in feet
|
||||
const float TYMAX = 55.0; // depth distance to truck in feet
|
||||
const float TXVMIN = 10.0; // truck x velocity in feet/sec
|
||||
const float TXVMAX = 30.0; // truck x velocity in feet/sec
|
||||
const float SVMIN = 10.0; // snowball velocity in feet/sec
|
||||
const float SVMAX = 30.0; // snowball velocity in feet/sec
|
||||
const float STHMIN = 10.0; // snowball launch angle in degrees
|
||||
const float STHMAX = 90.0; // snowball launch angle in degrees
|
||||
const float HALFLENMIN = 20.; // half length of the truck in feet
|
||||
const float HALFLENMAX = 20.; // half length of the truck in feet
|
||||
#else
|
||||
const float TXMIN = -10.0; // truck starting location in feet
|
||||
const float TXMAX = 10.0; // truck starting location in feet
|
||||
const float TXVMIN = 15.0; // truck x velocity in feet/sec
|
||||
const float TXVMAX = 35.0; // truck x velocity in feet/sec
|
||||
const float TYMIN = 40.0; // depth distance to truck in feet
|
||||
const float TYMAX = 50.0; // depth distance to truck in feet
|
||||
const float SVMIN = 5.0; // snowball velocity in feet/sec
|
||||
const float SVMAX = 30.0; // snowball velocity in feet/sec
|
||||
const float STHMIN = 10.0; // snowball launch angle in degrees
|
||||
const float STHMAX = 70.0; // snowball launch angle in degrees
|
||||
const float HALFLENMIN = 15.; // half length of the truck in feet
|
||||
const float HALFLENMAX = 30.; // half length of the truck in feet
|
||||
#endif
|
||||
|
||||
|
||||
// these are here just to be pretty labels, other than that, they do nothing:
|
||||
#define IN
|
||||
#define OUT
|
||||
|
||||
|
||||
|
||||
// function prototypes:
|
||||
float Ranf( float, float );
|
||||
int Ranf( int, int );
|
||||
void TimeOfDaySeed( );
|
||||
|
||||
|
||||
|
||||
void
|
||||
CudaCheckError()
|
||||
{
|
||||
cudaError_t e = cudaGetLastError();
|
||||
if(e != cudaSuccess)
|
||||
{
|
||||
fprintf( stderr, "Cuda failure %s:%d: '%s'\n", __FILE__, __LINE__, cudaGetErrorString(e));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// degrees-to-radians:
|
||||
__device__
|
||||
float
|
||||
Radians( float d )
|
||||
{
|
||||
return (M_PI/180.f) * d;
|
||||
}
|
||||
|
||||
|
||||
//Global means gpu and callable from cpu
|
||||
__global__
|
||||
void
|
||||
MonteCarlo( IN float *dtxs, IN float *dtys, IN float *dtxvs, IN float *dsvs, IN float *dsths, IN float *dhalflens, OUT int *dhits )
|
||||
{
|
||||
//Not sure about this, as it's differnt in the project video.
|
||||
//__shared__ int numHits[BLOCKSIZE];
|
||||
|
||||
//unsigned int numItems = blockDim.x;
|
||||
//unsigned int wgNum = blockIdx.x;
|
||||
//unsigned int tnum = threadIdx.x;
|
||||
unsigned int gid = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
|
||||
dhits[gid] = 0;
|
||||
|
||||
//Get stuff based off the gid/thread.
|
||||
// randomize everything:
|
||||
float tx = dtxs[gid];
|
||||
float ty = dtys[gid];
|
||||
float txv = dtxvs[gid];
|
||||
float sv = dsvs[gid];
|
||||
float sthd = dsths[gid];
|
||||
float sthr = Radians(sthd);
|
||||
float svx = sv * cos(sthr);
|
||||
float svy = sv * sin(sthr);
|
||||
|
||||
//I don't see anythign of the half len so I put this here.
|
||||
//float halflen = dhalflens[gid];
|
||||
|
||||
// how long until the snowball reaches the y depth:
|
||||
float tstar = ty / svy;
|
||||
float truckx = tx + txv * tstar;
|
||||
float sbx = svx * tstar;
|
||||
|
||||
//check snowball location.
|
||||
//Project video says HALF_LENGTH but I don't see that in the template file.
|
||||
if( fabs(truckx - sbx) < dhalflens[gid])
|
||||
{
|
||||
dhits[gid] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// main program:
|
||||
|
||||
int
|
||||
main( int argc, char* argv[ ] )
|
||||
{
|
||||
TimeOfDaySeed( );
|
||||
|
||||
//int dev = findCudaDevice(argc, (const char **)argv);
|
||||
|
||||
|
||||
float *htxs = new float [NUMTRIALS];
|
||||
float *htys = new float [NUMTRIALS];
|
||||
float *htxvs = new float [NUMTRIALS];
|
||||
float *hsvs = new float [NUMTRIALS];
|
||||
float *hsths = new float [NUMTRIALS];
|
||||
float *hhalflens = new float [NUMTRIALS];
|
||||
|
||||
// fill the random-value arrays:
|
||||
for( int n = 0; n < NUMTRIALS; n++ )
|
||||
{
|
||||
htxs[n] = Ranf( TXMIN, TXMAX );
|
||||
htys[n] = Ranf( TYMIN, TYMAX );
|
||||
htxvs[n] = Ranf( TXVMIN, TXVMAX );
|
||||
hsvs[n] = Ranf( SVMIN, SVMAX );
|
||||
hsths[n] = Ranf( STHMIN, STHMAX );
|
||||
hhalflens[n] = Ranf( HALFLENMIN, HALFLENMAX );
|
||||
}
|
||||
|
||||
int *hhits = new int [NUMTRIALS];
|
||||
|
||||
// allocate device memory:
|
||||
|
||||
float *dtxs, *dtys, *dtxvs, *dsvs, *dsths, *dhalflens;
|
||||
int *dhits;
|
||||
|
||||
|
||||
//cudaError_t status;
|
||||
cudaMalloc( (void **)(&dtxs), NUMTRIALS*sizeof(float) );
|
||||
CudaCheckError( );
|
||||
|
||||
//Malloc the space for the devices ones.
|
||||
cudaMalloc((void **)(&dtys), NUMTRIALS*sizeof(float) );
|
||||
CudaCheckError( );
|
||||
|
||||
cudaMalloc((void **)(&dtxvs), NUMTRIALS*sizeof(float) );
|
||||
CudaCheckError( );
|
||||
|
||||
cudaMalloc((void **)(&dsvs), NUMTRIALS*sizeof(float) );
|
||||
CudaCheckError( );
|
||||
|
||||
cudaMalloc((void **)(&dsths), NUMTRIALS*sizeof(float) );
|
||||
CudaCheckError( );
|
||||
|
||||
cudaMalloc((void **)(&dhalflens), NUMTRIALS*sizeof(float) );
|
||||
CudaCheckError( );
|
||||
|
||||
cudaMalloc((void **)(&dhits), NUMTRIALS*sizeof(int) );
|
||||
CudaCheckError( );
|
||||
|
||||
|
||||
|
||||
// copy host memory to the device:
|
||||
|
||||
cudaMemcpy( dtxs, htxs, NUMTRIALS*sizeof(float), cudaMemcpyHostToDevice );
|
||||
CudaCheckError( );
|
||||
|
||||
cudaMemcpy( dtys, htys, NUMTRIALS*sizeof(float), cudaMemcpyHostToDevice );
|
||||
CudaCheckError( );
|
||||
|
||||
cudaMemcpy( dtxvs, htxvs, NUMTRIALS*sizeof(float), cudaMemcpyHostToDevice );
|
||||
CudaCheckError( );
|
||||
|
||||
cudaMemcpy( dsvs, hsvs, NUMTRIALS*sizeof(float), cudaMemcpyHostToDevice );
|
||||
CudaCheckError( );
|
||||
|
||||
cudaMemcpy( dsths, hsths, NUMTRIALS*sizeof(float), cudaMemcpyHostToDevice );
|
||||
CudaCheckError( );
|
||||
|
||||
cudaMemcpy( dhalflens, hhalflens, NUMTRIALS*sizeof(float), cudaMemcpyHostToDevice );
|
||||
CudaCheckError( );
|
||||
|
||||
//The project video doesn't show this being done.....but it's part of the function...
|
||||
cudaMemcpy( dhits, hhits, NUMTRIALS*sizeof(int), cudaMemcpyHostToDevice );
|
||||
CudaCheckError( );
|
||||
|
||||
// setup the execution parameters:
|
||||
|
||||
dim3 threads(BLOCKSIZE, 1, 1 );
|
||||
dim3 grid(NUMBLOCKS, 1, 1 );
|
||||
|
||||
// create and start timer
|
||||
|
||||
cudaDeviceSynchronize( );
|
||||
|
||||
// allocate CUDA events that we'll use for timing:
|
||||
|
||||
cudaEvent_t start, stop;
|
||||
cudaEventCreate( &start );
|
||||
CudaCheckError( );
|
||||
cudaEventCreate( &stop );
|
||||
CudaCheckError( );
|
||||
|
||||
// record the start event:
|
||||
|
||||
cudaEventRecord( start, NULL );
|
||||
CudaCheckError( );
|
||||
|
||||
// execute the kernel:
|
||||
|
||||
MonteCarlo<<< grid, threads >>>( dtxs, dtys, dtxvs, dsvs, dsths, dhalflens, dhits );
|
||||
|
||||
// record the stop event:
|
||||
|
||||
cudaEventRecord( stop, NULL );
|
||||
CudaCheckError( );
|
||||
|
||||
// wait for the stop event to complete:
|
||||
|
||||
cudaEventSynchronize( stop );
|
||||
CudaCheckError( );
|
||||
|
||||
float msecTotal = 0.0f;
|
||||
cudaEventElapsedTime( &msecTotal, start, stop );
|
||||
CudaCheckError( );
|
||||
|
||||
// copy result from the device to the host:
|
||||
|
||||
cudaMemcpy( hhits, dhits, NUMTRIALS *sizeof(int), cudaMemcpyDeviceToHost );
|
||||
CudaCheckError( );
|
||||
|
||||
// compute the sum :
|
||||
|
||||
int numHits = 0;
|
||||
//????
|
||||
for(int i = 0; i < NUMTRIALS; i++)
|
||||
{
|
||||
numHits += hhits[i];
|
||||
//fprintf(stderr, "hhits[%d] = %5d ; Total numHits = %5d\n", i, hhits[i], numHits);
|
||||
}
|
||||
|
||||
float probability = 100.f * (float)numHits / (float)NUMTRIALS;
|
||||
|
||||
// compute and printL
|
||||
|
||||
double secondsTotal = 0.001 * (double)msecTotal;
|
||||
double trialsPerSecond = (float)NUMTRIALS / secondsTotal;
|
||||
double megaTrialsPerSecond = trialsPerSecond / 1000000.;
|
||||
fprintf( stderr, "Number of Trials = %10d, Blocksize = %8d, MegaTrials/Second = %10.4lf, Probability = %6.2f%%\n",
|
||||
NUMTRIALS, BLOCKSIZE, megaTrialsPerSecond, probability );
|
||||
|
||||
// clean up memory:
|
||||
delete [ ] htxs;
|
||||
delete [ ] htys;
|
||||
delete [ ] htxvs;
|
||||
delete [ ] hsvs;
|
||||
delete [ ] hsths;
|
||||
delete [ ] hhits;
|
||||
|
||||
cudaFree( dtxs );
|
||||
CudaCheckError( );
|
||||
cudaFree( dtys );
|
||||
CudaCheckError( );
|
||||
cudaFree( dtxvs );
|
||||
CudaCheckError( );
|
||||
cudaFree( dsvs );
|
||||
CudaCheckError( );
|
||||
cudaFree( dsths );
|
||||
CudaCheckError( );
|
||||
cudaFree( dhits );
|
||||
CudaCheckError( );
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
float
|
||||
Ranf( float low, float high )
|
||||
{
|
||||
float r = (float) rand(); // 0 - RAND_MAX
|
||||
float t = r / (float) RAND_MAX; // 0. - 1.
|
||||
|
||||
return low + t * ( high - low );
|
||||
}
|
||||
|
||||
int
|
||||
Ranf( int ilow, int ihigh )
|
||||
{
|
||||
float low = (float)ilow;
|
||||
float high = ceil( (float)ihigh );
|
||||
|
||||
return (int) Ranf(low,high);
|
||||
}
|
||||
|
||||
void
|
||||
TimeOfDaySeed( )
|
||||
{
|
||||
struct tm y2k = { 0 };
|
||||
y2k.tm_hour = 0; y2k.tm_min = 0; y2k.tm_sec = 0;
|
||||
y2k.tm_year = 100; y2k.tm_mon = 0; y2k.tm_mday = 1;
|
||||
|
||||
time_t timer;
|
||||
time( &timer );
|
||||
double seconds = difftime( timer, mktime(&y2k) );
|
||||
unsigned int seed = (unsigned int)( 1000.*seconds ); // milliseconds
|
||||
srand( seed );
|
||||
}
|
Binary file not shown.
After Width: | Height: | Size: 29 KiB |
Binary file not shown.
After Width: | Height: | Size: 24 KiB |
|
@ -0,0 +1,10 @@
|
|||
#!/bin/bash
|
||||
for t in 1024 4096 16384 65536 262144 1048576 2097152 4194304
|
||||
do
|
||||
for b in 8 32 128
|
||||
do
|
||||
echo "Running with $t trials and $b blocksize"
|
||||
/usr/local/apps/cuda/cuda-10.1/bin/nvcc -DNUMTRIALS=$t -DBLOCKSIZE=$b -o montecarlo montecarlo.cu
|
||||
./montecarlo 2>&1 | tee -a data.csv
|
||||
done
|
||||
done
|
|
@ -0,0 +1 @@
|
|||
17:40:16 up 154 days, 4:15, 62 users, load average: 5.86, 9.92, 12.18
|
Loading…
Reference in New Issue