344 lines
9.0 KiB
Plaintext
344 lines
9.0 KiB
Plaintext
|
// Monte Carlo simulation of a snowball fight:
|
||
|
|
||
|
// system includes
|
||
|
#include <stdio.h>
|
||
|
#include <assert.h>
|
||
|
#include <malloc.h>
|
||
|
#include <math.h>
|
||
|
#include <stdlib.h>
|
||
|
|
||
|
// CUDA runtime
|
||
|
#include <cuda_runtime.h>
|
||
|
|
||
|
// Helper functions and utilities to work with CUDA
|
||
|
#include "helper_functions.h"
|
||
|
#include "helper_cuda.h"
|
||
|
|
||
|
// setting the number of trials in the monte carlo simulation:
|
||
|
#ifndef NUMTRIALS
|
||
|
#define NUMTRIALS 1024
|
||
|
#endif
|
||
|
|
||
|
#ifndef BLOCKSIZE
|
||
|
#define BLOCKSIZE 8 // number of threads per block
|
||
|
#endif
|
||
|
|
||
|
#define NUMBLOCKS ( NUMTRIALS / BLOCKSIZE )
|
||
|
|
||
|
|
||
|
|
||
|
// ranges for the random numbers:
|
||
|
//#define PROJECT1
|
||
|
|
||
|
#ifdef PROJECT1
|
||
|
const float TXMIN = -10.0; // truck starting location in feet
|
||
|
const float TXMAX = 10.0; // truck starting location in feet
|
||
|
const float TYMIN = 45.0; // depth distance to truck in feet
|
||
|
const float TYMAX = 55.0; // depth distance to truck in feet
|
||
|
const float TXVMIN = 10.0; // truck x velocity in feet/sec
|
||
|
const float TXVMAX = 30.0; // truck x velocity in feet/sec
|
||
|
const float SVMIN = 10.0; // snowball velocity in feet/sec
|
||
|
const float SVMAX = 30.0; // snowball velocity in feet/sec
|
||
|
const float STHMIN = 10.0; // snowball launch angle in degrees
|
||
|
const float STHMAX = 90.0; // snowball launch angle in degrees
|
||
|
const float HALFLENMIN = 20.; // half length of the truck in feet
|
||
|
const float HALFLENMAX = 20.; // half length of the truck in feet
|
||
|
#else
|
||
|
const float TXMIN = -10.0; // truck starting location in feet
|
||
|
const float TXMAX = 10.0; // truck starting location in feet
|
||
|
const float TXVMIN = 15.0; // truck x velocity in feet/sec
|
||
|
const float TXVMAX = 35.0; // truck x velocity in feet/sec
|
||
|
const float TYMIN = 40.0; // depth distance to truck in feet
|
||
|
const float TYMAX = 50.0; // depth distance to truck in feet
|
||
|
const float SVMIN = 5.0; // snowball velocity in feet/sec
|
||
|
const float SVMAX = 30.0; // snowball velocity in feet/sec
|
||
|
const float STHMIN = 10.0; // snowball launch angle in degrees
|
||
|
const float STHMAX = 70.0; // snowball launch angle in degrees
|
||
|
const float HALFLENMIN = 15.; // half length of the truck in feet
|
||
|
const float HALFLENMAX = 30.; // half length of the truck in feet
|
||
|
#endif
|
||
|
|
||
|
|
||
|
// these are here just to be pretty labels, other than that, they do nothing:
|
||
|
#define IN
|
||
|
#define OUT
|
||
|
|
||
|
|
||
|
|
||
|
// function prototypes:
|
||
|
float Ranf( float, float );
|
||
|
int Ranf( int, int );
|
||
|
void TimeOfDaySeed( );
|
||
|
|
||
|
|
||
|
|
||
|
void
|
||
|
CudaCheckError()
|
||
|
{
|
||
|
cudaError_t e = cudaGetLastError();
|
||
|
if(e != cudaSuccess)
|
||
|
{
|
||
|
fprintf( stderr, "Cuda failure %s:%d: '%s'\n", __FILE__, __LINE__, cudaGetErrorString(e));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
// degrees-to-radians:
|
||
|
__device__
|
||
|
float
|
||
|
Radians( float d )
|
||
|
{
|
||
|
return (M_PI/180.f) * d;
|
||
|
}
|
||
|
|
||
|
|
||
|
//Global means gpu and callable from cpu
|
||
|
__global__
|
||
|
void
|
||
|
MonteCarlo( IN float *dtxs, IN float *dtys, IN float *dtxvs, IN float *dsvs, IN float *dsths, IN float *dhalflens, OUT int *dhits )
|
||
|
{
|
||
|
//Not sure about this, as it's differnt in the project video.
|
||
|
//__shared__ int numHits[BLOCKSIZE];
|
||
|
|
||
|
//unsigned int numItems = blockDim.x;
|
||
|
//unsigned int wgNum = blockIdx.x;
|
||
|
//unsigned int tnum = threadIdx.x;
|
||
|
unsigned int gid = blockIdx.x*blockDim.x + threadIdx.x;
|
||
|
|
||
|
dhits[gid] = 0;
|
||
|
|
||
|
//Get stuff based off the gid/thread.
|
||
|
// randomize everything:
|
||
|
float tx = dtxs[gid];
|
||
|
float ty = dtys[gid];
|
||
|
float txv = dtxvs[gid];
|
||
|
float sv = dsvs[gid];
|
||
|
float sthd = dsths[gid];
|
||
|
float sthr = Radians(sthd);
|
||
|
float svx = sv * cos(sthr);
|
||
|
float svy = sv * sin(sthr);
|
||
|
|
||
|
//I don't see anythign of the half len so I put this here.
|
||
|
//float halflen = dhalflens[gid];
|
||
|
|
||
|
// how long until the snowball reaches the y depth:
|
||
|
float tstar = ty / svy;
|
||
|
float truckx = tx + txv * tstar;
|
||
|
float sbx = svx * tstar;
|
||
|
|
||
|
//check snowball location.
|
||
|
//Project video says HALF_LENGTH but I don't see that in the template file.
|
||
|
if( fabs(truckx - sbx) < dhalflens[gid])
|
||
|
{
|
||
|
dhits[gid] = 1;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
// main program:
|
||
|
|
||
|
int
|
||
|
main( int argc, char* argv[ ] )
|
||
|
{
|
||
|
TimeOfDaySeed( );
|
||
|
|
||
|
//int dev = findCudaDevice(argc, (const char **)argv);
|
||
|
|
||
|
|
||
|
float *htxs = new float [NUMTRIALS];
|
||
|
float *htys = new float [NUMTRIALS];
|
||
|
float *htxvs = new float [NUMTRIALS];
|
||
|
float *hsvs = new float [NUMTRIALS];
|
||
|
float *hsths = new float [NUMTRIALS];
|
||
|
float *hhalflens = new float [NUMTRIALS];
|
||
|
|
||
|
// fill the random-value arrays:
|
||
|
for( int n = 0; n < NUMTRIALS; n++ )
|
||
|
{
|
||
|
htxs[n] = Ranf( TXMIN, TXMAX );
|
||
|
htys[n] = Ranf( TYMIN, TYMAX );
|
||
|
htxvs[n] = Ranf( TXVMIN, TXVMAX );
|
||
|
hsvs[n] = Ranf( SVMIN, SVMAX );
|
||
|
hsths[n] = Ranf( STHMIN, STHMAX );
|
||
|
hhalflens[n] = Ranf( HALFLENMIN, HALFLENMAX );
|
||
|
}
|
||
|
|
||
|
int *hhits = new int [NUMTRIALS];
|
||
|
|
||
|
// allocate device memory:
|
||
|
|
||
|
float *dtxs, *dtys, *dtxvs, *dsvs, *dsths, *dhalflens;
|
||
|
int *dhits;
|
||
|
|
||
|
|
||
|
//cudaError_t status;
|
||
|
cudaMalloc( (void **)(&dtxs), NUMTRIALS*sizeof(float) );
|
||
|
CudaCheckError( );
|
||
|
|
||
|
//Malloc the space for the devices ones.
|
||
|
cudaMalloc((void **)(&dtys), NUMTRIALS*sizeof(float) );
|
||
|
CudaCheckError( );
|
||
|
|
||
|
cudaMalloc((void **)(&dtxvs), NUMTRIALS*sizeof(float) );
|
||
|
CudaCheckError( );
|
||
|
|
||
|
cudaMalloc((void **)(&dsvs), NUMTRIALS*sizeof(float) );
|
||
|
CudaCheckError( );
|
||
|
|
||
|
cudaMalloc((void **)(&dsths), NUMTRIALS*sizeof(float) );
|
||
|
CudaCheckError( );
|
||
|
|
||
|
cudaMalloc((void **)(&dhalflens), NUMTRIALS*sizeof(float) );
|
||
|
CudaCheckError( );
|
||
|
|
||
|
cudaMalloc((void **)(&dhits), NUMTRIALS*sizeof(int) );
|
||
|
CudaCheckError( );
|
||
|
|
||
|
|
||
|
|
||
|
// copy host memory to the device:
|
||
|
|
||
|
cudaMemcpy( dtxs, htxs, NUMTRIALS*sizeof(float), cudaMemcpyHostToDevice );
|
||
|
CudaCheckError( );
|
||
|
|
||
|
cudaMemcpy( dtys, htys, NUMTRIALS*sizeof(float), cudaMemcpyHostToDevice );
|
||
|
CudaCheckError( );
|
||
|
|
||
|
cudaMemcpy( dtxvs, htxvs, NUMTRIALS*sizeof(float), cudaMemcpyHostToDevice );
|
||
|
CudaCheckError( );
|
||
|
|
||
|
cudaMemcpy( dsvs, hsvs, NUMTRIALS*sizeof(float), cudaMemcpyHostToDevice );
|
||
|
CudaCheckError( );
|
||
|
|
||
|
cudaMemcpy( dsths, hsths, NUMTRIALS*sizeof(float), cudaMemcpyHostToDevice );
|
||
|
CudaCheckError( );
|
||
|
|
||
|
cudaMemcpy( dhalflens, hhalflens, NUMTRIALS*sizeof(float), cudaMemcpyHostToDevice );
|
||
|
CudaCheckError( );
|
||
|
|
||
|
//The project video doesn't show this being done.....but it's part of the function...
|
||
|
cudaMemcpy( dhits, hhits, NUMTRIALS*sizeof(int), cudaMemcpyHostToDevice );
|
||
|
CudaCheckError( );
|
||
|
|
||
|
// setup the execution parameters:
|
||
|
|
||
|
dim3 threads(BLOCKSIZE, 1, 1 );
|
||
|
dim3 grid(NUMBLOCKS, 1, 1 );
|
||
|
|
||
|
// create and start timer
|
||
|
|
||
|
cudaDeviceSynchronize( );
|
||
|
|
||
|
// allocate CUDA events that we'll use for timing:
|
||
|
|
||
|
cudaEvent_t start, stop;
|
||
|
cudaEventCreate( &start );
|
||
|
CudaCheckError( );
|
||
|
cudaEventCreate( &stop );
|
||
|
CudaCheckError( );
|
||
|
|
||
|
// record the start event:
|
||
|
|
||
|
cudaEventRecord( start, NULL );
|
||
|
CudaCheckError( );
|
||
|
|
||
|
// execute the kernel:
|
||
|
|
||
|
MonteCarlo<<< grid, threads >>>( dtxs, dtys, dtxvs, dsvs, dsths, dhalflens, dhits );
|
||
|
|
||
|
// record the stop event:
|
||
|
|
||
|
cudaEventRecord( stop, NULL );
|
||
|
CudaCheckError( );
|
||
|
|
||
|
// wait for the stop event to complete:
|
||
|
|
||
|
cudaEventSynchronize( stop );
|
||
|
CudaCheckError( );
|
||
|
|
||
|
float msecTotal = 0.0f;
|
||
|
cudaEventElapsedTime( &msecTotal, start, stop );
|
||
|
CudaCheckError( );
|
||
|
|
||
|
// copy result from the device to the host:
|
||
|
|
||
|
cudaMemcpy( hhits, dhits, NUMTRIALS *sizeof(int), cudaMemcpyDeviceToHost );
|
||
|
CudaCheckError( );
|
||
|
|
||
|
// compute the sum :
|
||
|
|
||
|
int numHits = 0;
|
||
|
//????
|
||
|
for(int i = 0; i < NUMTRIALS; i++)
|
||
|
{
|
||
|
numHits += hhits[i];
|
||
|
//fprintf(stderr, "hhits[%d] = %5d ; Total numHits = %5d\n", i, hhits[i], numHits);
|
||
|
}
|
||
|
|
||
|
float probability = 100.f * (float)numHits / (float)NUMTRIALS;
|
||
|
|
||
|
// compute and printL
|
||
|
|
||
|
double secondsTotal = 0.001 * (double)msecTotal;
|
||
|
double trialsPerSecond = (float)NUMTRIALS / secondsTotal;
|
||
|
double megaTrialsPerSecond = trialsPerSecond / 1000000.;
|
||
|
fprintf( stderr, "Number of Trials = %10d, Blocksize = %8d, MegaTrials/Second = %10.4lf, Probability = %6.2f%%\n",
|
||
|
NUMTRIALS, BLOCKSIZE, megaTrialsPerSecond, probability );
|
||
|
|
||
|
// clean up memory:
|
||
|
delete [ ] htxs;
|
||
|
delete [ ] htys;
|
||
|
delete [ ] htxvs;
|
||
|
delete [ ] hsvs;
|
||
|
delete [ ] hsths;
|
||
|
delete [ ] hhits;
|
||
|
|
||
|
cudaFree( dtxs );
|
||
|
CudaCheckError( );
|
||
|
cudaFree( dtys );
|
||
|
CudaCheckError( );
|
||
|
cudaFree( dtxvs );
|
||
|
CudaCheckError( );
|
||
|
cudaFree( dsvs );
|
||
|
CudaCheckError( );
|
||
|
cudaFree( dsths );
|
||
|
CudaCheckError( );
|
||
|
cudaFree( dhits );
|
||
|
CudaCheckError( );
|
||
|
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
float
|
||
|
Ranf( float low, float high )
|
||
|
{
|
||
|
float r = (float) rand(); // 0 - RAND_MAX
|
||
|
float t = r / (float) RAND_MAX; // 0. - 1.
|
||
|
|
||
|
return low + t * ( high - low );
|
||
|
}
|
||
|
|
||
|
int
|
||
|
Ranf( int ilow, int ihigh )
|
||
|
{
|
||
|
float low = (float)ilow;
|
||
|
float high = ceil( (float)ihigh );
|
||
|
|
||
|
return (int) Ranf(low,high);
|
||
|
}
|
||
|
|
||
|
void
|
||
|
TimeOfDaySeed( )
|
||
|
{
|
||
|
struct tm y2k = { 0 };
|
||
|
y2k.tm_hour = 0; y2k.tm_min = 0; y2k.tm_sec = 0;
|
||
|
y2k.tm_year = 100; y2k.tm_mon = 0; y2k.tm_mday = 1;
|
||
|
|
||
|
time_t timer;
|
||
|
time( &timer );
|
||
|
double seconds = difftime( timer, mktime(&y2k) );
|
||
|
unsigned int seed = (unsigned int)( 1000.*seconds ); // milliseconds
|
||
|
srand( seed );
|
||
|
}
|