94 float2
const &bottom_left,
95 float2
const &top_right,
96 float2
const &mid_pt_cell) {
97 float2 bottom_right = make_float2(top_right.x, bottom_left.y);
98 float2 top_left = make_float2(bottom_left.x, top_right.y);
100 float total_importance = 0;
102 auto const &bnd = device_dists[i];
105 mid_pt.x = (mid_pt_cell.x - bnd.mean_x) / bnd.sigma_x;
106 mid_pt.y = (mid_pt_cell.y - bnd.mean_y) / bnd.sigma_y;
108 mid_pt.x = (mid_pt_cell.x - bnd.mean_x) / bnd.sigma_x;
109 mid_pt.y = (mid_pt_cell.y - bnd.mean_y) / bnd.sigma_y;
111 (mid_pt.x - bnd.rho * mid_pt.y) / (sqrt(1 - bnd.rho * bnd.rho));
113 if (mid_pt.x * mid_pt.x + mid_pt.y * mid_pt.y >
cu_trun_sq) {
121 return total_importance;
125 float2
const &mid_pt_cell) {
126 float total_importance = 0;
128 auto &x = device_polygons.
x;
129 auto &y = device_polygons.
y;
131 auto const &bounds = device_polygons.
bounds[i];
132 if ((mid_pt_cell.x < bounds.xmin) || (mid_pt_cell.x > bounds.xmax) ||
133 (mid_pt_cell.y < bounds.ymin) || (mid_pt_cell.y > bounds.ymax)) {
134 start += device_polygons.
sz[i];
137 int sz = device_polygons.
sz[i];
139 total_importance = fmaxf(total_importance, device_polygons.
imp[i]);
143 return total_importance;
148 float *importance_vec) {
149 int idx = blockIdx.x * blockDim.x + threadIdx.x;
150 int idy = blockIdx.y * blockDim.y + threadIdx.y;
158 float2 mid_pt_cell = make_float2((bottom_left.x + top_right.x) / 2.,
159 (bottom_left.y + top_right.y) / 2.);
161 importance_vec[vec_idx] =
177 int const num_dists,
int const map_size,
178 float const resolution,
float const truncation,
179 float const pNorm,
float *host_importance_vec,
180 float &normalization_factor) {
181 checkCudaErrors(cudaDeviceSynchronize());
183 checkCudaErrors(cudaMemcpyToSymbol(
cu_num_dists, &num_dists,
sizeof(
int)));
184 checkCudaErrors(cudaMemcpyToSymbol(
cu_map_size, &map_size,
sizeof(
int)));
186 cudaMemcpyToSymbol(
cu_resolution, &resolution,
sizeof(
float)));
188 cudaMemcpyToSymbol(
cu_truncation, &truncation,
sizeof(
float)));
189 float trun_sq = truncation * truncation;
190 checkCudaErrors(cudaMemcpyToSymbol(
cu_trun_sq, &trun_sq,
sizeof(
float)));
191 float f_OneBySqrt2 =
static_cast<float>(1. / std::sqrt(2.));
193 cudaMemcpyToSymbol(
cu_OneBySqrt2, &f_OneBySqrt2,
sizeof(
float)));
196 checkCudaErrors(cudaMalloc(&device_dists, num_dists *
sizeof(
BND_Cuda)));
197 checkCudaErrors(cudaMemcpy(device_dists, host_dists,
199 cudaMemcpyHostToDevice));
203 cudaMalloc(&(device_polygons.
x), host_polygons.
num_pts *
sizeof(
float)));
205 cudaMalloc(&(device_polygons.
y), host_polygons.
num_pts *
sizeof(
float)));
206 checkCudaErrors(cudaMalloc(&(device_polygons.
imp),
208 checkCudaErrors(cudaMalloc(&(device_polygons.
sz),
210 checkCudaErrors(cudaMalloc(&(device_polygons.
bounds),
213 checkCudaErrors(cudaMemcpy(device_polygons.
x, host_polygons.
x.data(),
214 host_polygons.
num_pts *
sizeof(
float),
215 cudaMemcpyHostToDevice));
216 checkCudaErrors(cudaMemcpy(device_polygons.
y, host_polygons.
y.data(),
217 host_polygons.
num_pts *
sizeof(
float),
218 cudaMemcpyHostToDevice));
219 checkCudaErrors(cudaMemcpy(device_polygons.
imp, host_polygons.
imp.data(),
221 cudaMemcpyHostToDevice));
222 checkCudaErrors(cudaMemcpy(device_polygons.
sz, host_polygons.
sz.data(),
224 cudaMemcpyHostToDevice));
225 checkCudaErrors(cudaMemcpy(
229 &host_polygons.
num_pts,
sizeof(
int)));
233 float *device_importance_vec;
235 cudaMalloc(&device_importance_vec, map_size * map_size *
sizeof(
float)));
240 dim3 dimBlock(32, 32, 1);
241 dim3 dimGrid(map_size / dimBlock.x, map_size / dimBlock.x, 1);
244 device_importance_vec);
246 cudaDeviceSynchronize();
248 thrust::device_ptr<float> d_ptr =
249 thrust::device_pointer_cast(device_importance_vec);
250 float max = *(thrust::max_element(d_ptr, d_ptr + map_size * map_size));
253 normalization_factor = pNorm;
255 normalization_factor = pNorm / max;
257 if (normalization_factor > 1e-5) {
259 &normalization_factor,
sizeof(
float)));
263 checkCudaErrors(cudaMemcpy(host_importance_vec, device_importance_vec,
264 map_size * map_size *
sizeof(
float),
265 cudaMemcpyDeviceToHost));
267 checkCudaErrors(cudaFree(device_dists));
268 checkCudaErrors(cudaFree(device_importance_vec));
269 checkCudaErrors(cudaFree(device_polygons.
x));
270 checkCudaErrors(cudaFree(device_polygons.
y));
271 checkCudaErrors(cudaFree(device_polygons.
sz));
272 checkCudaErrors(cudaFree(device_polygons.
bounds));
274 cudaError_t error = cudaGetLastError();
275 if (error != cudaSuccess) {
276 std::stringstream strstr;
277 strstr <<
"run_kernel launch failed" << std::endl;
void generate_world_map_cuda(BND_Cuda *, Polygons_Cuda_Host const &, int const, int const, float const, float const, float const, float *importance_vec, float &)
Function to generate the world map on the device.