22 #include <tbb/parallel_for.h>
26 namespace Mandelbrot {
35 const int32_t max_iterations) {
40 int32_t num_iterations = 0;
47 while ((zx * zx + zy * zy < 4) && (num_iterations < max_iterations)) {
51 const T temp_x = zx * zx - zy * zy + cx;
53 zy = 2 * zx * zy + cy;
60 return num_iterations;
64 const double domain_max,
65 const int32_t num_bins) {
66 return (domain_max - domain_min) / num_bins;
72 #pragma comment(linker "/INCLUDE:mandelbrot_impl")
78 const int32_t y_begin,
84 const int32_t max_iterations,
92 for (int32_t y = y_begin; y < y_end; ++y) {
94 const T cy = y * y_scale + y_min;
95 for (int32_t x = 0; x < x_pixels; ++x) {
97 const T cx = x * x_scale + x_min;
98 const int32_t output_pixel = y * x_pixels + x;
99 output_x[output_pixel] = cx;
100 output_y[output_pixel] = cy;
101 output_num_iterations[output_pixel] =
mandelbrot_pixel(cx, cy, max_iterations);
108 template <
typename T>
111 #pragma comment(linker "/INCLUDE:mandelbrot_cpu_template")
117 const int32_t x_pixels,
118 const int32_t y_pixels,
123 const int32_t max_iterations,
128 const T x_scale =
get_scale(x_min, x_max, x_pixels);
129 const T y_scale =
get_scale(y_min, y_max, y_pixels);
131 const int32_t num_pixels = x_pixels * y_pixels;
135 [&](
const tbb::blocked_range<int32_t>& y_itr) {
136 const int32_t y_begin = y_itr.begin();
137 const int32_t y_end = y_itr.end();
139 const int32_t y_begin = 0;
140 const int32_t y_end = y_pixels;
152 output_num_iterations);
159 #else // #ifndef __CUDACC__
161 template <
typename T>
164 #pragma comment(linker "/INCLUDE:mandelbrot_cuda_template__gpu_")
169 mandelbrot_cuda_template__gpu_(
const int32_t x_pixels,
170 const int32_t y_pixels,
175 const int32_t max_iterations,
179 const T x_scale =
get_scale(x_min, x_max, x_pixels);
180 const T y_scale =
get_scale(y_min, y_max, y_pixels);
181 const int32_t num_pixels = x_pixels * y_pixels;
183 int32_t start = threadIdx.x + blockDim.x * blockIdx.x;
184 int32_t step = blockDim.x * gridDim.x;
186 for (int32_t output_pixel = start; output_pixel < num_pixels; output_pixel += step) {
187 const int32_t y = output_pixel / x_pixels;
188 const int32_t x = output_pixel % x_pixels;
189 const T cy = y * y_scale + y_min;
190 const T cx = x * x_scale + x_min;
193 int32_t num_iterations = 1;
194 for (; num_iterations < max_iterations; ++num_iterations) {
195 const T temp_x = zx * zx - zy * zy + cx;
196 zy = 2 * zx * zy + cy;
198 if (zx * zx + zy * zy > 4.0) {
202 output_x[output_pixel] = cx;
203 output_y[output_pixel] = cy;
204 output_num_iterations[output_pixel] = num_iterations;
206 return output_x.
size();
208 #endif // #ifndef __CUDACC__
216 #pragma comment(linker "/INCLUDE:tf_mandelbrot__cpu_")
221 const int32_t x_pixels,
222 const int32_t y_pixels,
227 const int32_t max_iterations,
231 return Mandelbrot::mandelbrot_cpu_template<double>(mgr,
241 output_num_iterations);
246 #pragma comment(linker "/INCLUDE:tf_mandelbrot_float__cpu_")
251 const int32_t x_pixels,
252 const int32_t y_pixels,
257 const int32_t max_iterations,
261 return Mandelbrot::mandelbrot_cpu_template<float>(mgr,
271 output_num_iterations);
274 #else // #ifndef __CUDACC__
278 #pragma comment(linker "/INCLUDE:tf_mandelbrot_cuda__gpu_")
282 int32_t tf_mandelbrot_cuda__gpu_(
const int32_t x_pixels,
283 const int32_t y_pixels,
288 const int32_t max_iterations,
292 return Mandelbrot::mandelbrot_cuda_template__gpu_(x_pixels,
301 output_num_iterations);
306 #pragma comment(linker "/INCLUDE:tf_mandelbrot_cuda_float__gpu_")
310 int32_t tf_mandelbrot_cuda_float__gpu_(
const int32_t x_pixels,
311 const int32_t y_pixels,
316 const int32_t max_iterations,
320 return Mandelbrot::mandelbrot_cuda_template__gpu_(x_pixels,
329 output_num_iterations);
332 #endif // #ifndef __CUDACC__
void set_output_row_size(int64_t num_rows)
#define EXTENSION_NOINLINE
DEVICE int64_t size() const
DEVICE double get_scale(const double domain_min, const double domain_max, const int32_t num_bins)
TEMPLATE_NOINLINE void mandelbrot_impl(const int32_t x_pixels, const int32_t y_begin, const int32_t y_end, const T x_min, const T y_min, const T x_scale, const T y_scale, const int32_t max_iterations, Column< T > &output_x, Column< T > &output_y, Column< int32_t > &output_num_iterations)
EXTENSION_NOINLINE int32_t tf_mandelbrot__cpu_(TableFunctionManager &mgr, const int32_t x_pixels, const int32_t y_pixels, const double x_min, const double x_max, const double y_min, const double y_max, const int32_t max_iterations, Column< double > &output_x, Column< double > &output_y, Column< int32_t > &output_num_iterations)
EXTENSION_NOINLINE int32_t tf_mandelbrot_float__cpu_(TableFunctionManager &mgr, const int32_t x_pixels, const int32_t y_pixels, const float x_min, const float x_max, const float y_min, const float y_max, const int32_t max_iterations, Column< float > &output_x, Column< float > &output_y, Column< int32_t > &output_num_iterations)
__attribute__((__used__)) ModelInfo get_model_info_from_file(const std
void parallel_for(const blocked_range< Int > &range, const Body &body, const Partitioner &p=Partitioner())
TEMPLATE_INLINE int32_t mandelbrot_pixel(const T cx, const T cy, const int32_t max_iterations)
#define TEMPLATE_NOINLINE
TEMPLATE_NOINLINE int32_t mandelbrot_cpu_template(TableFunctionManager &mgr, const int32_t x_pixels, const int32_t y_pixels, const T x_min, const T x_max, const T y_min, const T y_max, const int32_t max_iterations, Column< T > &output_x, Column< T > &output_y, Column< int32_t > &output_num_iterations)