Signed-off-by: Yongjia Zhang<yongjia.zhang@xxxxxxxxx> --- opencl/motion-blur-zoom.cl | 78 ++++++++++++++++++++++++++ opencl/motion-blur-zoom.cl.h | 80 ++++++++++++++++++++++++++ operations/common/motion-blur-zoom.c | 105 +++++++++++++++++++++++++++++++++++ 3 files changed, 263 insertions(+) create mode 100644 opencl/motion-blur-zoom.cl create mode 100644 opencl/motion-blur-zoom.cl.h diff --git a/opencl/motion-blur-zoom.cl b/opencl/motion-blur-zoom.cl new file mode 100644 index 0000000..085c70c --- /dev/null +++ b/opencl/motion-blur-zoom.cl @@ -0,0 +1,78 @@ +#define CLAMP(val,lo,hi) ((val)<(lo)?(lo):((hi)<(val)?(hi):(val))) +#define SQR(x) ((x)*(x)) +#define MAX_NUM_IT 200 +#define NOMINAL_NUM_IT 100 + +float4 get_pixel_color(const __global float4 *in_buf, + int rect_width, + int rect_height, + int rect_x, + int rect_y, + int x, + int y) +{ + int ix = x - rect_x; + int iy = y - rect_y; + + ix = CLAMP(ix, 0, rect_width-1); + iy = CLAMP(iy, 0, rect_height-1); + + return in_buf[iy * rect_width + ix]; +} + + +__kernel void motion_blur_zoom(const __global float4 *src_buf, + __global float4 *dst_buf, + int src_width, + int src_height, + int src_x, + int src_y, + int x, + int y, + float center_x, + float center_y, + float factor) +{ + const int gidx = get_global_id(0); + const int gidy = get_global_id(1); + float dxx,dyy,ix,iy,inv_xy_len; + float4 sum = 0.0f; + float x_start = x + gidx; + float y_start = y + gidy; + float x_end = x + (center_x - x - gidx) * factor + gidx; + float y_end = y + (center_y - y - gidy) * factor + gidy; + int dist = ceil(sqrt(SQR(x_end-x_start)+SQR(y_end-y_start))+1); + int xy_len = max(dist, 3); + if(xy_len > NOMINAL_NUM_IT) + xy_len = min(NOMINAL_NUM_IT + (int)sqrt((float)(xy_len - NOMINAL_NUM_IT)), + MAX_NUM_IT); + inv_xy_len = 1.0f / xy_len; + dxx = (x_end - x_start) * inv_xy_len; + dyy = (y_end - y_start) * inv_xy_len; + ix = x_start; + iy = y_start; + + for( int i=0; i<xy_len; ++i ) + { + float dx = ix - floor(ix); + float dy = iy - floor(iy); + float4 mixy0,mixy1,pix0,pix1,pix2,pix3; + pix0 = get_pixel_color(src_buf, src_width, src_height, + src_x, src_y, (int)ix, (int)iy); + pix1 = get_pixel_color(src_buf, src_width, src_height, + src_x, src_y, (int)(ix+1.0f), (int)iy); + pix2 = get_pixel_color(src_buf, src_width, src_height, + src_x, src_y, (int)ix, (int)(iy+1.0f)); + pix3 = get_pixel_color(src_buf, src_width, src_height, + src_x, src_y, (int)(ix+1.0f), (int)(iy+1.0f)); + + mixy0 = dy * (pix2 - pix0) + pix0; + mixy1 = dy * (pix3 - pix1) + pix1; + sum += dx * (mixy1 - mixy0) + mixy0; + ix += dxx; + iy += dyy; + } + + dst_buf[gidy * get_global_size(0) + gidx] = sum * (float4)(inv_xy_len); + +} diff --git a/opencl/motion-blur-zoom.cl.h b/opencl/motion-blur-zoom.cl.h new file mode 100644 index 0000000..69b0ada --- /dev/null +++ b/opencl/motion-blur-zoom.cl.h @@ -0,0 +1,80 @@ +static const char* motion_blur_zoom_cl_source = +"#define CLAMP(val,lo,hi) ((val)<(lo)?(lo):((hi)<(val)?(hi):(val))) \n" +"#define SQR(x) ((x)*(x)) \n" +"#define MAX_NUM_IT 200 \n" +"#define NOMINAL_NUM_IT 100 \n" +" \n" +"float4 get_pixel_color(const __global float4 *in_buf, \n" +" int rect_width, \n" +" int rect_height, \n" +" int rect_x, \n" +" int rect_y, \n" +" int x, \n" +" int y) \n" +"{ \n" +" int ix = x - rect_x; \n" +" int iy = y - rect_y; \n" +" \n" +" ix = CLAMP(ix, 0, rect_width-1); \n" +" iy = CLAMP(iy, 0, rect_height-1); \n" +" \n" +" return in_buf[iy * rect_width + ix]; \n" +"} \n" +" \n" +" \n" +"__kernel void motion_blur_zoom(const __global float4 *src_buf, \n" +" __global float4 *dst_buf, \n" +" int src_width, \n" +" int src_height, \n" +" int src_x, \n" +" int src_y, \n" +" int x, \n" +" int y, \n" +" float center_x, \n" +" float center_y, \n" +" float factor) \n" +"{ \n" +" const int gidx = get_global_id(0); \n" +" const int gidy = get_global_id(1); \n" +" float dxx,dyy,ix,iy,inv_xy_len; \n" +" float4 sum = 0.0f; \n" +" float x_start = x + gidx; \n" +" float y_start = y + gidy; \n" +" float x_end = x + (center_x - x - gidx) * factor + gidx; \n" +" float y_end = y + (center_y - y - gidy) * factor + gidy; \n" +" int dist = ceil(sqrt(SQR(x_end-x_start)+SQR(y_end-y_start))+1); \n" +" int xy_len = max(dist, 3); \n" +" if(xy_len > NOMINAL_NUM_IT) \n" +" xy_len = min(NOMINAL_NUM_IT + (int)sqrt((float)(xy_len - NOMINAL_NUM_IT)), \n" +" MAX_NUM_IT); \n" +" inv_xy_len = 1.0f / xy_len; \n" +" dxx = (x_end - x_start) * inv_xy_len; \n" +" dyy = (y_end - y_start) * inv_xy_len; \n" +" ix = x_start; \n" +" iy = y_start; \n" +" \n" +" for( int i=0; i<xy_len; ++i ) \n" +" { \n" +" float dx = ix - floor(ix); \n" +" float dy = iy - floor(iy); \n" +" float4 mixy0,mixy1,pix0,pix1,pix2,pix3; \n" +" pix0 = get_pixel_color(src_buf, src_width, src_height, \n" +" src_x, src_y, (int)ix, (int)iy); \n" +" pix1 = get_pixel_color(src_buf, src_width, src_height, \n" +" src_x, src_y, (int)(ix+1.0f), (int)iy); \n" +" pix2 = get_pixel_color(src_buf, src_width, src_height, \n" +" src_x, src_y, (int)ix, (int)(iy+1.0f)); \n" +" pix3 = get_pixel_color(src_buf, src_width, src_height, \n" +" src_x, src_y, (int)(ix+1.0f), (int)(iy+1.0f)); \n" +" \n" +" mixy0 = dy * (pix2 - pix0) + pix0; \n" +" mixy1 = dy * (pix3 - pix1) + pix1; \n" +" sum += dx * (mixy1 - mixy0) + mixy0; \n" +" ix += dxx; \n" +" iy += dyy; \n" +" } \n" +" \n" +" dst_buf[gidy * get_global_size(0) + gidx] = sum * (float4)(inv_xy_len); \n" +" \n" +"} \n" +; diff --git a/operations/common/motion-blur-zoom.c b/operations/common/motion-blur-zoom.c index 841c1c2..25ee2f0 100644 --- a/operations/common/motion-blur-zoom.c +++ b/operations/common/motion-blur-zoom.c @@ -95,6 +95,107 @@ prepare (GeglOperation *operation) gegl_operation_set_format (operation, "output", babl_format ("RaGaBaA float")); } +#include "opencl/gegl-cl.h" +#include "buffer/gegl-buffer-cl-iterator.h" +#include "opencl/motion-blur-zoom.cl.h" + +static GeglClRunData *cl_data = NULL; + +static gboolean +cl_motion_blur_zoom(cl_mem in_tex, + cl_mem out_tex, + const GeglRectangle *roi, + const GeglRectangle *src_rect, + float center_x, + float center_y, + float factor) +{ + cl_int cl_err = 0; + size_t global_ws[2] = {roi->width,roi->height}; + if(!cl_data) + { + const char *kernel_name[] = {"motion_blur_zoom", NULL}; + cl_data = gegl_cl_compile_and_build(motion_blur_zoom_cl_source, kernel_name); + } + if(!cl_data) + return TRUE; + + cl_err = gegl_cl_set_kernel_args(cl_data->kernel[0], + sizeof(cl_mem), (void *)&in_tex, + sizeof(cl_mem), (void *)&out_tex, + sizeof(cl_int), (void *)&src_rect->width, + sizeof(cl_int), (void *)&src_rect->height, + sizeof(cl_int), (void *)&src_rect->x, + sizeof(cl_int), (void *)&src_rect->y, + sizeof(cl_int), (void *)&roi->x, + sizeof(cl_int), (void *)&roi->y, + sizeof(cl_float), (void *)¢er_x, + sizeof(cl_float), (void *)¢er_y, + sizeof(cl_float), (void *)&factor, NULL); + CL_CHECK; + + cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue(), + cl_data->kernel[0], 2, + NULL, global_ws, NULL, + 0, NULL, NULL); + CL_CHECK; + + return FALSE; + +error: + return TRUE; +} + + +static gboolean +cl_process(GeglOperation *operation, + GeglBuffer *input, + GeglBuffer *output, + const GeglRectangle *result, + const GeglRectangle *src_rect) +{ + GeglOperationAreaFilter *op_area = GEGL_OPERATION_AREA_FILTER(operation); + GeglChantO *o = GEGL_CHANT_PROPERTIES(operation); + + const Babl *in_format = gegl_operation_get_format(operation,"input"); + const Babl *out_format = gegl_operation_get_format(operation, "output"); + + gint err; + + GeglBufferClIterator *i = gegl_buffer_cl_iterator_new(output, + result, + out_format, + GEGL_CL_BUFFER_WRITE); + gint read = gegl_buffer_cl_iterator_add_2(i, + input, + result, + in_format, + GEGL_CL_BUFFER_READ, + op_area->left, + op_area->right, + op_area->top, + op_area->bottom, + GEGL_ABYSS_NONE); + while(gegl_buffer_cl_iterator_next(i,&err)) + { + if(err) return FALSE; + err = cl_motion_blur_zoom(i->tex[read], + i->tex[0], + &i->roi[0], + &i->roi[read], + o->center_x, + o->center_y, + o->factor); + + if(err) return FALSE; + } + + return TRUE; + + +} + + static inline gfloat * get_pixel_color (gfloat *in_buf, const GeglRectangle *rect, @@ -130,6 +231,9 @@ process (GeglOperation *operation, src_rect.width += op_area->left + op_area->right; src_rect.height += op_area->top + op_area->bottom; + if(gegl_operation_use_opencl(operation)) + if(cl_process(operation, input, output, roi, &src_rect)) + return TRUE; in_buf = g_new (gfloat, src_rect.width * src_rect.height * 4); out_buf = g_new0 (gfloat, roi->width * roi->height * 4); out_pixel = out_buf; @@ -218,6 +322,7 @@ gegl_chant_class_init (GeglChantClass *klass) filter_class = GEGL_OPERATION_FILTER_CLASS (klass); operation_class->prepare = prepare; + operation_class->opencl_support = TRUE; filter_class->process = process; gegl_operation_class_set_keys (operation_class, -- 1.8.3.2 _______________________________________________ gegl-developer-list mailing list List address: gegl-developer-list@xxxxxxxxx List membership: https://mail.gnome.org/mailman/listinfo/gegl-developer-list