
typedef struct{
	float4 pos, vel, accel;
	float4 pos_b;
	float mass;
	float radius;
	float dum1;
	float dum2;
	// need this to make sure the float2 vel is aligned to a 16 byte boundary
} Particle;

typedef struct{
	float	line;		float	color;		float	normal;		float	attrib;
	float	dt;			uint	i_offset; 	uint	nb_by_set;	uint	float_by_set;

	float2	mouse_pos;

	float	p01;		float	p02;		float	p03;		float	p04;
	float	p05;		float	p06;		float	p07;		float	p08;
	float	p09;		float	p10;

	float4	v01;
	float4	v02;
} kernel_param;

typedef struct{
	float	v[200];
} dataset_param;


__kernel void Bang(			__global Particle*		particles
						,	__global float4*		pos_out
						,	__global float4*		col_out
						,	__global float4*		nor_out
						,	__global float4*		att_out
						,	const kernel_param		aaa
						,	const dataset_param		ds
//nvidia				__global __read_only image2d_t img,
//nvidia				__global __read_only image2d_t img2
//						,	__read_only image2d_t	img
//						,__read_only image2d_t img2
)
{
	int id = get_global_id(0);					//	get the Particle index
	__global Particle *p = &particles[id];		//	get the particle struct
	int sub_id = id - aaa.i_offset;
	if( aaa.line != 0. )	// if we draw line the ..._out have a double size to handle the 2 points
		id = sub_id * 2 + aaa.i_offset;

//	get pixel color
/*
	float2	img_size;	//	we need it later
	img_size.x =  get_image_width( img );
	img_size.y =  get_image_height( img );
	float2	pos;
	pos.x = (p->pos.x * 1. + .5f) * img_size.x;
	pos.y = (p->pos.y * 1. + .5f) * img_size.y;
	//	const sampler_t sampler		= CLK_FILTER_NEAREST	| CLK_ADDRESS_CLAMP_TO_EDGE;
	const sampler_t sampler		= CLK_FILTER_NEAREST	| CLK_ADDRESS_CLAMP;
	float4	color	= read_imagef( img, sampler, pos );
*/

//	elininate early unwanted dots
/*
	float	v;
	if( aaa.p07 > 0 )
		v = extract_depht(color);
	else
		v = extract_lum( color );
	if( v < aaa.p04 )
	{
		pos_out[id].x = -400;
		pos_out[id].y = -4;
		pos_out[id].y = -4;
		if( aaa.line != 0. )
		{
			pos_out[id+1] = pos_out[id];
		}
		return;
	}
*/
//get_gradient
/*	float4	nor;
	float4	cb;
	float	vb;

	pos.x += aaa.p01;
	cb = read_imagef( img, sampler, pos );
	if( aaa.p07 > 0 )
		vb = extract_depht(cb);
	else
		vb = extract_lum(cb);
	nor.x = (vb - v) * aaa.normal * img_size.x / aaa.p01;

	pos.x -= aaa.p01;
	pos.y += aaa.p02;
	cb = read_imagef( img, sampler, pos );
	if( aaa.p07 > 0 )
		vb = extract_depht(cb);
	else
		vb = extract_lum(cb);
	nor.y = (v - vb) * aaa.normal * img_size.y / aaa.p02;

	nor.z = 1.;
	nor.w = 0.;
 	float4 n = fast_normalize( nor );
*/

//	float4 one = {1,1,1,1};
	float2 pos;
 	pos.x = p->pos.x;
	pos.y = fabs( p->pos.y );
	float2 n = fast_normalize( pos );
	pos_out[id].w = 1;
	pos.x = aaa.p01 * (n.x - aaa.p01 * aaa.p02 * ( pos.x) );
	pos.y = aaa.p01 * (n.y - aaa.p01 * aaa.p02 * ( pos.y) );
	pos.xy *= aaa.p03;
	pos_out[id].x = pos.x;
	pos_out[id].y = pos.y;
	pos_out[id].z = aaa.p02 * p->pos.z;

	if( aaa.line != 0. )
	{
		pos_out[id+1].xyz = pos_out[id].xyz * 1.1 ;
	}
	//v = 1-v;
	float4 color;
	if( aaa.color > 0. )
	{
		float v =  p->pos.x * p->pos.x +  p->pos.y *  p->pos.y ;
		v *= 4.;
		v = 1-v;
		color.x = v;
		color.y = v;
		color.z = v;
		color.w = 1;
		col_out[id] = color;
		if( aaa.line != 0. )
		{
			col_out[id+1] = col_out[id];
		}
	}
	//float4 nor = {1-color.z, 1-color.z, color.z, 1 };

	if( aaa.normal > 0. )
	{
		nor_out[id] = pos_out[id];
	}

}


