pix_convolve.cpp 22.7 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
////////////////////////////////////////////////////////
//
// GEM - Graphics Environment for Multimedia
//
// zmoelnig@iem.kug.ac.at
//
// Implementation file
//
//    Copyright (c) 1997-1998 Mark Danks.
//    Copyright (c) Gnther Geiger.
//    Copyright (c) 2001-2002 IOhannes m zmoelnig. forum::fr::umlute. IEM
12
//    Copyright (c) 2002 James Tittle & Chris Clepper
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
//    For information on usage and redistribution, and for a DISCLAIMER OF ALL
//    WARRANTIES, see the file, "GEM.LICENSE.TERMS" in this distribution.
//
/////////////////////////////////////////////////////////

#include "pix_convolve.h"

CPPEXTERN_NEW_WITH_TWO_ARGS(pix_convolve, t_floatarg, A_DEFFLOAT, t_floatarg, A_DEFFLOAT)

/////////////////////////////////////////////////////////
//
// pix_convolve
//
/////////////////////////////////////////////////////////
// Constructor
//
/////////////////////////////////////////////////////////
pix_convolve :: pix_convolve(t_floatarg fRow, t_floatarg fCol)
31
  : m_imatrix(NULL)
32
{
33
34
35
  int row = (int)fRow;
  int col = (int)fCol;

36
37
38
39
40
41
42
43
44
45
46
47
48
49
    if (!row || !col )
    {
    	error("GEM: pix_convolve: matrix must have some dimension");
    	return;
    }
    
    if (!(row % 2) || !(col % 2) )
    {
    	error("GEM: pix_convolve: matrix must have odd dimensions");
    	return;
    }
    
    m_rows = row;
    m_cols = col;
50
51
    m_irange = 255;
    m_imatrix = new signed short[m_rows * m_cols];
52
53

    // zero out the matrix
ggeiger's avatar
ggeiger committed
54
    int i;
dheck's avatar
dheck committed
55
    for (i = 0; i < m_cols * m_rows; i++) m_imatrix[i] = 0;
56
    // insert a one for the default center value (identity matrix)
57
    m_imatrix[ ((m_cols / 2 + 1) * m_rows) + (m_rows / 2 + 1) ] = 255;
58
59
60
61
62
63
64
65
66
67
68
    
    inlet_new(this->x_obj, &this->x_obj->ob_pd, gensym("float"), gensym("ft1"));
    inlet_new(this->x_obj, &this->x_obj->ob_pd, gensym("list"), gensym("matrix"));
}

/////////////////////////////////////////////////////////
// Destructor
//
/////////////////////////////////////////////////////////
pix_convolve :: ~pix_convolve()
{
69
70
71
    if (m_imatrix)delete [] m_imatrix;
    post("done...");
  
72
73
74
75
76
77
}

/////////////////////////////////////////////////////////
// processImage
//
/////////////////////////////////////////////////////////
78

cclepper's avatar
cclepper committed
79

80
void pix_convolve :: calculateRGBA3x3(imageStruct &image,imageStruct &tempImg)
81
82
83
{
  int i;
  int j;
84
//  int k;
85
86
87
  int xsize =  tempImg.xsize;
  int ysize =  tempImg.ysize;
  int size = xsize*ysize - xsize-1;
88
  int csize = tempImg.csize;
89
90
91
92
93

  int* src = (int*) tempImg.data;
  int* dest = (int*)image.data;
  
  //  MMXSTART;
cclepper's avatar
cclepper committed
94
//unroll this to do R G B in one pass?? (too many registers?)
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
  i = xsize;
  int* val1 = 0;
  int* val2 = src+i-xsize;
  int* val3 = src+i-xsize+1;
  int* val4 = src+i-1;
  int* val5 = src+i;
  int* val6 = src+i+1;
  int* val7 = src+i+xsize-1;
  int* val8 = src+i+xsize;
  int* val9 = src+i+xsize+1;
  int res;
  for (i=xsize+1;i<size;i++) {
    val1 = val2;
    val2 = val3;
    val3 = src+i-xsize+1;
    val4 = val5;
    val5 = val6;
    val6 = src+i+1;
    val7 = val8;
    val8 = val9;
    val9 = src+i+xsize+1;
    if (i%xsize == 0 || i%xsize == xsize-1) continue;
cclepper's avatar
cclepper committed
117
118
119
120
121
122
    #ifndef MACOSX
    for (j=0;j<3;j++) 
    #else
    for (j=1;j<4;j++)
    #endif
    {
123
124
125
126
127
128
129
130
131
132
133
      res = m_imatrix[0]*(int)((unsigned char*)val1)[j];
      res += m_imatrix[1]*(int)((unsigned char*)val2)[j];
      res += m_imatrix[2]*(int)((unsigned char*)val3)[j];
      res += m_imatrix[3]*(int)((unsigned char*)val4)[j];
      res += m_imatrix[4]*(int)((unsigned char*)val5)[j];
      res += m_imatrix[5]*(int)((unsigned char*)val6)[j];
      res += m_imatrix[6]*(int)((unsigned char*)val7)[j];
      res += m_imatrix[7]*(int)((unsigned char*)val8)[j];
      res += m_imatrix[8]*(int)((unsigned char*)val9)[j];
      res*=m_irange;
      res>>=16;
134
      ((unsigned char*)dest)[i*csize+j] = CLAMP(res);
135
136
137
138
    }

  }

cclepper's avatar
cclepper committed
139
  
140
141
}

142
143
void pix_convolve :: processImage(imageStruct &image)
{
144
    image.copy2Image(&tempImg);
145
146
147
148
149
150
    int initX = m_rows / 2;
    int initY = m_cols / 2;
    int maxX = tempImg.xsize - initX;
    int maxY = tempImg.ysize - initY;
    int xTimesc = tempImg.xsize * tempImg.csize;
    int initOffset = initY * xTimesc + initX * tempImg.csize;
151
152


cclepper's avatar
cclepper committed
153

154
155
156
157
158
    if (m_rows == 3 && m_cols == 3 && tempImg.csize == 4) {
      calculateRGBA3x3(image,tempImg);
      return;
    }

159
160
161
162
163
164
165
    for (int y = initY; y < maxY; y++)
    {
        int realY = y * xTimesc;
        int offsetY = realY - initOffset;

    	for (int x = initX; x < maxX; x++)
    	{
166
167
168
	    int csize = tempImg.csize;
    	    int realPos = x * csize + realY;
            int offsetXY = x * csize + offsetY;
169
170

    	    // skip the alpha value
171
172

	    for (int c = 1; c < csize; c++)
173
174
    	    {
    		    int new_val = 0;
175
		    int offsetXYC = offsetXY + c;
176
177
178
179
180
181
    		    for (int matY = 0; matY < m_cols; matY++)
    		    {
    		        int offsetXYCMat = matY * xTimesc + offsetXYC;
    		        int realMatY = matY * m_rows;
    	    	    for (int matX = 0; matX < m_rows; matX++)
    	    	    {
182
                        new_val += (tempImg.data[offsetXYCMat + matX * csize] *
cclepper's avatar
cclepper committed
183
                                        m_imatrix[realMatY + matX])>>8;
184
185
    	    	    }
    		    }
186
187
188
                    image.data[realPos + c] = CLAMP(new_val);  
		    //removes insult from injury ??
		    // we do not use the m_irange anymore ...  remove it ??
cclepper's avatar
cclepper committed
189

190
191
192
    	    }
    	}
    }
cclepper's avatar
cclepper committed
193

194
195
}

196

197
198
void pix_convolve :: processYUVImage(imageStruct &image)
{
cclepper's avatar
cclepper committed
199
200
201
202
203
204
205
206
207
     image.copy2Image(&tempImg);
     //float range = 1;
    int initX = m_rows / 2;
    int initY = m_cols / 2;
    int maxX = tempImg.xsize - initX;
    int maxY = tempImg.ysize - initY;
    int xTimesc = tempImg.xsize * tempImg.csize;
    int initOffset = initY * xTimesc + initX * tempImg.csize;
    
cclepper's avatar
cclepper committed
208
209
210
211
212
 //   calculate3x3YUV(image,tempImg);
    if (m_rows == 3 && m_cols == 3) {
      calculate3x3YUV(image,tempImg);
      return;
    }
cclepper's avatar
cclepper committed
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
    if (m_chroma) {
    
    for (int y = initY; y < maxY; y++)
    {
        int realY = y * xTimesc;
        int offsetY = realY - initOffset;

    	for (int x = initX; x < maxX; x++)
    	{
    	    int realPos = x * tempImg.csize + realY;
            int offsetXY = x * tempImg.csize + offsetY;

    	    // skip the UV
    	    for (int c = 1; c < 3; c+=2)
    	    {
    		    int new_val = 0;
                int offsetXYC = offsetXY + c;
    		    for (int matY = 0; matY < m_cols; matY++)
    		    {
    		        int offsetXYCMat = matY * xTimesc + offsetXYC;
    		        int realMatY = matY * m_rows;
    	    	    for (int matX = 0; matX < m_rows; matX++)
    	    	    {
                      new_val += (tempImg.data[offsetXYCMat + matX * tempImg.csize] *
                                        m_imatrix[realMatY + matX])>>8;
    	    	    }
    		    }
                   image.data[realPos + c] = CLAMP(new_val);
                   // image.data[realPos + c-1] = 128;  //remove the U+V
    	    }
    	}
    }
    }else{
    for (int y = initY; y < maxY; y++)
    {
        int realY = y * xTimesc;
        int offsetY = realY - initOffset;

    	for (int x = initX; x < maxX; x++)
    	{
    	    int realPos = x * tempImg.csize + realY;
            int offsetXY = x * tempImg.csize + offsetY;

    	    // skip the UV
    	    for (int c = 1; c < 3; c+=2)
    	    {
    		    int new_val = 0;
                int offsetXYC = offsetXY + c;
    		    for (int matY = 0; matY < m_cols; matY++)
    		    {
    		        int offsetXYCMat = matY * xTimesc + offsetXYC;
    		        int realMatY = matY * m_rows;
    	    	    for (int matX = 0; matX < m_rows; matX++)
    	    	    {
                      new_val += (tempImg.data[offsetXYCMat + matX * tempImg.csize] *
                                        m_imatrix[realMatY + matX])>>8;
    	    	    }
    		    }
                   image.data[realPos + c] = CLAMP(new_val);
                    image.data[realPos + c-1] = 128;  //remove the U+V
    	    }
    	}
    }
    }
   
278
}
cclepper's avatar
cclepper committed
279

280
//make two functions - one for chroma one without
cclepper's avatar
cclepper committed
281
282
void pix_convolve :: calculate3x3YUV(imageStruct &image,imageStruct &tempImg)
{
283
284
285
286
287
288

#ifdef ALTIVEC
calculate3x3YUVAltivec(image,tempImg);
return;
#else

cclepper's avatar
cclepper committed
289
290
291
  int i;
  int j;
  int k;
292
293
  int xsize =  tempImg.xsize -1;
  int ysize =  tempImg.ysize -1;
cclepper's avatar
cclepper committed
294
  int size = xsize*ysize - xsize-1;
295
  int length;
cclepper's avatar
cclepper committed
296
297
298

  short* src = (short*) tempImg.data;
  short* dest = (short*)image.data;
299
300
301
  register int mat1,mat2,mat3,mat4,mat5,mat6,mat7,mat8,mat9;
  register int res1,res2,res3,res4,res5,res6,res7,res8,res9;
  register int range;
cclepper's avatar
cclepper committed
302
  
303
304
305
306
307
308
309
310
311
312
  mat1 = m_imatrix[0];
  mat2 = m_imatrix[1];
  mat3 = m_imatrix[2];
  mat4 = m_imatrix[3];
  mat5 = m_imatrix[4];
  mat6 = m_imatrix[5];
  mat7 = m_imatrix[6];
  mat8 = m_imatrix[7];
  mat9 = m_imatrix[8]; 
  range =m_irange;
cclepper's avatar
cclepper committed
313
314
315
 
if (m_chroma){
  i = xsize;
316
 
cclepper's avatar
cclepper committed
317
318
319
320
321
322
323
324
325
326
  register unsigned char val1 = 0;  
  register unsigned char val2 = src[i-xsize+1]; 
  register unsigned char val3 = src[i-xsize+3];
  register unsigned char val4 = src[i-1];
  register unsigned char val5 = src[i+1];
  register unsigned char val6 = src[i+3];
  register unsigned char val7 = src[i+xsize-1];
  register unsigned char val8 = src[i+xsize+1];
  register unsigned char val9 = src[i+xsize+3];
  
327
length = size /2;
cclepper's avatar
cclepper committed
328
  //unroll this 2x to fill the registers? (matrix*y1*y2= 9*9*9 =27)
329
330
331
i=xsize+1;
    for (k=1;k<ysize;k++) {
        for (j=1;j<xsize;j++) {
cclepper's avatar
cclepper committed
332
333
  //load furthest value first...the rest should be in cache
    
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
            val7 = val8;
            val8 = val9;
            val9 = src[i+xsize+3]; //this will come from main mem
            val1 = val2;
            val2 = val3;
            val3 = src[i-xsize+3]; //should be in cache from previous pass
            val4 = val5;
            val5 = val6;
            val6 = src[i+3];
    
            //unroll??
            res1 = mat1*(int)((unsigned char)val1);
            res2 = mat2*(int)((unsigned char)val2);
            res3 = mat3*(int)((unsigned char)val3);
            res4 = mat4*(int)((unsigned char)val4);
            res5 = mat5*(int)((unsigned char)val5);
            res6 = mat6*(int)((unsigned char)val6);
            res7 = mat7*(int)((unsigned char)val7);
            res8 = mat8*(int)((unsigned char)val8);
            res9 = mat9*(int)((unsigned char)val9);
            
            
            res1 += res2 + res3;
            res4 += res5 + res6;
            res7 += res8 + res9;
            res1 += res4 + res7;
        
            res1*=range;
            res1>>=16;
            ((unsigned char*)dest)[i*2+1] = CLAMP(res1);
            i++;
    
        }
    i=k*tempImg.xsize;
cclepper's avatar
cclepper committed
368
369
  } 
  }else{
370
   
cclepper's avatar
cclepper committed
371
372
  i = xsize;
  //make these temp register vars rather than pointers?
373
  
cclepper's avatar
cclepper committed
374
375
376
377
378
379
380
381
382
  short* val1 = 0;  
  short* val2 = src+i-xsize; //val2 = src[i-xsize];
  short* val3 = src+i-xsize+1; //val3 = src[i-xsize+1];
  short* val4 = src+i-1; //val4 = src[i-1];
  short* val5 = src+i; //val5 = src[i];
  short* val6 = src+i+1; //val6 = src[i+1];
  short* val7 = src+i+xsize-1; //val7 = src[i+xsize-1];
  short* val8 = src+i+xsize; //val8 = src[i+xsize];
  short* val9 = src+i+xsize+1; //val9 = src[i+xsize+1];
383
384
385
386
387
388
389
390
391
392
393
  /*
  register short* val1 = 0;  
  register short* val2 = src+i-xsize; //val2 = src[i-xsize];
  register short* val3 = src+i-xsize+1; //val3 = src[i-xsize+1];
  register short* val4 = src+i-1; //val4 = src[i-1];
  register short* val5 = src+i; //val5 = src[i];
  register short* val6 = src+i+1; //val6 = src[i+1];
  register short* val7 = src+i+xsize-1; //val7 = src[i+xsize-1];
  register short* val8 = src+i+xsize; //val8 = src[i+xsize];
  register short* val9 = src+i+xsize+1; //val9 = src[i+xsize+1];*/
  //int res; 
cclepper's avatar
cclepper committed
394
  for (i=xsize+1;i<size;i++) {
395
  
cclepper's avatar
cclepper committed
396
397
398
399
400
401
402
403
    val1 = val2;
    val2 = val3;
    val3 = src+i-xsize+1;
    val4 = val5;
    val5 = val6;
    val6 = src+i+1;
    val7 = val8;
    val8 = val9;
404
405
    val9 = src+i+xsize+1; 
    
cclepper's avatar
cclepper committed
406
407
408
409
410
411
412
    if (i%xsize == 0 || i%xsize == xsize-1) continue;
    #ifndef MACOSX
    for (j=0;j<3;j++) 
    #else
    for (j=1;j<3;j+=2)
    #endif
    {
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
    
      res1 = mat1*(int)((unsigned char*)val1)[j];
      res2 = mat2*(int)((unsigned char*)val2)[j];
      res3 = mat3*(int)((unsigned char*)val3)[j];
      res4 = mat4*(int)((unsigned char*)val4)[j];
      res5 = mat5*(int)((unsigned char*)val5)[j];
      res6 = mat6*(int)((unsigned char*)val6)[j];
      res7 = mat7*(int)((unsigned char*)val7)[j];
      res8 = mat8*(int)((unsigned char*)val8)[j];
      res9 = mat9*(int)((unsigned char*)val9)[j];
      res1 += res2 + res3;
      res4 += res5 + res6;
      res7 += res8 + res9;
      res1 += res4 + res7;
      res1*=range;
      res1>>=16;
     // ((unsigned char*)dest)[i*2] = 128;
     // ((unsigned char*)dest)[i*2+2] = 128;
      ((unsigned char*)dest)[i*2+j] = CLAMP(res1);
cclepper's avatar
cclepper committed
432
    }
433
434
     ((unsigned char*)dest)[i*2] = 128;
      ((unsigned char*)dest)[i*2+2] = 128;
cclepper's avatar
cclepper committed
435
436
  }
  }
437
#endif
cclepper's avatar
cclepper committed
438
439
}

440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
void pix_convolve :: calculate3x3YUVAltivec(imageStruct &image,imageStruct &tempImg)
{
 #ifdef ALTIVEC
 int h,w,width,i;
 int xsize =  (tempImg.xsize)*2;
// int xsize =  (tempImg.xsize-1);
 
   width = (tempImg.xsize)/8;
   //format is U Y V Y
  // post("pix_convolve : m_irange %d",m_irange);
    union
    {
        //unsigned int	i;
        short	elements[8];
        //vector signed char v;
        vector	short v;
    }shortBuffer;
    
    union
    {
        unsigned int	elements[4];
        vector	unsigned int v;
    }intBuffer;
    
    vector unsigned char one;
    vector signed short mat1,mat2,mat3,mat4,mat5,mat6,mat7,mat8,mat9; 
    vector unsigned char  val1,val2,val3,val4,val5,val6,val7,val8,val9;
    vector signed int  res1,res2,res3,res4,res5,res6,res7,res8,res9;
    vector signed int  yhi,ylo;
    vector signed int  res1a,res2a,res3a,res4a,res5a,res6a,res7a,res8a,res9a;
    vector unsigned int bitshift;
    vector signed short y1,y2,y3,y4,y5,y6,y7,y8,y9,yres,uvres,hiImage,loImage;
    vector signed short range,uvnone;
  //  vector unsigned char *dst = (vector unsigned char*) image.data;
  unsigned char *dst =  (unsigned char*) image.data;
    unsigned char *src =  tempImg.data;
   // short* src = (short*) tempImg.data;
   // unsigned char *temp;

    one =  vec_splat_u8( 1 );
    
    intBuffer.elements[0] = 8;
    //Load it into the vector unit
    bitshift = intBuffer.v;
    bitshift = (vector unsigned int)vec_splat((vector unsigned int)bitshift,0);
      
     shortBuffer.elements[0] = m_irange;
    range = shortBuffer.v;
    range = (vector signed short)vec_splat((vector signed short)range, 0); 
    
     shortBuffer.elements[0] = 128;
    uvnone = shortBuffer.v;
    uvnone = (vector signed short)vec_splat((vector signed short)uvnone, 0); 
      
    //load the matrix values into vectors 
    shortBuffer.elements[0] = m_imatrix[0];
    mat1 = shortBuffer.v;
    mat1 = (vector signed short)vec_splat((vector signed short)mat1,0);
    
    shortBuffer.elements[0] = m_imatrix[1];
    mat2 = shortBuffer.v;
    mat2 = (vector signed short)vec_splat((vector signed short)mat2,0);
    
    shortBuffer.elements[0] = m_imatrix[2];
    mat3 = shortBuffer.v;
    mat3 = (vector signed short)vec_splat((vector signed short)mat3,0);
    
    shortBuffer.elements[0] = m_imatrix[3];
    mat4 = shortBuffer.v;
    mat4 = (vector signed short)vec_splat((vector signed short)mat4,0);
    
    shortBuffer.elements[0] = m_imatrix[4];
    mat5 = shortBuffer.v;
    mat5 = (vector signed short)vec_splat((vector signed short)mat5,0);
    
    shortBuffer.elements[0] = m_imatrix[5];
    mat6 = shortBuffer.v;
    mat6 = (vector signed short)vec_splat((vector signed short)mat6,0);
    
    shortBuffer.elements[0] = m_imatrix[6];
    mat7 = shortBuffer.v;
    mat7 = (vector signed short)vec_splat((vector signed short)mat7,0);
    
    shortBuffer.elements[0] = m_imatrix[7];
    mat8 = shortBuffer.v;
    mat8 = (vector signed short)vec_splat((vector signed short)mat8,0);
    
    shortBuffer.elements[0] = m_imatrix[8];
    mat9 = shortBuffer.v;
    mat9 = (vector signed short)vec_splat((vector signed short)mat9,0);
    

    UInt32			prefetchSize = GetPrefetchConstant( 16, 1, 256 );
    vec_dst( src, prefetchSize, 0 );
    vec_dst( dst, prefetchSize, 0 );
         
    i = 0;
537
538
    h =0;
    w = 0;
539
   // dst = i;     
540
    i = xsize+2;
541
542
543
544
545
546
547
548
549
550
551
// i = xsize;
    //load our initial values
   /* val1 = 0;
    val2 = vec_ld(0,src+i-xsize); 
  val3 = vec_ld(0,src+i-xsize+2); 
   val4 = vec_ld(0,src+i-2); 
   val5 = vec_ld(0,src+i);
   val6 = vec_ld(0,src+i+2); 
   val7 = vec_ld(0,src+i+xsize-2); 
   val8 = vec_ld(0,src+i+xsize); 
   val9 = vec_ld(0,src+i+xsize+2); */
552
 // post("pix_convolve: h %d w %d total pixels %d ",h,w,i);
553
    for ( h=1; h<image.ysize-1; h++){
554
        for (w=1; w<width; w++)
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
        {
        
            vec_dst( src, prefetchSize, 0 );
            vec_dst( dst, prefetchSize, 0 );    
        /*
            //swap around the pixels for this pass
            val7 = val8;
            val8 = val9;
            //temp =src+i+xsize+2;
            val9 = vec_ld(0,src+i+xsize+2); //this will come from main mem
            val1 = val2;
            val2 = val3;
            //src+=i-xsize+3;
           // temp = src+i-xsize+2;
            val3 = vec_ld(0,src+i-xsize+2); //should be in cache from previous pass
            val4 = val5;
            val5 = val6;
          // temp = src+i+2;
            val6 = vec_ld(0,src+i+2); */
            
    val1 = vec_ld(0,src+i-xsize-2);
    val2 = vec_ld(0,src+i-xsize); 
  val3 = vec_ld(0,src+i-xsize+2); 
   val4 = vec_ld(0,src+i-2); 
   val5 = vec_ld(0,src+i);
   val6 = vec_ld(0,src+i+2); 
   val7 = vec_ld(0,src+i+xsize-2); 
   val8 = vec_ld(0,src+i+xsize); 
   val9 = vec_ld(0,src+i+xsize+2);
            
            //extract the Y for processing
            y1 = (vector signed short)vec_mulo((vector unsigned char)one,(vector unsigned char)val1);
            y2 = (vector signed short)vec_mulo((vector unsigned char)one,(vector unsigned char)val2);
            y3 = (vector signed short)vec_mulo((vector unsigned char)one,(vector unsigned char)val3);
            y4 = (vector signed short)vec_mulo((vector unsigned char)one,(vector unsigned char)val4);
            y5 = (vector signed short)vec_mulo((vector unsigned char)one,(vector unsigned char)val5);
            y6 = (vector signed short)vec_mulo((vector unsigned char)one,(vector unsigned char)val6);
            y7 = (vector signed short)vec_mulo((vector unsigned char)one,(vector unsigned char)val7);
            y8 = (vector signed short)vec_mulo((vector unsigned char)one,(vector unsigned char)val8);
            y9 = (vector signed short)vec_mulo((vector unsigned char)one,(vector unsigned char)val9);
            
            uvres = (vector signed short)vec_mule((vector unsigned char)one,(vector unsigned char)val5);
            
            //mult the Y by the matrix coefficient
            res1 = vec_mulo(mat1,y1);
            res2 = vec_mulo(mat2,y2);
            res3 = vec_mulo(mat3,y3);
            res4 = vec_mulo(mat4,y4);
            res5 = vec_mulo(mat5,y5);
            res6 = vec_mulo(mat6,y6);
            res7 = vec_mulo(mat7,y7);
            res8 = vec_mulo(mat8,y8);
            res9 = vec_mulo(mat9,y9);
            
            res1a = vec_mule(mat1,y1);
            res2a = vec_mule(mat2,y2);
            res3a = vec_mule(mat3,y3);
            res4a = vec_mule(mat4,y4);
            res5a = vec_mule(mat5,y5);
            res6a = vec_mule(mat6,y6);
            res7a = vec_mule(mat7,y7);
            res8a = vec_mule(mat8,y8);
            res9a = vec_mule(mat9,y9);
            
            //sum the results
            res1 = vec_adds(res1,res2); //1+2
            res3 = vec_adds(res3,res4);//3+4
            res5 = vec_adds(res5,res6);//5+6
            res7 = vec_adds(res7,res8);//7+8
            res1 = vec_adds(res1,res3);//(1+2)+(3+4)
            res7 = vec_adds(res7,res9);//7+8+9
            res1 = vec_adds(res1,res5);//(1+2)+(3+4)+(5+6)
            res1 = vec_adds(res1,res7);//(1+2)+(3+4)+(5+6)+(7+8+9)
            
            res1a = vec_adds(res1a,res2a); //1+2
            res3a = vec_adds(res3a,res4a);//3+4
            res5a = vec_adds(res5a,res6a);//5+6
            res7a = vec_adds(res7a,res8a);//7+8
            res1a = vec_adds(res1a,res3a);//(1+2)+(3+4)
            res7a = vec_adds(res7a,res9a);//7+8+9
            res1a = vec_adds(res1a,res5a);//(1+2)+(3+4)+(5+6)
            res1a = vec_adds(res1a,res7a);//(1+2)+(3+4)+(5+6)+(7+8+9)
            
            
         /*   //pack back to one short vector??
            yhi = vec_mergeh(res1a,res1);
            ylo = vec_mergel(res1a,res1);
            yres = vec_packs(yhi,ylo);
            
            //back to ints
            res1 = vec_mulo(yres,range);
            res1a = vec_mule(yres,range); */
            
            //do the bitshift on the results here??
            res1 = vec_sra(res1,bitshift);
            res1a = vec_sra(res1a,bitshift); 
                        
            //pack back to one short vector??
            yhi = vec_mergeh(res1a,res1);
            ylo = vec_mergel(res1a,res1);
            yres = vec_packs(yhi,ylo);
            
            
            //combine with the UV\
            //vec_mergel + vec_mergeh Y and UV
            hiImage =  vec_mergeh(uvres,yres);
            loImage =  vec_mergel(uvres,yres);
          //  hiImage =  vec_mergeh(uvnone,yres);
          //  loImage =  vec_mergel(uvnone,yres);
            
          //  dst[0] = vec_packsu(hiImage,loImage);
          val1 = vec_packsu(hiImage,loImage);
          vec_st(val1,0,dst+i);
           i+=16;
          // dst+=16;
           
        }
        vec_dss( 0 );
673
674
      //   i=(h+1)*(xsize+2);
    //    post("pix_convolve: h %d w %d total pixels %d ",h,w,i);
675
676
677
678
      //dst+=16;
      //i+=16;
      //  dst++;
}  /*end of working altivec function */
679

680
681
#endif
}
cclepper's avatar
cclepper committed
682

683
684
685
686
687
688
/////////////////////////////////////////////////////////
// rangeMess
//
/////////////////////////////////////////////////////////
void pix_convolve :: rangeMess(float range)
{
689
    m_irange = (int)(range*255.f);
690
691
692
693
694
695
696
697
698
699
700
701
702
703
    setPixModified();
}

/////////////////////////////////////////////////////////
// matrixMess
//
/////////////////////////////////////////////////////////
void pix_convolve :: matrixMess(int argc, t_atom *argv)
{
    if (argc != m_cols * m_rows)
    {
    	error("GEM: pix_convolve: matrix size not correct");
    	return;
    }
cclepper's avatar
cclepper committed
704

ggeiger's avatar
ggeiger committed
705
    int i;
dheck's avatar
dheck committed
706
    for (i = 0; i < argc; i++) m_imatrix[i] = (int)(atom_getfloat(&argv[i])*255.);
707

cclepper's avatar
cclepper committed
708

709
710
711
712
713
714
715
716
717
718
719
720
721
    setPixModified();
}

/////////////////////////////////////////////////////////
// static member function
//
/////////////////////////////////////////////////////////
void pix_convolve :: obj_setupCallback(t_class *classPtr)
{
    class_addmethod(classPtr, (t_method)&pix_convolve::matrixMessCallback,
    	    gensym("matrix"), A_GIMME, A_NULL);
    class_addmethod(classPtr, (t_method)&pix_convolve::rangeMessCallback,
    	    gensym("ft1"), A_FLOAT, A_NULL);
cclepper's avatar
cclepper committed
722
723
    class_addmethod(classPtr, (t_method)&pix_convolve::chromaMessCallback,
    	    gensym("chroma"), A_FLOAT, A_NULL);
724
725
726
727
728
729
730
731
732
}
void pix_convolve :: matrixMessCallback(void *data, t_symbol *, int argc, t_atom *argv)
{
    GetMyClass(data)->matrixMess(argc, argv);
}
void pix_convolve :: rangeMessCallback(void *data, t_floatarg range)
{
    GetMyClass(data)->rangeMess((float)range);
}
cclepper's avatar
cclepper committed
733
734
735
736
737

void pix_convolve :: chromaMessCallback(void *data, t_floatarg value)
{
    GetMyClass(data)->m_chroma=(int)value;
}