pix_convolve.cpp 21.6 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
////////////////////////////////////////////////////////
//
// GEM - Graphics Environment for Multimedia
//
// zmoelnig@iem.kug.ac.at
//
// Implementation file
//
//    Copyright (c) 1997-1998 Mark Danks.
//    Copyright (c) Gnther Geiger.
//    Copyright (c) 2001-2002 IOhannes m zmoelnig. forum::fr::umlute. IEM
12
//    Copyright (c) 2002 James Tittle & Chris Clepper
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
//    For information on usage and redistribution, and for a DISCLAIMER OF ALL
//    WARRANTIES, see the file, "GEM.LICENSE.TERMS" in this distribution.
//
/////////////////////////////////////////////////////////

#include "pix_convolve.h"

CPPEXTERN_NEW_WITH_TWO_ARGS(pix_convolve, t_floatarg, A_DEFFLOAT, t_floatarg, A_DEFFLOAT)

/////////////////////////////////////////////////////////
//
// pix_convolve
//
/////////////////////////////////////////////////////////
// Constructor
//
/////////////////////////////////////////////////////////
pix_convolve :: pix_convolve(t_floatarg fRow, t_floatarg fCol)
31
  : m_imatrix(NULL)
32
{
33
34
35
  int row = (int)fRow;
  int col = (int)fCol;

36
37
38
39
40
41
42
43
44
45
46
47
48
49
    if (!row || !col )
    {
    	error("GEM: pix_convolve: matrix must have some dimension");
    	return;
    }
    
    if (!(row % 2) || !(col % 2) )
    {
    	error("GEM: pix_convolve: matrix must have odd dimensions");
    	return;
    }
    
    m_rows = row;
    m_cols = col;
50
51
    m_irange = 255;
    m_imatrix = new signed short[m_rows * m_cols];
52
53

    // zero out the matrix
ggeiger's avatar
ggeiger committed
54
    int i;
dheck's avatar
dheck committed
55
    for (i = 0; i < m_cols * m_rows; i++) m_imatrix[i] = 0;
56
    // insert a one for the default center value (identity matrix)
57
    m_imatrix[ ((m_cols / 2 + 1) * m_rows) + (m_rows / 2 + 1) ] = 255;
58
59
60
61
62
63
64
65
66
67
68
    
    inlet_new(this->x_obj, &this->x_obj->ob_pd, gensym("float"), gensym("ft1"));
    inlet_new(this->x_obj, &this->x_obj->ob_pd, gensym("list"), gensym("matrix"));
}

/////////////////////////////////////////////////////////
// Destructor
//
/////////////////////////////////////////////////////////
pix_convolve :: ~pix_convolve()
{
69
70
71
    if (m_imatrix)delete [] m_imatrix;
    post("done...");
  
72
73
74
75
76
77
}

/////////////////////////////////////////////////////////
// processImage
//
/////////////////////////////////////////////////////////
78

cclepper's avatar
cclepper committed
79

80
void pix_convolve :: calculateRGBA3x3(imageStruct &image,imageStruct &tempImg)
81
82
83
{
  int i;
  int j;
84
//  int k;
85
86
87
  int xsize =  tempImg.xsize;
  int ysize =  tempImg.ysize;
  int size = xsize*ysize - xsize-1;
88
  int csize = tempImg.csize;
89
90
91
92

  int* src = (int*) tempImg.data;
  int* dest = (int*)image.data;
  
cclepper's avatar
cclepper committed
93
 
cclepper's avatar
cclepper committed
94
//unroll this to do R G B in one pass?? (too many registers?)
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
  i = xsize;
  int* val1 = 0;
  int* val2 = src+i-xsize;
  int* val3 = src+i-xsize+1;
  int* val4 = src+i-1;
  int* val5 = src+i;
  int* val6 = src+i+1;
  int* val7 = src+i+xsize-1;
  int* val8 = src+i+xsize;
  int* val9 = src+i+xsize+1;
  int res;
  for (i=xsize+1;i<size;i++) {
    val1 = val2;
    val2 = val3;
    val3 = src+i-xsize+1;
    val4 = val5;
    val5 = val6;
    val6 = src+i+1;
    val7 = val8;
    val8 = val9;
    val9 = src+i+xsize+1;
    if (i%xsize == 0 || i%xsize == xsize-1) continue;
cclepper's avatar
cclepper committed
117
118
119
120
121
122
    #ifndef MACOSX
    for (j=0;j<3;j++) 
    #else
    for (j=1;j<4;j++)
    #endif
    {
123
124
125
126
127
128
129
130
131
132
133
      res = m_imatrix[0]*(int)((unsigned char*)val1)[j];
      res += m_imatrix[1]*(int)((unsigned char*)val2)[j];
      res += m_imatrix[2]*(int)((unsigned char*)val3)[j];
      res += m_imatrix[3]*(int)((unsigned char*)val4)[j];
      res += m_imatrix[4]*(int)((unsigned char*)val5)[j];
      res += m_imatrix[5]*(int)((unsigned char*)val6)[j];
      res += m_imatrix[6]*(int)((unsigned char*)val7)[j];
      res += m_imatrix[7]*(int)((unsigned char*)val8)[j];
      res += m_imatrix[8]*(int)((unsigned char*)val9)[j];
      res*=m_irange;
      res>>=16;
134
      ((unsigned char*)dest)[i*csize+j] = CLAMP(res);
135
136
137
138
    }

  }

cclepper's avatar
cclepper committed
139
  
140
141
}

142
143
void pix_convolve :: processImage(imageStruct &image)
{
144
    image.copy2Image(&tempImg);
145
146
147
148
149
150
    int initX = m_rows / 2;
    int initY = m_cols / 2;
    int maxX = tempImg.xsize - initX;
    int maxY = tempImg.ysize - initY;
    int xTimesc = tempImg.xsize * tempImg.csize;
    int initOffset = initY * xTimesc + initX * tempImg.csize;
151
152


cclepper's avatar
cclepper committed
153

154
155
156
157
158
    if (m_rows == 3 && m_cols == 3 && tempImg.csize == 4) {
      calculateRGBA3x3(image,tempImg);
      return;
    }

159
160
161
162
163
164
165
    for (int y = initY; y < maxY; y++)
    {
        int realY = y * xTimesc;
        int offsetY = realY - initOffset;

    	for (int x = initX; x < maxX; x++)
    	{
166
167
168
	    int csize = tempImg.csize;
    	    int realPos = x * csize + realY;
            int offsetXY = x * csize + offsetY;
169
170

    	    // skip the alpha value
171
172

	    for (int c = 1; c < csize; c++)
173
174
    	    {
    		    int new_val = 0;
175
		    int offsetXYC = offsetXY + c;
176
177
178
179
180
181
    		    for (int matY = 0; matY < m_cols; matY++)
    		    {
    		        int offsetXYCMat = matY * xTimesc + offsetXYC;
    		        int realMatY = matY * m_rows;
    	    	    for (int matX = 0; matX < m_rows; matX++)
    	    	    {
182
                        new_val += (tempImg.data[offsetXYCMat + matX * csize] *
cclepper's avatar
cclepper committed
183
                                        m_imatrix[realMatY + matX])>>8;
184
185
    	    	    }
    		    }
186
187
188
                    image.data[realPos + c] = CLAMP(new_val);  
		    //removes insult from injury ??
		    // we do not use the m_irange anymore ...  remove it ??
cclepper's avatar
cclepper committed
189

190
191
192
    	    }
    	}
    }
cclepper's avatar
cclepper committed
193

194
195
}

196

197
198
void pix_convolve :: processYUVImage(imageStruct &image)
{
cclepper's avatar
cclepper committed
199
200
201
202
203
204
205
206
207
     image.copy2Image(&tempImg);
     //float range = 1;
    int initX = m_rows / 2;
    int initY = m_cols / 2;
    int maxX = tempImg.xsize - initX;
    int maxY = tempImg.ysize - initY;
    int xTimesc = tempImg.xsize * tempImg.csize;
    int initOffset = initY * xTimesc + initX * tempImg.csize;
    
cclepper's avatar
cclepper committed
208
209
210
211
212
 //   calculate3x3YUV(image,tempImg);
    if (m_rows == 3 && m_cols == 3) {
      calculate3x3YUV(image,tempImg);
      return;
    }
cclepper's avatar
cclepper committed
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
    if (m_chroma) {
    
    for (int y = initY; y < maxY; y++)
    {
        int realY = y * xTimesc;
        int offsetY = realY - initOffset;

    	for (int x = initX; x < maxX; x++)
    	{
    	    int realPos = x * tempImg.csize + realY;
            int offsetXY = x * tempImg.csize + offsetY;

    	    // skip the UV
    	    for (int c = 1; c < 3; c+=2)
    	    {
    		    int new_val = 0;
                int offsetXYC = offsetXY + c;
    		    for (int matY = 0; matY < m_cols; matY++)
    		    {
    		        int offsetXYCMat = matY * xTimesc + offsetXYC;
    		        int realMatY = matY * m_rows;
    	    	    for (int matX = 0; matX < m_rows; matX++)
    	    	    {
                      new_val += (tempImg.data[offsetXYCMat + matX * tempImg.csize] *
                                        m_imatrix[realMatY + matX])>>8;
    	    	    }
    		    }
                   image.data[realPos + c] = CLAMP(new_val);
                   // image.data[realPos + c-1] = 128;  //remove the U+V
    	    }
    	}
    }
    }else{
    for (int y = initY; y < maxY; y++)
    {
        int realY = y * xTimesc;
        int offsetY = realY - initOffset;

    	for (int x = initX; x < maxX; x++)
    	{
    	    int realPos = x * tempImg.csize + realY;
            int offsetXY = x * tempImg.csize + offsetY;

    	    // skip the UV
    	    for (int c = 1; c < 3; c+=2)
    	    {
    		    int new_val = 0;
                int offsetXYC = offsetXY + c;
    		    for (int matY = 0; matY < m_cols; matY++)
    		    {
    		        int offsetXYCMat = matY * xTimesc + offsetXYC;
    		        int realMatY = matY * m_rows;
    	    	    for (int matX = 0; matX < m_rows; matX++)
    	    	    {
                      new_val += (tempImg.data[offsetXYCMat + matX * tempImg.csize] *
                                        m_imatrix[realMatY + matX])>>8;
    	    	    }
    		    }
                   image.data[realPos + c] = CLAMP(new_val);
                    image.data[realPos + c-1] = 128;  //remove the U+V
    	    }
    	}
    }
    }
   
278
}
cclepper's avatar
cclepper committed
279

280
//make two functions - one for chroma one without
cclepper's avatar
cclepper committed
281
282
void pix_convolve :: calculate3x3YUV(imageStruct &image,imageStruct &tempImg)
{
283
284
285
286
287
288

#ifdef ALTIVEC
calculate3x3YUVAltivec(image,tempImg);
return;
#else

cclepper's avatar
cclepper committed
289
290
291
  int i;
  int j;
  int k;
292
293
  int xsize =  tempImg.xsize -1;
  int ysize =  tempImg.ysize -1;
cclepper's avatar
cclepper committed
294
  int size = xsize*ysize - xsize-1;
295
  int length;
cclepper's avatar
cclepper committed
296
297
298

  short* src = (short*) tempImg.data;
  short* dest = (short*)image.data;
299
300
301
  register int mat1,mat2,mat3,mat4,mat5,mat6,mat7,mat8,mat9;
  register int res1,res2,res3,res4,res5,res6,res7,res8,res9;
  register int range;
cclepper's avatar
cclepper committed
302
  
303
304
305
306
307
308
309
310
311
312
  mat1 = m_imatrix[0];
  mat2 = m_imatrix[1];
  mat3 = m_imatrix[2];
  mat4 = m_imatrix[3];
  mat5 = m_imatrix[4];
  mat6 = m_imatrix[5];
  mat7 = m_imatrix[6];
  mat8 = m_imatrix[7];
  mat9 = m_imatrix[8]; 
  range =m_irange;
cclepper's avatar
cclepper committed
313
314
315
 
if (m_chroma){
  i = xsize;
316
 
cclepper's avatar
cclepper committed
317
318
319
320
321
322
323
324
325
326
  register unsigned char val1 = 0;  
  register unsigned char val2 = src[i-xsize+1]; 
  register unsigned char val3 = src[i-xsize+3];
  register unsigned char val4 = src[i-1];
  register unsigned char val5 = src[i+1];
  register unsigned char val6 = src[i+3];
  register unsigned char val7 = src[i+xsize-1];
  register unsigned char val8 = src[i+xsize+1];
  register unsigned char val9 = src[i+xsize+3];
  
327
length = size /2;
cclepper's avatar
cclepper committed
328
  //unroll this 2x to fill the registers? (matrix*y1*y2= 9*9*9 =27)
329
330
331
i=xsize+1;
    for (k=1;k<ysize;k++) {
        for (j=1;j<xsize;j++) {
cclepper's avatar
cclepper committed
332
333
  //load furthest value first...the rest should be in cache
    
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
            val7 = val8;
            val8 = val9;
            val9 = src[i+xsize+3]; //this will come from main mem
            val1 = val2;
            val2 = val3;
            val3 = src[i-xsize+3]; //should be in cache from previous pass
            val4 = val5;
            val5 = val6;
            val6 = src[i+3];
    
            //unroll??
            res1 = mat1*(int)((unsigned char)val1);
            res2 = mat2*(int)((unsigned char)val2);
            res3 = mat3*(int)((unsigned char)val3);
            res4 = mat4*(int)((unsigned char)val4);
            res5 = mat5*(int)((unsigned char)val5);
            res6 = mat6*(int)((unsigned char)val6);
            res7 = mat7*(int)((unsigned char)val7);
            res8 = mat8*(int)((unsigned char)val8);
            res9 = mat9*(int)((unsigned char)val9);
            
            
            res1 += res2 + res3;
            res4 += res5 + res6;
            res7 += res8 + res9;
            res1 += res4 + res7;
        
            res1*=range;
            res1>>=16;
            ((unsigned char*)dest)[i*2+1] = CLAMP(res1);
            i++;
    
        }
    i=k*tempImg.xsize;
cclepper's avatar
cclepper committed
368
369
  } 
  }else{
370
   
cclepper's avatar
cclepper committed
371
372
  i = xsize;
  //make these temp register vars rather than pointers?
373
  
cclepper's avatar
cclepper committed
374
375
376
377
378
379
380
381
382
  short* val1 = 0;  
  short* val2 = src+i-xsize; //val2 = src[i-xsize];
  short* val3 = src+i-xsize+1; //val3 = src[i-xsize+1];
  short* val4 = src+i-1; //val4 = src[i-1];
  short* val5 = src+i; //val5 = src[i];
  short* val6 = src+i+1; //val6 = src[i+1];
  short* val7 = src+i+xsize-1; //val7 = src[i+xsize-1];
  short* val8 = src+i+xsize; //val8 = src[i+xsize];
  short* val9 = src+i+xsize+1; //val9 = src[i+xsize+1];
383
384
385
386
387
388
389
390
391
392
393
  /*
  register short* val1 = 0;  
  register short* val2 = src+i-xsize; //val2 = src[i-xsize];
  register short* val3 = src+i-xsize+1; //val3 = src[i-xsize+1];
  register short* val4 = src+i-1; //val4 = src[i-1];
  register short* val5 = src+i; //val5 = src[i];
  register short* val6 = src+i+1; //val6 = src[i+1];
  register short* val7 = src+i+xsize-1; //val7 = src[i+xsize-1];
  register short* val8 = src+i+xsize; //val8 = src[i+xsize];
  register short* val9 = src+i+xsize+1; //val9 = src[i+xsize+1];*/
  //int res; 
cclepper's avatar
cclepper committed
394
395
396
 // for (i=xsize+1;i<size;i++) {
   for (k=1;k<ysize;k++) {
        for (j=1;j<xsize;j++) {
cclepper's avatar
cclepper committed
397
398
399
400
401
402
403
404
    val1 = val2;
    val2 = val3;
    val3 = src+i-xsize+1;
    val4 = val5;
    val5 = val6;
    val6 = src+i+1;
    val7 = val8;
    val8 = val9;
405
406
    val9 = src+i+xsize+1; 
    
cclepper's avatar
cclepper committed
407
   /* if (i%xsize == 0 || i%xsize == xsize-1) continue;
cclepper's avatar
cclepper committed
408
409
410
411
412
    #ifndef MACOSX
    for (j=0;j<3;j++) 
    #else
    for (j=1;j<3;j+=2)
    #endif
cclepper's avatar
cclepper committed
413
    { */
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
    
      res1 = mat1*(int)((unsigned char*)val1)[j];
      res2 = mat2*(int)((unsigned char*)val2)[j];
      res3 = mat3*(int)((unsigned char*)val3)[j];
      res4 = mat4*(int)((unsigned char*)val4)[j];
      res5 = mat5*(int)((unsigned char*)val5)[j];
      res6 = mat6*(int)((unsigned char*)val6)[j];
      res7 = mat7*(int)((unsigned char*)val7)[j];
      res8 = mat8*(int)((unsigned char*)val8)[j];
      res9 = mat9*(int)((unsigned char*)val9)[j];
      res1 += res2 + res3;
      res4 += res5 + res6;
      res7 += res8 + res9;
      res1 += res4 + res7;
      res1*=range;
      res1>>=16;
     // ((unsigned char*)dest)[i*2] = 128;
     // ((unsigned char*)dest)[i*2+2] = 128;
cclepper's avatar
cclepper committed
432
433
      ((unsigned char*)dest)[i*2+1] = CLAMP(res1);
   // }
434
     ((unsigned char*)dest)[i*2] = 128;
cclepper's avatar
cclepper committed
435
436
437
438
     // ((unsigned char*)dest)[i*2+2] = 128;
      i++;
      }
    i=k*tempImg.xsize;
cclepper's avatar
cclepper committed
439
440
  }
  }
441
#endif
cclepper's avatar
cclepper committed
442
443
}

cclepper's avatar
cclepper committed
444
//too many temps for all the registers - reuse some
445
446
447
448
449
450
451
452
void pix_convolve :: calculate3x3YUVAltivec(imageStruct &image,imageStruct &tempImg)
{
 #ifdef ALTIVEC
 int h,w,width,i;
 int xsize =  (tempImg.xsize)*2;
 
   width = (tempImg.xsize)/8;
   //format is U Y V Y
cclepper's avatar
cclepper committed
453
  
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
    union
    {
        short	elements[8];
        vector	short v;
    }shortBuffer;
    
    union
    {
        unsigned int	elements[4];
        vector	unsigned int v;
    }intBuffer;
    
    vector unsigned char one;
    vector signed short mat1,mat2,mat3,mat4,mat5,mat6,mat7,mat8,mat9; 
    vector unsigned char  val1,val2,val3,val4,val5,val6,val7,val8,val9;
cclepper's avatar
cclepper committed
469
    register vector signed int  res1,res2,res3,res4,res5,res6,res7,res8,res9;
470
    vector signed int  yhi,ylo;
cclepper's avatar
cclepper committed
471
    register vector signed int  res1a,res2a,res3a,res4a,res5a,res6a,res7a,res8a,res9a;
472
    vector unsigned int bitshift;
cclepper's avatar
cclepper committed
473
474
475
    register vector signed short y1,y2,y3,y4,y5,y6,y7,y8,y9,yres,uvres,hiImage,loImage;
    vector signed short range,uvnone,uv128;
    unsigned char *dst =  (unsigned char*) image.data;
476
    unsigned char *src =  tempImg.data;
cclepper's avatar
cclepper committed
477
   
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530

    one =  vec_splat_u8( 1 );
    
    intBuffer.elements[0] = 8;
    //Load it into the vector unit
    bitshift = intBuffer.v;
    bitshift = (vector unsigned int)vec_splat((vector unsigned int)bitshift,0);
      
     shortBuffer.elements[0] = m_irange;
    range = shortBuffer.v;
    range = (vector signed short)vec_splat((vector signed short)range, 0); 
    
     shortBuffer.elements[0] = 128;
    uvnone = shortBuffer.v;
    uvnone = (vector signed short)vec_splat((vector signed short)uvnone, 0); 
      
    //load the matrix values into vectors 
    shortBuffer.elements[0] = m_imatrix[0];
    mat1 = shortBuffer.v;
    mat1 = (vector signed short)vec_splat((vector signed short)mat1,0);
    
    shortBuffer.elements[0] = m_imatrix[1];
    mat2 = shortBuffer.v;
    mat2 = (vector signed short)vec_splat((vector signed short)mat2,0);
    
    shortBuffer.elements[0] = m_imatrix[2];
    mat3 = shortBuffer.v;
    mat3 = (vector signed short)vec_splat((vector signed short)mat3,0);
    
    shortBuffer.elements[0] = m_imatrix[3];
    mat4 = shortBuffer.v;
    mat4 = (vector signed short)vec_splat((vector signed short)mat4,0);
    
    shortBuffer.elements[0] = m_imatrix[4];
    mat5 = shortBuffer.v;
    mat5 = (vector signed short)vec_splat((vector signed short)mat5,0);
    
    shortBuffer.elements[0] = m_imatrix[5];
    mat6 = shortBuffer.v;
    mat6 = (vector signed short)vec_splat((vector signed short)mat6,0);
    
    shortBuffer.elements[0] = m_imatrix[6];
    mat7 = shortBuffer.v;
    mat7 = (vector signed short)vec_splat((vector signed short)mat7,0);
    
    shortBuffer.elements[0] = m_imatrix[7];
    mat8 = shortBuffer.v;
    mat8 = (vector signed short)vec_splat((vector signed short)mat8,0);
    
    shortBuffer.elements[0] = m_imatrix[8];
    mat9 = shortBuffer.v;
    mat9 = (vector signed short)vec_splat((vector signed short)mat9,0);
    
cclepper's avatar
cclepper committed
531
532
533
    shortBuffer.elements[0] = 128;
    uv128 = shortBuffer.v;
    uv128 = (vector signed short)vec_splat((vector signed short)uv128,0);
534
535
536
537
538

    UInt32			prefetchSize = GetPrefetchConstant( 16, 1, 256 );
    vec_dst( src, prefetchSize, 0 );
    vec_dst( dst, prefetchSize, 0 );
         
cclepper's avatar
cclepper committed
539
540
541
542
543
544
545
546
547
 
    i = xsize*2;

//need to treat the first rows as a special case for accuracy and keep it from crashing
//or just skip the first 2 rows ;)
 
    for ( h=2; h<image.ysize-1; h++){
    i+=2; //this gets rid of the echoes but kills the vertical edge-detects???
        for (w=0; w<width; w++)
548
549
550
        {
        
            vec_dst( src, prefetchSize, 0 );
cclepper's avatar
cclepper committed
551
            vec_dst( dst, prefetchSize, 1 );    
cclepper's avatar
cclepper committed
552
     
553
            
cclepper's avatar
cclepper committed
554
555
556
557
558
559
560
561
562
            val1 = vec_ld(0,src+(i-xsize-2));//this might crash?
            val2 = vec_ld(0,src+(i-xsize)); 
            val3 = vec_ld(0,src+(i-xsize+2)); 
            val4 = vec_ld(0,src+(i-2)); 
            val5 = vec_ld(0,src+i);
            val6 = vec_ld(0,src+(i+2)); 
            val7 = vec_ld(0,src+(i+xsize-2)); 
            val8 = vec_ld(0,src+(i+xsize)); 
            val9 = vec_ld(0,src+(i+xsize+2));
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
            
            //extract the Y for processing
            y1 = (vector signed short)vec_mulo((vector unsigned char)one,(vector unsigned char)val1);
            y2 = (vector signed short)vec_mulo((vector unsigned char)one,(vector unsigned char)val2);
            y3 = (vector signed short)vec_mulo((vector unsigned char)one,(vector unsigned char)val3);
            y4 = (vector signed short)vec_mulo((vector unsigned char)one,(vector unsigned char)val4);
            y5 = (vector signed short)vec_mulo((vector unsigned char)one,(vector unsigned char)val5);
            y6 = (vector signed short)vec_mulo((vector unsigned char)one,(vector unsigned char)val6);
            y7 = (vector signed short)vec_mulo((vector unsigned char)one,(vector unsigned char)val7);
            y8 = (vector signed short)vec_mulo((vector unsigned char)one,(vector unsigned char)val8);
            y9 = (vector signed short)vec_mulo((vector unsigned char)one,(vector unsigned char)val9);
            
            uvres = (vector signed short)vec_mule((vector unsigned char)one,(vector unsigned char)val5);
            
            //mult the Y by the matrix coefficient
            res1 = vec_mulo(mat1,y1);
            res2 = vec_mulo(mat2,y2);
            res3 = vec_mulo(mat3,y3);
            res4 = vec_mulo(mat4,y4);
            res5 = vec_mulo(mat5,y5);
            res6 = vec_mulo(mat6,y6);
            res7 = vec_mulo(mat7,y7);
            res8 = vec_mulo(mat8,y8);
            res9 = vec_mulo(mat9,y9);
            
            res1a = vec_mule(mat1,y1);
            res2a = vec_mule(mat2,y2);
            res3a = vec_mule(mat3,y3);
            res4a = vec_mule(mat4,y4);
            res5a = vec_mule(mat5,y5);
            res6a = vec_mule(mat6,y6);
            res7a = vec_mule(mat7,y7);
            res8a = vec_mule(mat8,y8);
            res9a = vec_mule(mat9,y9);
            
cclepper's avatar
cclepper committed
598
            //sum the results - these are only 1 cycle ops so no dependency issues
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
            res1 = vec_adds(res1,res2); //1+2
            res3 = vec_adds(res3,res4);//3+4
            res5 = vec_adds(res5,res6);//5+6
            res7 = vec_adds(res7,res8);//7+8
            res1 = vec_adds(res1,res3);//(1+2)+(3+4)
            res7 = vec_adds(res7,res9);//7+8+9
            res1 = vec_adds(res1,res5);//(1+2)+(3+4)+(5+6)
            res1 = vec_adds(res1,res7);//(1+2)+(3+4)+(5+6)+(7+8+9)
            
            res1a = vec_adds(res1a,res2a); //1+2
            res3a = vec_adds(res3a,res4a);//3+4
            res5a = vec_adds(res5a,res6a);//5+6
            res7a = vec_adds(res7a,res8a);//7+8
            res1a = vec_adds(res1a,res3a);//(1+2)+(3+4)
            res7a = vec_adds(res7a,res9a);//7+8+9
            res1a = vec_adds(res1a,res5a);//(1+2)+(3+4)+(5+6)
            res1a = vec_adds(res1a,res7a);//(1+2)+(3+4)+(5+6)+(7+8+9)
            
            
            //do the bitshift on the results here??
            res1 = vec_sra(res1,bitshift);
            res1a = vec_sra(res1a,bitshift); 
                        
            //pack back to one short vector??
            yhi = vec_mergeh(res1a,res1);
            ylo = vec_mergel(res1a,res1);
            yres = vec_packs(yhi,ylo);
            
            
            //combine with the UV\
            //vec_mergel + vec_mergeh Y and UV
            hiImage =  vec_mergeh(uvres,yres);
            loImage =  vec_mergel(uvres,yres);
            
          val1 = vec_packsu(hiImage,loImage);
          vec_st(val1,0,dst+i);
           i+=16;
           
        }
cclepper's avatar
cclepper committed
638
        i = h * xsize;
639
        vec_dss( 0 );
cclepper's avatar
cclepper committed
640
        vec_dss( 1 );
cclepper's avatar
cclepper committed
641
    
642
}  /*end of working altivec function */
643

cclepper's avatar
cclepper committed
644

645
646
#endif
}
cclepper's avatar
cclepper committed
647

648
649
650
651
652
653
/////////////////////////////////////////////////////////
// rangeMess
//
/////////////////////////////////////////////////////////
void pix_convolve :: rangeMess(float range)
{
654
    m_irange = (int)(range*255.f);
655
656
657
658
659
660
661
662
663
664
665
666
667
668
    setPixModified();
}

/////////////////////////////////////////////////////////
// matrixMess
//
/////////////////////////////////////////////////////////
void pix_convolve :: matrixMess(int argc, t_atom *argv)
{
    if (argc != m_cols * m_rows)
    {
    	error("GEM: pix_convolve: matrix size not correct");
    	return;
    }
cclepper's avatar
cclepper committed
669

ggeiger's avatar
ggeiger committed
670
    int i;
dheck's avatar
dheck committed
671
    for (i = 0; i < argc; i++) m_imatrix[i] = (int)(atom_getfloat(&argv[i])*255.);
672

cclepper's avatar
cclepper committed
673

674
675
676
677
678
679
680
681
682
683
684
685
686
    setPixModified();
}

/////////////////////////////////////////////////////////
// static member function
//
/////////////////////////////////////////////////////////
void pix_convolve :: obj_setupCallback(t_class *classPtr)
{
    class_addmethod(classPtr, (t_method)&pix_convolve::matrixMessCallback,
    	    gensym("matrix"), A_GIMME, A_NULL);
    class_addmethod(classPtr, (t_method)&pix_convolve::rangeMessCallback,
    	    gensym("ft1"), A_FLOAT, A_NULL);
cclepper's avatar
cclepper committed
687
688
    class_addmethod(classPtr, (t_method)&pix_convolve::chromaMessCallback,
    	    gensym("chroma"), A_FLOAT, A_NULL);
689
690
691
692
693
694
695
696
697
}
void pix_convolve :: matrixMessCallback(void *data, t_symbol *, int argc, t_atom *argv)
{
    GetMyClass(data)->matrixMess(argc, argv);
}
void pix_convolve :: rangeMessCallback(void *data, t_floatarg range)
{
    GetMyClass(data)->rangeMess((float)range);
}
cclepper's avatar
cclepper committed
698
699
700
701
702

void pix_convolve :: chromaMessCallback(void *data, t_floatarg value)
{
    GetMyClass(data)->m_chroma=(int)value;
}