pix_convolve.cpp 23.7 KB
Newer Older
1
2
3
4
5
6
7
8
9
////////////////////////////////////////////////////////
//
// GEM - Graphics Environment for Multimedia
//
// zmoelnig@iem.kug.ac.at
//
// Implementation file
//
//    Copyright (c) 1997-1998 Mark Danks.
zmoelnig's avatar
zmoelnig committed
10
11
//    Copyright (c) Günther Geiger.
//    Copyright (c) 2001-2002 IOhannes m zmoelnig. forum::für::umläute. IEM
12
//    Copyright (c) 2002 James Tittle & Chris Clepper
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
//    For information on usage and redistribution, and for a DISCLAIMER OF ALL
//    WARRANTIES, see the file, "GEM.LICENSE.TERMS" in this distribution.
//
/////////////////////////////////////////////////////////

#include "pix_convolve.h"

CPPEXTERN_NEW_WITH_TWO_ARGS(pix_convolve, t_floatarg, A_DEFFLOAT, t_floatarg, A_DEFFLOAT)

/////////////////////////////////////////////////////////
//
// pix_convolve
//
/////////////////////////////////////////////////////////
// Constructor
//
/////////////////////////////////////////////////////////
pix_convolve :: pix_convolve(t_floatarg fRow, t_floatarg fCol)
31
  : m_imatrix(NULL)
32
{
zmoelnig's avatar
zmoelnig committed
33
34
  int row = static_cast<int>(fRow);
  int col = static_cast<int>(fCol);
35

36
37
    if (!row || !col )
    {
38
    	error("matrix must have some dimension");
39
40
41
42
43
    	return;
    }
    
    if (!(row % 2) || !(col % 2) )
    {
44
    	error("matrix must have odd dimensions");
45
46
47
48
49
    	return;
    }
    
    m_rows = row;
    m_cols = col;
50
51
    m_irange = 255;
    m_imatrix = new signed short[m_rows * m_cols];
52
53

    // zero out the matrix
ggeiger's avatar
ggeiger committed
54
    int i;
dheck's avatar
dheck committed
55
    for (i = 0; i < m_cols * m_rows; i++) m_imatrix[i] = 0;
56
    // insert a one for the default center value (identity matrix)
57
    m_imatrix[ ((m_cols / 2 + 1) * m_rows) + (m_rows / 2 + 1) ] = 255;
58
59
60
61
62
63
64
65
66
67
68
    
    inlet_new(this->x_obj, &this->x_obj->ob_pd, gensym("float"), gensym("ft1"));
    inlet_new(this->x_obj, &this->x_obj->ob_pd, gensym("list"), gensym("matrix"));
}

/////////////////////////////////////////////////////////
// Destructor
//
/////////////////////////////////////////////////////////
pix_convolve :: ~pix_convolve()
{
69
    if (m_imatrix)delete [] m_imatrix;
70
71
72
73
74
75
}

/////////////////////////////////////////////////////////
// processImage
//
/////////////////////////////////////////////////////////
76
void pix_convolve :: calculateRGBA3x3(imageStruct &image,imageStruct &tempImg)
77
78
79
{
  int i;
  int j;
80
//  int k;
81
82
83
  int xsize =  tempImg.xsize;
  int ysize =  tempImg.ysize;
  int size = xsize*ysize - xsize-1;
84
  int csize = tempImg.csize;
85
86
87
88

  int* src = (int*) tempImg.data;
  int* dest = (int*)image.data;
  
cclepper's avatar
cclepper committed
89
 
cclepper's avatar
cclepper committed
90
//unroll this to do R G B in one pass?? (too many registers?)
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
  i = xsize;
  int* val1 = 0;
  int* val2 = src+i-xsize;
  int* val3 = src+i-xsize+1;
  int* val4 = src+i-1;
  int* val5 = src+i;
  int* val6 = src+i+1;
  int* val7 = src+i+xsize-1;
  int* val8 = src+i+xsize;
  int* val9 = src+i+xsize+1;
  int res;
  for (i=xsize+1;i<size;i++) {
    val1 = val2;
    val2 = val3;
    val3 = src+i-xsize+1;
    val4 = val5;
    val5 = val6;
    val6 = src+i+1;
    val7 = val8;
    val8 = val9;
    val9 = src+i+xsize+1;
    if (i%xsize == 0 || i%xsize == xsize-1) continue;
cclepper's avatar
cclepper committed
113
    #ifndef __APPLE__
cclepper's avatar
cclepper committed
114
115
116
117
118
    for (j=0;j<3;j++) 
    #else
    for (j=1;j<4;j++)
    #endif
    {
zmoelnig's avatar
zmoelnig committed
119
120
121
122
123
124
125
126
127
128
      //      res =  m_imatrix[0]*(int)((unsigned char*)val1)[j];
      res =  m_imatrix[0]*static_cast<int>(reinterpret_cast<unsigned char*>(val1)[j]);
      res += m_imatrix[1]*static_cast<int>(reinterpret_cast<unsigned char*>(val2)[j]);
      res += m_imatrix[2]*static_cast<int>(reinterpret_cast<unsigned char*>(val3)[j]);
      res += m_imatrix[3]*static_cast<int>(reinterpret_cast<unsigned char*>(val4)[j]);
      res += m_imatrix[4]*static_cast<int>(reinterpret_cast<unsigned char*>(val5)[j]);
      res += m_imatrix[5]*static_cast<int>(reinterpret_cast<unsigned char*>(val6)[j]);
      res += m_imatrix[6]*static_cast<int>(reinterpret_cast<unsigned char*>(val7)[j]);
      res += m_imatrix[7]*static_cast<int>(reinterpret_cast<unsigned char*>(val8)[j]);
      res += m_imatrix[8]*static_cast<int>(reinterpret_cast<unsigned char*>(val9)[j]);
129
130
      res*=m_irange;
      res>>=16;
131
      ((unsigned char*)dest)[i*csize+j] = CLAMP(res);
132
133
134
135
    }

  }

cclepper's avatar
cclepper committed
136
  
137
138
}

zmoelnig's avatar
zmoelnig committed
139
void pix_convolve :: processRGBAImage(imageStruct &image)
140
{
141
    image.copy2Image(&tempImg);
142
143
144
145
146
147
    int initX = m_rows / 2;
    int initY = m_cols / 2;
    int maxX = tempImg.xsize - initX;
    int maxY = tempImg.ysize - initY;
    int xTimesc = tempImg.xsize * tempImg.csize;
    int initOffset = initY * xTimesc + initX * tempImg.csize;
zmoelnig's avatar
zmoelnig committed
148
    const int csize = tempImg.csize;
149

zmoelnig's avatar
zmoelnig committed
150
    if (m_rows == 3 && m_cols == 3) {
151
152
153
154
      calculateRGBA3x3(image,tempImg);
      return;
    }

155
156
157
158
159
160
161
    for (int y = initY; y < maxY; y++)
    {
        int realY = y * xTimesc;
        int offsetY = realY - initOffset;

    	for (int x = initX; x < maxX; x++)
    	{
162
163
    	    int realPos = x * csize + realY;
            int offsetXY = x * csize + offsetY;
164
165

    	    // skip the alpha value
166
	    for (int c = 1; c < csize; c++)
167
168
    	    {
    		    int new_val = 0;
169
		    int offsetXYC = offsetXY + c;
170
171
172
173
174
175
    		    for (int matY = 0; matY < m_cols; matY++)
    		    {
    		        int offsetXYCMat = matY * xTimesc + offsetXYC;
    		        int realMatY = matY * m_rows;
    	    	    for (int matX = 0; matX < m_rows; matX++)
    	    	    {
176
                        new_val += (tempImg.data[offsetXYCMat + matX * csize] *
cclepper's avatar
cclepper committed
177
                                        m_imatrix[realMatY + matX])>>8;
178
179
    	    	    }
    		    }
180
181
182
                    image.data[realPos + c] = CLAMP(new_val);  
		    //removes insult from injury ??
		    // we do not use the m_irange anymore ...  remove it ??
cclepper's avatar
cclepper committed
183

184
185
186
    	    }
    	}
    }
cclepper's avatar
cclepper committed
187

188
189
}

zmoelnig's avatar
zmoelnig committed
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
void pix_convolve :: processGrayImage(imageStruct &image)
{
  const int csize=image.csize;
    image.copy2Image(&tempImg);
    int initX = m_rows / 2;
    int initY = m_cols / 2;
    int maxX = tempImg.xsize - initX;
    int maxY = tempImg.ysize - initY;
    int xTimesc = tempImg.xsize * csize;
    int initOffset = initY * xTimesc + initX * csize;

    for (int y = initY; y < maxY; y++)    {
      int realY = y * xTimesc;
      int offsetY = realY - initOffset;

      for (int x = initX; x < maxX; x++)    	{
	int offsetXY = x + offsetY;

	int new_val = 0;
	int offsetXYC = offsetXY;
	for (int matY = 0; matY < m_cols; matY++)   {
	  int offsetXYCMat = matY * xTimesc + offsetXYC;
	  int realMatY = matY * m_rows;
	  for (int matX = 0; matX < m_rows; matX++)     {
	    new_val += (tempImg.data[offsetXYCMat + matX] *
			m_imatrix[realMatY + matX])>>8;
	  }
	}
	image.data[x+realY] = CLAMP(new_val);  
	//removes insult from injury ??
	// we do not use the m_irange anymore ...  remove it ??
      }
    }
}
224

225
226
void pix_convolve :: processYUVImage(imageStruct &image)
{
zmoelnig's avatar
zmoelnig committed
227
228
  image.copy2Image(&tempImg);
  //float range = 1;
cclepper's avatar
cclepper committed
229
230
231
232
233
234
235
    int initX = m_rows / 2;
    int initY = m_cols / 2;
    int maxX = tempImg.xsize - initX;
    int maxY = tempImg.ysize - initY;
    int xTimesc = tempImg.xsize * tempImg.csize;
    int initOffset = initY * xTimesc + initX * tempImg.csize;
    
cclepper's avatar
cclepper committed
236
 //   calculate3x3YUV(image,tempImg);
237
238
 
//quick fix for Intel 3x3YUV problems
239
#ifdef __BIG_ENDIAN__
cclepper's avatar
cclepper committed
240
241
242
243
    if (m_rows == 3 && m_cols == 3) {
      calculate3x3YUV(image,tempImg);
      return;
    }
244
#endif
cclepper's avatar
cclepper committed
245
    if (m_chroma) {
zmoelnig's avatar
zmoelnig committed
246
    for (int y = initY; y < maxY; y++)   {
cclepper's avatar
cclepper committed
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
        int realY = y * xTimesc;
        int offsetY = realY - initOffset;

    	for (int x = initX; x < maxX; x++)
    	{
    	    int realPos = x * tempImg.csize + realY;
            int offsetXY = x * tempImg.csize + offsetY;

    	    // skip the UV
    	    for (int c = 1; c < 3; c+=2)
    	    {
    		    int new_val = 0;
                int offsetXYC = offsetXY + c;
    		    for (int matY = 0; matY < m_cols; matY++)
    		    {
    		        int offsetXYCMat = matY * xTimesc + offsetXYC;
    		        int realMatY = matY * m_rows;
    	    	    for (int matX = 0; matX < m_rows; matX++)
    	    	    {
                      new_val += (tempImg.data[offsetXYCMat + matX * tempImg.csize] *
                                        m_imatrix[realMatY + matX])>>8;
    	    	    }
    		    }
                   image.data[realPos + c] = CLAMP(new_val);
                   // image.data[realPos + c-1] = 128;  //remove the U+V
    	    }
    	}
    }
    }else{
    for (int y = initY; y < maxY; y++)
    {
        int realY = y * xTimesc;
        int offsetY = realY - initOffset;

    	for (int x = initX; x < maxX; x++)
    	{
    	    int realPos = x * tempImg.csize + realY;
            int offsetXY = x * tempImg.csize + offsetY;

    	    // skip the UV
    	    for (int c = 1; c < 3; c+=2)
    	    {
    		    int new_val = 0;
                int offsetXYC = offsetXY + c;
    		    for (int matY = 0; matY < m_cols; matY++)
    		    {
    		        int offsetXYCMat = matY * xTimesc + offsetXYC;
    		        int realMatY = matY * m_rows;
    	    	    for (int matX = 0; matX < m_rows; matX++)
    	    	    {
                      new_val += (tempImg.data[offsetXYCMat + matX * tempImg.csize] *
                                        m_imatrix[realMatY + matX])>>8;
    	    	    }
    		    }
                   image.data[realPos + c] = CLAMP(new_val);
                    image.data[realPos + c-1] = 128;  //remove the U+V
    	    }
    	}
    }
    }
   
308
}
cclepper's avatar
cclepper committed
309

310
//make two functions - one for chroma one without
cclepper's avatar
cclepper committed
311
312
void pix_convolve :: calculate3x3YUV(imageStruct &image,imageStruct &tempImg)
{
313

cclepper's avatar
cclepper committed
314
#ifdef __VEC__
315
316
317
318
calculate3x3YUVAltivec(image,tempImg);
return;
#else

cclepper's avatar
cclepper committed
319
320
321
  int i;
  int j;
  int k;
322
323
  int xsize =  tempImg.xsize -1;
  int ysize =  tempImg.ysize -1;
cclepper's avatar
cclepper committed
324
  int size = xsize*ysize - xsize-1;
325
  int length;
cclepper's avatar
cclepper committed
326
327
328

  short* src = (short*) tempImg.data;
  short* dest = (short*)image.data;
329
330
331
  register int mat1,mat2,mat3,mat4,mat5,mat6,mat7,mat8,mat9;
  register int res1,res2,res3,res4,res5,res6,res7,res8,res9;
  register int range;
cclepper's avatar
cclepper committed
332
  
333
334
335
336
337
338
339
340
341
342
  mat1 = m_imatrix[0];
  mat2 = m_imatrix[1];
  mat3 = m_imatrix[2];
  mat4 = m_imatrix[3];
  mat5 = m_imatrix[4];
  mat6 = m_imatrix[5];
  mat7 = m_imatrix[6];
  mat8 = m_imatrix[7];
  mat9 = m_imatrix[8]; 
  range =m_irange;
cclepper's avatar
cclepper committed
343
344
345
 
if (m_chroma){
  i = xsize;
346
 
347
#ifdef i386
cclepper's avatar
cclepper committed
348
349
350
351
352
353
354
355
356
  register unsigned char val1 = 0;  
  register unsigned char val2 = src[i-xsize+1]; 
  register unsigned char val3 = src[i-xsize+3];
  register unsigned char val4 = src[i-1];
  register unsigned char val5 = src[i+1];
  register unsigned char val6 = src[i+3];
  register unsigned char val7 = src[i+xsize-1];
  register unsigned char val8 = src[i+xsize+1];
  register unsigned char val9 = src[i+xsize+3];
357
358
359
360
361
362
363
364
365
366
367
368
#else 
  register unsigned char val1 = 0;  
  register unsigned char val2 = src[i-xsize+1]; 
  register unsigned char val3 = src[i-xsize+3];
  register unsigned char val4 = src[i-1];
  register unsigned char val5 = src[i+1];
  register unsigned char val6 = src[i+3];
  register unsigned char val7 = src[i+xsize-1];
  register unsigned char val8 = src[i+xsize+1];
  register unsigned char val9 = src[i+xsize+3];
#endif  

cclepper's avatar
cclepper committed
369
  //unroll this 2x to fill the registers? (matrix*y1*y2= 9*9*9 =27)
370
371
372
373
374
375
  //messed up looking on x86
i=xsize+2;

length = size /2;

	  for (k=1;k<ysize;k++) {
376
        for (j=1;j<xsize;j++) {
cclepper's avatar
cclepper committed
377
378
  //load furthest value first...the rest should be in cache
    
379
380
381
382
383
384
385
386
387
388
389
            val7 = val8;
            val8 = val9;
            val9 = src[i+xsize+3]; //this will come from main mem
            val1 = val2;
            val2 = val3;
            val3 = src[i-xsize+3]; //should be in cache from previous pass
            val4 = val5;
            val5 = val6;
            val6 = src[i+3];
    
            //unroll??
zmoelnig's avatar
zmoelnig committed
390
391
392
393
394
395
396
397
398
            res1 = mat1*static_cast<int>(val1);
            res2 = mat2*static_cast<int>(val2);
            res3 = mat3*static_cast<int>(val3);
            res4 = mat4*static_cast<int>(val4);
            res5 = mat5*static_cast<int>(val5);
            res6 = mat6*static_cast<int>(val6);
            res7 = mat7*static_cast<int>(val7);
            res8 = mat8*static_cast<int>(val8);
            res9 = mat9*static_cast<int>(val9);
399
400
401
402
403
404
405
406
407
408
409
410
411
412
            
            
            res1 += res2 + res3;
            res4 += res5 + res6;
            res7 += res8 + res9;
            res1 += res4 + res7;
        
            res1*=range;
            res1>>=16;
            ((unsigned char*)dest)[i*2+1] = CLAMP(res1);
            i++;
    
        }
    i=k*tempImg.xsize;
cclepper's avatar
cclepper committed
413
414
  } 
  }else{
415
   
cclepper's avatar
cclepper committed
416
417
  i = xsize;
  //make these temp register vars rather than pointers?
418
  
cclepper's avatar
cclepper committed
419
420
421
422
423
424
425
426
427
  short* val1 = 0;  
  short* val2 = src+i-xsize; //val2 = src[i-xsize];
  short* val3 = src+i-xsize+1; //val3 = src[i-xsize+1];
  short* val4 = src+i-1; //val4 = src[i-1];
  short* val5 = src+i; //val5 = src[i];
  short* val6 = src+i+1; //val6 = src[i+1];
  short* val7 = src+i+xsize-1; //val7 = src[i+xsize-1];
  short* val8 = src+i+xsize; //val8 = src[i+xsize];
  short* val9 = src+i+xsize+1; //val9 = src[i+xsize+1];
428
429
430
431
432
433
434
435
436
437
438
  /*
  register short* val1 = 0;  
  register short* val2 = src+i-xsize; //val2 = src[i-xsize];
  register short* val3 = src+i-xsize+1; //val3 = src[i-xsize+1];
  register short* val4 = src+i-1; //val4 = src[i-1];
  register short* val5 = src+i; //val5 = src[i];
  register short* val6 = src+i+1; //val6 = src[i+1];
  register short* val7 = src+i+xsize-1; //val7 = src[i+xsize-1];
  register short* val8 = src+i+xsize; //val8 = src[i+xsize];
  register short* val9 = src+i+xsize+1; //val9 = src[i+xsize+1];*/
  //int res; 
cclepper's avatar
cclepper committed
439
440
441
 // for (i=xsize+1;i<size;i++) {
   for (k=1;k<ysize;k++) {
        for (j=1;j<xsize;j++) {
cclepper's avatar
cclepper committed
442
443
444
445
446
447
448
449
    val1 = val2;
    val2 = val3;
    val3 = src+i-xsize+1;
    val4 = val5;
    val5 = val6;
    val6 = src+i+1;
    val7 = val8;
    val8 = val9;
450
451
    val9 = src+i+xsize+1; 
    
cclepper's avatar
cclepper committed
452
   /* if (i%xsize == 0 || i%xsize == xsize-1) continue;
cclepper's avatar
cclepper committed
453
    #ifndef __APPLE__
cclepper's avatar
cclepper committed
454
455
456
457
    for (j=0;j<3;j++) 
    #else
    for (j=1;j<3;j+=2)
    #endif
cclepper's avatar
cclepper committed
458
    { */
459
    
zmoelnig's avatar
zmoelnig committed
460
461
462
463
464
465
466
467
468
      res1 = mat1*static_cast<int>(reinterpret_cast<unsigned char*>(val1)[j]);
      res2 = mat2*static_cast<int>(reinterpret_cast<unsigned char*>(val2)[j]);
      res3 = mat3*static_cast<int>(reinterpret_cast<unsigned char*>(val3)[j]);
      res4 = mat4*static_cast<int>(reinterpret_cast<unsigned char*>(val4)[j]);
      res5 = mat5*static_cast<int>(reinterpret_cast<unsigned char*>(val5)[j]);
      res6 = mat6*static_cast<int>(reinterpret_cast<unsigned char*>(val6)[j]);
      res7 = mat7*static_cast<int>(reinterpret_cast<unsigned char*>(val7)[j]);
      res8 = mat8*static_cast<int>(reinterpret_cast<unsigned char*>(val8)[j]);
      res9 = mat9*static_cast<int>(reinterpret_cast<unsigned char*>(val9)[j]);
469
470
471
472
473
474
475
476
      res1 += res2 + res3;
      res4 += res5 + res6;
      res7 += res8 + res9;
      res1 += res4 + res7;
      res1*=range;
      res1>>=16;
     // ((unsigned char*)dest)[i*2] = 128;
     // ((unsigned char*)dest)[i*2+2] = 128;
cclepper's avatar
cclepper committed
477
478
      ((unsigned char*)dest)[i*2+1] = CLAMP(res1);
   // }
479
     ((unsigned char*)dest)[i*2] = 128;
cclepper's avatar
cclepper committed
480
481
482
483
     // ((unsigned char*)dest)[i*2+2] = 128;
      i++;
      }
    i=k*tempImg.xsize;
cclepper's avatar
cclepper committed
484
485
  }
  }
486
#endif
cclepper's avatar
cclepper committed
487
488
}

cclepper's avatar
cclepper committed
489
//too many temps for all the registers - reuse some
490
491
void pix_convolve :: calculate3x3YUVAltivec(imageStruct &image,imageStruct &tempImg)
{
cclepper's avatar
cclepper committed
492
 #ifdef __VEC__
493
494
495
496
497
 int h,w,width,i;
 int xsize =  (tempImg.xsize)*2;
 
   width = (tempImg.xsize)/8;
   //format is U Y V Y
cclepper's avatar
cclepper committed
498
  
499
500
501
    union
    {
        short	elements[8];
cclepper's avatar
cclepper committed
502
        vector	signed short v;
503
504
505
506
507
508
509
510
511
512
513
    }shortBuffer;
    
    union
    {
        unsigned int	elements[4];
        vector	unsigned int v;
    }intBuffer;
    
    vector unsigned char one;
    vector signed short mat1,mat2,mat3,mat4,mat5,mat6,mat7,mat8,mat9; 
    vector unsigned char  val1,val2,val3,val4,val5,val6,val7,val8,val9;
cclepper's avatar
cclepper committed
514
    register vector signed int  res1,res2,res3,res4,res5,res6,res7,res8,res9;
515
    vector signed int  yhi,ylo;
cclepper's avatar
cclepper committed
516
    register vector signed int  res1a,res2a,res3a,res4a,res5a,res6a,res7a,res8a,res9a;
517
    vector unsigned int bitshift;
cclepper's avatar
cclepper committed
518
519
520
    register vector signed short y1,y2,y3,y4,y5,y6,y7,y8,y9,yres,uvres,hiImage,loImage;
    vector signed short range,uvnone,uv128;
    unsigned char *dst =  (unsigned char*) image.data;
cclepper's avatar
cclepper committed
521
    unsigned char *src =  (unsigned char*) tempImg.data;
cclepper's avatar
cclepper committed
522
   
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575

    one =  vec_splat_u8( 1 );
    
    intBuffer.elements[0] = 8;
    //Load it into the vector unit
    bitshift = intBuffer.v;
    bitshift = (vector unsigned int)vec_splat((vector unsigned int)bitshift,0);
      
     shortBuffer.elements[0] = m_irange;
    range = shortBuffer.v;
    range = (vector signed short)vec_splat((vector signed short)range, 0); 
    
     shortBuffer.elements[0] = 128;
    uvnone = shortBuffer.v;
    uvnone = (vector signed short)vec_splat((vector signed short)uvnone, 0); 
      
    //load the matrix values into vectors 
    shortBuffer.elements[0] = m_imatrix[0];
    mat1 = shortBuffer.v;
    mat1 = (vector signed short)vec_splat((vector signed short)mat1,0);
    
    shortBuffer.elements[0] = m_imatrix[1];
    mat2 = shortBuffer.v;
    mat2 = (vector signed short)vec_splat((vector signed short)mat2,0);
    
    shortBuffer.elements[0] = m_imatrix[2];
    mat3 = shortBuffer.v;
    mat3 = (vector signed short)vec_splat((vector signed short)mat3,0);
    
    shortBuffer.elements[0] = m_imatrix[3];
    mat4 = shortBuffer.v;
    mat4 = (vector signed short)vec_splat((vector signed short)mat4,0);
    
    shortBuffer.elements[0] = m_imatrix[4];
    mat5 = shortBuffer.v;
    mat5 = (vector signed short)vec_splat((vector signed short)mat5,0);
    
    shortBuffer.elements[0] = m_imatrix[5];
    mat6 = shortBuffer.v;
    mat6 = (vector signed short)vec_splat((vector signed short)mat6,0);
    
    shortBuffer.elements[0] = m_imatrix[6];
    mat7 = shortBuffer.v;
    mat7 = (vector signed short)vec_splat((vector signed short)mat7,0);
    
    shortBuffer.elements[0] = m_imatrix[7];
    mat8 = shortBuffer.v;
    mat8 = (vector signed short)vec_splat((vector signed short)mat8,0);
    
    shortBuffer.elements[0] = m_imatrix[8];
    mat9 = shortBuffer.v;
    mat9 = (vector signed short)vec_splat((vector signed short)mat9,0);
    
cclepper's avatar
cclepper committed
576
577
578
    shortBuffer.elements[0] = 128;
    uv128 = shortBuffer.v;
    uv128 = (vector signed short)vec_splat((vector signed short)uv128,0);
cclepper's avatar
cclepper committed
579
    #ifndef PPC970 
580
581
582
    UInt32			prefetchSize = GetPrefetchConstant( 16, 1, 256 );
    vec_dst( src, prefetchSize, 0 );
    vec_dst( dst, prefetchSize, 0 );
cclepper's avatar
cclepper committed
583
      #endif   
cclepper's avatar
cclepper committed
584
585
586
587
588
589
590
 
    i = xsize*2;

//need to treat the first rows as a special case for accuracy and keep it from crashing
//or just skip the first 2 rows ;)
 
    for ( h=2; h<image.ysize-1; h++){
cclepper's avatar
cclepper committed
591
592
593
   // i+=2; //this gets rid of the echoes but kills the vertical edge-detects???
   i+=8;
        for (w=0; w<width-1; w++)
594
        {
cclepper's avatar
cclepper committed
595
        #ifndef PPC970
596
            vec_dst( src, prefetchSize, 0 );
cclepper's avatar
cclepper committed
597
            vec_dst( dst, prefetchSize, 1 );    
cclepper's avatar
cclepper committed
598
     #endif
599
            
cclepper's avatar
cclepper committed
600
601
602
603
604
605
606
607
608
            val1 = vec_ld(0,src+(i-xsize-2));//this might crash?
            val2 = vec_ld(0,src+(i-xsize)); 
            val3 = vec_ld(0,src+(i-xsize+2)); 
            val4 = vec_ld(0,src+(i-2)); 
            val5 = vec_ld(0,src+i);
            val6 = vec_ld(0,src+(i+2)); 
            val7 = vec_ld(0,src+(i+xsize-2)); 
            val8 = vec_ld(0,src+(i+xsize)); 
            val9 = vec_ld(0,src+(i+xsize+2));
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
            
            //extract the Y for processing
            y1 = (vector signed short)vec_mulo((vector unsigned char)one,(vector unsigned char)val1);
            y2 = (vector signed short)vec_mulo((vector unsigned char)one,(vector unsigned char)val2);
            y3 = (vector signed short)vec_mulo((vector unsigned char)one,(vector unsigned char)val3);
            y4 = (vector signed short)vec_mulo((vector unsigned char)one,(vector unsigned char)val4);
            y5 = (vector signed short)vec_mulo((vector unsigned char)one,(vector unsigned char)val5);
            y6 = (vector signed short)vec_mulo((vector unsigned char)one,(vector unsigned char)val6);
            y7 = (vector signed short)vec_mulo((vector unsigned char)one,(vector unsigned char)val7);
            y8 = (vector signed short)vec_mulo((vector unsigned char)one,(vector unsigned char)val8);
            y9 = (vector signed short)vec_mulo((vector unsigned char)one,(vector unsigned char)val9);
            
            uvres = (vector signed short)vec_mule((vector unsigned char)one,(vector unsigned char)val5);
            
            //mult the Y by the matrix coefficient
            res1 = vec_mulo(mat1,y1);
            res2 = vec_mulo(mat2,y2);
            res3 = vec_mulo(mat3,y3);
            res4 = vec_mulo(mat4,y4);
            res5 = vec_mulo(mat5,y5);
            res6 = vec_mulo(mat6,y6);
            res7 = vec_mulo(mat7,y7);
            res8 = vec_mulo(mat8,y8);
            res9 = vec_mulo(mat9,y9);
            
            res1a = vec_mule(mat1,y1);
            res2a = vec_mule(mat2,y2);
            res3a = vec_mule(mat3,y3);
            res4a = vec_mule(mat4,y4);
            res5a = vec_mule(mat5,y5);
            res6a = vec_mule(mat6,y6);
            res7a = vec_mule(mat7,y7);
            res8a = vec_mule(mat8,y8);
            res9a = vec_mule(mat9,y9);
            
cclepper's avatar
cclepper committed
644
            //sum the results - these are only 1 cycle ops so no dependency issues
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
            res1 = vec_adds(res1,res2); //1+2
            res3 = vec_adds(res3,res4);//3+4
            res5 = vec_adds(res5,res6);//5+6
            res7 = vec_adds(res7,res8);//7+8
            res1 = vec_adds(res1,res3);//(1+2)+(3+4)
            res7 = vec_adds(res7,res9);//7+8+9
            res1 = vec_adds(res1,res5);//(1+2)+(3+4)+(5+6)
            res1 = vec_adds(res1,res7);//(1+2)+(3+4)+(5+6)+(7+8+9)
            
            res1a = vec_adds(res1a,res2a); //1+2
            res3a = vec_adds(res3a,res4a);//3+4
            res5a = vec_adds(res5a,res6a);//5+6
            res7a = vec_adds(res7a,res8a);//7+8
            res1a = vec_adds(res1a,res3a);//(1+2)+(3+4)
            res7a = vec_adds(res7a,res9a);//7+8+9
            res1a = vec_adds(res1a,res5a);//(1+2)+(3+4)+(5+6)
            res1a = vec_adds(res1a,res7a);//(1+2)+(3+4)+(5+6)+(7+8+9)
            
            
            //do the bitshift on the results here??
            res1 = vec_sra(res1,bitshift);
            res1a = vec_sra(res1a,bitshift); 
                        
            //pack back to one short vector??
            yhi = vec_mergeh(res1a,res1);
            ylo = vec_mergel(res1a,res1);
cclepper's avatar
cclepper committed
671
672
           

673
674
675
            yres = vec_packs(yhi,ylo);
            
            
cclepper's avatar
cclepper committed
676
            //combine with the UV
677
678
679
680
681
682
683
684
685
            //vec_mergel + vec_mergeh Y and UV
            hiImage =  vec_mergeh(uvres,yres);
            loImage =  vec_mergel(uvres,yres);
            
          val1 = vec_packsu(hiImage,loImage);
          vec_st(val1,0,dst+i);
           i+=16;
           
        }
cclepper's avatar
cclepper committed
686
        i = h * xsize;
cclepper's avatar
cclepper committed
687
        #ifndef PPC970
688
        vec_dss( 0 );
cclepper's avatar
cclepper committed
689
        vec_dss( 1 );
cclepper's avatar
cclepper committed
690
    #endif
691
}  /*end of working altivec function */
692

cclepper's avatar
cclepper committed
693

694
695
#endif
}
cclepper's avatar
cclepper committed
696

697
698
699
700
701
702
/////////////////////////////////////////////////////////
// rangeMess
//
/////////////////////////////////////////////////////////
void pix_convolve :: rangeMess(float range)
{
703
    m_irange = (int)(range*255.f);
704
705
706
707
708
709
710
711
712
713
714
    setPixModified();
}

/////////////////////////////////////////////////////////
// matrixMess
//
/////////////////////////////////////////////////////////
void pix_convolve :: matrixMess(int argc, t_atom *argv)
{
    if (argc != m_cols * m_rows)
    {
715
    	error("matrix size not correct");
716
717
    	return;
    }
cclepper's avatar
cclepper committed
718

ggeiger's avatar
ggeiger committed
719
    int i;
dheck's avatar
dheck committed
720
    for (i = 0; i < argc; i++) m_imatrix[i] = (int)(atom_getfloat(&argv[i])*255.);
721

cclepper's avatar
cclepper committed
722

723
724
725
726
727
728
729
730
731
    setPixModified();
}

/////////////////////////////////////////////////////////
// static member function
//
/////////////////////////////////////////////////////////
void pix_convolve :: obj_setupCallback(t_class *classPtr)
{
zmoelnig's avatar
zmoelnig committed
732
    class_addmethod(classPtr, reinterpret_cast<t_method>(&pix_convolve::matrixMessCallback),
733
    	    gensym("matrix"), A_GIMME, A_NULL);
zmoelnig's avatar
zmoelnig committed
734
    class_addmethod(classPtr, reinterpret_cast<t_method>(&pix_convolve::rangeMessCallback),
735
    	    gensym("ft1"), A_FLOAT, A_NULL);
zmoelnig's avatar
zmoelnig committed
736
    class_addmethod(classPtr, reinterpret_cast<t_method>(&pix_convolve::chromaMessCallback),
cclepper's avatar
cclepper committed
737
    	    gensym("chroma"), A_FLOAT, A_NULL);
738
739
740
741
742
743
744
}
void pix_convolve :: matrixMessCallback(void *data, t_symbol *, int argc, t_atom *argv)
{
    GetMyClass(data)->matrixMess(argc, argv);
}
void pix_convolve :: rangeMessCallback(void *data, t_floatarg range)
{
zmoelnig's avatar
zmoelnig committed
745
    GetMyClass(data)->rangeMess(range);
746
}
cclepper's avatar
cclepper committed
747
748
749

void pix_convolve :: chromaMessCallback(void *data, t_floatarg value)
{
zmoelnig's avatar
zmoelnig committed
750
    GetMyClass(data)->m_chroma=static_cast<int>(value);
cclepper's avatar
cclepper committed
751
}