Browse Source

xf86-video-siliconmotion: add loongson fix

I'm not really sure what it does, and I suspect it's only there for
video playback performance reasons, but since it's also here:
http://www.gentoo-cn.org/gitweb/?p=loongson.git;a=blob;f=x11-drivers/xf86-video-siliconmotion/files/xf86-video-siliconmotion-1.7.3-fix-loongson.patch;h=e3b6974cf3a242f6ac3373285309c579299e3689;hb=2e5b51e34b7a7d0fe92ea9762d1aed9ac0b808c9
it shouldn't hurt having it.
Phil Sutter 15 years ago
parent
commit
19e9045f4a

+ 182 - 0
package/xf86-video-siliconmotion/patches/xf86-video-siliconmotion-1.7.3-fix-loongson.patch

@@ -0,0 +1,182 @@
+diff -ur orig/src/smi_video.c mod/src/smi_video.c
+--- xf86-video-siliconmotion-1.7.3.orig/src/smi_video.c	2009-07-27 05:42:44.000000000 +0200
++++ xf86-video-siliconmotion-1.7.3/src/smi_video.c	2010-06-03 16:55:59.169793245 +0200
+@@ -276,6 +276,7 @@ static XF86ImageRec SMI_VideoImages[] =
+     XVIMAGE_YUY2,
+     XVIMAGE_YV12,
+     XVIMAGE_I420,
++    XVIMAGE_UYVY,
+     {
+ 	FOURCC_RV15,			/* id				*/
+ 	XvRGB,				/* type				*/
+@@ -1103,7 +1104,7 @@ SMI_PutVideo(
+ 	vpr00 |= 0x0010000E;
+     } else {
+ 	/*
+-	  Bit 21     = 10: Vertical Interpolation                   = enabled
++	  Bit 21     = 1: Vertical Interpolation                   = enabled
+ 	  Bit 24     = 1: Select Video Window I Source Addr        = 1
+ 	  1= Video window I source addr = capture port buffer ?
+ 	*/
+@@ -1464,6 +1465,117 @@ SMI_QueryBestSize(
+     LEAVE();
+ }
+ 
++static void myXVCopyYUV12ToPacked(const unsigned char *srcy, const unsigned char *srcv, const unsigned char *srcu,
++		unsigned char *dst, int srcPitchy, int srcPitchuv, int dstPitch, int h, int w)
++{
++	int i, j;
++	unsigned char const *y, *u, *v;
++	int dstinc, yinc, uinc, vinc;
++
++	y = srcy;
++	u = srcu;
++	v = srcv;
++
++	dstinc = dstPitch - 2*w;
++	yinc = srcPitchy - w;
++	uinc = srcPitchuv - w/2;
++	vinc = srcPitchuv - w/2;
++
++	for (i = 0; i < h; i++) {
++		asm (
++//			".set arch=loongson2f\n\t"
++			".set noreorder\n\t"
++			"move $8, %8 \n\t"
++			"1: \n\t"
++			"beqz $8, 2f \n\t"
++			"xor $f0, $f0, $f0 \n\t"
++			"ldc1 $f4, (%0) \n\t"
++			"punpcklbh $f2, $f4, $f0 \n\t"
++			"punpckhbh $f4, $f4, $f0 \n\t"
++			"ldc1 $f16, 8(%0) \n\t"
++			"punpcklbh $f14, $f16, $f0 \n\t"
++			"punpckhbh $f16, $f16, $f0 \n\t"
++			
++			"lwc1 $f8, (%1) \n\t"
++			"lwc1 $f12, (%2) \n\t"
++			"punpcklbh $f8, $f8, $f12 \n\t"
++			"punpcklbh $f6, $f0, $f8 \n\t"
++			"punpckhbh $f8, $f0, $f8 \n\t"
++			"lwc1 $f18, 4(%1) \n\t"
++			"lwc1 $f12, 4(%2) \n\t"
++			"punpcklbh $f18, $f18, $f12 \n\t"
++			"punpcklbh $f10, $f0, $f18 \n\t"
++			"punpckhbh $f12, $f0, $f18 \n\t"
++
++			"or $f2, $f2, $f6 \n\t"
++			"or $f4, $f4, $f8 \n\t"
++			"or $f14, $f14, $f10 \n\t"
++			"or $f16, $f16, $f12 \n\t"
++
++			"sdc1 $f2, (%3) \n\t"
++			"sdc1 $f4, 8(%3) \n\t"
++			"add %0, 16 \n\t"
++			"add %1, 8 \n\t"
++			"add %2, 8 \n\t"
++			"sdc1 $f14, 0x10(%3) \n\t"
++			"sdc1 $f16, 0x18(%3) \n\t"
++			"add $8, -1 \n\t"
++			"b 1b \n\t"
++			"add %3, 32 \n\t"
++			"2: \n\t"
++			".set reorder\n\t"
++			: "=r" (y), "=r" (u), "=r" (v), "=r" (dst)
++			: "0" (y), "1" (u), "2" (v), "3" (dst), "r" (w>>4)
++			: "memory","$8"
++		);
++
++		asm (
++//			".set arch=loongson2f\n\t"
++			".set noreorder\n\t"
++			"move $8, %8 \n\t"
++			"1: \n\t"
++			"beqz $8, 2f \n\t"
++			"xor $f0, $f0, $f0 \n\t"
++			"ldc1 $f4, (%0) \n\t"
++			"punpcklbh $f2, $f4, $f0 \n\t"
++			"punpckhbh $f4, $f4, $f0 \n\t"
++			
++			"lwc1 $f8, (%1) \n\t"
++			"lwc1 $f12, (%2) \n\t"
++			"punpcklbh $f8, $f8, $f12 \n\t"
++			"punpcklbh $f6, $f0, $f8 \n\t"
++			"punpckhbh $f8, $f0, $f8 \n\t"
++
++			"or $f2, $f2, $f6 \n\t"
++			"or $f4, $f4, $f8 \n\t"
++
++			"sdc1 $f2, (%3) \n\t"
++			"sdc1 $f4, 8(%3) \n\t"
++			"add %0, 8 \n\t"
++			"add %1, 4 \n\t"
++			"add %2, 4 \n\t"
++			"add $8, -1 \n\t"
++			"b 1b \n\t"
++			"add %3, 16 \n\t"
++			"2:\n\t"
++			".set reorder\n\t"
++			: "=r" (y), "=r" (u), "=r" (v), "=r" (dst)
++			: "0" (y), "1" (u), "2" (v), "3" (dst), "r" ((w&0xf)/8)
++			: "memory","$8"
++		);
++
++		for (j = (w&7)/2; j; j--) {
++			*dst++ = *y++;
++			*dst++ = *u++;
++			*dst++ = *y++;
++			*dst++ = *v++;
++		}
++		y += yinc;
++		u = (i%2) ? (u + uinc): (u - w/2);
++		v = (i%2) ? (v + vinc): (v - w/2);
++		dst += dstinc;
++	}
++}
+ 
+ static int
+ SMI_PutImage(
+@@ -1592,7 +1704,7 @@ SMI_PutImage(
+ 		offset3 = tmp;
+ 	    }
+ 	    nLines = ((((y2 + 0xffff) >> 16) + 1) & ~1) - top;
+-	    xf86XVCopyYUV12ToPacked(buf + (top * srcPitch) + (left >> 1), 
++	    myXVCopyYUV12ToPacked(buf + (top * srcPitch) + (left >> 1), 
+ 				    buf + offset2, buf + offset3, dstStart,
+ 				    srcPitch, srcPitch2, dstPitch, nLines,
+ 				    nPixels);
+@@ -1747,7 +1859,7 @@ SMI_DisplayVideo(
+ {
+     SMIPtr pSmi = SMIPTR(pScrn);
+     CARD32 vpr00;
+-    int hstretch, vstretch;
++    uint_least32_t hstretch, vstretch;
+ 
+     ENTER();
+ 
+@@ -1774,13 +1886,13 @@ SMI_DisplayVideo(
+     }
+ 
+     if (drw_w > vid_w) {
+-	hstretch = (2560 * vid_w / drw_w + 5) / 10;
++	hstretch = ((uint_least32_t)(vid_w - 1) << 16) / (drw_w - 1);
+     } else {
+ 	hstretch = 0;
+     }
+ 
+     if (drw_h > vid_h) {
+-	vstretch = (2560 * vid_h / drw_h + 5) / 10;
++	vstretch = ((uint_least32_t)(vid_h - 1) << 16) / (drw_h - 1);
+ 	vpr00 |= 1 << 21;
+     } else {
+ 	vstretch = 0;
+@@ -1791,7 +1903,8 @@ SMI_DisplayVideo(
+     WRITE_VPR(pSmi, 0x18, (dstBox->x2) | (dstBox->y2 << 16));
+     WRITE_VPR(pSmi, 0x1C, offset >> 3);
+     WRITE_VPR(pSmi, 0x20, (pitch >> 3) | ((pitch >> 3) << 16));
+-    WRITE_VPR(pSmi, 0x24, (hstretch << 8) | vstretch);
++    WRITE_VPR(pSmi, 0x24, (hstretch & 0xff00) | ((vstretch & 0xff00) >> 8));
++    WRITE_VPR(pSmi, 0x68, ((hstretch & 0xff) << 8) | (vstretch & 0xff));
+ 
+     LEAVE();
+ }