This topic contains 3 replies, has 2 voices, and was last updated by Joe Davis 4 years, 2 months ago.

- AuthorPosts
- September 6, 2013 at 5:52 am #31454
we are developing video process in powerVR. and we implement our filters in GLSL. there is one filter have about 600 line shader code. when we enable it, glDrawArray

seems failed. so i want to know if there is a shader code size limitation? thanks!September 6, 2013 at 11:21 am #37924There aren’t any limitations of shader code size. Have you checked that your shader compilation and linking succeeded?

If you share the code with us on this thread, or email it to devtech@imgtec.com, we can look into the issue for you.

Regards,

JoeSeptember 9, 2013 at 1:12 am #37925hi, joe

thanks for your kindly reply. below is our shader code, i am sure we can run it on Ipad and Iphone5, however, it always failed at Iphone4. would you please also share with me some tips about optimize this shader code? i am really a newbie about optimizing shader code. thanks!

precision highp float;

uniform float inv_h;

uniform float texelWidthScale;

uniform float texelHeightScale;

uniform sampler2D inputTexture0;const float norValue = 255.0 * 255.0 / (1024.0*32.0);

const float norexp = -1.0/32.0;float computeWeight(mat3 m1,mat3 m2)

{

float distance = 0.0;

mat3 kernel1 = mat3(114.0,114.0,114.0,0.0,0.0,0.0,0.0,0.0,0.0);

mat3 kernel2 = mat3(1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0);

mat3 diff = m1 – m2;

diff = matrixCompMult(diff,diff);

diff = kernel2 * diff * kernel1;

//float d = 114.0 * (diff[0][0]*diff[0][0] + diff[0][1]*diff[0][1] + diff[0][2]*diff[0][2] + diff[1][0]*diff[1][0] + diff[1][1]*diff[1][1] + diff[1][2]*diff[1][2] + diff[0][2]*diff[0][2] + diff[1][2]*diff[1][2] + diff[2][2]*diff[2][2]);

distance = diff[0][0] * inv_h * norValue;

float weight = step(distance,255.0) * pow(2.718281828,distance*norexp) * 1024.0;

return weight;

}void main()

{

vec2 pt = vec2(gl_FragCoord);

vec2 scale = vec2(texelWidthScale, texelHeightScale);

vec4 cur = texture2D(inputTexture0, pt * scale);vec4 ll1 = texture2D(inputTexture0, (pt+vec2(-1.0,0.0)) * scale);

vec4 tt1 = texture2D(inputTexture0, (pt+vec2(0.0,-1.0)) * scale);

vec4 tt2 = texture2D(inputTexture0, (pt+vec2(0.0,-2.0)) * scale);

vec4 tt3 = texture2D(inputTexture0, (pt+vec2(0.0,-3.0)) * scale);

vec4 rr1 = texture2D(inputTexture0, (pt+vec2(1.0,0.0)) * scale);

vec4 bb1 = texture2D(inputTexture0, (pt+vec2(0.0,1.0)) * scale);

vec4 bb2 = texture2D(inputTexture0, (pt+vec2(0.0,2.0)) * scale);

vec4 bb3 = texture2D(inputTexture0, (pt+vec2(0.0,3.0)) * scale);vec4 lt13 = texture2D(inputTexture0, (pt+vec2(-1.0,-3.0)) * scale);

vec4 lt12 = texture2D(inputTexture0, (pt+vec2(-1.0,-2.0)) * scale);

vec4 lt11 = texture2D(inputTexture0, (pt+vec2(-1.0,-1.0)) * scale);vec4 rt11 = texture2D(inputTexture0, (pt+vec2(1.0,-1.0)) * scale);

vec4 rt12 = texture2D(inputTexture0, (pt+vec2(1.0,-2.0)) * scale);

vec4 rt13 = texture2D(inputTexture0, (pt+vec2(1.0,-3.0)) * scale);vec4 rb11 = texture2D(inputTexture0, (pt+vec2(1.0,1.0)) * scale);

vec4 rb12 = texture2D(inputTexture0, (pt+vec2(1.0,2.0)) * scale);

vec4 rb13 = texture2D(inputTexture0, (pt+vec2(1.0,3.0)) * scale);vec4 lb11 = texture2D(inputTexture0, (pt+vec2(-1.0,1.0)) * scale);

vec4 lb12 = texture2D(inputTexture0, (pt+vec2(-1.0,2.0)) * scale);

vec4 lb13 = texture2D(inputTexture0, (pt+vec2(-1.0,3.0)) * scale);mat3 cur3_r = mat3(lt11.a,tt1.r,tt1.g,ll1.a,cur.r,cur.g,lb11.a,bb1.r,bb1.g);

mat3 cur3_g = mat3(tt1.rgb,cur.rgb,bb1.rgb);

mat3 cur3_b = mat3(tt1.gba,cur.gba,bb1.gba);

mat3 cur3_a = mat3(tt1.b,tt1.a,rt11.r,cur.b,cur.a,rr1.r,bb1.b,bb1.a,rb11.r);vec4 average = vec4(0.0);

vec4 weight = vec4(0.0);//first line

///////////////////////////////

//point 1

mat3 other = mat3(lt13.gba,lt12.gba,lt11.gba);

float fweight = computeWeight(cur3_r,other);

average.r += fweight*lt12.b;

weight.r += fweight;//point 2

other = mat3(lt13.b,lt13.a,tt3.r,lt12.b,lt12.a,tt2.r,lt11.b,lt11.a,tt1.r);

fweight = computeWeight(cur3_r,other);

average.r += fweight*lt12.a;

weight.r += fweight;fweight = computeWeight(cur3_g,other);

average.g += fweight*lt12.a;

weight.g += fweight;//point 3

other = mat3(lt13.a,tt3.r,tt3.g,lt12.a,tt2.r,tt2.g,lt11.a,tt1.r,tt1.g);

fweight = computeWeight(cur3_r,other);

average.r += fweight * tt2.r;

weight.r += fweight;fweight = computeWeight(cur3_g,other);

average.g += fweight*tt2.r;

weight.g += fweight;fweight = computeWeight(cur3_b,other);

average.b += fweight*tt2.r;

weight.b += fweight;//point 4

other = mat3(tt3.rgb,tt2.rgb,tt1.rgb);

fweight = computeWeight(cur3_r,other);

average.r += fweight*tt2.g;

weight.r += fweight;fweight = computeWeight(cur3_g,other);

average.g += fweight*tt2.g;

weight.g += fweight;fweight = computeWeight(cur3_b,other);

average.b += fweight*tt2.g;

weight.b += fweight;fweight = computeWeight(cur3_a,other);

average.a += fweight*tt2.g;

weight.a += fweight;//point 5

other = mat3(tt3.gba,tt2.gba,tt1.gba);

fweight = computeWeight(cur3_r,other);

average.r += fweight*tt2.b;

weight.r += fweight;fweight = computeWeight(cur3_g,other);

average.g += fweight*tt2.b;

weight.g += fweight;fweight = computeWeight(cur3_b,other);

average.b += fweight*tt2.b;

weight.b += fweight;fweight = computeWeight(cur3_a,other);

average.a += fweight*tt2.b;

weight.a += fweight;//point

other = mat3(tt3.b,tt3.a,rt13.r,tt2.b,tt2.a,rt12.r,tt1.b,tt1.a,rt11.r);

fweight = computeWeight(cur3_g,other);

average.g += fweight*tt2.a;

weight.g += fweight;fweight = computeWeight(cur3_b,other);

average.b += fweight*tt2.a;

weight.b += fweight;fweight = computeWeight(cur3_a,other);

average.a += fweight*tt2.a;

weight.a += fweight;//point

other = mat3(tt3.a,rt13.r,rt13.g,tt2.a,rt12.r,rt12.g,tt1.a,rt11.r,rt11.g);

fweight = computeWeight(cur3_b,other);

average.b += fweight*rt12.r;

weight.b += fweight;fweight = computeWeight(cur3_a,other);

average.a += fweight*rt12.r;

weight.a += fweight;//point

other = mat3(rt13.rgb,rt12.rgb,rt11.rgb);

fweight = computeWeight(cur3_a,other);

average.a += fweight*rt12.g;

weight.a += fweight;//second line

///////////////////////////////

//point 6

other = mat3(lt12.gba,lt11.gba,ll1.gba);

fweight = computeWeight(cur3_r,other);

average.r += fweight*lt11.b;

weight.r += fweight;//point 7

other = mat3(lt12.b,lt12.a,tt2.r,lt11.b,lt11.a,tt1.r,ll1.b,ll1.a,cur.r);

fweight = computeWeight(cur3_r,other);

average.r += fweight*lt11.a;

weight.r += fweight;fweight = computeWeight(cur3_g,other);

average.g += fweight*lt11.a;

weight.g += fweight;//point 8

other = mat3(lt12.a,tt2.r,tt2.g,lt11.a,tt1.r,tt1.g,ll1.a,cur.r,cur.g);

fweight = computeWeight(cur3_r,other);

average.r += fweight * tt1.r;

weight.r += fweight;fweight = computeWeight(cur3_g,other);

average.g += fweight*tt1.r;

weight.g += fweight;fweight = computeWeight(cur3_b,other);

average.b += fweight*tt1.r;

weight.b += fweight;//point 9

other = mat3(tt2.rgb,tt1.rgb,cur.rgb);

fweight = computeWeight(cur3_r,other);

average.r += fweight*tt1.g;

weight.r += fweight;fweight = computeWeight(cur3_g,other);

average.g += fweight*tt1.g;

weight.g += fweight;fweight = computeWeight(cur3_b,other);

average.b += fweight*tt1.g;

weight.b += fweight;fweight = computeWeight(cur3_a,other);

average.a += fweight*tt1.g;

weight.a += fweight;//point 10

other = mat3(tt2.gba,tt1.gba,cur.gba);

fweight = computeWeight(cur3_r,other);

average.r += fweight*tt1.b;

weight.r += fweight;fweight = computeWeight(cur3_g,other);

average.g += fweight*tt1.b;

weight.g += fweight;fweight = computeWeight(cur3_b,other);

average.b += fweight*tt1.b;

weight.b += fweight;fweight = computeWeight(cur3_a,other);

average.a += fweight*tt1.b;

weight.a += fweight;//point

other = mat3(tt2.b,tt2.a,rt12.r,tt1.b,tt1.a,rt11.r,cur.b,cur.a,rr1.r);

fweight = computeWeight(cur3_g,other);

average.g += fweight*tt1.a;

weight.g += fweight;fweight = computeWeight(cur3_b,other);

average.b += fweight*tt1.a;

weight.b += fweight;fweight = computeWeight(cur3_a,other);

average.a += fweight*tt1.a;

weight.a += fweight;//point

other = mat3(tt2.a,rt12.r,rt12.g,tt1.a,rt11.r,rt11.g,cur.a,rr1.r,rr1.g);

fweight = computeWeight(cur3_b,other);

average.b += fweight*rt11.r;

weight.b += fweight;fweight = computeWeight(cur3_a,other);

average.a += fweight*rt11.r;

weight.a += fweight;//point

other = mat3(rt12.rgb,rt11.rgb,rr1.rgb);

fweight = computeWeight(cur3_a,other);

average.a += fweight*rt11.g;

weight.a += fweight;//third line

///////////////////////////////

//point 11

other = mat3(lt11.gba,ll1.gba,lb11.gba);

fweight = computeWeight(cur3_r,other);

average.r += fweight*ll1.b;

weight.r += fweight;//point 12

other = mat3(lt11.b,lt11.a,tt1.r,ll1.b,ll1.a,cur.r,lb11.b,lb11.a,bb1.r);

fweight = computeWeight(cur3_r,other);

average.r += fweight*ll1.a;

weight.r += fweight;fweight = computeWeight(cur3_g,other);

average.g += fweight*ll1.a;

weight.g += fweight;//point 13

other = mat3(lt11.a,tt1.r,tt1.g,ll1.a,cur.r,cur.g,lb11.a,bb1.r,bb1.g);

fweight = computeWeight(cur3_r,other);

average.r += fweight * cur.r;

weight.r += fweight;fweight = computeWeight(cur3_g,other);

average.g += fweight*cur.r;

weight.g += fweight;fweight = computeWeight(cur3_b,other);

average.b += fweight*cur.r;

weight.b += fweight;//point 14

other = mat3(tt1.rgb,cur.rgb,bb1.rgb);

fweight = computeWeight(cur3_r,other);

average.r += fweight*cur.g;

weight.r += fweight;fweight = computeWeight(cur3_g,other);

average.g += fweight*cur.g;

weight.g += fweight;fweight = computeWeight(cur3_b,other);

average.b += fweight*cur.g;

weight.b += fweight;fweight = computeWeight(cur3_a,other);

average.a += fweight*cur.g;

weight.a += fweight;//point 15

other = mat3(tt1.gba,cur.gba,bb1.gba);

fweight = computeWeight(cur3_r,other);

average.r += fweight*cur.b;

weight.r += fweight;fweight = computeWeight(cur3_g,other);

average.g += fweight*cur.b;

weight.g += fweight;fweight = computeWeight(cur3_b,other);

average.b += fweight*cur.b;

weight.b += fweight;fweight = computeWeight(cur3_a,other);

average.a += fweight*cur.b;

weight.a += fweight;//point

other = mat3(tt1.b,tt1.a,rt11.r,cur.b,cur.a,rr1.r,bb1.b,bb1.a,rb11.r);

fweight = computeWeight(cur3_g,other);

average.g += fweight*cur.a;

weight.g += fweight;fweight = computeWeight(cur3_b,other);

average.b += fweight*cur.a;

weight.b += fweight;fweight = computeWeight(cur3_a,other);

average.a += fweight*cur.a;

weight.a += fweight;//point

other = mat3(tt1.a,rt11.r,rt11.g,cur.a,rr1.r,rr1.g,bb1.a,rb11.r,rb11.g);

fweight = computeWeight(cur3_b,other);

average.b += fweight*rr1.r;

weight.b += fweight;fweight = computeWeight(cur3_a,other);

average.a += fweight*rr1.r;

weight.a += fweight;//point

other = mat3(rt11.rgb,rr1.rgb,rb11.rgb);

fweight = computeWeight(cur3_a,other);

average.a += fweight*rr1.g;

weight.a += fweight;//fouth line

///////////////////////////////

//point 16

other = mat3(ll1.gba,lb11.gba,lb12.gba);

fweight = computeWeight(cur3_r,other);

average.r += fweight*lb11.b;

weight.r += fweight;//point 17

other = mat3(ll1.b,ll1.a,cur.r,lb11.b,lb11.a,bb1.r,lb12.b,lb12.a,bb2.r);

fweight = computeWeight(cur3_r,other);

average.r += fweight*lb11.a;

weight.r += fweight;fweight = computeWeight(cur3_g,other);

average.g += fweight*lb11.a;

weight.g += fweight;//point 18

other = mat3(ll1.a,cur.r,cur.g,lb11.a,bb1.r,bb1.g,lb12.a,bb2.r,bb2.g);

fweight = computeWeight(cur3_r,other);

average.r += fweight * bb1.r;

weight.r += fweight;fweight = computeWeight(cur3_g,other);

average.g += fweight*bb1.r;

weight.g += fweight;fweight = computeWeight(cur3_b,other);

average.b += fweight*bb1.r;

weight.b += fweight;//point 19

other = mat3(cur.rgb,bb1.rgb,bb2.rgb);

fweight = computeWeight(cur3_r,other);

average.r += fweight*bb1.g;

weight.r += fweight;fweight = computeWeight(cur3_g,other);

average.g += fweight*bb1.g;

weight.g += fweight;fweight = computeWeight(cur3_b,other);

average.b += fweight*bb1.g;

weight.b += fweight;fweight = computeWeight(cur3_a,other);

average.a += fweight*bb1.g;

weight.a += fweight;//point 20

other = mat3(cur.gba,bb1.gba,bb2.gba);

fweight = computeWeight(cur3_r,other);

average.r += fweight*bb1.b;

weight.r += fweight;fweight = computeWeight(cur3_g,other);

average.g += fweight*bb1.b;

weight.g += fweight;fweight = computeWeight(cur3_b,other);

average.b += fweight*bb1.b;

weight.b += fweight;fweight = computeWeight(cur3_a,other);

average.a += fweight*bb1.b;

weight.a += fweight;//point

other = mat3(cur.b,cur.a,rr1.r,bb1.b,bb1.a,rb11.r,bb2.b,bb2.a,rb12.r);

fweight = computeWeight(cur3_g,other);

average.g += fweight*bb1.a;

weight.g += fweight;fweight = computeWeight(cur3_b,other);

average.b += fweight*bb1.a;

weight.b += fweight;fweight = computeWeight(cur3_a,other);

average.a += fweight*bb1.a;

weight.a += fweight;//point

other = mat3(cur.a,rr1.r,rr1.g,bb1.a,rb11.r,rb11.g,bb2.a,rb12.r,rb12.g);

fweight = computeWeight(cur3_b,other);

average.b += fweight*rb11.r;

weight.b += fweight;fweight = computeWeight(cur3_a,other);

average.a += fweight*rb11.r;

weight.a += fweight;//point

other = mat3(rr1.rgb,rb11.rgb,rb12.rgb);

fweight = computeWeight(cur3_a,other);

average.a += fweight*rb11.g;

weight.a += fweight;//fifth line

///////////////////////////////

//point 21

other = mat3(lb11.gba,lb12.gba,lb13.gba);

fweight = computeWeight(cur3_r,other);

average.r += fweight*lb12.b;

weight.r += fweight;//point 22

other = mat3(lb11.b,lb11.a,bb1.r,lb12.b,lb12.a,bb2.r,lb13.b,lb13.a,bb3.r);

fweight = computeWeight(cur3_r,other);

average.r += fweight*lb12.a;

weight.r += fweight;fweight = computeWeight(cur3_g,other);

average.g += fweight*lb12.a;

weight.g += fweight;//point 23

other = mat3(lb11.a,bb1.r,bb1.g,lb12.a,bb2.r,bb2.g,lb13.a,bb3.r,bb3.g);

fweight = computeWeight(cur3_r,other);

average.r += fweight * bb2.r;

weight.r += fweight;fweight = computeWeight(cur3_g,other);

average.g += fweight*bb2.r;

weight.g += fweight;fweight = computeWeight(cur3_b,other);

average.b += fweight*bb2.r;

weight.b += fweight;//point 24

other = mat3(bb1.rgb,bb2.rgb,bb3.rgb);

fweight = computeWeight(cur3_r,other);

average.r += fweight*bb2.g;

weight.r += fweight;fweight = computeWeight(cur3_g,other);

average.g += fweight*bb2.g;

weight.g += fweight;fweight = computeWeight(cur3_b,other);

average.b += fweight*bb2.g;

weight.b += fweight;fweight = computeWeight(cur3_a,other);

average.a += fweight*bb2.g;

weight.a += fweight;//point 25

other = mat3(bb1.gba,bb2.gba,bb3.gba);

fweight = computeWeight(cur3_r,other);

average.r += fweight*bb2.b;

weight.r += fweight;fweight = computeWeight(cur3_g,other);

average.g += fweight*bb2.b;

weight.g += fweight;fweight = computeWeight(cur3_b,other);

average.b += fweight*bb2.b;

weight.b += fweight;fweight = computeWeight(cur3_a,other);

average.a += fweight*bb2.b;

weight.a += fweight;//point

other = mat3(bb1.b,bb1.a,rb11.r,bb2.b,bb2.a,rb12.r,bb3.b,bb3.a,rb13.r);

fweight = computeWeight(cur3_g,other);

average.g += fweight*bb2.a;

weight.g += fweight;fweight = computeWeight(cur3_b,other);

average.b += fweight*bb2.a;

weight.b += fweight;fweight = computeWeight(cur3_a,other);

average.a += fweight*bb2.a;

weight.a += fweight;//rt

other = mat3(bb1.a,rb11.r,rb11.g,bb2.a,rb12.r,rb12.g,bb3.a,rb13.r,rb13.g);

fweight = computeWeight(cur3_b,other);

average.b += fweight*rb12.r;

weight.b += fweight;fweight = computeWeight(cur3_a,other);

average.a += fweight*rb12.r;

weight.a += fweight;//point

other = mat3(rb11.rgb,rb12.rgb,rb13.rgb);

fweight = computeWeight(cur3_a,other);

average.a += fweight*rb12.g;

weight.a += fweight;vec4 tmp1 = step(weight,vec4(0.0));

vec4 tmp2 = average/weight;

tmp2 = clamp(tmp2,0.0,1.0);

gl_FragColor = (vec4(1.0)-tmp1) * tmp2 + tmp1*cur;

}October 16, 2013 at 1:42 pm #37926Hi,

The shader is extremely expensive. When targeting an SGX540 instruction set, our PVRShaderEditor tool reports the best case is 2712 instructions, the worst is 5284. As a comparison, a current high-end mobile game will usually have fragment shaders that are between 5-40 instructions.

Although there isn’t a limit on the size of GLSL ES shader source, there is a limit to the number of instructions that can be processed by a given GPU. This limit will vary depending on the target GPU. If you hit this limit, the compiler should produce an error.

I suspect the problem you’re seeing on the target is purely compilation time. On my desktop machine, the shader takes 2-3 seconds to compile. On a mobile device, this compilation time will be much higher. This high compile time may cause iOS to kill the compiler as it may appear unresponsive for a very long time. You would have to discuss the issue with Apple to understand why compilation doesn’t succeed on the iPhone 4.

My recommendation would be to revisit your algorithm and see if there’s anyway it can be simplified. If it’s not possible to find a compromise between speed and quality of your output, you could split the render into multiple passes (i.e. output texture of pass 1 is read in by pass 2, and so on).

Regards,

Joe