glsl code limit size

This topic contains 3 replies, has 2 voices, and was last updated by  Joe Davis 4 years ago.

Viewing 4 posts - 1 through 4 (of 4 total)
  • Author
    Posts
  • #31454

    rory_yin
    Member

    we are developing video process in powerVR. and we implement our filters in GLSL. there is one filter have about 600 line shader code. when we enable it, glDrawArray
    seems failed. so i want to know if there is a shader code size limitation? thanks!

    #37924

    Joe Davis
    Member

    There aren’t any limitations of shader code size. Have you checked that your shader compilation and linking succeeded?

    If you share the code with us on this thread, or email it to devtech@imgtec.com, we can look into the issue for you.

    Regards,
    Joe

    #37925

    rory_yin
    Member

    hi, joe

    thanks for your kindly reply. below is our shader code, i am sure we can run it on Ipad and Iphone5, however, it always failed at Iphone4. would you please also share with me some tips about optimize this shader code? i am really a newbie about optimizing shader code. thanks!

    precision highp float;

    uniform float inv_h;
    uniform float texelWidthScale;
    uniform float texelHeightScale;
    uniform sampler2D inputTexture0;

    const float norValue = 255.0 * 255.0 / (1024.0*32.0);
    const float norexp = -1.0/32.0;

    float computeWeight(mat3 m1,mat3 m2)
    {
    float distance = 0.0;
    mat3 kernel1 = mat3(114.0,114.0,114.0,0.0,0.0,0.0,0.0,0.0,0.0);
    mat3 kernel2 = mat3(1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0);
    mat3 diff = m1 – m2;
    diff = matrixCompMult(diff,diff);
    diff = kernel2 * diff * kernel1;
    //float d = 114.0 * (diff[0][0]*diff[0][0] + diff[0][1]*diff[0][1] + diff[0][2]*diff[0][2] + diff[1][0]*diff[1][0] + diff[1][1]*diff[1][1] + diff[1][2]*diff[1][2] + diff[0][2]*diff[0][2] + diff[1][2]*diff[1][2] + diff[2][2]*diff[2][2]);
    distance = diff[0][0] * inv_h * norValue;
    float weight = step(distance,255.0) * pow(2.718281828,distance*norexp) * 1024.0;
    return weight;
    }

    void main()
    {
    vec2 pt = vec2(gl_FragCoord);
    vec2 scale = vec2(texelWidthScale, texelHeightScale);
    vec4 cur = texture2D(inputTexture0, pt * scale);

    vec4 ll1 = texture2D(inputTexture0, (pt+vec2(-1.0,0.0)) * scale);
    vec4 tt1 = texture2D(inputTexture0, (pt+vec2(0.0,-1.0)) * scale);
    vec4 tt2 = texture2D(inputTexture0, (pt+vec2(0.0,-2.0)) * scale);
    vec4 tt3 = texture2D(inputTexture0, (pt+vec2(0.0,-3.0)) * scale);
    vec4 rr1 = texture2D(inputTexture0, (pt+vec2(1.0,0.0)) * scale);
    vec4 bb1 = texture2D(inputTexture0, (pt+vec2(0.0,1.0)) * scale);
    vec4 bb2 = texture2D(inputTexture0, (pt+vec2(0.0,2.0)) * scale);
    vec4 bb3 = texture2D(inputTexture0, (pt+vec2(0.0,3.0)) * scale);

    vec4 lt13 = texture2D(inputTexture0, (pt+vec2(-1.0,-3.0)) * scale);
    vec4 lt12 = texture2D(inputTexture0, (pt+vec2(-1.0,-2.0)) * scale);
    vec4 lt11 = texture2D(inputTexture0, (pt+vec2(-1.0,-1.0)) * scale);

    vec4 rt11 = texture2D(inputTexture0, (pt+vec2(1.0,-1.0)) * scale);
    vec4 rt12 = texture2D(inputTexture0, (pt+vec2(1.0,-2.0)) * scale);
    vec4 rt13 = texture2D(inputTexture0, (pt+vec2(1.0,-3.0)) * scale);

    vec4 rb11 = texture2D(inputTexture0, (pt+vec2(1.0,1.0)) * scale);
    vec4 rb12 = texture2D(inputTexture0, (pt+vec2(1.0,2.0)) * scale);
    vec4 rb13 = texture2D(inputTexture0, (pt+vec2(1.0,3.0)) * scale);

    vec4 lb11 = texture2D(inputTexture0, (pt+vec2(-1.0,1.0)) * scale);
    vec4 lb12 = texture2D(inputTexture0, (pt+vec2(-1.0,2.0)) * scale);
    vec4 lb13 = texture2D(inputTexture0, (pt+vec2(-1.0,3.0)) * scale);

    mat3 cur3_r = mat3(lt11.a,tt1.r,tt1.g,ll1.a,cur.r,cur.g,lb11.a,bb1.r,bb1.g);
    mat3 cur3_g = mat3(tt1.rgb,cur.rgb,bb1.rgb);
    mat3 cur3_b = mat3(tt1.gba,cur.gba,bb1.gba);
    mat3 cur3_a = mat3(tt1.b,tt1.a,rt11.r,cur.b,cur.a,rr1.r,bb1.b,bb1.a,rb11.r);

    vec4 average = vec4(0.0);
    vec4 weight = vec4(0.0);

    //first line
    ///////////////////////////////
    //point 1
    mat3 other = mat3(lt13.gba,lt12.gba,lt11.gba);
    float fweight = computeWeight(cur3_r,other);
    average.r += fweight*lt12.b;
    weight.r += fweight;

    //point 2
    other = mat3(lt13.b,lt13.a,tt3.r,lt12.b,lt12.a,tt2.r,lt11.b,lt11.a,tt1.r);
    fweight = computeWeight(cur3_r,other);
    average.r += fweight*lt12.a;
    weight.r += fweight;

    fweight = computeWeight(cur3_g,other);
    average.g += fweight*lt12.a;
    weight.g += fweight;

    //point 3
    other = mat3(lt13.a,tt3.r,tt3.g,lt12.a,tt2.r,tt2.g,lt11.a,tt1.r,tt1.g);
    fweight = computeWeight(cur3_r,other);
    average.r += fweight * tt2.r;
    weight.r += fweight;

    fweight = computeWeight(cur3_g,other);
    average.g += fweight*tt2.r;
    weight.g += fweight;

    fweight = computeWeight(cur3_b,other);
    average.b += fweight*tt2.r;
    weight.b += fweight;

    //point 4
    other = mat3(tt3.rgb,tt2.rgb,tt1.rgb);
    fweight = computeWeight(cur3_r,other);
    average.r += fweight*tt2.g;
    weight.r += fweight;

    fweight = computeWeight(cur3_g,other);
    average.g += fweight*tt2.g;
    weight.g += fweight;

    fweight = computeWeight(cur3_b,other);
    average.b += fweight*tt2.g;
    weight.b += fweight;

    fweight = computeWeight(cur3_a,other);
    average.a += fweight*tt2.g;
    weight.a += fweight;

    //point 5
    other = mat3(tt3.gba,tt2.gba,tt1.gba);
    fweight = computeWeight(cur3_r,other);
    average.r += fweight*tt2.b;
    weight.r += fweight;

    fweight = computeWeight(cur3_g,other);
    average.g += fweight*tt2.b;
    weight.g += fweight;

    fweight = computeWeight(cur3_b,other);
    average.b += fweight*tt2.b;
    weight.b += fweight;

    fweight = computeWeight(cur3_a,other);
    average.a += fweight*tt2.b;
    weight.a += fweight;

    //point
    other = mat3(tt3.b,tt3.a,rt13.r,tt2.b,tt2.a,rt12.r,tt1.b,tt1.a,rt11.r);
    fweight = computeWeight(cur3_g,other);
    average.g += fweight*tt2.a;
    weight.g += fweight;

    fweight = computeWeight(cur3_b,other);
    average.b += fweight*tt2.a;
    weight.b += fweight;

    fweight = computeWeight(cur3_a,other);
    average.a += fweight*tt2.a;
    weight.a += fweight;

    //point
    other = mat3(tt3.a,rt13.r,rt13.g,tt2.a,rt12.r,rt12.g,tt1.a,rt11.r,rt11.g);
    fweight = computeWeight(cur3_b,other);
    average.b += fweight*rt12.r;
    weight.b += fweight;

    fweight = computeWeight(cur3_a,other);
    average.a += fweight*rt12.r;
    weight.a += fweight;

    //point
    other = mat3(rt13.rgb,rt12.rgb,rt11.rgb);
    fweight = computeWeight(cur3_a,other);
    average.a += fweight*rt12.g;
    weight.a += fweight;

    //second line
    ///////////////////////////////
    //point 6
    other = mat3(lt12.gba,lt11.gba,ll1.gba);
    fweight = computeWeight(cur3_r,other);
    average.r += fweight*lt11.b;
    weight.r += fweight;

    //point 7
    other = mat3(lt12.b,lt12.a,tt2.r,lt11.b,lt11.a,tt1.r,ll1.b,ll1.a,cur.r);
    fweight = computeWeight(cur3_r,other);
    average.r += fweight*lt11.a;
    weight.r += fweight;

    fweight = computeWeight(cur3_g,other);
    average.g += fweight*lt11.a;
    weight.g += fweight;

    //point 8
    other = mat3(lt12.a,tt2.r,tt2.g,lt11.a,tt1.r,tt1.g,ll1.a,cur.r,cur.g);
    fweight = computeWeight(cur3_r,other);
    average.r += fweight * tt1.r;
    weight.r += fweight;

    fweight = computeWeight(cur3_g,other);
    average.g += fweight*tt1.r;
    weight.g += fweight;

    fweight = computeWeight(cur3_b,other);
    average.b += fweight*tt1.r;
    weight.b += fweight;

    //point 9
    other = mat3(tt2.rgb,tt1.rgb,cur.rgb);
    fweight = computeWeight(cur3_r,other);
    average.r += fweight*tt1.g;
    weight.r += fweight;

    fweight = computeWeight(cur3_g,other);
    average.g += fweight*tt1.g;
    weight.g += fweight;

    fweight = computeWeight(cur3_b,other);
    average.b += fweight*tt1.g;
    weight.b += fweight;

    fweight = computeWeight(cur3_a,other);
    average.a += fweight*tt1.g;
    weight.a += fweight;

    //point 10
    other = mat3(tt2.gba,tt1.gba,cur.gba);
    fweight = computeWeight(cur3_r,other);
    average.r += fweight*tt1.b;
    weight.r += fweight;

    fweight = computeWeight(cur3_g,other);
    average.g += fweight*tt1.b;
    weight.g += fweight;

    fweight = computeWeight(cur3_b,other);
    average.b += fweight*tt1.b;
    weight.b += fweight;

    fweight = computeWeight(cur3_a,other);
    average.a += fweight*tt1.b;
    weight.a += fweight;

    //point
    other = mat3(tt2.b,tt2.a,rt12.r,tt1.b,tt1.a,rt11.r,cur.b,cur.a,rr1.r);
    fweight = computeWeight(cur3_g,other);
    average.g += fweight*tt1.a;
    weight.g += fweight;

    fweight = computeWeight(cur3_b,other);
    average.b += fweight*tt1.a;
    weight.b += fweight;

    fweight = computeWeight(cur3_a,other);
    average.a += fweight*tt1.a;
    weight.a += fweight;

    //point
    other = mat3(tt2.a,rt12.r,rt12.g,tt1.a,rt11.r,rt11.g,cur.a,rr1.r,rr1.g);
    fweight = computeWeight(cur3_b,other);
    average.b += fweight*rt11.r;
    weight.b += fweight;

    fweight = computeWeight(cur3_a,other);
    average.a += fweight*rt11.r;
    weight.a += fweight;

    //point
    other = mat3(rt12.rgb,rt11.rgb,rr1.rgb);
    fweight = computeWeight(cur3_a,other);
    average.a += fweight*rt11.g;
    weight.a += fweight;

    //third line
    ///////////////////////////////
    //point 11
    other = mat3(lt11.gba,ll1.gba,lb11.gba);
    fweight = computeWeight(cur3_r,other);
    average.r += fweight*ll1.b;
    weight.r += fweight;

    //point 12
    other = mat3(lt11.b,lt11.a,tt1.r,ll1.b,ll1.a,cur.r,lb11.b,lb11.a,bb1.r);
    fweight = computeWeight(cur3_r,other);
    average.r += fweight*ll1.a;
    weight.r += fweight;

    fweight = computeWeight(cur3_g,other);
    average.g += fweight*ll1.a;
    weight.g += fweight;

    //point 13
    other = mat3(lt11.a,tt1.r,tt1.g,ll1.a,cur.r,cur.g,lb11.a,bb1.r,bb1.g);
    fweight = computeWeight(cur3_r,other);
    average.r += fweight * cur.r;
    weight.r += fweight;

    fweight = computeWeight(cur3_g,other);
    average.g += fweight*cur.r;
    weight.g += fweight;

    fweight = computeWeight(cur3_b,other);
    average.b += fweight*cur.r;
    weight.b += fweight;

    //point 14
    other = mat3(tt1.rgb,cur.rgb,bb1.rgb);
    fweight = computeWeight(cur3_r,other);
    average.r += fweight*cur.g;
    weight.r += fweight;

    fweight = computeWeight(cur3_g,other);
    average.g += fweight*cur.g;
    weight.g += fweight;

    fweight = computeWeight(cur3_b,other);
    average.b += fweight*cur.g;
    weight.b += fweight;

    fweight = computeWeight(cur3_a,other);
    average.a += fweight*cur.g;
    weight.a += fweight;

    //point 15
    other = mat3(tt1.gba,cur.gba,bb1.gba);
    fweight = computeWeight(cur3_r,other);
    average.r += fweight*cur.b;
    weight.r += fweight;

    fweight = computeWeight(cur3_g,other);
    average.g += fweight*cur.b;
    weight.g += fweight;

    fweight = computeWeight(cur3_b,other);
    average.b += fweight*cur.b;
    weight.b += fweight;

    fweight = computeWeight(cur3_a,other);
    average.a += fweight*cur.b;
    weight.a += fweight;

    //point
    other = mat3(tt1.b,tt1.a,rt11.r,cur.b,cur.a,rr1.r,bb1.b,bb1.a,rb11.r);
    fweight = computeWeight(cur3_g,other);
    average.g += fweight*cur.a;
    weight.g += fweight;

    fweight = computeWeight(cur3_b,other);
    average.b += fweight*cur.a;
    weight.b += fweight;

    fweight = computeWeight(cur3_a,other);
    average.a += fweight*cur.a;
    weight.a += fweight;

    //point
    other = mat3(tt1.a,rt11.r,rt11.g,cur.a,rr1.r,rr1.g,bb1.a,rb11.r,rb11.g);
    fweight = computeWeight(cur3_b,other);
    average.b += fweight*rr1.r;
    weight.b += fweight;

    fweight = computeWeight(cur3_a,other);
    average.a += fweight*rr1.r;
    weight.a += fweight;

    //point
    other = mat3(rt11.rgb,rr1.rgb,rb11.rgb);
    fweight = computeWeight(cur3_a,other);
    average.a += fweight*rr1.g;
    weight.a += fweight;

    //fouth line
    ///////////////////////////////
    //point 16
    other = mat3(ll1.gba,lb11.gba,lb12.gba);
    fweight = computeWeight(cur3_r,other);
    average.r += fweight*lb11.b;
    weight.r += fweight;

    //point 17
    other = mat3(ll1.b,ll1.a,cur.r,lb11.b,lb11.a,bb1.r,lb12.b,lb12.a,bb2.r);
    fweight = computeWeight(cur3_r,other);
    average.r += fweight*lb11.a;
    weight.r += fweight;

    fweight = computeWeight(cur3_g,other);
    average.g += fweight*lb11.a;
    weight.g += fweight;

    //point 18
    other = mat3(ll1.a,cur.r,cur.g,lb11.a,bb1.r,bb1.g,lb12.a,bb2.r,bb2.g);
    fweight = computeWeight(cur3_r,other);
    average.r += fweight * bb1.r;
    weight.r += fweight;

    fweight = computeWeight(cur3_g,other);
    average.g += fweight*bb1.r;
    weight.g += fweight;

    fweight = computeWeight(cur3_b,other);
    average.b += fweight*bb1.r;
    weight.b += fweight;

    //point 19
    other = mat3(cur.rgb,bb1.rgb,bb2.rgb);
    fweight = computeWeight(cur3_r,other);
    average.r += fweight*bb1.g;
    weight.r += fweight;

    fweight = computeWeight(cur3_g,other);
    average.g += fweight*bb1.g;
    weight.g += fweight;

    fweight = computeWeight(cur3_b,other);
    average.b += fweight*bb1.g;
    weight.b += fweight;

    fweight = computeWeight(cur3_a,other);
    average.a += fweight*bb1.g;
    weight.a += fweight;

    //point 20
    other = mat3(cur.gba,bb1.gba,bb2.gba);
    fweight = computeWeight(cur3_r,other);
    average.r += fweight*bb1.b;
    weight.r += fweight;

    fweight = computeWeight(cur3_g,other);
    average.g += fweight*bb1.b;
    weight.g += fweight;

    fweight = computeWeight(cur3_b,other);
    average.b += fweight*bb1.b;
    weight.b += fweight;

    fweight = computeWeight(cur3_a,other);
    average.a += fweight*bb1.b;
    weight.a += fweight;

    //point
    other = mat3(cur.b,cur.a,rr1.r,bb1.b,bb1.a,rb11.r,bb2.b,bb2.a,rb12.r);
    fweight = computeWeight(cur3_g,other);
    average.g += fweight*bb1.a;
    weight.g += fweight;

    fweight = computeWeight(cur3_b,other);
    average.b += fweight*bb1.a;
    weight.b += fweight;

    fweight = computeWeight(cur3_a,other);
    average.a += fweight*bb1.a;
    weight.a += fweight;

    //point
    other = mat3(cur.a,rr1.r,rr1.g,bb1.a,rb11.r,rb11.g,bb2.a,rb12.r,rb12.g);
    fweight = computeWeight(cur3_b,other);
    average.b += fweight*rb11.r;
    weight.b += fweight;

    fweight = computeWeight(cur3_a,other);
    average.a += fweight*rb11.r;
    weight.a += fweight;

    //point
    other = mat3(rr1.rgb,rb11.rgb,rb12.rgb);
    fweight = computeWeight(cur3_a,other);
    average.a += fweight*rb11.g;
    weight.a += fweight;

    //fifth line
    ///////////////////////////////
    //point 21
    other = mat3(lb11.gba,lb12.gba,lb13.gba);
    fweight = computeWeight(cur3_r,other);
    average.r += fweight*lb12.b;
    weight.r += fweight;

    //point 22
    other = mat3(lb11.b,lb11.a,bb1.r,lb12.b,lb12.a,bb2.r,lb13.b,lb13.a,bb3.r);
    fweight = computeWeight(cur3_r,other);
    average.r += fweight*lb12.a;
    weight.r += fweight;

    fweight = computeWeight(cur3_g,other);
    average.g += fweight*lb12.a;
    weight.g += fweight;

    //point 23
    other = mat3(lb11.a,bb1.r,bb1.g,lb12.a,bb2.r,bb2.g,lb13.a,bb3.r,bb3.g);
    fweight = computeWeight(cur3_r,other);
    average.r += fweight * bb2.r;
    weight.r += fweight;

    fweight = computeWeight(cur3_g,other);
    average.g += fweight*bb2.r;
    weight.g += fweight;

    fweight = computeWeight(cur3_b,other);
    average.b += fweight*bb2.r;
    weight.b += fweight;

    //point 24
    other = mat3(bb1.rgb,bb2.rgb,bb3.rgb);
    fweight = computeWeight(cur3_r,other);
    average.r += fweight*bb2.g;
    weight.r += fweight;

    fweight = computeWeight(cur3_g,other);
    average.g += fweight*bb2.g;
    weight.g += fweight;

    fweight = computeWeight(cur3_b,other);
    average.b += fweight*bb2.g;
    weight.b += fweight;

    fweight = computeWeight(cur3_a,other);
    average.a += fweight*bb2.g;
    weight.a += fweight;

    //point 25
    other = mat3(bb1.gba,bb2.gba,bb3.gba);
    fweight = computeWeight(cur3_r,other);
    average.r += fweight*bb2.b;
    weight.r += fweight;

    fweight = computeWeight(cur3_g,other);
    average.g += fweight*bb2.b;
    weight.g += fweight;

    fweight = computeWeight(cur3_b,other);
    average.b += fweight*bb2.b;
    weight.b += fweight;

    fweight = computeWeight(cur3_a,other);
    average.a += fweight*bb2.b;
    weight.a += fweight;

    //point
    other = mat3(bb1.b,bb1.a,rb11.r,bb2.b,bb2.a,rb12.r,bb3.b,bb3.a,rb13.r);
    fweight = computeWeight(cur3_g,other);
    average.g += fweight*bb2.a;
    weight.g += fweight;

    fweight = computeWeight(cur3_b,other);
    average.b += fweight*bb2.a;
    weight.b += fweight;

    fweight = computeWeight(cur3_a,other);
    average.a += fweight*bb2.a;
    weight.a += fweight;

    //rt
    other = mat3(bb1.a,rb11.r,rb11.g,bb2.a,rb12.r,rb12.g,bb3.a,rb13.r,rb13.g);
    fweight = computeWeight(cur3_b,other);
    average.b += fweight*rb12.r;
    weight.b += fweight;

    fweight = computeWeight(cur3_a,other);
    average.a += fweight*rb12.r;
    weight.a += fweight;

    //point
    other = mat3(rb11.rgb,rb12.rgb,rb13.rgb);
    fweight = computeWeight(cur3_a,other);
    average.a += fweight*rb12.g;
    weight.a += fweight;

    vec4 tmp1 = step(weight,vec4(0.0));
    vec4 tmp2 = average/weight;
    tmp2 = clamp(tmp2,0.0,1.0);
    gl_FragColor = (vec4(1.0)-tmp1) * tmp2 + tmp1*cur;
    }

    #37926

    Joe Davis
    Member

    Hi,

    The shader is extremely expensive. When targeting an SGX540 instruction set, our PVRShaderEditor tool reports the best case is 2712 instructions, the worst is 5284. As a comparison, a current high-end mobile game will usually have fragment shaders that are between 5-40 instructions.

    Although there isn’t a limit on the size of GLSL ES shader source, there is a limit to the number of instructions that can be processed by a given GPU. This limit will vary depending on the target GPU. If you hit this limit, the compiler should produce an error.

    I suspect the problem you’re seeing on the target is purely compilation time. On my desktop machine, the shader takes 2-3 seconds to compile. On a mobile device, this compilation time will be much higher. This high compile time may cause iOS to kill the compiler as it may appear unresponsive for a very long time. You would have to discuss the issue with Apple to understand why compilation doesn’t succeed on the iPhone 4.

    My recommendation would be to revisit your algorithm and see if there’s anyway it can be simplified. If it’s not possible to find a compromise between speed and quality of your output, you could split the render into multiple passes (i.e. output texture of pass 1 is read in by pass 2, and so on).

    Regards,
    Joe

Viewing 4 posts - 1 through 4 (of 4 total)
You must be logged in to reply to this topic.