diff --git a/SPIRV/GLSL.ext.AMD.h b/SPIRV/GLSL.ext.AMD.h index d2098cc1a..59972bc58 100644 --- a/SPIRV/GLSL.ext.AMD.h +++ b/SPIRV/GLSL.ext.AMD.h @@ -28,11 +28,12 @@ #define GLSLextAMD_H enum BuiltIn; +enum Capability; enum Decoration; enum Op; static const int GLSLextAMDVersion = 100; -static const int GLSLextAMDRevision = 2; +static const int GLSLextAMDRevision = 3; // SPV_AMD_shader_ballot static const char* const E_SPV_AMD_shader_ballot = "SPV_AMD_shader_ballot"; @@ -113,4 +114,9 @@ enum GcnShaderAMD { // SPV_AMD_gpu_shader_half_float static const char* const E_SPV_AMD_gpu_shader_half_float = "SPV_AMD_gpu_shader_half_float"; +// SPV_AMD_texture_gather_bias_lod +static const char* const E_SPV_AMD_texture_gather_bias_lod = "SPV_AMD_texture_gather_bias_lod"; + +static const Capability OpCapabilityImageGatherBiasLodAMD = static_cast(5009); + #endif // #ifndef GLSLextAMD_H diff --git a/SPIRV/GlslangToSpv.cpp b/SPIRV/GlslangToSpv.cpp index e175bfc65..f6c3e5d52 100755 --- a/SPIRV/GlslangToSpv.cpp +++ b/SPIRV/GlslangToSpv.cpp @@ -3044,7 +3044,7 @@ void TGlslangToSpvTraverser::translateArguments(const glslang::TIntermAggregate& if (i == 6) lvalue = true; break; - case glslang::EOpSparseTextureGather: + case glslang::EOpSparseTextureGather: if ((sampler.shadow && i == 3) || (! sampler.shadow && i == 2)) lvalue = true; break; @@ -3053,6 +3053,17 @@ void TGlslangToSpvTraverser::translateArguments(const glslang::TIntermAggregate& if ((sampler.shadow && i == 4) || (! sampler.shadow && i == 3)) lvalue = true; break; +#ifdef AMD_EXTENSIONS + case glslang::EOpSparseTextureGatherLod: + if (i == 3) + lvalue = true; + break; + case glslang::EOpSparseTextureGatherLodOffset: + case glslang::EOpSparseTextureGatherLodOffsets: + if (i == 4) + lvalue = true; + break; +#endif default: break; } @@ -3219,10 +3230,22 @@ spv::Id TGlslangToSpvTraverser::createImageTextureFunctionCall(glslang::TIntermO // check for bias argument bool bias = false; +#ifdef AMD_EXTENSIONS + if (! cracked.lod && ! cracked.grad && ! cracked.fetch && ! cubeCompare) { +#else if (! cracked.lod && ! cracked.gather && ! cracked.grad && ! cracked.fetch && ! cubeCompare) { +#endif int nonBiasArgCount = 2; +#ifdef AMD_EXTENSIONS + if (cracked.gather) + ++nonBiasArgCount; // comp argument should be present when bias argument is present +#endif if (cracked.offset) ++nonBiasArgCount; +#ifdef AMD_EXTENSIONS + else if (cracked.offsets) + ++nonBiasArgCount; +#endif if (cracked.grad) nonBiasArgCount += 2; if (cracked.lodClamp) @@ -3241,6 +3264,17 @@ spv::Id TGlslangToSpvTraverser::createImageTextureFunctionCall(glslang::TIntermO params.sampler = builder.createUnaryOp(spv::OpImage, builder.getImageType(params.sampler), params.sampler); } +#ifdef AMD_EXTENSIONS + if (cracked.gather) { + const auto& sourceExtensions = glslangIntermediate->getRequestedExtensions(); + if (bias || cracked.lod || + sourceExtensions.find(glslang::E_GL_AMD_texture_gather_bias_lod) != sourceExtensions.end()) { + builder.addExtension(spv::E_SPV_AMD_texture_gather_bias_lod); + builder.addCapability(spv::OpCapabilityImageGatherBiasLodAMD); + } + } +#endif + // set the rest of the arguments params.coords = arguments[1]; @@ -3308,21 +3342,20 @@ spv::Id TGlslangToSpvTraverser::createImageTextureFunctionCall(glslang::TIntermO ++extraArgs; } - // bias - if (bias) { - params.bias = arguments[2 + extraArgs]; - ++extraArgs; - } - // gather component if (cracked.gather && ! sampler.shadow) { // default component is 0, if missing, otherwise an argument if (2 + extraArgs < (int)arguments.size()) { params.component = arguments[2 + extraArgs]; ++extraArgs; - } else { + } else params.component = builder.makeIntConstant(0); - } + } + + // bias + if (bias) { + params.bias = arguments[2 + extraArgs]; + ++extraArgs; } // projective component (might not to move) diff --git a/SPIRV/doc.cpp b/SPIRV/doc.cpp index 49b734b5e..bae43bdcc 100755 --- a/SPIRV/doc.cpp +++ b/SPIRV/doc.cpp @@ -839,6 +839,10 @@ const char* CapabilityString(int info) case 4437: return "DeviceGroup"; case 4439: return "MultiView"; +#ifdef AMD_EXTENSIONS + case 5009: return "ImageGatherBiasLodAMD"; +#endif + #ifdef NV_EXTENSIONS case 5251: return "GeometryShaderPassthroughNV"; case 5254: return "ShaderViewportIndexLayerNV"; diff --git a/Test/baseResults/spv.textureGatherBiasLod.frag.out b/Test/baseResults/spv.textureGatherBiasLod.frag.out new file mode 100644 index 000000000..4e52cac59 --- /dev/null +++ b/Test/baseResults/spv.textureGatherBiasLod.frag.out @@ -0,0 +1,386 @@ +spv.textureGatherBiasLod.frag +Warning, version 450 is not yet complete; most version-specific features are present, but some are missing. + +// Module Version 10000 +// Generated by (magic number): 80001 +// Id's are bound by 298 + + Capability Shader + Capability SparseResidency + Capability SampledCubeArray + Capability ImageGatherBiasLodAMD + Extension "SPV_AMD_texture_gather_bias_lod" + 1: ExtInstImport "GLSL.std.450" + MemoryModel Logical GLSL450 + EntryPoint Fragment 4 "main" 20 25 37 61 176 296 + ExecutionMode 4 OriginUpperLeft + Source GLSL 450 + SourceExtension "GL_AMD_texture_gather_bias_lod" + SourceExtension "GL_ARB_sparse_texture2" + Name 4 "main" + Name 9 "texel" + Name 12 "result" + Name 16 "s2D" + Name 20 "c2" + Name 25 "bias" + Name 33 "s2DArray" + Name 37 "c3" + Name 47 "sCube" + Name 58 "sCubeArray" + Name 61 "c4" + Name 104 "ResType" + Name 176 "lod" + Name 296 "fragColor" + Decorate 16(s2D) DescriptorSet 0 + Decorate 33(s2DArray) DescriptorSet 0 + Decorate 47(sCube) DescriptorSet 0 + Decorate 58(sCubeArray) DescriptorSet 0 + 2: TypeVoid + 3: TypeFunction 2 + 6: TypeFloat 32 + 7: TypeVector 6(float) 4 + 8: TypePointer Function 7(fvec4) + 10: 6(float) Constant 0 + 11: 7(fvec4) ConstantComposite 10 10 10 10 + 13: TypeImage 6(float) 2D sampled format:Unknown + 14: TypeSampledImage 13 + 15: TypePointer UniformConstant 14 + 16(s2D): 15(ptr) Variable UniformConstant + 18: TypeVector 6(float) 2 + 19: TypePointer Input 18(fvec2) + 20(c2): 19(ptr) Variable Input + 22: TypeInt 32 1 + 23: 22(int) Constant 0 + 24: TypePointer Input 6(float) + 25(bias): 24(ptr) Variable Input + 30: TypeImage 6(float) 2D array sampled format:Unknown + 31: TypeSampledImage 30 + 32: TypePointer UniformConstant 31 + 33(s2DArray): 32(ptr) Variable UniformConstant + 35: TypeVector 6(float) 3 + 36: TypePointer Input 35(fvec3) + 37(c3): 36(ptr) Variable Input + 39: 22(int) Constant 1 + 44: TypeImage 6(float) Cube sampled format:Unknown + 45: TypeSampledImage 44 + 46: TypePointer UniformConstant 45 + 47(sCube): 46(ptr) Variable UniformConstant + 50: 22(int) Constant 2 + 55: TypeImage 6(float) Cube array sampled format:Unknown + 56: TypeSampledImage 55 + 57: TypePointer UniformConstant 56 + 58(sCubeArray): 57(ptr) Variable UniformConstant + 60: TypePointer Input 7(fvec4) + 61(c4): 60(ptr) Variable Input + 63: 22(int) Constant 3 + 70: TypeVector 22(int) 2 + 71: 70(ivec2) ConstantComposite 23 23 + 78: 70(ivec2) ConstantComposite 23 39 + 85: TypeInt 32 0 + 86: 85(int) Constant 4 + 87: TypeArray 70(ivec2) 86 + 88: 70(ivec2) ConstantComposite 39 23 + 89: 70(ivec2) ConstantComposite 39 39 + 90: 87 ConstantComposite 71 78 88 89 + 104(ResType): TypeStruct 22(int) 7(fvec4) + 176(lod): 24(ptr) Variable Input + 295: TypePointer Output 7(fvec4) + 296(fragColor): 295(ptr) Variable Output + 4(main): 2 Function None 3 + 5: Label + 9(texel): 8(ptr) Variable Function + 12(result): 8(ptr) Variable Function + Store 9(texel) 11 + Store 12(result) 11 + 17: 14 Load 16(s2D) + 21: 18(fvec2) Load 20(c2) + 26: 6(float) Load 25(bias) + 27: 7(fvec4) ImageGather 17 21 23 Bias 26 + 28: 7(fvec4) Load 9(texel) + 29: 7(fvec4) FAdd 28 27 + Store 9(texel) 29 + 34: 31 Load 33(s2DArray) + 38: 35(fvec3) Load 37(c3) + 40: 6(float) Load 25(bias) + 41: 7(fvec4) ImageGather 34 38 39 Bias 40 + 42: 7(fvec4) Load 9(texel) + 43: 7(fvec4) FAdd 42 41 + Store 9(texel) 43 + 48: 45 Load 47(sCube) + 49: 35(fvec3) Load 37(c3) + 51: 6(float) Load 25(bias) + 52: 7(fvec4) ImageGather 48 49 50 Bias 51 + 53: 7(fvec4) Load 9(texel) + 54: 7(fvec4) FAdd 53 52 + Store 9(texel) 54 + 59: 56 Load 58(sCubeArray) + 62: 7(fvec4) Load 61(c4) + 64: 6(float) Load 25(bias) + 65: 7(fvec4) ImageGather 59 62 63 Bias 64 + 66: 7(fvec4) Load 9(texel) + 67: 7(fvec4) FAdd 66 65 + Store 9(texel) 67 + 68: 14 Load 16(s2D) + 69: 18(fvec2) Load 20(c2) + 72: 6(float) Load 25(bias) + 73: 7(fvec4) ImageGather 68 69 23 Bias ConstOffset 72 71 + 74: 7(fvec4) Load 9(texel) + 75: 7(fvec4) FAdd 74 73 + Store 9(texel) 75 + 76: 31 Load 33(s2DArray) + 77: 35(fvec3) Load 37(c3) + 79: 6(float) Load 25(bias) + 80: 7(fvec4) ImageGather 76 77 39 Bias ConstOffset 79 78 + 81: 7(fvec4) Load 9(texel) + 82: 7(fvec4) FAdd 81 80 + Store 9(texel) 82 + 83: 14 Load 16(s2D) + 84: 18(fvec2) Load 20(c2) + 91: 6(float) Load 25(bias) + 92: 7(fvec4) ImageGather 83 84 23 Bias ConstOffsets 91 90 + 93: 7(fvec4) Load 9(texel) + 94: 7(fvec4) FAdd 93 92 + Store 9(texel) 94 + 95: 31 Load 33(s2DArray) + 96: 35(fvec3) Load 37(c3) + 97: 6(float) Load 25(bias) + 98: 7(fvec4) ImageGather 95 96 39 Bias ConstOffsets 97 90 + 99: 7(fvec4) Load 9(texel) + 100: 7(fvec4) FAdd 99 98 + Store 9(texel) 100 + 101: 14 Load 16(s2D) + 102: 18(fvec2) Load 20(c2) + 103: 6(float) Load 25(bias) + 105:104(ResType) ImageSparseGather 101 102 23 Bias 103 + 106: 7(fvec4) CompositeExtract 105 1 + Store 12(result) 106 + 107: 22(int) CompositeExtract 105 0 + 108: 7(fvec4) Load 12(result) + 109: 7(fvec4) Load 9(texel) + 110: 7(fvec4) FAdd 109 108 + Store 9(texel) 110 + 111: 31 Load 33(s2DArray) + 112: 35(fvec3) Load 37(c3) + 113: 6(float) Load 25(bias) + 114:104(ResType) ImageSparseGather 111 112 39 Bias 113 + 115: 7(fvec4) CompositeExtract 114 1 + Store 12(result) 115 + 116: 22(int) CompositeExtract 114 0 + 117: 7(fvec4) Load 12(result) + 118: 7(fvec4) Load 9(texel) + 119: 7(fvec4) FAdd 118 117 + Store 9(texel) 119 + 120: 45 Load 47(sCube) + 121: 35(fvec3) Load 37(c3) + 122: 6(float) Load 25(bias) + 123:104(ResType) ImageSparseGather 120 121 50 Bias 122 + 124: 7(fvec4) CompositeExtract 123 1 + Store 12(result) 124 + 125: 22(int) CompositeExtract 123 0 + 126: 7(fvec4) Load 12(result) + 127: 7(fvec4) Load 9(texel) + 128: 7(fvec4) FAdd 127 126 + Store 9(texel) 128 + 129: 56 Load 58(sCubeArray) + 130: 7(fvec4) Load 61(c4) + 131: 6(float) Load 25(bias) + 132:104(ResType) ImageSparseGather 129 130 50 Bias 131 + 133: 7(fvec4) CompositeExtract 132 1 + Store 12(result) 133 + 134: 22(int) CompositeExtract 132 0 + 135: 7(fvec4) Load 12(result) + 136: 7(fvec4) Load 9(texel) + 137: 7(fvec4) FAdd 136 135 + Store 9(texel) 137 + 138: 14 Load 16(s2D) + 139: 18(fvec2) Load 20(c2) + 140: 6(float) Load 25(bias) + 141:104(ResType) ImageSparseGather 138 139 23 Bias ConstOffset 140 71 + 142: 7(fvec4) CompositeExtract 141 1 + Store 12(result) 142 + 143: 22(int) CompositeExtract 141 0 + 144: 7(fvec4) Load 12(result) + 145: 7(fvec4) Load 9(texel) + 146: 7(fvec4) FAdd 145 144 + Store 9(texel) 146 + 147: 31 Load 33(s2DArray) + 148: 35(fvec3) Load 37(c3) + 149: 6(float) Load 25(bias) + 150:104(ResType) ImageSparseGather 147 148 39 Bias ConstOffset 149 78 + 151: 7(fvec4) CompositeExtract 150 1 + Store 12(result) 151 + 152: 22(int) CompositeExtract 150 0 + 153: 7(fvec4) Load 12(result) + 154: 7(fvec4) Load 9(texel) + 155: 7(fvec4) FAdd 154 153 + Store 9(texel) 155 + 156: 14 Load 16(s2D) + 157: 18(fvec2) Load 20(c2) + 158: 6(float) Load 25(bias) + 159:104(ResType) ImageSparseGather 156 157 23 Bias ConstOffsets 158 90 + 160: 7(fvec4) CompositeExtract 159 1 + Store 12(result) 160 + 161: 22(int) CompositeExtract 159 0 + 162: 7(fvec4) Load 12(result) + 163: 7(fvec4) Load 9(texel) + 164: 7(fvec4) FAdd 163 162 + Store 9(texel) 164 + 165: 31 Load 33(s2DArray) + 166: 35(fvec3) Load 37(c3) + 167: 6(float) Load 25(bias) + 168:104(ResType) ImageSparseGather 165 166 39 Bias ConstOffsets 167 90 + 169: 7(fvec4) CompositeExtract 168 1 + Store 12(result) 169 + 170: 22(int) CompositeExtract 168 0 + 171: 7(fvec4) Load 12(result) + 172: 7(fvec4) Load 9(texel) + 173: 7(fvec4) FAdd 172 171 + Store 9(texel) 173 + 174: 14 Load 16(s2D) + 175: 18(fvec2) Load 20(c2) + 177: 6(float) Load 176(lod) + 178: 7(fvec4) ImageGather 174 175 23 Lod 177 + 179: 7(fvec4) Load 9(texel) + 180: 7(fvec4) FAdd 179 178 + Store 9(texel) 180 + 181: 31 Load 33(s2DArray) + 182: 35(fvec3) Load 37(c3) + 183: 6(float) Load 176(lod) + 184: 7(fvec4) ImageGather 181 182 39 Lod 183 + 185: 7(fvec4) Load 9(texel) + 186: 7(fvec4) FAdd 185 184 + Store 9(texel) 186 + 187: 45 Load 47(sCube) + 188: 35(fvec3) Load 37(c3) + 189: 6(float) Load 176(lod) + 190: 7(fvec4) ImageGather 187 188 50 Lod 189 + 191: 7(fvec4) Load 9(texel) + 192: 7(fvec4) FAdd 191 190 + Store 9(texel) 192 + 193: 56 Load 58(sCubeArray) + 194: 7(fvec4) Load 61(c4) + 195: 6(float) Load 176(lod) + 196: 7(fvec4) ImageGather 193 194 63 Lod 195 + 197: 7(fvec4) Load 9(texel) + 198: 7(fvec4) FAdd 197 196 + Store 9(texel) 198 + 199: 14 Load 16(s2D) + 200: 18(fvec2) Load 20(c2) + 201: 6(float) Load 176(lod) + 202: 7(fvec4) ImageGather 199 200 23 Lod ConstOffset 201 71 + 203: 7(fvec4) Load 9(texel) + 204: 7(fvec4) FAdd 203 202 + Store 9(texel) 204 + 205: 31 Load 33(s2DArray) + 206: 35(fvec3) Load 37(c3) + 207: 6(float) Load 176(lod) + 208: 7(fvec4) ImageGather 205 206 39 Lod ConstOffset 207 78 + 209: 7(fvec4) Load 9(texel) + 210: 7(fvec4) FAdd 209 208 + Store 9(texel) 210 + 211: 14 Load 16(s2D) + 212: 18(fvec2) Load 20(c2) + 213: 6(float) Load 176(lod) + 214: 7(fvec4) ImageGather 211 212 23 Lod ConstOffsets 213 90 + 215: 7(fvec4) Load 9(texel) + 216: 7(fvec4) FAdd 215 214 + Store 9(texel) 216 + 217: 31 Load 33(s2DArray) + 218: 35(fvec3) Load 37(c3) + 219: 6(float) Load 176(lod) + 220: 7(fvec4) ImageGather 217 218 39 Lod ConstOffsets 219 90 + 221: 7(fvec4) Load 9(texel) + 222: 7(fvec4) FAdd 221 220 + Store 9(texel) 222 + 223: 14 Load 16(s2D) + 224: 18(fvec2) Load 20(c2) + 225: 6(float) Load 176(lod) + 226:104(ResType) ImageSparseGather 223 224 23 Lod 225 + 227: 7(fvec4) CompositeExtract 226 1 + Store 12(result) 227 + 228: 22(int) CompositeExtract 226 0 + 229: 7(fvec4) Load 12(result) + 230: 7(fvec4) Load 9(texel) + 231: 7(fvec4) FAdd 230 229 + Store 9(texel) 231 + 232: 31 Load 33(s2DArray) + 233: 35(fvec3) Load 37(c3) + 234: 6(float) Load 176(lod) + 235:104(ResType) ImageSparseGather 232 233 39 Lod 234 + 236: 7(fvec4) CompositeExtract 235 1 + Store 12(result) 236 + 237: 22(int) CompositeExtract 235 0 + 238: 7(fvec4) Load 12(result) + 239: 7(fvec4) Load 9(texel) + 240: 7(fvec4) FAdd 239 238 + Store 9(texel) 240 + 241: 45 Load 47(sCube) + 242: 35(fvec3) Load 37(c3) + 243: 6(float) Load 176(lod) + 244:104(ResType) ImageSparseGather 241 242 50 Lod 243 + 245: 7(fvec4) CompositeExtract 244 1 + Store 12(result) 245 + 246: 22(int) CompositeExtract 244 0 + 247: 7(fvec4) Load 12(result) + 248: 7(fvec4) Load 9(texel) + 249: 7(fvec4) FAdd 248 247 + Store 9(texel) 249 + 250: 56 Load 58(sCubeArray) + 251: 7(fvec4) Load 61(c4) + 252: 6(float) Load 176(lod) + 253:104(ResType) ImageSparseGather 250 251 50 Lod 252 + 254: 7(fvec4) CompositeExtract 253 1 + Store 12(result) 254 + 255: 22(int) CompositeExtract 253 0 + 256: 7(fvec4) Load 12(result) + 257: 7(fvec4) Load 9(texel) + 258: 7(fvec4) FAdd 257 256 + Store 9(texel) 258 + 259: 14 Load 16(s2D) + 260: 18(fvec2) Load 20(c2) + 261: 6(float) Load 176(lod) + 262:104(ResType) ImageSparseGather 259 260 23 Lod ConstOffset 261 71 + 263: 7(fvec4) CompositeExtract 262 1 + Store 12(result) 263 + 264: 22(int) CompositeExtract 262 0 + 265: 7(fvec4) Load 12(result) + 266: 7(fvec4) Load 9(texel) + 267: 7(fvec4) FAdd 266 265 + Store 9(texel) 267 + 268: 31 Load 33(s2DArray) + 269: 35(fvec3) Load 37(c3) + 270: 6(float) Load 176(lod) + 271:104(ResType) ImageSparseGather 268 269 39 Lod ConstOffset 270 78 + 272: 7(fvec4) CompositeExtract 271 1 + Store 12(result) 272 + 273: 22(int) CompositeExtract 271 0 + 274: 7(fvec4) Load 12(result) + 275: 7(fvec4) Load 9(texel) + 276: 7(fvec4) FAdd 275 274 + Store 9(texel) 276 + 277: 14 Load 16(s2D) + 278: 18(fvec2) Load 20(c2) + 279: 6(float) Load 176(lod) + 280:104(ResType) ImageSparseGather 277 278 23 Lod ConstOffsets 279 90 + 281: 7(fvec4) CompositeExtract 280 1 + Store 12(result) 281 + 282: 22(int) CompositeExtract 280 0 + 283: 7(fvec4) Load 12(result) + 284: 7(fvec4) Load 9(texel) + 285: 7(fvec4) FAdd 284 283 + Store 9(texel) 285 + 286: 31 Load 33(s2DArray) + 287: 35(fvec3) Load 37(c3) + 288: 6(float) Load 176(lod) + 289:104(ResType) ImageSparseGather 286 287 39 Lod ConstOffsets 288 90 + 290: 7(fvec4) CompositeExtract 289 1 + Store 12(result) 290 + 291: 22(int) CompositeExtract 289 0 + 292: 7(fvec4) Load 12(result) + 293: 7(fvec4) Load 9(texel) + 294: 7(fvec4) FAdd 293 292 + Store 9(texel) 294 + 297: 7(fvec4) Load 9(texel) + Store 296(fragColor) 297 + Return + FunctionEnd diff --git a/Test/spv.textureGatherBiasLod.frag b/Test/spv.textureGatherBiasLod.frag new file mode 100644 index 000000000..35bd035aa --- /dev/null +++ b/Test/spv.textureGatherBiasLod.frag @@ -0,0 +1,88 @@ +#version 450 core + +#extension GL_ARB_sparse_texture2: enable +#extension GL_AMD_texture_gather_bias_lod: enable + +uniform sampler2D s2D; +uniform sampler2DArray s2DArray; +uniform samplerCube sCube; +uniform samplerCubeArray sCubeArray; + +in vec2 c2; +in vec3 c3; +in vec4 c4; + +in float lod; +in float bias; + +out vec4 fragColor; + +void main() +{ + vec4 texel = vec4(0.0); + vec4 result = vec4(0.0); + + const ivec2 offsets[4] = { ivec2(0, 0), ivec2(0, 1), ivec2(1, 0), ivec2(1, 1) }; + + texel += textureGather(s2D, c2, 0, bias); + texel += textureGather(s2DArray, c3, 1, bias); + texel += textureGather(sCube, c3, 2, bias); + texel += textureGather(sCubeArray, c4, 3, bias); + + texel += textureGatherOffset(s2D, c2, offsets[0], 0, bias); + texel += textureGatherOffset(s2DArray, c3, offsets[1], 1, bias); + + texel += textureGatherOffsets(s2D, c2, offsets, 0, bias); + texel += textureGatherOffsets(s2DArray, c3, offsets, 1, bias); + + sparseTextureGatherARB(s2D, c2, result, 0, bias); + texel += result; + sparseTextureGatherARB(s2DArray, c3, result, 1, bias); + texel += result; + sparseTextureGatherARB(sCube, c3, result, 2, bias); + texel += result; + sparseTextureGatherARB(sCubeArray, c4, result, 2, bias); + texel += result; + + sparseTextureGatherOffsetARB(s2D, c2, offsets[0], result, 0, bias); + texel += result; + sparseTextureGatherOffsetARB(s2DArray, c3, offsets[1], result, 1, bias); + texel += result; + + sparseTextureGatherOffsetsARB(s2D, c2, offsets, result, 0, bias); + texel += result; + sparseTextureGatherOffsetsARB(s2DArray, c3, offsets, result, 1, bias); + texel += result; + + texel += textureGatherLodAMD(s2D, c2, lod); + texel += textureGatherLodAMD(s2DArray, c3, lod, 1); + texel += textureGatherLodAMD(sCube, c3, lod, 2); + texel += textureGatherLodAMD(sCubeArray, c4, lod, 3); + + texel += textureGatherLodOffsetAMD(s2D, c2, lod, offsets[0]); + texel += textureGatherLodOffsetAMD(s2DArray, c3, lod, offsets[1], 1); + + texel += textureGatherLodOffsetsAMD(s2D, c2, lod, offsets); + texel += textureGatherLodOffsetsAMD(s2DArray, c3, lod, offsets, 1); + + sparseTextureGatherLodAMD(s2D, c2, lod, result); + texel += result; + sparseTextureGatherLodAMD(s2DArray, c3, lod, result, 1); + texel += result; + sparseTextureGatherLodAMD(sCube, c3, lod, result, 2); + texel += result; + sparseTextureGatherLodAMD(sCubeArray, c4, lod, result, 2); + texel += result; + + sparseTextureGatherLodOffsetAMD(s2D, c2, lod, offsets[0], result); + texel += result; + sparseTextureGatherLodOffsetAMD(s2DArray, c3, lod, offsets[1], result, 1); + texel += result; + + sparseTextureGatherLodOffsetsAMD(s2D, c2, lod, offsets, result); + texel += result; + sparseTextureGatherLodOffsetsAMD(s2DArray, c3, lod, offsets, result, 1); + texel += result; + + fragColor = texel; +} diff --git a/glslang/Include/intermediate.h b/glslang/Include/intermediate.h index 5115db9d5..9618a992f 100644 --- a/glslang/Include/intermediate.h +++ b/glslang/Include/intermediate.h @@ -567,6 +567,11 @@ enum TOperator { EOpTextureOffsetClamp, EOpTextureGradClamp, EOpTextureGradOffsetClamp, +#ifdef AMD_EXTENSIONS + EOpTextureGatherLod, + EOpTextureGatherLodOffset, + EOpTextureGatherLodOffsets, +#endif EOpSparseTextureGuardBegin, @@ -586,6 +591,11 @@ enum TOperator { EOpSparseTextureOffsetClamp, EOpSparseTextureGradClamp, EOpSparseTextureGradOffsetClamp, +#ifdef AMD_EXTENSIONS + EOpSparseTextureGatherLod, + EOpSparseTextureGatherLodOffset, + EOpSparseTextureGatherLodOffsets, +#endif EOpSparseTextureGuardEnd, EOpSamplingGuardEnd, @@ -1075,6 +1085,25 @@ public: cracked.gather = true; cracked.offsets = true; break; +#ifdef AMD_EXTENSIONS + case EOpTextureGatherLod: + case EOpSparseTextureGatherLod: + cracked.gather = true; + cracked.lod = true; + break; + case EOpTextureGatherLodOffset: + case EOpSparseTextureGatherLodOffset: + cracked.gather = true; + cracked.offset = true; + cracked.lod = true; + break; + case EOpTextureGatherLodOffsets: + case EOpSparseTextureGatherLodOffsets: + cracked.gather = true; + cracked.offsets = true; + cracked.lod = true; + break; +#endif case EOpSubpassLoad: case EOpSubpassLoadMS: cracked.subpass = true; diff --git a/glslang/MachineIndependent/Initialize.cpp b/glslang/MachineIndependent/Initialize.cpp index 0771c86a5..083b3cefe 100644 --- a/glslang/MachineIndependent/Initialize.cpp +++ b/glslang/MachineIndependent/Initialize.cpp @@ -3844,6 +3844,7 @@ void TBuiltIns::add2ndGenerationSamplingImaging(int version, EProfile profile, c else { addSamplingFunctions(sampler, typeName, version, profile); addGatherFunctions(sampler, typeName, version, profile); + if (spvVersion.vulkan > 0 && sampler.dim == EsdBuffer && sampler.isCombined()) { // Vulkan wants a textureBuffer to allow texelFetch() -- // a sampled image with no sampler. @@ -4349,6 +4350,7 @@ void TBuiltIns::addGatherFunctions(TSampler sampler, const TString& typeName, in default: break; } + if (sparse) s.append("ARB"); s.append("("); @@ -4388,6 +4390,116 @@ void TBuiltIns::addGatherFunctions(TSampler sampler, const TString& typeName, in } } } + +#ifdef AMD_EXTENSIONS + if (sampler.dim == EsdRect || sampler.shadow) + return; + + if (profile == EEsProfile || version < 450) + return; + + for (int bias = 0; bias < 2; ++bias) { // loop over presence of bias argument + + for (int lod = 0; lod < 2; ++lod) { // loop over presence of lod argument + + if ((lod && bias) || (lod == 0 && bias == 0)) + continue; + + for (int offset = 0; offset < 3; ++offset) { // loop over three forms of offset in the call name: none, Offset, and Offsets + + for (int comp = 0; comp < 2; ++comp) { // loop over presence of comp argument + + if (comp == 0 && bias) + continue; + + if (offset > 0 && sampler.dim == EsdCube) + continue; + + for (int sparse = 0; sparse <= 1; ++sparse) { // loop over "bool" sparse or not + if (sparse && (profile == EEsProfile || version < 450)) + continue; + + TString s; + + // return type + if (sparse) + s.append("int "); + else { + s.append(prefixes[sampler.type]); + s.append("vec4 "); + } + + // name + if (sparse) + s.append("sparseTextureGather"); + else + s.append("textureGather"); + + if (lod) + s.append("Lod"); + + switch (offset) { + case 1: + s.append("Offset"); + break; + case 2: + s.append("Offsets"); + default: + break; + } + + if (lod) + s.append("AMD"); + else if (sparse) + s.append("ARB"); + + s.append("("); + + // sampler type argument + s.append(typeName); + + // P coordinate argument + s.append(",vec"); + int totalDims = dimMap[sampler.dim] + (sampler.arrayed ? 1 : 0); + s.append(postfixes[totalDims]); + + // lod argument + if (lod) + s.append(",float"); + + // offset argument + if (offset > 0) { + s.append(",ivec2"); + if (offset == 2) + s.append("[4]"); + } + + // texel out (for sparse texture) + if (sparse) { + s.append(",out "); + s.append(prefixes[sampler.type]); + s.append("vec4 "); + } + + // comp argument + if (comp) + s.append(",int"); + + // bias argument + if (bias) + s.append(",float"); + + s.append(");\n"); + if (bias) + stageBuiltins[EShLangFragment].append(s); + else + commonBuiltins.append(s); + } + } + } + } + } +#endif } // @@ -5366,6 +5478,16 @@ void TBuiltIns::identifyBuiltIns(int version, EProfile profile, const SpvVersion BuiltInVariable("gl_BaryCoordSmoothSampleAMD", EbvBaryCoordSmoothSample, symbolTable); BuiltInVariable("gl_BaryCoordPullModelAMD", EbvBaryCoordPullModel, symbolTable); } + + // E_GL_AMD_texture_gather_bias_lod + if (profile != EEsProfile) { + symbolTable.setFunctionExtensions("textureGatherLodAMD", 1, &E_GL_AMD_texture_gather_bias_lod); + symbolTable.setFunctionExtensions("textureGatherLodOffsetAMD", 1, &E_GL_AMD_texture_gather_bias_lod); + symbolTable.setFunctionExtensions("textureGatherLodOffsetsAMD", 1, &E_GL_AMD_texture_gather_bias_lod); + symbolTable.setFunctionExtensions("sparseTextureGatherLodAMD", 1, &E_GL_AMD_texture_gather_bias_lod); + symbolTable.setFunctionExtensions("sparseTextureGatherLodOffsetAMD", 1, &E_GL_AMD_texture_gather_bias_lod); + symbolTable.setFunctionExtensions("sparseTextureGatherLodOffsetsAMD", 1, &E_GL_AMD_texture_gather_bias_lod); + } #endif symbolTable.setVariableExtensions("gl_FragDepthEXT", 1, &E_GL_EXT_frag_depth); @@ -5752,6 +5874,13 @@ void TBuiltIns::identifyBuiltIns(int version, EProfile profile, const SpvVersion symbolTable.relateToOperator("cubeFaceIndexAMD", EOpCubeFaceIndex); symbolTable.relateToOperator("cubeFaceCoordAMD", EOpCubeFaceCoord); symbolTable.relateToOperator("timeAMD", EOpTime); + + symbolTable.relateToOperator("textureGatherLodAMD", EOpTextureGatherLod); + symbolTable.relateToOperator("textureGatherLodOffsetAMD", EOpTextureGatherLodOffset); + symbolTable.relateToOperator("textureGatherLodOffsetsAMD", EOpTextureGatherLodOffsets); + symbolTable.relateToOperator("sparseTextureGatherLodAMD", EOpSparseTextureGatherLod); + symbolTable.relateToOperator("sparseTextureGatherLodOffsetAMD", EOpSparseTextureGatherLodOffset); + symbolTable.relateToOperator("sparseTextureGatherLodOffsetsAMD", EOpSparseTextureGatherLodOffsets); #endif } } diff --git a/glslang/MachineIndependent/ParseHelper.cpp b/glslang/MachineIndependent/ParseHelper.cpp index c1f0b30fc..f4cf354c7 100644 --- a/glslang/MachineIndependent/ParseHelper.cpp +++ b/glslang/MachineIndependent/ParseHelper.cpp @@ -1442,9 +1442,56 @@ void TParseContext::builtInOpCheck(const TSourceLoc& loc, const TFunction& fnCan error(loc, "must be a compile-time constant:", feature, "component argument"); } +#ifdef AMD_EXTENSIONS + bool bias = false; + if (callNode.getOp() == EOpTextureGather) + bias = fnCandidate.getParamCount() > 3; + else if (callNode.getOp() == EOpTextureGatherOffset || + callNode.getOp() == EOpTextureGatherOffsets) + bias = fnCandidate.getParamCount() > 4; + + if (bias) { + featureString = fnCandidate.getName() + "with bias argument"; + feature = featureString.c_str(); + profileRequires(loc, ~EEsProfile, 450, nullptr, feature); + requireExtensions(loc, 1, &E_GL_AMD_texture_gather_bias_lod, feature); + } +#endif + break; } +#ifdef AMD_EXTENSIONS + case EOpSparseTextureGather: + case EOpSparseTextureGatherOffset: + case EOpSparseTextureGatherOffsets: + { + bool bias = false; + if (callNode.getOp() == EOpSparseTextureGather) + bias = fnCandidate.getParamCount() > 4; + else if (callNode.getOp() == EOpSparseTextureGatherOffset || + callNode.getOp() == EOpSparseTextureGatherOffsets) + bias = fnCandidate.getParamCount() > 5; + + if (bias) { + TString featureString = fnCandidate.getName() + "with bias argument"; + const char* feature = featureString.c_str(); + profileRequires(loc, ~EEsProfile, 450, nullptr, feature); + requireExtensions(loc, 1, &E_GL_AMD_texture_gather_bias_lod, feature); + } + + break; + } + + case EOpSparseTextureGatherLod: + case EOpSparseTextureGatherLodOffset: + case EOpSparseTextureGatherLodOffsets: + { + requireExtensions(loc, 1, &E_GL_ARB_sparse_texture2, fnCandidate.getName().c_str()); + break; + } +#endif + case EOpTextureOffset: case EOpTextureFetchOffset: case EOpTextureProjOffset: diff --git a/glslang/MachineIndependent/Versions.cpp b/glslang/MachineIndependent/Versions.cpp index 528333f46..9ae6bc168 100644 --- a/glslang/MachineIndependent/Versions.cpp +++ b/glslang/MachineIndependent/Versions.cpp @@ -194,6 +194,7 @@ void TParseVersions::initializeExtensionBehavior() extensionBehavior[E_GL_AMD_shader_explicit_vertex_parameter] = EBhDisable; extensionBehavior[E_GL_AMD_gcn_shader] = EBhDisable; extensionBehavior[E_GL_AMD_gpu_shader_half_float] = EBhDisable; + extensionBehavior[E_GL_AMD_texture_gather_bias_lod] = EBhDisable; #endif #ifdef NV_EXTENSIONS @@ -316,6 +317,7 @@ void TParseVersions::getPreamble(std::string& preamble) "#define GL_AMD_shader_explicit_vertex_parameter 1\n" "#define GL_AMD_gcn_shader 1\n" "#define GL_AMD_gpu_shader_half_float 1\n" + "#define GL_AMD_texture_gather_bias_lod 1\n" #endif #ifdef NV_EXTENSIONS diff --git a/glslang/MachineIndependent/Versions.h b/glslang/MachineIndependent/Versions.h index 7f5d33d5e..483140ca8 100644 --- a/glslang/MachineIndependent/Versions.h +++ b/glslang/MachineIndependent/Versions.h @@ -146,6 +146,7 @@ const char* const E_GL_AMD_shader_trinary_minmax = "GL_AMD_shader const char* const E_GL_AMD_shader_explicit_vertex_parameter = "GL_AMD_shader_explicit_vertex_parameter"; const char* const E_GL_AMD_gcn_shader = "GL_AMD_gcn_shader"; const char* const E_GL_AMD_gpu_shader_half_float = "GL_AMD_gpu_shader_half_float"; +const char* const E_GL_AMD_texture_gather_bias_lod = "GL_AMD_texture_gather_bias_lod"; #endif #ifdef NV_EXTENSIONS diff --git a/glslang/MachineIndependent/intermOut.cpp b/glslang/MachineIndependent/intermOut.cpp index 31f599c47..59faba8bf 100644 --- a/glslang/MachineIndependent/intermOut.cpp +++ b/glslang/MachineIndependent/intermOut.cpp @@ -390,6 +390,8 @@ bool TOutputTraverser::visitUnary(TVisit /* visit */, TIntermUnary* node) case EOpRcp: out.debug << "rcp"; break; case EOpSaturate: out.debug << "saturate"; break; + case EOpSparseTexelsResident: out.debug << "sparseTexelsResident"; break; + #ifdef AMD_EXTENSIONS case EOpMinInvocations: out.debug << "minInvocations"; break; case EOpMaxInvocations: out.debug << "maxInvocations"; break; @@ -647,6 +649,37 @@ bool TOutputTraverser::visitAggregate(TVisit /* visit */, TIntermAggregate* node case EOpTextureGather: out.debug << "textureGather"; break; case EOpTextureGatherOffset: out.debug << "textureGatherOffset"; break; case EOpTextureGatherOffsets: out.debug << "textureGatherOffsets"; break; + case EOpTextureClamp: out.debug << "textureClamp"; break; + case EOpTextureOffsetClamp: out.debug << "textureOffsetClamp"; break; + case EOpTextureGradClamp: out.debug << "textureGradClamp"; break; + case EOpTextureGradOffsetClamp: out.debug << "textureGradOffsetClamp"; break; +#ifdef AMD_EXTENSIONS + case EOpTextureGatherLod: out.debug << "textureGatherLod"; break; + case EOpTextureGatherLodOffset: out.debug << "textureGatherLodOffset"; break; + case EOpTextureGatherLodOffsets: out.debug << "textureGatherLodOffsets"; break; +#endif + + case EOpSparseTexture: out.debug << "sparseTexture"; break; + case EOpSparseTextureOffset: out.debug << "sparseTextureOffset"; break; + case EOpSparseTextureLod: out.debug << "sparseTextureLod"; break; + case EOpSparseTextureLodOffset: out.debug << "sparseTextureLodOffset"; break; + case EOpSparseTextureFetch: out.debug << "sparseTexelFetch"; break; + case EOpSparseTextureFetchOffset: out.debug << "sparseTexelFetchOffset"; break; + case EOpSparseTextureGrad: out.debug << "sparseTextureGrad"; break; + case EOpSparseTextureGradOffset: out.debug << "sparseTextureGradOffset"; break; + case EOpSparseTextureGather: out.debug << "sparseTextureGather"; break; + case EOpSparseTextureGatherOffset: out.debug << "sparseTextureGatherOffset"; break; + case EOpSparseTextureGatherOffsets: out.debug << "sparseTextureGatherOffsets"; break; + case EOpSparseImageLoad: out.debug << "sparseImageLoad"; break; + case EOpSparseTextureClamp: out.debug << "sparseTextureClamp"; break; + case EOpSparseTextureOffsetClamp: out.debug << "sparseTextureOffsetClamp"; break; + case EOpSparseTextureGradClamp: out.debug << "sparseTextureGradClamp"; break; + case EOpSparseTextureGradOffsetClamp: out.debug << "sparseTextureGradOffsetClam"; break; +#ifdef AMD_EXTENSIONS + case EOpSparseTextureGatherLod: out.debug << "sparseTextureGatherLod"; break; + case EOpSparseTextureGatherLodOffset: out.debug << "sparseTextureGatherLodOffset"; break; + case EOpSparseTextureGatherLodOffsets: out.debug << "sparseTextureGatherLodOffsets"; break; +#endif case EOpAddCarry: out.debug << "addCarry"; break; case EOpSubBorrow: out.debug << "subBorrow"; break; diff --git a/gtests/Spv.FromFile.cpp b/gtests/Spv.FromFile.cpp index 1552b9523..27b683783 100644 --- a/gtests/Spv.FromFile.cpp +++ b/gtests/Spv.FromFile.cpp @@ -396,7 +396,8 @@ INSTANTIATE_TEST_CASE_P( Glsl, CompileVulkanToSpirvTestAMD, ::testing::ValuesIn(std::vector({ "spv.float16.frag", - "spv.shaderBallotAMD.comp" + "spv.shaderBallotAMD.comp", + "spv.textureGatherBiasLod.frag" })), FileNameAsCustomTestSuffix );