diff --git a/docs/DXIL.rst b/docs/DXIL.rst index e9514212dd..795828b33a 100644 --- a/docs/DXIL.rst +++ b/docs/DXIL.rst @@ -1758,6 +1758,15 @@ The following signature shows the operation syntax:: The call respects SM5.1 OOB and alignment rules. +The ``alignment`` parameter specifies the **absolute alignment** of the +effective address (``base address + index``). For regular ``Load`` operations, +this defaults to 4 bytes for raw buffers. For templated ''Load'' operations, +this defaults to the size of the largest scalar component contained in the +aggregate template parameter type. The HLSL +``AlignedLoad(offset, alignment)`` intrinsic allows applications to specify +custom alignment values when they can guarantee higher alignment, enabling +backend compiler optimizations. + ==================== ===================================================== Valid resource type # of active coordinates ==================== ===================================================== @@ -1816,6 +1825,15 @@ The call respects SM5.1 OOB and alignment rules. The write mask indicates which components are written (x - 1, y - 2, z - 4, w - 8), similar to DXBC. For RWTypedBuffer, the mask must cover all resource components. For RWRawBuffer and RWStructuredBuffer, valid masks are: x, xy, xyz, xyzw. +The ``alignment`` parameter specifies the **absolute alignment** of the +effective address (``base address + index``). For regular ``Store`` operations, +this defaults to 4 bytes for raw buffers. For templated ''Store'' operations, +this defaults to the size of the largest scalar component contained in the +aggregate template parameter type. The HLSL +``AlignedStore(offset, alignment, value)`` intrinsic allows applications to +specify custom alignment values when they can guarantee higher alignment, +enabling backend compiler optimizations. + ==================== ===================================================== Valid resource type # of active coordinates ==================== ===================================================== diff --git a/docs/SPIR-V.rst b/docs/SPIR-V.rst index 71935e1757..38535125af 100644 --- a/docs/SPIR-V.rst +++ b/docs/SPIR-V.rst @@ -2768,6 +2768,15 @@ used to store a 32-bit unsigned integer. For ``Store2``, ``Store3``, and ``Store done 2, 3, and 4 times, respectively. Each time the word offset is incremented by 1 before performing ``OpAccessChain``. +``.AlignedLoad()``, ``.AlignedStore()`` +++++++++++++++++++++++++++++++++++++++++++++++ +These functions work identically to their non-aligned counterparts (``Load`` and ``Store``), +but accept an additional ``alignment`` parameter that specifies the guaranteed alignment of +the effective address. The alignment value is passed to SPIR-V load/store operations via +memory operands (``Aligned`` memory access qualifier) to enable backend optimizations. +The alignment parameter must be a compile-time constant power-of-two value that is greater +than or equal to the largest scalar type size and less than or equal to 4096 bytes. + ``.Interlocked*()`` +++++++++++++++++++ diff --git a/include/dxc/HlslIntrinsicOp.h b/include/dxc/HlslIntrinsicOp.h index aae269cfa1..03cd62f1fb 100644 --- a/include/dxc/HlslIntrinsicOp.h +++ b/include/dxc/HlslIntrinsicOp.h @@ -270,9 +270,11 @@ enum class IntrinsicOp { MOP_GatherRaw = 250, MOP_GatherRed = 251, MOP_GetSamplePosition = 252, + MOP_AlignedLoad = 405, MOP_Load2 = 253, MOP_Load3 = 254, MOP_Load4 = 255, + MOP_AlignedStore = 406, MOP_InterlockedAdd = 256, MOP_InterlockedAdd64 = 257, MOP_InterlockedAnd = 258, @@ -411,7 +413,7 @@ enum class IntrinsicOp { IOP_usign = 355, MOP_InterlockedUMax = 356, MOP_InterlockedUMin = 357, - Num_Intrinsics = 405, + Num_Intrinsics = 407, }; inline bool HasUnsignedIntrinsicOpcode(IntrinsicOp opcode) { switch (opcode) { diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index f8a7907c91..70ee43a387 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -4100,7 +4100,8 @@ struct ResLoadHelper { ResLoadHelper(Instruction *Inst, DxilResource::Kind RK, Value *h, Value *idx, Value *Offset, Value *status = nullptr, Value *mip = nullptr) : intrinsicOpCode(IntrinsicOp::Num_Intrinsics), handle(h), retVal(Inst), - addr(idx), offset(Offset), status(status), mipLevel(mip) { + addr(idx), offset(Offset), status(status), mipLevel(mip), + customAlignment(0) { opcode = LoadOpFromResKind(RK); Type *Ty = Inst->getType(); if (opcode == OP::OpCode::RawBufferLoad && Ty->isVectorTy() && @@ -4118,6 +4119,8 @@ struct ResLoadHelper { Value *offset; Value *status; Value *mipLevel; + unsigned + customAlignment; // For AlignedLoad/AlignedStore - 0 means use default }; // Uses CI arguments to determine the index, offset, and mipLevel also depending @@ -4129,7 +4132,8 @@ struct ResLoadHelper { ResLoadHelper::ResLoadHelper(CallInst *CI, DxilResource::Kind RK, DxilResourceBase::Class RC, Value *hdl, IntrinsicOp IOP, LoadInst *TyBufSubLoad) - : intrinsicOpCode(IOP), handle(hdl), offset(nullptr), status(nullptr) { + : intrinsicOpCode(IOP), handle(hdl), offset(nullptr), status(nullptr), + customAlignment(0) { opcode = LoadOpFromResKind(RK); bool bForSubscript = false; if (TyBufSubLoad) { @@ -4144,6 +4148,26 @@ ResLoadHelper::ResLoadHelper(CallInst *CI, DxilResource::Kind RK, unsigned StatusIdx = HLOperandIndex::kBufLoadStatusOpIdx; unsigned OffsetIdx = HLOperandIndex::kInvalidIdx; + // Extract alignment for AlignedLoad operations + // AlignedLoad CallInst has: (opcode, handle, addr, alignment [, status]) + // Regular Load has: (opcode, handle, addr [, status]) + if (IOP == IntrinsicOp::MOP_AlignedLoad) { + // alignment is at index 3 (after opcode, handle, addr) + const unsigned kAlignmentIdx = kAddrIdx + 1; + if (argc > kAlignmentIdx) { + if (ConstantInt *AlignConst = + dyn_cast(CI->getArgOperand(kAlignmentIdx))) { + customAlignment = AlignConst->getZExtValue(); + } + } + // Status is at index 4 for AlignedLoad (if present) + if (argc > kAlignmentIdx + 1) { + StatusIdx = kAlignmentIdx + 1; + } else { + StatusIdx = HLOperandIndex::kInvalidIdx; + } + } + if (opcode == OP::OpCode::TextureLoad) { bool IsMS = (RK == DxilResource::Kind::Texture2DMS || RK == DxilResource::Kind::Texture2DMSArray); @@ -4191,7 +4215,7 @@ ResLoadHelper::ResLoadHelper(CallInst *CI, DxilResource::Kind RK, // Structured buffers receive no exterior offset in this constructor, // but may need to increment it later. offset = ConstantInt::get(i32Ty, 0U); - else if (argc > OffsetIdx) + else if (argc > OffsetIdx && OffsetIdx != HLOperandIndex::kInvalidIdx) // Textures may set the offset from an explicit argument. offset = CI->getArgOperand(OffsetIdx); else @@ -4199,7 +4223,7 @@ ResLoadHelper::ResLoadHelper(CallInst *CI, DxilResource::Kind RK, offset = UndefValue::get(i32Ty); // Retrieve status value if provided. - if (argc > StatusIdx) + if (StatusIdx != HLOperandIndex::kInvalidIdx && argc > StatusIdx) status = CI->getArgOperand(StatusIdx); } @@ -4246,8 +4270,15 @@ static SmallVector GetBufLoadArgs(ResLoadHelper helper, OP::OpCode opcode = helper.opcode; llvm::Constant *opArg = Builder.getInt32((uint32_t)opcode); - unsigned alignment = RK == DxilResource::Kind::RawBuffer ? 4U : 8U; - alignment = std::min(alignment, LdSize); + // Use custom alignment if provided (for AlignedLoad), otherwise calculate + // default + unsigned alignment; + if (helper.customAlignment != 0) { + alignment = helper.customAlignment; + } else { + alignment = RK == DxilResource::Kind::RawBuffer ? 4U : 8U; + alignment = std::min(alignment, LdSize); + } Constant *alignmentVal = Builder.getInt32(alignment); // Assemble args specific to the type bab/struct/typed: @@ -4516,7 +4547,8 @@ void Split64bitValForStore(Type *EltTy, ArrayRef vals, unsigned size, void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val, Value *Idx, Value *offset, IRBuilder<> &Builder, - hlsl::OP *OP, Value *sampIdx = nullptr) { + hlsl::OP *OP, Value *sampIdx = nullptr, + unsigned customAlignment = 0) { Type *Ty = val->getType(); OP::OpCode opcode = OP::OpCode::NumOpCodes; bool IsTyped = true; @@ -4560,11 +4592,18 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val, val = Builder.CreateZExt(val, Ty); } - // If RawBuffer store of 64-bit value, don't set alignment to 8, - // since buffer alignment isn't known to be anything over 4. - unsigned alignValue = OP->GetAllocSizeForType(EltTy); - if (RK == HLResource::Kind::RawBuffer && alignValue > 4) - alignValue = 4; + // Use custom alignment if provided (for AlignedStore), otherwise calculate + // default + unsigned alignValue; + if (customAlignment != 0) { + alignValue = customAlignment; + } else { + // If RawBuffer store of 64-bit value, don't set alignment to 8, + // since buffer alignment isn't known to be anything over 4. + alignValue = OP->GetAllocSizeForType(EltTy); + if (RK == HLResource::Kind::RawBuffer && alignValue > 4) + alignValue = 4; + } Constant *Alignment = OP->GetI32Const(alignValue); bool is64 = EltTy == i64Ty || EltTy == doubleTy; if (is64 && IsTyped) { @@ -4758,10 +4797,30 @@ Value *TranslateResourceStore(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, IRBuilder<> Builder(CI); DXIL::ResourceKind RK = pObjHelper->GetRK(handle); - Value *val = CI->getArgOperand(HLOperandIndex::kStoreValOpIdx); - Value *offset = CI->getArgOperand(HLOperandIndex::kStoreOffsetOpIdx); + // Extract custom alignment for AlignedStore + unsigned customAlignment = 0; + unsigned valueArgIdx = HLOperandIndex::kStoreValOpIdx; + unsigned offsetArgIdx = HLOperandIndex::kStoreOffsetOpIdx; + + if (IOP == IntrinsicOp::MOP_AlignedStore) { + // AlignedStore CallInst has: (opcode, handle, offset, alignment, value) + // Regular Store has: (opcode, handle, offset, value) + const unsigned kAlignmentIdx = HLOperandIndex::kStoreOffsetOpIdx + 1; // = 3 + if (CI->getNumArgOperands() > kAlignmentIdx) { + if (ConstantInt *AlignConst = + dyn_cast(CI->getArgOperand(kAlignmentIdx))) { + customAlignment = AlignConst->getZExtValue(); + } + } + valueArgIdx = + kAlignmentIdx + 1; // Value is after alignment for AlignedStore + } + + Value *val = CI->getArgOperand(valueArgIdx); + Value *offset = CI->getArgOperand(offsetArgIdx); Value *UndefI = UndefValue::get(Builder.getInt32Ty()); - TranslateStore(RK, handle, val, offset, UndefI, Builder, hlslOP); + TranslateStore(RK, handle, val, offset, UndefI, Builder, hlslOP, nullptr, + customAlignment); return nullptr; } @@ -7514,7 +7573,6 @@ constexpr IntrinsicLower gLowerTable[] = { DXIL::OpCode::VectorAccumulate}, {IntrinsicOp::IOP_isnormal, TrivialIsSpecialFloat, DXIL::OpCode::IsNormal}, - {IntrinsicOp::IOP_GetGroupWaveCount, EmptyLower, DXIL::OpCode::GetGroupWaveCount}, {IntrinsicOp::IOP_GetGroupWaveIndex, EmptyLower, @@ -7536,6 +7594,11 @@ constexpr IntrinsicLower gLowerTable[] = { DXIL::OpCode::RayQuery_CommittedTriangleObjectPosition}, {IntrinsicOp::MOP_DxHitObject_TriangleObjectPosition, EmptyLower, DXIL::OpCode::HitObject_TriangleObjectPosition}, + + {IntrinsicOp::MOP_AlignedLoad, TranslateResourceLoad, + DXIL::OpCode::NumOpCodes}, + {IntrinsicOp::MOP_AlignedStore, TranslateResourceStore, + DXIL::OpCode::NumOpCodes}, }; constexpr size_t NumLowerTableEntries = sizeof(gLowerTable) / sizeof(gLowerTable[0]); diff --git a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td index 34a2195cbc..17ef549040 100644 --- a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -7606,6 +7606,15 @@ def err_hlsl_unsupported_buffer_packoffset : Error< "packoffset is only allowed within a constant buffer, not on the constant buffer declaration">; def err_hlsl_unsupported_buffer_slot_target_specific : Error< "user defined constant buffer slots cannot be target specific">; +def err_hlsl_aligned_buffer_unsupported_type : Error< + "AlignedLoad/AlignedStore functions cannot be used with %0. " + "Supported types are ByteAddressBuffer and RWByteAddressBuffer">; +def err_hlsl_aligned_buffer_invalid_alignment : Error< + "Alignment values require compile-time constant power-of-two values " + "that are >= largest scalar type size and <= 4096">; +def err_hlsl_aligned_buffer_alignment_too_small : Error< + "Alignment parameter of %0 bytes must be >= the largest scalar type size " + "%1 bytes for %2 element type">; def err_hlsl_unsupported_typedbuffer_template_parameter : Error< "elements of typed buffers and textures must be scalars or vectors">; def err_hlsl_unsupported_typedbuffer_template_parameter_size : Error< diff --git a/tools/clang/lib/SPIRV/RawBufferMethods.cpp b/tools/clang/lib/SPIRV/RawBufferMethods.cpp index 87409e7ccc..8c7e81dd8f 100644 --- a/tools/clang/lib/SPIRV/RawBufferMethods.cpp +++ b/tools/clang/lib/SPIRV/RawBufferMethods.cpp @@ -148,7 +148,7 @@ SpirvInstruction *RawBufferHandler::load64Bits(SpirvInstruction *buffer, SpirvInstruction *RawBufferHandler::processTemplatedLoadFromBuffer( SpirvInstruction *buffer, BufferAddress &address, const QualType targetType, - SourceRange range) { + SourceRange range, uint32_t alignment) { const auto loc = buffer->getSourceLocation(); SpirvInstruction *result = nullptr; @@ -188,8 +188,8 @@ SpirvInstruction *RawBufferHandler::processTemplatedLoadFromBuffer( if (isVectorType(targetType, &elemType, &elemCount)) { llvm::SmallVector loadedElems; for (uint32_t i = 0; i < elemCount; ++i) { - loadedElems.push_back( - processTemplatedLoadFromBuffer(buffer, address, elemType, range)); + loadedElems.push_back(processTemplatedLoadFromBuffer( + buffer, address, elemType, range, alignment)); } result = spvBuilder.createCompositeConstruct(targetType, loadedElems, loc, range); @@ -207,8 +207,8 @@ SpirvInstruction *RawBufferHandler::processTemplatedLoadFromBuffer( elemType = arrType->getElementType(); llvm::SmallVector loadedElems; for (uint32_t i = 0; i < elemCount; ++i) { - loadedElems.push_back( - processTemplatedLoadFromBuffer(buffer, address, elemType, range)); + loadedElems.push_back(processTemplatedLoadFromBuffer( + buffer, address, elemType, range, alignment)); } result = spvBuilder.createCompositeConstruct(targetType, loadedElems, loc, range); @@ -241,8 +241,8 @@ SpirvInstruction *RawBufferHandler::processTemplatedLoadFromBuffer( const uint32_t numElements = numRows * numCols; llvm::SmallVector loadedElems(numElements); for (uint32_t i = 0; i != numElements; ++i) - loadedElems[i] = - processTemplatedLoadFromBuffer(buffer, address, elemType, range); + loadedElems[i] = processTemplatedLoadFromBuffer( + buffer, address, elemType, range, alignment); llvm::SmallVector loadedRows; for (uint32_t i = 0; i < numRows; ++i) { @@ -279,7 +279,7 @@ SpirvInstruction *RawBufferHandler::processTemplatedLoadFromBuffer( llvm::SmallVector loadedElems; forEachSpirvField( structType, spvType, - [this, &buffer, &address, range, + [this, &buffer, &address, range, alignment, &loadedElems](size_t spirvFieldIndex, const QualType &fieldType, const auto &field) { auto *baseOffset = address.getByteAddress(); @@ -294,7 +294,7 @@ SpirvInstruction *RawBufferHandler::processTemplatedLoadFromBuffer( } loadedElems.push_back(processTemplatedLoadFromBuffer( - buffer, baseOffset, fieldType, range)); + buffer, baseOffset, fieldType, range, alignment)); return true; }); @@ -328,10 +328,11 @@ SpirvInstruction *RawBufferHandler::processTemplatedLoadFromBuffer( SpirvInstruction *RawBufferHandler::processTemplatedLoadFromBuffer( SpirvInstruction *buffer, SpirvInstruction *byteAddress, - const QualType targetType, SourceRange range) { + const QualType targetType, SourceRange range, uint32_t alignment) { BufferAddress address(byteAddress, theEmitter); - return processTemplatedLoadFromBuffer(buffer, address, targetType, range); + return processTemplatedLoadFromBuffer(buffer, address, targetType, range, + alignment); } void RawBufferHandler::store16Bits(SpirvInstruction *value, @@ -535,11 +536,9 @@ QualType RawBufferHandler::serializeToScalarsOrStruct( llvm_unreachable("unhandled type when serializing an array"); } -void RawBufferHandler::processTemplatedStoreToBuffer(SpirvInstruction *value, - SpirvInstruction *buffer, - BufferAddress &address, - const QualType valueType, - SourceRange range) { +void RawBufferHandler::processTemplatedStoreToBuffer( + SpirvInstruction *value, SpirvInstruction *buffer, BufferAddress &address, + const QualType valueType, SourceRange range, uint32_t alignment) { const auto loc = buffer->getSourceLocation(); // Scalar types @@ -575,7 +574,7 @@ void RawBufferHandler::processTemplatedStoreToBuffer(SpirvInstruction *value, if (isScalarType(serializedType) || serializedType->getAs()) { for (auto elem : elems) processTemplatedStoreToBuffer(elem, buffer, address, serializedType, - range); + range, alignment); } return; } @@ -595,9 +594,9 @@ void RawBufferHandler::processTemplatedStoreToBuffer(SpirvInstruction *value, assert(spvType); forEachSpirvField( structType, spvType, - [this, &address, loc, range, buffer, value](size_t spirvFieldIndex, - const QualType &fieldType, - const auto &field) { + [this, &address, loc, range, buffer, value, + alignment](size_t spirvFieldIndex, const QualType &fieldType, + const auto &field) { auto *baseOffset = address.getByteAddress(); if (field.offset.hasValue() && field.offset.getValue() != 0) { SpirvConstant *offset = spvBuilder.getConstantInt( @@ -612,7 +611,7 @@ void RawBufferHandler::processTemplatedStoreToBuffer(SpirvInstruction *value, spvBuilder.createCompositeExtract( fieldType, value, {static_cast(spirvFieldIndex)}, loc, range), - buffer, baseOffset, fieldType, range); + buffer, baseOffset, fieldType, range, alignment); return true; }); @@ -645,11 +644,12 @@ void RawBufferHandler::processTemplatedStoreToBuffer(SpirvInstruction *value, void RawBufferHandler::processTemplatedStoreToBuffer( SpirvInstruction *value, SpirvInstruction *buffer, - SpirvInstruction *&byteAddress, const QualType valueType, - SourceRange range) { + SpirvInstruction *&byteAddress, const QualType valueType, SourceRange range, + uint32_t alignment) { BufferAddress address(byteAddress, theEmitter); - processTemplatedStoreToBuffer(value, buffer, address, valueType, range); + processTemplatedStoreToBuffer(value, buffer, address, valueType, range, + alignment); } SpirvInstruction *RawBufferHandler::BufferAddress::getByteAddress() { diff --git a/tools/clang/lib/SPIRV/RawBufferMethods.h b/tools/clang/lib/SPIRV/RawBufferMethods.h index f089f2df5c..477a765b4e 100644 --- a/tools/clang/lib/SPIRV/RawBufferMethods.h +++ b/tools/clang/lib/SPIRV/RawBufferMethods.h @@ -36,7 +36,8 @@ class RawBufferHandler { /// --> Load the first 16-bit uint starting at byte address 0. SpirvInstruction *processTemplatedLoadFromBuffer( SpirvInstruction *buffer, SpirvInstruction *byteAddress, - const QualType targetType, SourceRange range = {}); + const QualType targetType, SourceRange range = {}, + uint32_t alignment = 0); /// \brief Performs RWByteAddressBuffer.Store(address, value). /// RWByteAddressBuffers are represented in SPIR-V as structs with only one @@ -51,7 +52,8 @@ class RawBufferHandler { SpirvInstruction *buffer, SpirvInstruction *&byteAddress, const QualType valueType, - SourceRange range = {}); + SourceRange range = {}, + uint32_t alignment = 0); private: class BufferAddress { @@ -81,12 +83,11 @@ class RawBufferHandler { SpirvInstruction *processTemplatedLoadFromBuffer(SpirvInstruction *buffer, BufferAddress &address, const QualType targetType, - SourceRange range = {}); - void processTemplatedStoreToBuffer(SpirvInstruction *value, - SpirvInstruction *buffer, - BufferAddress &address, - const QualType valueType, - SourceRange range = {}); + SourceRange range = {}, + uint32_t alignment = 0); + void processTemplatedStoreToBuffer( + SpirvInstruction *value, SpirvInstruction *buffer, BufferAddress &address, + const QualType valueType, SourceRange range = {}, uint32_t alignment = 0); SpirvInstruction *load16Bits(SpirvInstruction *buffer, BufferAddress &address, QualType target16BitType, diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.cpp b/tools/clang/lib/SPIRV/SpirvEmitter.cpp index 1400104d3d..4916be77f7 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp +++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp @@ -4740,34 +4740,65 @@ SpirvInstruction *SpirvEmitter::processBufferTextureLoad( return retVal; } -SpirvInstruction *SpirvEmitter::processByteAddressBufferLoadStore( - const CXXMemberCallExpr *expr, uint32_t numWords, bool doStore) { +SpirvInstruction * +SpirvEmitter::processByteAddressBufferLoadStore(const CXXMemberCallExpr *expr, + uint32_t numWords, bool doStore, + bool isAligned) { SpirvInstruction *result = nullptr; const auto object = expr->getImplicitObjectArgument(); auto *objectInfo = loadIfAliasVarRef(object); assert(numWords >= 1 && numWords <= 4); + + // Extract alignment parameter if this is an aligned operation + uint32_t alignment = 0; + uint32_t addressArgIndex = 0; // offset/address is first arg + uint32_t valueArgIndex = 1; // value is second arg (for store) + + if (isAligned) { + // For AlignedLoad/AlignedStore: args are (offset, alignment [, value] [, + // status]) offset is arg 0, alignment is arg 1 + if (expr->getNumArgs() < 2) { + emitError("AlignedLoad/AlignedStore requires alignment parameter", + expr->getExprLoc()); + return nullptr; + } + const Expr *alignmentExpr = expr->getArg(1); + alignment = getRawBufferAlignment(alignmentExpr); + + // For AlignedStore, the value is the 3rd argument (after offset and + // alignment) + if (doStore) { + valueArgIndex = 2; + } + } + if (doStore) { assert(isRWByteAddressBuffer(object->getType())); - assert(expr->getNumArgs() == 2); + uint32_t expectedArgs = + isAligned ? 3 : 2; // AlignedStore has 3 args (offset, alignment, value) + assert(expr->getNumArgs() == expectedArgs); } else { assert(isRWByteAddressBuffer(object->getType()) || isByteAddressBuffer(object->getType())); - if (expr->getNumArgs() == 2) { + // Regular Load with status has 2 args, AlignedLoad with status has 3 args + uint32_t maxArgs = isAligned ? 3 : 2; + if (expr->getNumArgs() == maxArgs && !isAligned) { emitError( "(RW)ByteAddressBuffer::Load(in address, out status) not supported", expr->getExprLoc()); return 0; } } - const Expr *addressExpr = expr->getArg(0); + const Expr *addressExpr = expr->getArg(addressArgIndex); auto *byteAddress = doExpr(addressExpr); const QualType addressType = addressExpr->getType(); // The front-end prevents usage of templated Load2, Load3, Load4, Store2, // Store3, Store4 intrinsic functions. const bool isTemplatedLoadOrStore = (numWords == 1) && - (doStore ? !expr->getArg(1)->getType()->isSpecificBuiltinType( - BuiltinType::UInt) + (doStore ? !expr->getArg(valueArgIndex) + ->getType() + ->isSpecificBuiltinType(BuiltinType::UInt) : !expr->getType()->isSpecificBuiltinType(BuiltinType::UInt)); const auto range = expr->getSourceRange(); @@ -4782,14 +4813,15 @@ SpirvInstruction *SpirvEmitter::processByteAddressBufferLoadStore( } if (doStore) { - auto *values = doExpr(expr->getArg(1)); + auto *values = doExpr(expr->getArg(valueArgIndex)); RawBufferHandler(*this).processTemplatedStoreToBuffer( - values, objectInfo, byteAddress, expr->getArg(1)->getType(), range); + values, objectInfo, byteAddress, + expr->getArg(valueArgIndex)->getType(), range, alignment); result = nullptr; } else { RawBufferHandler rawBufferHandler(*this); result = rawBufferHandler.processTemplatedLoadFromBuffer( - objectInfo, byteAddress, expr->getType(), range); + objectInfo, byteAddress, expr->getType(), range, alignment); } if (rasterizerOrder) { @@ -5556,6 +5588,9 @@ SpirvEmitter::processIntrinsicMemberCall(const CXXMemberCallExpr *expr, return processByteAddressBufferLoadStore(expr, 3, /*doStore*/ false); case IntrinsicOp::MOP_Load4: return processByteAddressBufferLoadStore(expr, 4, /*doStore*/ false); + case IntrinsicOp::MOP_AlignedLoad: + return processByteAddressBufferLoadStore(expr, 1, /*doStore*/ false, + /*isAligned*/ true); case IntrinsicOp::MOP_Store: return processByteAddressBufferLoadStore(expr, 1, /*doStore*/ true); case IntrinsicOp::MOP_Store2: @@ -5564,6 +5599,9 @@ SpirvEmitter::processIntrinsicMemberCall(const CXXMemberCallExpr *expr, return processByteAddressBufferLoadStore(expr, 3, /*doStore*/ true); case IntrinsicOp::MOP_Store4: return processByteAddressBufferLoadStore(expr, 4, /*doStore*/ true); + case IntrinsicOp::MOP_AlignedStore: + return processByteAddressBufferLoadStore(expr, 1, /*doStore*/ true, + /*isAligned*/ true); case IntrinsicOp::MOP_GetDimensions: retVal = processGetDimensions(expr); break; diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.h b/tools/clang/lib/SPIRV/SpirvEmitter.h index 9b890d3af4..eb80bd31dc 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.h +++ b/tools/clang/lib/SPIRV/SpirvEmitter.h @@ -1240,7 +1240,8 @@ class SpirvEmitter : public ASTConsumer { /// Panics if it is not the case. SpirvInstruction *processByteAddressBufferLoadStore(const CXXMemberCallExpr *, uint32_t numWords, - bool doStore); + bool doStore, + bool isAligned = false); /// \brief Processes the GetDimensions intrinsic function call on a /// (RW)ByteAddressBuffer by querying the image in the given expr. diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index e9c8c90a2d..47bffb10b4 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -7119,9 +7119,10 @@ bool HLSLExternalSource::MatchArguments( } else if (pArgument->uTemplateId == INTRIN_TEMPLATE_FROM_FUNCTION) { if (functionTemplateTypeArg.isNull()) { if (i == 0) { - // [RW]ByteAddressBuffer.Load, default to uint + // [RW]ByteAddressBuffer.Load/AlignedLoad, default to uint pNewType = m_context->UnsignedIntTy; - if (builtinOp != hlsl::IntrinsicOp::MOP_Load) + if (builtinOp != hlsl::IntrinsicOp::MOP_Load && + builtinOp != hlsl::IntrinsicOp::MOP_AlignedLoad) badArgIdx = std::min(badArgIdx, i); } else { // [RW]ByteAddressBuffer.Store, default to argument type @@ -10077,6 +10078,108 @@ bool HLSLExternalSource::ValidateTypeRequirements(SourceLocation loc, return true; } +// Get the largest scalar type size in bytes for a given type (for +// AlignedLoad/AlignedStore validation) +static UINT GetLargestScalarTypeSize(QualType Ty, ASTContext &Ctx) { + if (Ty.isNull()) + return 0; + + // Strip off reference types + Ty = Ty.getNonReferenceType(); + + // Handle scalar types + if (const BuiltinType *BT = Ty->getAs()) { + switch (BT->getKind()) { + case BuiltinType::Bool: + return 1; + case BuiltinType::Half: + case BuiltinType::Short: + case BuiltinType::UShort: + case BuiltinType::Min16Float: + case BuiltinType::Min16Int: + case BuiltinType::Min16UInt: + case BuiltinType::Min10Float: + case BuiltinType::Min12Int: + return 2; + case BuiltinType::Int: + case BuiltinType::UInt: + case BuiltinType::Float: + case BuiltinType::LitInt: + case BuiltinType::LitFloat: + return 4; + case BuiltinType::Double: + case BuiltinType::LongLong: + case BuiltinType::ULongLong: + return 8; + default: + break; + } + } + + // Handle vector types + if (const ExtVectorType *VT = Ty->getAs()) { + return GetLargestScalarTypeSize(VT->getElementType(), Ctx); + } + + // Handle array types + if (const ConstantArrayType *AT = Ctx.getAsConstantArrayType(Ty)) { + return GetLargestScalarTypeSize(AT->getElementType(), Ctx); + } + + // Handle record (struct) types - find the largest field + if (const RecordType *RT = Ty->getAs()) { + UINT maxSize = 0; + RecordDecl *RD = RT->getDecl(); + for (const FieldDecl *FD : RD->fields()) { + UINT fieldSize = GetLargestScalarTypeSize(FD->getType(), Ctx); + if (fieldSize > maxSize) + maxSize = fieldSize; + } + return maxSize; + } + + // Default to 4 bytes + return 4; +} + +// Validate alignment parameter for AlignedLoad/AlignedStore +static bool ValidateAlignmentParameter(Sema &S, const Expr *AlignmentExpr, + QualType TemplateType, + SourceLocation Loc) { + // Alignment must be a compile-time constant + llvm::APSInt alignmentValue; + if (!AlignmentExpr->isIntegerConstantExpr(alignmentValue, + S.getASTContext())) { + S.Diag(Loc, diag::err_hlsl_aligned_buffer_invalid_alignment); + return false; + } + + UINT alignment = alignmentValue.getZExtValue(); + + // Alignment must be a power of two + if (alignment == 0 || (alignment & (alignment - 1)) != 0) { + S.Diag(Loc, diag::err_hlsl_aligned_buffer_invalid_alignment); + return false; + } + + // Alignment must be <= 4096 + if (alignment > 4096) { + S.Diag(Loc, diag::err_hlsl_aligned_buffer_invalid_alignment); + return false; + } + + // Alignment must be >= largest scalar type size + UINT largestScalarSize = + GetLargestScalarTypeSize(TemplateType, S.getASTContext()); + if (alignment < largestScalarSize) { + S.Diag(Loc, diag::err_hlsl_aligned_buffer_alignment_too_small) + << alignment << largestScalarSize << TemplateType; + return false; + } + + return true; +} + bool HLSLExternalSource::ValidatePrimitiveTypeForOperand( SourceLocation loc, QualType type, ArTypeObjectKind kind) { bool isValid = true; @@ -10973,13 +11076,43 @@ HLSLExternalSource::DeduceTemplateArgumentsForHLSL( objectName == g_ArBasicTypeNames[AR_OBJECT_RWBYTEADDRESS_BUFFER]; bool IsBABLoad = false; bool IsBABStore = false; + bool IsBABAlignedLoad = false; + bool IsBABAlignedStore = false; if (IsBuiltinTable(tableName) && IsBAB) { IsBABLoad = intrinsicOp == (UINT)IntrinsicOp::MOP_Load; IsBABStore = intrinsicOp == (UINT)IntrinsicOp::MOP_Store; + IsBABAlignedLoad = intrinsicOp == (UINT)IntrinsicOp::MOP_AlignedLoad; + IsBABAlignedStore = intrinsicOp == (UINT)IntrinsicOp::MOP_AlignedStore; + } + + // Validate alignment parameter for AlignedLoad/AlignedStore + if (IsBABAlignedLoad || IsBABAlignedStore) { + // AlignedLoad/AlignedStore have alignment as second parameter (after + // offset) + if (Args.size() < 2) { + getSema()->Diag(Args[0]->getExprLoc(), + diag::err_ovl_no_viable_member_function_in_call) + << intrinsicName; + return Sema::TemplateDeductionResult::TDK_Invalid; + } + + const Expr *AlignmentExpr = Args[1]; + SourceLocation AlignmentLoc = AlignmentExpr->getExprLoc(); + + // If we have a template type, validate alignment against it + if (!functionTemplateTypeArg.isNull()) { + if (!ValidateAlignmentParameter(*getSema(), AlignmentExpr, + functionTemplateTypeArg, + AlignmentLoc)) { + return Sema::TemplateDeductionResult::TDK_Invalid; + } + } } + if (ExplicitTemplateArgs && ExplicitTemplateArgs->size() >= 1) { SourceLocation Loc = ExplicitTemplateArgs->getLAngleLoc(); - if (!IsBABLoad && !IsBABStore) { + if (!IsBABLoad && !IsBABStore && !IsBABAlignedLoad && + !IsBABAlignedStore) { getSema()->Diag(Loc, diag::err_hlsl_intrinsic_template_arg_unsupported) << intrinsicName; return Sema::TemplateDeductionResult::TDK_Invalid; @@ -10992,7 +11125,7 @@ HLSLExternalSource::DeduceTemplateArgumentsForHLSL( return Sema::TemplateDeductionResult::TDK_Invalid; } - if (IsBABLoad || IsBABStore) { + if (IsBABLoad || IsBABStore || IsBABAlignedLoad || IsBABAlignedStore) { const bool IsNull = functionTemplateTypeArg.isNull(); // Incomplete type is diagnosed elsewhere, so just fail if incomplete. if (!IsNull && @@ -11008,10 +11141,22 @@ HLSLExternalSource::DeduceTemplateArgumentsForHLSL( TypeDiagContext::Valid /*LongVecDiagContext*/); return Sema::TemplateDeductionResult::TDK_Invalid; } + + // Re-validate alignment with the now-known template type for + // AlignedLoad/AlignedStore + if ((IsBABAlignedLoad || IsBABAlignedStore) && Args.size() >= 2) { + const Expr *AlignmentExpr = Args[1]; + SourceLocation AlignmentLoc = AlignmentExpr->getExprLoc(); + if (!ValidateAlignmentParameter(*getSema(), AlignmentExpr, + functionTemplateTypeArg, + AlignmentLoc)) { + return Sema::TemplateDeductionResult::TDK_Invalid; + } + } } - } else if (IsBABStore) { + } else if (IsBABStore || IsBABAlignedStore) { // Prior to HLSL 2018, Store operation only stored scalar uint. - if (!Is2018) { + if (!Is2018 && !IsBABAlignedStore) { if (GetNumElements(argTypes[2]) != 1) { getSema()->Diag(Args[1]->getLocStart(), diag::err_ovl_no_viable_member_function_in_call) @@ -12365,6 +12510,16 @@ void Sema::CheckHLSLFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall) { hlsl::IntrinsicOp opCode = (hlsl::IntrinsicOp)IntrinsicAttr->getOpcode(); switch (opCode) { + case hlsl::IntrinsicOp::MOP_AlignedLoad: + case hlsl::IntrinsicOp::MOP_AlignedStore: + // AlignedLoad/AlignedStore require SM 6.2+ (DXIL 1.2+) for + // RawBufferLoad/Store + if (SM->GetMajor() < 6 || (SM->GetMajor() == 6 && SM->GetMinor() < 2)) { + Diag(TheCall->getLocStart(), + diag::warn_hlsl_intrinsic_in_wrong_shader_model) + << FDecl->getName() << FDecl << "6.2"; + } + break; case hlsl::IntrinsicOp::MOP_FinishedCrossGroupSharing: CheckFinishedCrossGroupSharingCall(*this, cast(FDecl), TheCall->getLocStart()); diff --git a/tools/clang/test/HLSLFileCheck/hlsl/objects/ByteAddressBuffer/aligned_load_types.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/objects/ByteAddressBuffer/aligned_load_types.hlsl new file mode 100644 index 0000000000..cbd8323089 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/objects/ByteAddressBuffer/aligned_load_types.hlsl @@ -0,0 +1,159 @@ +// RUN: %dxc -E main -T cs_6_2 -enable-16bit-types -DTY=float -DALIGN=4 -DCHKSTATUS %s | FileCheck %s -check-prefix=CHK_FLOAT32_A4_RO +// RUN: %dxc -E main -T cs_6_2 -enable-16bit-types -DTY=float -DALIGN=16 %s | FileCheck %s -check-prefix=CHK_FLOAT32_A16_RO +// RUN: %dxc -E main -T cs_6_2 -enable-16bit-types -DTY=float -DALIGN=8 -DSRCRW %s | FileCheck %s -check-prefix=CHK_FLOAT32_A8_RW +// RUN: %dxc -E main -T cs_6_2 -enable-16bit-types -DTY=float -DALIGN=32 -DSRCRW -DCHKSTATUS %s | FileCheck %s -check-prefix=CHK_FLOAT32_A32_RW + +// RUN: %dxc -E main -T cs_6_2 -enable-16bit-types -DTY=float4 -DALIGN=4 -DCHKSTATUS %s | FileCheck %s -check-prefix=CHK_FLOAT32x4_A4_RO +// RUN: %dxc -E main -T cs_6_2 -enable-16bit-types -DTY=float4 -DALIGN=16 %s | FileCheck %s -check-prefix=CHK_FLOAT32x4_A16_RO +// RUN: %dxc -E main -T cs_6_2 -enable-16bit-types -DTY=float4 -DALIGN=8 -DSRCRW %s | FileCheck %s -check-prefix=CHK_FLOAT32x4_A8_RW +// RUN: %dxc -E main -T cs_6_2 -enable-16bit-types -DTY=float4 -DALIGN=64 -DSRCRW -DCHKSTATUS %s | FileCheck %s -check-prefix=CHK_FLOAT32x4_A64_RW + +// RUN: %dxc -E main -T cs_6_2 -enable-16bit-types -DTY=float16_t -DALIGN=2 -DCHKSTATUS %s | FileCheck %s -check-prefix=CHK_FLOAT16_A2_RO +// RUN: %dxc -E main -T cs_6_2 -enable-16bit-types -DTY=float16_t -DALIGN=8 %s | FileCheck %s -check-prefix=CHK_FLOAT16_A8_RO +// RUN: %dxc -E main -T cs_6_2 -enable-16bit-types -DTY=float16_t -DALIGN=4 -DSRCRW %s | FileCheck %s -check-prefix=CHK_FLOAT16_A4_RW +// RUN: %dxc -E main -T cs_6_2 -enable-16bit-types -DTY=float16_t -DALIGN=16 -DSRCRW -DCHKSTATUS %s | FileCheck %s -check-prefix=CHK_FLOAT16_A16_RW + +// RUN: %dxc -E main -T cs_6_2 -enable-16bit-types -DTY=double -DALIGN=8 -DCHKSTATUS %s | FileCheck %s -check-prefix=CHK_FLOAT64_A8_RO +// RUN: %dxc -E main -T cs_6_2 -enable-16bit-types -DTY=double -DALIGN=32 %s | FileCheck %s -check-prefix=CHK_FLOAT64_A32_RO +// RUN: %dxc -E main -T cs_6_2 -enable-16bit-types -DTY=double -DALIGN=16 -DSRCRW %s | FileCheck %s -check-prefix=CHK_FLOAT64_A16_RW +// RUN: %dxc -E main -T cs_6_2 -enable-16bit-types -DTY=double -DALIGN=64 -DSRCRW -DCHKSTATUS %s | FileCheck %s -check-prefix=CHK_FLOAT64_A64_RW + +// RUN: %dxc -E main -T cs_6_2 -enable-16bit-types -DTY=uint -DALIGN=4 -DCHKSTATUS %s | FileCheck %s -check-prefix=CHK_UINT32_A4_RO +// RUN: %dxc -E main -T cs_6_2 -enable-16bit-types -DTY=uint -DALIGN=16 %s | FileCheck %s -check-prefix=CHK_UINT32_A16_RO +// RUN: %dxc -E main -T cs_6_2 -enable-16bit-types -DTY=uint -DALIGN=8 -DSRCRW %s | FileCheck %s -check-prefix=CHK_UINT32_A8_RW +// RUN: %dxc -E main -T cs_6_2 -enable-16bit-types -DTY=uint -DALIGN=32 -DSRCRW -DCHKSTATUS %s | FileCheck %s -check-prefix=CHK_UINT32_A32_RW + +// RUN: %dxc -E main -T cs_6_2 -enable-16bit-types -DTY=uint3 -DALIGN=4 -DCHKSTATUS %s | FileCheck %s -check-prefix=CHK_UINT32x3_A4_RO +// RUN: %dxc -E main -T cs_6_2 -enable-16bit-types -DTY=uint3 -DALIGN=16 %s | FileCheck %s -check-prefix=CHK_UINT32x3_A16_RO +// RUN: %dxc -E main -T cs_6_2 -enable-16bit-types -DTY=uint3 -DALIGN=8 -DSRCRW %s | FileCheck %s -check-prefix=CHK_UINT32x3_A8_RW +// RUN: %dxc -E main -T cs_6_2 -enable-16bit-types -DTY=uint3 -DALIGN=32 -DSRCRW -DCHKSTATUS %s | FileCheck %s -check-prefix=CHK_UINT32x3_A32_RW + +// RUN: %dxc -E main -T cs_6_2 -enable-16bit-types -DTY=uint16_t -DALIGN=2 -DCHKSTATUS %s | FileCheck %s -check-prefix=CHK_UINT16_A2_RO +// RUN: %dxc -E main -T cs_6_2 -enable-16bit-types -DTY=uint16_t -DALIGN=8 %s | FileCheck %s -check-prefix=CHK_UINT16_A8_RO +// RUN: %dxc -E main -T cs_6_2 -enable-16bit-types -DTY=uint16_t -DALIGN=4 -DSRCRW %s | FileCheck %s -check-prefix=CHK_UINT16_A4_RW +// RUN: %dxc -E main -T cs_6_2 -enable-16bit-types -DTY=uint16_t -DALIGN=16 -DSRCRW -DCHKSTATUS %s | FileCheck %s -check-prefix=CHK_UINT16_A16_RW + +// RUN: %dxc -E main -T cs_6_2 -enable-16bit-types -DTY=int64_t -DALIGN=8 -DCHKSTATUS %s | FileCheck %s -check-prefix=CHK_INT64_A8_RO +// RUN: %dxc -E main -T cs_6_2 -enable-16bit-types -DTY=int64_t -DALIGN=32 %s | FileCheck %s -check-prefix=CHK_INT64_A32_RO +// RUN: %dxc -E main -T cs_6_2 -enable-16bit-types -DTY=int64_t -DALIGN=16 -DSRCRW %s | FileCheck %s -check-prefix=CHK_INT64_A16_RW +// RUN: %dxc -E main -T cs_6_2 -enable-16bit-types -DTY=int64_t -DALIGN=64 -DSRCRW -DCHKSTATUS %s | FileCheck %s -check-prefix=CHK_INT64_A64_RW + + +// CHK_FLOAT32_A4_RO: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %srcbuf_texture_rawbuf, i32 %mul, i32 undef, i8 1, i32 4) +// CHK_FLOAT32_A4_RO: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %dstbuf_UAV_rawbuf, i32 %mul, i32 undef, float %{{.*}}, float undef, float undef, float undef, i8 1, i32 4) + +// CHK_FLOAT32_A16_RO: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %srcbuf_texture_rawbuf, i32 %mul, i32 undef, i8 1, i32 16) +// CHK_FLOAT32_A16_RO: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %dstbuf_UAV_rawbuf, i32 %mul, i32 undef, float %{{.*}}, float undef, float undef, float undef, i8 1, i32 16) + +// CHK_FLOAT32_A8_RW: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %srcbuf_UAV_rawbuf, i32 %mul, i32 undef, i8 1, i32 8) +// CHK_FLOAT32_A8_RW: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %dstbuf_UAV_rawbuf, i32 %mul, i32 undef, float %{{.*}}, float undef, float undef, float undef, i8 1, i32 8) + +// CHK_FLOAT32_A32_RW: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %srcbuf_UAV_rawbuf, i32 %mul, i32 undef, i8 1, i32 32) +// CHK_FLOAT32_A32_RW: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %dstbuf_UAV_rawbuf, i32 %mul, i32 undef, float %{{.*}}, float undef, float undef, float undef, i8 1, i32 32) + +// CHK_FLOAT32x4_A4_RO: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %srcbuf_texture_rawbuf, i32 %mul, i32 undef, i8 15, i32 4) +// CHK_FLOAT32x4_A4_RO: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %dstbuf_UAV_rawbuf, i32 %mul, i32 undef, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, i8 15, i32 4) + +// CHK_FLOAT32x4_A16_RO: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %srcbuf_texture_rawbuf, i32 %mul, i32 undef, i8 15, i32 16) +// CHK_FLOAT32x4_A16_RO: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %dstbuf_UAV_rawbuf, i32 %mul, i32 undef, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, i8 15, i32 16) + +// CHK_FLOAT32x4_A8_RW: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %srcbuf_UAV_rawbuf, i32 %mul, i32 undef, i8 15, i32 8) +// CHK_FLOAT32x4_A8_RW: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %dstbuf_UAV_rawbuf, i32 %mul, i32 undef, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, i8 15, i32 8) + +// CHK_FLOAT32x4_A64_RW: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %srcbuf_UAV_rawbuf, i32 %mul, i32 undef, i8 15, i32 64) +// CHK_FLOAT32x4_A64_RW: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %dstbuf_UAV_rawbuf, i32 %mul, i32 undef, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, i8 15, i32 64) + +// CHK_FLOAT16_A2_RO: call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %srcbuf_texture_rawbuf, i32 %mul, i32 undef, i8 1, i32 2) +// CHK_FLOAT16_A2_RO: call void @dx.op.rawBufferStore.f16(i32 140, %dx.types.Handle %dstbuf_UAV_rawbuf, i32 %mul, i32 undef, half %{{.*}}, half undef, half undef, half undef, i8 1, i32 2) + +// CHK_FLOAT16_A8_RO: call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %srcbuf_texture_rawbuf, i32 %mul, i32 undef, i8 1, i32 8) +// CHK_FLOAT16_A8_RO: call void @dx.op.rawBufferStore.f16(i32 140, %dx.types.Handle %dstbuf_UAV_rawbuf, i32 %mul, i32 undef, half %{{.*}}, half undef, half undef, half undef, i8 1, i32 8) + +// CHK_FLOAT16_A4_RW: call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %srcbuf_UAV_rawbuf, i32 %mul, i32 undef, i8 1, i32 4) +// CHK_FLOAT16_A4_RW: call void @dx.op.rawBufferStore.f16(i32 140, %dx.types.Handle %dstbuf_UAV_rawbuf, i32 %mul, i32 undef, half %{{.*}}, half undef, half undef, half undef, i8 1, i32 4) + +// CHK_FLOAT16_A16_RW: call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %srcbuf_UAV_rawbuf, i32 %mul, i32 undef, i8 1, i32 16) +// CHK_FLOAT16_A16_RW: call void @dx.op.rawBufferStore.f16(i32 140, %dx.types.Handle %dstbuf_UAV_rawbuf, i32 %mul, i32 undef, half %{{.*}}, half undef, half undef, half undef, i8 1, i32 16) + +// CHK_FLOAT64_A8_RO: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %srcbuf_texture_rawbuf, i32 %mul, i32 undef, i8 3, i32 8) +// CHK_FLOAT64_A8_RO: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %dstbuf_UAV_rawbuf, i32 %mul, i32 undef, i32 %{{.*}}, i32 %{{.*}}, i32 undef, i32 undef, i8 3, i32 8) + +// CHK_FLOAT64_A32_RO: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %srcbuf_texture_rawbuf, i32 %mul, i32 undef, i8 3, i32 32) +// CHK_FLOAT64_A32_RO: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %dstbuf_UAV_rawbuf, i32 %mul, i32 undef, i32 %{{.*}}, i32 %{{.*}}, i32 undef, i32 undef, i8 3, i32 32) + +// CHK_FLOAT64_A16_RW: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %srcbuf_UAV_rawbuf, i32 %mul, i32 undef, i8 3, i32 16) +// CHK_FLOAT64_A16_RW: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %dstbuf_UAV_rawbuf, i32 %mul, i32 undef, i32 %{{.*}}, i32 %{{.*}}, i32 undef, i32 undef, i8 3, i32 16) + +// CHK_FLOAT64_A64_RW: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %srcbuf_UAV_rawbuf, i32 %mul, i32 undef, i8 3, i32 64) +// CHK_FLOAT64_A64_RW: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %dstbuf_UAV_rawbuf, i32 %mul, i32 undef, i32 %{{.*}}, i32 %{{.*}}, i32 undef, i32 undef, i8 3, i32 64) + +// CHK_UINT32_A4_RO: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %srcbuf_texture_rawbuf, i32 %mul, i32 undef, i8 1, i32 4) +// CHK_UINT32_A4_RO: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %dstbuf_UAV_rawbuf, i32 %mul, i32 undef, i32 %{{.*}}, i32 undef, i32 undef, i32 undef, i8 1, i32 4) + +// CHK_UINT32_A16_RO: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %srcbuf_texture_rawbuf, i32 %mul, i32 undef, i8 1, i32 16) +// CHK_UINT32_A16_RO: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %dstbuf_UAV_rawbuf, i32 %mul, i32 undef, i32 %{{.*}}, i32 undef, i32 undef, i32 undef, i8 1, i32 16) + +// CHK_UINT32_A8_RW: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %srcbuf_UAV_rawbuf, i32 %mul, i32 undef, i8 1, i32 8) +// CHK_UINT32_A8_RW: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %dstbuf_UAV_rawbuf, i32 %mul, i32 undef, i32 %{{.*}}, i32 undef, i32 undef, i32 undef, i8 1, i32 8) + +// CHK_UINT32_A32_RW: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %srcbuf_UAV_rawbuf, i32 %mul, i32 undef, i8 1, i32 32) +// CHK_UINT32_A32_RW: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %dstbuf_UAV_rawbuf, i32 %mul, i32 undef, i32 %{{.*}}, i32 undef, i32 undef, i32 undef, i8 1, i32 32) + +// CHK_UINT32x3_A4_RO: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %srcbuf_texture_rawbuf, i32 %mul, i32 undef, i8 7, i32 4) +// CHK_UINT32x3_A4_RO: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %dstbuf_UAV_rawbuf, i32 %mul, i32 undef, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 undef, i8 7, i32 4) + +// CHK_UINT32x3_A16_RO: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %srcbuf_texture_rawbuf, i32 %mul, i32 undef, i8 7, i32 16) +// CHK_UINT32x3_A16_RO: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %dstbuf_UAV_rawbuf, i32 %mul, i32 undef, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 undef, i8 7, i32 16) + +// CHK_UINT32x3_A8_RW: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %srcbuf_UAV_rawbuf, i32 %mul, i32 undef, i8 7, i32 8) +// CHK_UINT32x3_A8_RW: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %dstbuf_UAV_rawbuf, i32 %mul, i32 undef, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 undef, i8 7, i32 8) + +// CHK_UINT32x3_A32_RW: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %srcbuf_UAV_rawbuf, i32 %mul, i32 undef, i8 7, i32 32) +// CHK_UINT32x3_A32_RW: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %dstbuf_UAV_rawbuf, i32 %mul, i32 undef, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 undef, i8 7, i32 32) + +// CHK_UINT16_A2_RO: call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %srcbuf_texture_rawbuf, i32 %mul, i32 undef, i8 1, i32 2) +// CHK_UINT16_A2_RO: call void @dx.op.rawBufferStore.i16(i32 140, %dx.types.Handle %dstbuf_UAV_rawbuf, i32 %mul, i32 undef, i16 %{{.*}}, i16 undef, i16 undef, i16 undef, i8 1, i32 2) + +// CHK_UINT16_A8_RO: call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %srcbuf_texture_rawbuf, i32 %mul, i32 undef, i8 1, i32 8) +// CHK_UINT16_A8_RO: call void @dx.op.rawBufferStore.i16(i32 140, %dx.types.Handle %dstbuf_UAV_rawbuf, i32 %mul, i32 undef, i16 %{{.*}}, i16 undef, i16 undef, i16 undef, i8 1, i32 8) + +// CHK_UINT16_A4_RW: call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %srcbuf_UAV_rawbuf, i32 %mul, i32 undef, i8 1, i32 4) +// CHK_UINT16_A4_RW: call void @dx.op.rawBufferStore.i16(i32 140, %dx.types.Handle %dstbuf_UAV_rawbuf, i32 %mul, i32 undef, i16 %{{.*}}, i16 undef, i16 undef, i16 undef, i8 1, i32 4) + +// CHK_UINT16_A16_RW: call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %srcbuf_UAV_rawbuf, i32 %mul, i32 undef, i8 1, i32 16) +// CHK_UINT16_A16_RW: call void @dx.op.rawBufferStore.i16(i32 140, %dx.types.Handle %dstbuf_UAV_rawbuf, i32 %mul, i32 undef, i16 %{{.*}}, i16 undef, i16 undef, i16 undef, i8 1, i32 16) + +// CHK_INT64_A8_RO: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %srcbuf_texture_rawbuf, i32 %mul, i32 undef, i8 3, i32 8) +// CHK_INT64_A8_RO: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %dstbuf_UAV_rawbuf, i32 %mul, i32 undef, i32 %{{.*}}, i32 %{{.*}}, i32 undef, i32 undef, i8 3, i32 8) + +// CHK_INT64_A32_RO: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %srcbuf_texture_rawbuf, i32 %mul, i32 undef, i8 3, i32 32) +// CHK_INT64_A32_RO: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %dstbuf_UAV_rawbuf, i32 %mul, i32 undef, i32 %{{.*}}, i32 %{{.*}}, i32 undef, i32 undef, i8 3, i32 32) + +// CHK_INT64_A16_RW: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %srcbuf_UAV_rawbuf, i32 %mul, i32 undef, i8 3, i32 16) +// CHK_INT64_A16_RW: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %dstbuf_UAV_rawbuf, i32 %mul, i32 undef, i32 %{{.*}}, i32 %{{.*}}, i32 undef, i32 undef, i8 3, i32 16) + +// CHK_INT64_A64_RW: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %srcbuf_UAV_rawbuf, i32 %mul, i32 undef, i8 3, i32 64) +// CHK_INT64_A64_RW: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %dstbuf_UAV_rawbuf, i32 %mul, i32 undef, i32 %{{.*}}, i32 %{{.*}}, i32 undef, i32 undef, i8 3, i32 64) + + +#ifdef SRCRW +RWByteAddressBuffer srcbuf : register(u0); +RWByteAddressBuffer dstbuf : register(u1); +#else +ByteAddressBuffer srcbuf : register(t0); +RWByteAddressBuffer dstbuf : register(u0); +#endif + +[numthreads(1, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + const uint offset = tid.x * ALIGN; +#ifdef CHKSTATUS + uint status = 0; + TY data = srcbuf.AlignedLoad(offset, ALIGN, status); + if (!CheckAccessFullyMapped(status)) return; +#else + TY data = srcbuf.AlignedLoad(offset, ALIGN); +#endif + dstbuf.AlignedStore(offset, ALIGN, data); +} + diff --git a/tools/clang/test/SemaHLSL/aligned_load_buffer_types.hlsl b/tools/clang/test/SemaHLSL/aligned_load_buffer_types.hlsl new file mode 100644 index 0000000000..a6e4dece46 --- /dev/null +++ b/tools/clang/test/SemaHLSL/aligned_load_buffer_types.hlsl @@ -0,0 +1,65 @@ +// RUN: %dxc -E main -T cs_6_2 %s -verify + +// Test that AlignedLoad/AlignedStore only work with ByteAddressBuffer and RWByteAddressBuffer + +ByteAddressBuffer bab; +RWByteAddressBuffer rwbab; + +// Invalid buffer types +Texture2D tex2d; +RWTexture2D rwtex2d; +Buffer typedBuffer; +RWBuffer rwTypedBuffer; +StructuredBuffer structuredBuffer; +RWStructuredBuffer rwStructuredBuffer; +AppendStructuredBuffer appendBuffer; +ConsumeStructuredBuffer consumeBuffer; + +[numthreads(1,1,1)] +void main() +{ + uint offset = 0; + uint data; + + // Valid - these should work + data = bab.AlignedLoad(offset, 4); + rwbab.AlignedStore(offset, 4, data); + + // Invalid buffer types - Texture2D + // expected-error@+2 {{no member named 'AlignedLoad' in 'Texture2D >'}} + // expected-error@+1 {{unexpected type name 'uint': expected expression}} + data = tex2d.AlignedLoad(offset, 4); + + // expected-error@+2 {{no member named 'AlignedStore' in 'RWTexture2D >'}} + // expected-error@+1 {{unexpected type name 'uint': expected expression}} + rwtex2d.AlignedStore(offset, 4, data); + + // Invalid buffer types - Buffer (typed) + // expected-error@+2 {{no member named 'AlignedLoad' in 'Buffer >'}} + // expected-error@+1 {{unexpected type name 'uint': expected expression}} + data = typedBuffer.AlignedLoad(offset, 4); + + // expected-error@+2 {{no member named 'AlignedStore' in 'RWBuffer >'}} + // expected-error@+1 {{unexpected type name 'uint': expected expression}} + rwTypedBuffer.AlignedStore(offset, 4, data); + + // Invalid buffer types - StructuredBuffer + // expected-error@+2 {{no member named 'AlignedLoad' in 'StructuredBuffer >'}} + // expected-error@+1 {{unexpected type name 'uint': expected expression}} + data = structuredBuffer.AlignedLoad(offset, 4); + + // expected-error@+2 {{no member named 'AlignedStore' in 'RWStructuredBuffer >'}} + // expected-error@+1 {{unexpected type name 'uint': expected expression}} + rwStructuredBuffer.AlignedStore(offset, 4, data); + + // Invalid buffer types - AppendStructuredBuffer + // expected-error@+2 {{no member named 'AlignedStore' in 'AppendStructuredBuffer >'}} + // expected-error@+1 {{unexpected type name 'uint': expected expression}} + appendBuffer.AlignedStore(offset, 4, data); + + // Invalid buffer types - ConsumeStructuredBuffer + // expected-error@+2 {{no member named 'AlignedLoad' in 'ConsumeStructuredBuffer >'}} + // expected-error@+1 {{unexpected type name 'uint': expected expression}} + data = consumeBuffer.AlignedLoad(offset, 4); +} + diff --git a/tools/clang/test/SemaHLSL/aligned_load_errors.hlsl b/tools/clang/test/SemaHLSL/aligned_load_errors.hlsl new file mode 100644 index 0000000000..55746b8519 --- /dev/null +++ b/tools/clang/test/SemaHLSL/aligned_load_errors.hlsl @@ -0,0 +1,82 @@ +// RUN: %dxc -E main -T cs_6_2 -DTY=uint -DALIGN=4 %s -verify +// RUN: %dxc -E main -T cs_6_2 -DTY=uint3 -DALIGN=4 %s -verify +// RUN: %dxc -E main -T cs_6_2 -DTY=float4 -DALIGN=4 %s -verify +// RUN: %dxc -E main -T cs_6_2 -enable-16bit-types -DTY=uint16_t -DALIGN=2 %s -verify +// RUN: %dxc -E main -T cs_6_2 -enable-16bit-types -DTY=int64_t -DALIGN=8 %s -verify +// RUN: %dxc -E main -T cs_6_2 -enable-16bit-types -DTY=double -DALIGN=8 %s -verify + +// Test alignment validation for AlignedLoad/AlignedStore with various types +// ALIGN is set to match the largest scalar type size for each type + +ByteAddressBuffer srcbuf; +RWByteAddressBuffer dstbuf; + +[numthreads(1,1,1)] +void main(uint3 tid : SV_DispatchThreadID) +{ + uint offset = tid.x * ALIGN; + int dynAlign = ALIGN; + TY data; + + // Error: Non-constant alignment value + // expected-error@+1 {{Alignment values require compile-time constant}} + data = srcbuf.AlignedLoad(offset, dynAlign); + + // expected-error@+1 {{Alignment values require compile-time constant}} + dstbuf.AlignedStore(offset, dynAlign, data); + offset += ALIGN; + + // Error: Non-power-of-two alignment (ALIGN - 1) + // When ALIGN=4, this is 3 (not power of 2) + // When ALIGN=2, this is 1 (power of 2, but less than scalar size) + // When ALIGN=8, this is 7 (not power of 2) +#if ALIGN == 2 + // For 16-bit types: ALIGN=2, ALIGN-1=1 + // 1 is power-of-two, but less than scalar size + // expected-error@+1 {{Alignment parameter of 1 bytes must be >= the largest scalar type size 2 bytes}} + data = srcbuf.AlignedLoad(offset, ALIGN - 1); + + // expected-error@+1 {{Alignment parameter of 1 bytes must be >= the largest scalar type size 2 bytes}} + dstbuf.AlignedStore(offset, ALIGN - 1, data); +#else + // For 32-bit and 64-bit types: ALIGN-1 is not power-of-two + // expected-error@+1 {{Alignment values require compile-time constant power-of-two}} + data = srcbuf.AlignedLoad(offset, ALIGN - 1); + + // expected-error@+1 {{Alignment values require compile-time constant power-of-two}} + dstbuf.AlignedStore(offset, ALIGN - 1, data); +#endif + offset += ALIGN; + + // Error: Alignment greater than 4096 + // expected-error@+1 {{Alignment values require compile-time constant power-of-two values that are >= largest scalar type size and <= 4096}} + data = srcbuf.AlignedLoad(offset, 4096 * 2); + + // expected-error@+1 {{Alignment values require compile-time constant power-of-two values that are >= largest scalar type size and <= 4096}} + dstbuf.AlignedStore(offset, 4096 * 2, data); + offset += ALIGN; + + // Error: Alignment less than largest scalar type size (ALIGN / 2) + // For ALIGN=4: ALIGN/2=2, error shows "2 bytes must be >= 4 bytes" + // For ALIGN=2: ALIGN/2=1, error shows "1 bytes must be >= 2 bytes" + // For ALIGN=8: ALIGN/2=4, error shows "4 bytes must be >= 8 bytes" +#if ALIGN == 4 + // expected-error@+1 {{Alignment parameter of 2 bytes must be >= the largest scalar type size 4 bytes}} + data = srcbuf.AlignedLoad(offset, ALIGN / 2); + + // expected-error@+1 {{Alignment parameter of 2 bytes must be >= the largest scalar type size 4 bytes}} + dstbuf.AlignedStore(offset, ALIGN / 2, data); +#elif ALIGN == 2 + // expected-error@+1 {{Alignment parameter of 1 bytes must be >= the largest scalar type size 2 bytes}} + data = srcbuf.AlignedLoad(offset, ALIGN / 2); + + // expected-error@+1 {{Alignment parameter of 1 bytes must be >= the largest scalar type size 2 bytes}} + dstbuf.AlignedStore(offset, ALIGN / 2, data); +#elif ALIGN == 8 + // expected-error@+1 {{Alignment parameter of 4 bytes must be >= the largest scalar type size 8 bytes}} + data = srcbuf.AlignedLoad(offset, ALIGN / 2); + + // expected-error@+1 {{Alignment parameter of 4 bytes must be >= the largest scalar type size 8 bytes}} + dstbuf.AlignedStore(offset, ALIGN / 2, data); +#endif +} diff --git a/tools/clang/test/SemaHLSL/aligned_load_shader_model.hlsl b/tools/clang/test/SemaHLSL/aligned_load_shader_model.hlsl new file mode 100644 index 0000000000..cdee41cb85 --- /dev/null +++ b/tools/clang/test/SemaHLSL/aligned_load_shader_model.hlsl @@ -0,0 +1,18 @@ +// RUN: %dxc -E main -T cs_6_0 %s -verify +// RUN: %dxc -E main -T cs_6_1 %s -verify + +// Test that AlignedLoad/AlignedStore require SM 6.2+ + +ByteAddressBuffer inputBuffer; +RWByteAddressBuffer outputBuffer; + +[numthreads(1, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) +{ + // expected-error@+1 {{intrinsic AlignedLoad potentially used by ''AlignedLoad'' requires shader model 6.2 or greater}} + uint value = inputBuffer.AlignedLoad(0, 4); + + // expected-error@+1 {{intrinsic AlignedStore potentially used by ''AlignedStore'' requires shader model 6.2 or greater}} + outputBuffer.AlignedStore(0, 4, value); +} + diff --git a/utils/hct/gen_intrin_main.txt b/utils/hct/gen_intrin_main.txt index 8b10f733a8..ed248adfbf 100644 --- a/utils/hct/gen_intrin_main.txt +++ b/utils/hct/gen_intrin_main.txt @@ -952,6 +952,8 @@ $funcT [[]] Load(in uint byteOffset, out uint_only status) : byteaddress_load_s; uint<2> [[]] Load2(in uint byteOffset, out uint_only status) : byteaddress_load_s; uint<3> [[]] Load3(in uint byteOffset, out uint_only status) : byteaddress_load_s; uint<4> [[]] Load4(in uint byteOffset, out uint_only status) : byteaddress_load_s; +$funcT [[ro]] AlignedLoad(in uint byteOffset, in uint alignment) : byteaddress_alignedload; +$funcT [[]] AlignedLoad(in uint byteOffset, in uint alignment, out uint_only status) : byteaddress_alignedload_s; } namespace @@ -966,10 +968,13 @@ $funcT [[]] Load(in uint byteOffset, out uint_only status) : byteaddress_load_s; uint<2> [[]] Load2(in uint byteOffset, out uint_only status) : byteaddress_load_s; uint<3> [[]] Load3(in uint byteOffset, out uint_only status) : byteaddress_load_s; uint<4> [[]] Load4(in uint byteOffset, out uint_only status) : byteaddress_load_s; +$funcT [[ro]] AlignedLoad(in uint byteOffset, in uint alignment) : byteaddress_alignedload; +$funcT [[]] AlignedLoad(in uint byteOffset, in uint alignment, out uint_only status) : byteaddress_alignedload_s; void [[]] Store(in uint byteOffset, in $funcT value) : byteaddress_store; void [[]] Store2(in uint byteOffset, in uint<2> value) : byteaddress_store; void [[]] Store3(in uint byteOffset, in uint<3> value) : byteaddress_store; void [[]] Store4(in uint byteOffset, in uint<4> value) : byteaddress_store; +void [[]] AlignedStore(in uint byteOffset, in uint alignment, in $funcT value) : byteaddress_alignedstore; // 64-bit integer interlocks void [[]] InterlockedAdd64(in uint byteOffset, in u64 value); void [[]] InterlockedAdd64(in uint byteOffset, in u64 value, out any_int64 original) : interlockedadd_immediate; diff --git a/utils/hct/hlsl_intrinsic_opcodes.json b/utils/hct/hlsl_intrinsic_opcodes.json index e2d3c6f290..645a621ae3 100644 --- a/utils/hct/hlsl_intrinsic_opcodes.json +++ b/utils/hct/hlsl_intrinsic_opcodes.json @@ -1,6 +1,6 @@ { "IntrinsicOpCodes": { - "Num_Intrinsics": 405, + "Num_Intrinsics": 407, "IOP_AcceptHitAndEndSearch": 0, "IOP_AddUint64": 1, "IOP_AllMemoryBarrier": 2, @@ -405,6 +405,8 @@ "IOP_TriangleObjectPosition": 401, "MOP_CandidateTriangleObjectPosition": 402, "MOP_CommittedTriangleObjectPosition": 403, - "MOP_DxHitObject_TriangleObjectPosition": 404 + "MOP_DxHitObject_TriangleObjectPosition": 404, + "MOP_AlignedLoad": 405, + "MOP_AlignedStore": 406, } }