diff --git a/clang/lib/Basic/Targets/WebAssembly.cpp b/clang/lib/Basic/Targets/WebAssembly.cpp index ba3ff11ad4ea9..46f9bd10f01ec 100644 --- a/clang/lib/Basic/Targets/WebAssembly.cpp +++ b/clang/lib/Basic/Targets/WebAssembly.cpp @@ -123,6 +123,8 @@ void WebAssemblyTargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__wasm_tail_call__"); if (HasWideArithmetic) Builder.defineMacro("__wasm_wide_arithmetic__"); + if (HasLibcallThreadContext) + Builder.defineMacro("__wasm_libcall_thread_context__"); // Note that not all wasm features appear here. For example, // HasCompatctImports diff --git a/clang/lib/Basic/Targets/WebAssembly.h b/clang/lib/Basic/Targets/WebAssembly.h index 808342485cad0..6085197498163 100644 --- a/clang/lib/Basic/Targets/WebAssembly.h +++ b/clang/lib/Basic/Targets/WebAssembly.h @@ -68,6 +68,7 @@ class LLVM_LIBRARY_VISIBILITY WebAssemblyTargetInfo : public TargetInfo { bool HasExtendedConst = false; bool HasFP16 = false; bool HasGC = false; + bool HasLibcallThreadContext = false; bool HasMultiMemory = false; bool HasMultivalue = false; bool HasMutableGlobals = false; @@ -110,6 +111,8 @@ class LLVM_LIBRARY_VISIBILITY WebAssemblyTargetInfo : public TargetInfo { PtrDiffType = SignedLong; IntPtrType = SignedLong; } + if (T.getOS() == llvm::Triple::WASIp3) + HasLibcallThreadContext = true; } StringRef getABI() const override; diff --git a/clang/lib/Driver/ToolChains/WebAssembly.cpp b/clang/lib/Driver/ToolChains/WebAssembly.cpp index e532ef0743cc2..ce0245773b75d 100644 --- a/clang/lib/Driver/ToolChains/WebAssembly.cpp +++ b/clang/lib/Driver/ToolChains/WebAssembly.cpp @@ -88,6 +88,11 @@ static bool WantsPthread(const llvm::Triple &Triple, const ArgList &Args) { return WantsPthread; } +static bool WantsLibcallThreadContext(const llvm::Triple &Triple, + const ArgList &Args) { + return Triple.getOS() == llvm::Triple::WASIp3; +} + void wasm::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, @@ -169,6 +174,9 @@ void wasm::Linker::ConstructJob(Compilation &C, const JobAction &JA, AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA); + if (WantsLibcallThreadContext(ToolChain.getTriple(), Args)) + CmdArgs.push_back("--libcall-thread-context"); + if (WantsPthread(ToolChain.getTriple(), Args)) CmdArgs.push_back("--shared-memory"); diff --git a/clang/test/Preprocessor/wasm-target-features.c b/clang/test/Preprocessor/wasm-target-features.c index 3244fa61c0a4b..d5d0c241d86a3 100644 --- a/clang/test/Preprocessor/wasm-target-features.c +++ b/clang/test/Preprocessor/wasm-target-features.c @@ -217,6 +217,7 @@ // GENERIC-NOT: #define __wasm_simd128__ 1{{$}} // GENERIC-NOT: #define __wasm_tail_call__ 1{{$}} // GENERIC-NOT: #define __wasm_wide_arithmetic__ 1{{$}} +// GENERIC-NOT: #define __wasm_libcall_thread_context__ 1{{$}} // RUN: %clang -E -dM %s -o - 2>&1 \ // RUN: -target wasm32-unknown-unknown -mcpu=bleeding-edge \ @@ -251,3 +252,12 @@ // RUN: | FileCheck %s -check-prefix=BLEEDING-EDGE-NO-SIMD128 // // BLEEDING-EDGE-NO-SIMD128-NOT: #define __wasm_simd128__ 1{{$}} + +// RUN: %clang -E -dM %s -o - 2>&1 \ +// RUN: -target wasm32-wasip3 \ +// RUN: | FileCheck %s -check-prefix=LIBCALL-THREAD-CONTEXT +// RUN: %clang -E -dM %s -o - 2>&1 \ +// RUN: -target wasm64-wasip3 \ +// RUN: | FileCheck %s -check-prefix=LIBCALL-THREAD-CONTEXT + +// LIBCALL-THREAD-CONTEXT: #define __wasm_libcall_thread_context__ 1{{$}} diff --git a/lld/test/wasm/stack-pointer-abi.s b/lld/test/wasm/stack-pointer-abi.s new file mode 100644 index 0000000000000..6bfe0d4a8daa8 --- /dev/null +++ b/lld/test/wasm/stack-pointer-abi.s @@ -0,0 +1,13 @@ +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s +# RUN: wasm-ld --libcall-thread-context -o %t.libcall.wasm %t.o +# RUN: obj2yaml %t.libcall.wasm | FileCheck %s --check-prefix=LIBCALL +# RUN: wasm-ld -o %t.global.wasm %t.o +# RUN: obj2yaml %t.global.wasm | FileCheck %s --check-prefix=GLOBAL + + .globl _start +_start: + .functype _start () -> () + end_function + +# LIBCALL: Name: __init_stack_pointer +# GLOBAL: Name: __stack_pointer diff --git a/lld/test/wasm/thread-context-abi-mismatch.s b/lld/test/wasm/thread-context-abi-mismatch.s new file mode 100644 index 0000000000000..069534cbe5762 --- /dev/null +++ b/lld/test/wasm/thread-context-abi-mismatch.s @@ -0,0 +1,22 @@ +# Test that linking object files with mismatched thread context ABIs fails with an error. +# The presence of an import of __stack_pointer from the env module should be treated +# as an indication that the global thread context ABI is being used. + +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s +# RUN: not wasm-ld --libcall-thread-context %t.o -o %t.wasm 2>&1 | FileCheck %s + +# CHECK: object file uses globals for thread context, but --libcall-thread-context was specified + +.globl _start +_start: + .functype _start () -> () + end_function + +.globaltype __stack_pointer, i32 + +.globl use_stack_pointer +use_stack_pointer: + .functype use_stack_pointer () -> () + global.get __stack_pointer + drop + end_function diff --git a/lld/test/wasm/tls-libcall.s b/lld/test/wasm/tls-libcall.s new file mode 100644 index 0000000000000..df8b8f8be0207 --- /dev/null +++ b/lld/test/wasm/tls-libcall.s @@ -0,0 +1,71 @@ +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s +# RUN: wasm-ld --libcall-thread-context --shared-memory -no-gc-sections -o %t.wasm %t.o +# RUN: obj2yaml %t.wasm | FileCheck %s +# RUN: llvm-objdump -d --no-print-imm-hex --no-show-raw-insn %t.wasm | FileCheck %s --check-prefix=DIS + +.globl __wasm_get_tls_base +__wasm_get_tls_base: + .functype __wasm_get_tls_base () -> (i32) + i32.const 0 + end_function + +.globl _start +_start: + .functype _start () -> (i32) + call __wasm_get_tls_base + i32.const tls1@TLSREL + i32.add + i32.load 0 + call __wasm_get_tls_base + i32.const tls2@TLSREL + i32.add + i32.load 0 + i32.add + end_function + +.section .tdata.tls1,"",@ +.globl tls1 +tls1: + .int32 1 + .size tls1, 4 + +.section .tdata.tls2,"",@ +.globl tls2 +tls2: + .int32 2 + .size tls2, 4 + +.section .custom_section.target_features,"",@ + .int8 2 + .int8 43 + .int8 11 + .ascii "bulk-memory" + .int8 43 + .int8 7 + .ascii "atomics" + + +# CHECK: GlobalNames: +# CHECK-NEXT: - Index: 0 +# CHECK-NEXT: Name: __init_stack_pointer +# CHECK-NEXT: - Index: 1 +# CHECK-NEXT: Name: __init_tls_base +# CHECK-NEXT: - Index: 2 +# CHECK-NEXT: Name: __tls_size +# CHECK-NEXT: - Index: 3 +# CHECK-NEXT: Name: __tls_align + +# DIS-LABEL: <__wasm_init_memory>: + +# DIS-LABEL: <_start>: +# DIS-EMPTY: +# DIS-NEXT: call 4 +# DIS-NEXT: i32.const 0 +# DIS-NEXT: i32.add +# DIS-NEXT: i32.load 0 +# DIS-NEXT: call 4 +# DIS-NEXT: i32.const 4 +# DIS-NEXT: i32.add +# DIS-NEXT: i32.load 0 +# DIS-NEXT: i32.add +# DIS-NEXT: end diff --git a/lld/wasm/Config.h b/lld/wasm/Config.h index 491bf9233b0cf..71a378a412e9e 100644 --- a/lld/wasm/Config.h +++ b/lld/wasm/Config.h @@ -35,6 +35,7 @@ class Symbol; class DefinedData; class GlobalSymbol; class DefinedFunction; +class UndefinedFunction; class DefinedGlobal; class UndefinedGlobal; class TableSymbol; @@ -64,6 +65,7 @@ struct Config { bool growableTable; bool gcSections; llvm::StringSet<> keepSections; + bool libcallThreadContext; std::optional> memoryImport; std::optional memoryExport; bool sharedMemory; @@ -252,6 +254,14 @@ struct Ctx { // Used as an address space for function pointers, with each function that // is used as a function pointer being allocated a slot. TableSymbol *indirectFunctionTable; + + // __wasm_set_tls_base + // Function used to set TLS base in libcall thread context modules. + UndefinedFunction *setTLSBase; + + // __wasm_get_tls_base + // Function used to get TLS base in libcall thread context modules. + UndefinedFunction *getTLSBase; }; WasmSym sym; diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp index 2cba6ae540526..f2eec055f65b2 100644 --- a/lld/wasm/Driver.cpp +++ b/lld/wasm/Driver.cpp @@ -561,6 +561,7 @@ static void readConfigs(opt::InputArgList &args) { ctx.arg.soName = args.getLastArgValue(OPT_soname); ctx.arg.importTable = args.hasArg(OPT_import_table); ctx.arg.importUndefined = args.hasArg(OPT_import_undefined); + ctx.arg.libcallThreadContext = args.hasArg(OPT_libcall_thread_context); ctx.arg.ltoo = args::getInteger(args, OPT_lto_O, 2); if (ctx.arg.ltoo > 3) error("invalid optimization level for LTO: " + Twine(ctx.arg.ltoo)); @@ -883,6 +884,16 @@ createUndefinedGlobal(StringRef name, llvm::wasm::WasmGlobalType *type) { return sym; } +static UndefinedFunction *createUndefinedFunction(StringRef name, + WasmSignature *signature) { + auto *sym = cast(symtab->addUndefinedFunction( + name, std::nullopt, std::nullopt, WASM_SYMBOL_UNDEFINED, nullptr, + signature, true)); + ctx.arg.allowUndefinedSymbols.insert(sym->getName()); + sym->isUsedInRegularObj = true; + return sym; +} + static InputGlobal *createGlobal(StringRef name, bool isMutable) { llvm::wasm::WasmGlobal wasmGlobal; bool is64 = ctx.arg.is64.value_or(false); @@ -917,17 +928,26 @@ static void createSyntheticSymbols() { true}; static llvm::wasm::WasmGlobalType mutableGlobalTypeI64 = {WASM_TYPE_I64, true}; + ctx.sym.callCtors = symtab->addSyntheticFunction( "__wasm_call_ctors", WASM_SYMBOL_VISIBILITY_HIDDEN, make(nullSignature, "__wasm_call_ctors")); bool is64 = ctx.arg.is64.value_or(false); + auto stack_pointer_name = + ctx.arg.libcallThreadContext ? "__init_stack_pointer" : "__stack_pointer"; if (ctx.isPic) { - ctx.sym.stackPointer = - createUndefinedGlobal("__stack_pointer", ctx.arg.is64.value_or(false) - ? &mutableGlobalTypeI64 - : &mutableGlobalTypeI32); + if (ctx.arg.libcallThreadContext) { + ctx.sym.stackPointer = createUndefinedGlobal( + stack_pointer_name, + ctx.arg.is64.value_or(false) ? &globalTypeI64 : &globalTypeI32); + } else { + ctx.sym.stackPointer = createUndefinedGlobal(stack_pointer_name, + ctx.arg.is64.value_or(false) + ? &mutableGlobalTypeI64 + : &mutableGlobalTypeI32); + } // For PIC code, we import two global variables (__memory_base and // __table_base) from the environment and use these as the offset at // which to load our static data and function table. @@ -940,14 +960,18 @@ static void createSyntheticSymbols() { ctx.sym.tableBase->markLive(); } else { // For non-PIC code - ctx.sym.stackPointer = createGlobalVariable("__stack_pointer", true); + ctx.sym.stackPointer = + createGlobalVariable(stack_pointer_name, !ctx.arg.libcallThreadContext); ctx.sym.stackPointer->markLive(); } if (ctx.arg.sharedMemory) { // TLS symbols are all hidden/dso-local + auto tls_base_name = + ctx.arg.libcallThreadContext ? "__init_tls_base" : "__tls_base"; ctx.sym.tlsBase = - createGlobalVariable("__tls_base", true, WASM_SYMBOL_VISIBILITY_HIDDEN); + createGlobalVariable(tls_base_name, !ctx.arg.libcallThreadContext, + WASM_SYMBOL_VISIBILITY_HIDDEN); ctx.sym.tlsSize = createGlobalVariable("__tls_size", false, WASM_SYMBOL_VISIBILITY_HIDDEN); ctx.sym.tlsAlign = createGlobalVariable("__tls_align", false, @@ -956,6 +980,17 @@ static void createSyntheticSymbols() { "__wasm_init_tls", WASM_SYMBOL_VISIBILITY_HIDDEN, make(is64 ? i64ArgSignature : i32ArgSignature, "__wasm_init_tls")); + if (ctx.arg.libcallThreadContext) { + ctx.sym.tlsBase->markLive(); + ctx.sym.tlsSize->markLive(); + ctx.sym.tlsAlign->markLive(); + static WasmSignature setTLSBaseSignature{{}, {ValType::I32}}; + ctx.sym.setTLSBase = + createUndefinedFunction("__wasm_set_tls_base", &setTLSBaseSignature); + static WasmSignature getTLSBaseSignature{{ValType::I32}, {}}; + ctx.sym.getTLSBase = + createUndefinedFunction("__wasm_get_tls_base", &getTLSBaseSignature); + } } } diff --git a/lld/wasm/Options.td b/lld/wasm/Options.td index a009cac7f57ad..144eee33061e1 100644 --- a/lld/wasm/Options.td +++ b/lld/wasm/Options.td @@ -238,6 +238,9 @@ def page_size: JJ<"page-size=">, def initial_memory: JJ<"initial-memory=">, HelpText<"Initial size of the linear memory">; +def libcall_thread_context: FF<"libcall-thread-context">, + HelpText<"Use library calls for thread context access instead of globals.">; + def max_memory: JJ<"max-memory=">, HelpText<"Maximum size of the linear memory">; diff --git a/lld/wasm/SyntheticSections.cpp b/lld/wasm/SyntheticSections.cpp index 9ba43ef0fae0b..d1a01c7ec3f9d 100644 --- a/lld/wasm/SyntheticSections.cpp +++ b/lld/wasm/SyntheticSections.cpp @@ -52,6 +52,16 @@ class SubSection { raw_string_ostream os{body}; }; +void writeGetTLSBase(const Ctx &ctx, raw_ostream &os) { + if (ctx.arg.libcallThreadContext) { + writeU8(os, WASM_OPCODE_CALL, "call"); + writeUleb128(os, ctx.sym.getTLSBase->getFunctionIndex(), "function index"); + } else { + writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_SET"); + writeUleb128(os, ctx.sym.tlsBase->getGlobalIndex(), "__tls_base"); + } +} + } // namespace bool DylinkSection::isNeeded() const { @@ -474,11 +484,12 @@ void GlobalSection::generateRelocationCode(raw_ostream &os, bool TLS) const { if (auto *d = dyn_cast(sym)) { // Get __memory_base - writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET"); if (sym->isTLS()) - writeUleb128(os, ctx.sym.tlsBase->getGlobalIndex(), "__tls_base"); - else + writeGetTLSBase(ctx, os); + else { + writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET"); writeUleb128(os, ctx.sym.memoryBase->getGlobalIndex(), "__memory_base"); + } // Add the virtual address of the data symbol writePtrConst(os, d->getVA(), is64, "offset"); @@ -519,7 +530,7 @@ void GlobalSection::writeBody() { // the correct runtime value during `__wasm_apply_global_relocs`. if (!ctx.arg.extendedConst && ctx.isPic && !sym->isTLS()) mutable_ = true; - // With multi-theadeding any TLS globals must be mutable since they get + // With multi-threading any TLS globals must be mutable since they get // set during `__wasm_apply_global_tls_relocs` if (ctx.arg.sharedMemory && sym->isTLS()) mutable_ = true; diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp index 038a8b3f5417d..1758953292160 100644 --- a/lld/wasm/Writer.cpp +++ b/lld/wasm/Writer.cpp @@ -123,6 +123,15 @@ class Writer { llvm::SmallDenseMap segmentMap; }; +void writeSetTLSBase(const Ctx &ctx, raw_ostream &os) { + if (ctx.arg.libcallThreadContext) { + writeU8(os, WASM_OPCODE_CALL, "call"); + writeUleb128(os, ctx.sym.setTLSBase->getFunctionIndex(), "function index"); + } else { + writeU8(os, WASM_OPCODE_GLOBAL_SET, "GLOBAL_SET"); + writeUleb128(os, ctx.sym.tlsBase->getGlobalIndex(), "__tls_base"); + } +} } // anonymous namespace void Writer::calculateCustomSections() { @@ -635,6 +644,16 @@ void Writer::populateTargetFeatures() { return segment->live && segment->isTLS(); }; tlsUsed = tlsUsed || llvm::any_of(file->segments, isTLS); + + // Ensure that we're not mixing incompatible thread context models + if (ctx.arg.libcallThreadContext && + llvm::any_of(file->getSymbols(), [](const auto &sym) { + return sym && sym->getName() == "__stack_pointer" && + sym->kind() == Symbol::UndefinedGlobalKind && + sym->importModule && sym->importModule == "env"; + })) + error(fileName + ": object file uses globals for thread context, " + "but --libcall-thread-context was specified"); } if (inferFeatures) @@ -680,7 +699,9 @@ void Writer::populateTargetFeatures() { if (feature.Prefix == WASM_FEATURE_PREFIX_DISALLOWED) continue; objectFeatures.insert(feature.Name); - if (disallowed.contains(feature.Name)) + // libcall-thread-context is handled as a special case above + if (disallowed.contains(feature.Name) && + feature.Name != "libcall-thread-context") error(Twine("Target feature '") + feature.Name + "' used in " + fileName + " is disallowed by " + disallowed[feature.Name] + ". Use --no-check-features to suppress."); @@ -1356,9 +1377,9 @@ void Writer::createInitMemoryFunction() { "i32.add"); } - // When we initialize the TLS segment we also set the `__tls_base` - // global. This allows the runtime to use this static copy of the - // TLS data for the first/main thread. + // When we initialize the TLS segment we also set the TLS base. + // This allows the runtime to use this static copy of the TLS data + // for the first/main thread. if (ctx.arg.sharedMemory && s->isTLS()) { if (ctx.isPic) { // Cache the result of the addionion in local 0 @@ -1367,8 +1388,7 @@ void Writer::createInitMemoryFunction() { } else { writePtrConst(os, s->startVA, is64, "destination address"); } - writeU8(os, WASM_OPCODE_GLOBAL_SET, "GLOBAL_SET"); - writeUleb128(os, ctx.sym.tlsBase->getGlobalIndex(), "__tls_base"); + writeSetTLSBase(ctx, os); if (ctx.isPic) { writeU8(os, WASM_OPCODE_LOCAL_GET, "local.tee"); writeUleb128(os, 1, "local 1"); @@ -1641,8 +1661,7 @@ void Writer::createInitTLSFunction() { writeU8(os, WASM_OPCODE_LOCAL_GET, "local.get"); writeUleb128(os, 0, "local index"); - writeU8(os, WASM_OPCODE_GLOBAL_SET, "global.set"); - writeUleb128(os, ctx.sym.tlsBase->getGlobalIndex(), "global index"); + writeSetTLSBase(ctx, os); // FIXME(wvo): this local needs to be I64 in wasm64, or we need an extend // op. diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h index 58d6b44a94d4d..6f52281628e46 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h @@ -110,8 +110,9 @@ enum TOF { MO_MEMORY_BASE_REL, // On a symbol operand this indicates that the immediate is the symbol - // address relative the __tls_base wasm global. - // Only applicable to data symbols. + // address relative to the TLS base. This is retrieved through + // __wasm_get_tls_base() when using libcall thread context, and the __tls_base + // global otherwise. Only applicable to data symbols. MO_TLS_BASE_REL, // On a symbol operand this indicates that the immediate is the symbol diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp index edea99e629407..c25972343c96a 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp @@ -236,8 +236,7 @@ MCSymbol *WebAssemblyAsmPrinter::getOrCreateWasmSymbol(StringRef Name) { if (Name == "__stack_pointer" || Name == "__tls_base" || Name == "__memory_base" || Name == "__table_base" || Name == "__tls_size" || Name == "__tls_align") { - bool Mutable = - Name == "__stack_pointer" || Name == "__tls_base"; + bool Mutable = Name == "__stack_pointer" || Name == "__tls_base"; WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL); WasmSym->setGlobalType(wasm::WasmGlobalType{ uint8_t(Subtarget.hasAddr64() ? wasm::WASM_TYPE_I64 @@ -265,6 +264,14 @@ MCSymbol *WebAssemblyAsmPrinter::getOrCreateWasmSymbol(StringRef Name) { wasm::ValType AddrType = Subtarget.hasAddr64() ? wasm::ValType::I64 : wasm::ValType::I32; Params.push_back(AddrType); + } else if (Name == "__wasm_get_stack_pointer" || + Name == "__wasm_get_tls_base") { + WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); + Returns.push_back(wasm::ValType::I32); + } else if (Name == "__wasm_set_stack_pointer" || + Name == "__wasm_set_tls_base") { + WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); + Params.push_back(wasm::ValType::I32); } else { // Function symbols WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); WebAssembly::getLibcallSignature(Subtarget, Name, Returns, Params); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp index 5c3127e2d3dc6..e05b23d255894 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp @@ -366,7 +366,14 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) { const TargetRegisterClass *RC = MRI.getRegClass(OldReg); Register NewReg = MRI.createVirtualRegister(RC); auto InsertPt = std::next(MI.getIterator()); - if (UseEmpty[OldReg.virtRegIndex()]) { + // When libcalls are emitted for thread context, the frame base vreg + // has an implicit use in the DW_AT_frame_base debug info, so we + // should not remove it. + bool NeedsRegForDebug = + MFI.isFrameBaseVirtual() && OldReg == MFI.getFrameBaseVreg() && + MF.getFunction().getSubprogram() && + MF.getSubtarget().hasLibcallThreadContext(); + if (UseEmpty[OldReg.virtRegIndex()] && !NeedsRegForDebug) { unsigned Opc = getDropOpcode(RC); MachineInstr *Drop = BuildMI(MBB, InsertPt, MI.getDebugLoc(), TII->get(Opc)) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp index e0e72316b7e2c..50f820086c5a6 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp @@ -135,13 +135,19 @@ bool WebAssemblyFrameLowering::needsSPForLocalFrame( any_of(MRI.use_operands(getSPReg(MF)), [](MachineOperand &MO) { return !MO.isImplicit(); }); + // With libcall thread context, we need SP in the prolog when debug + // info is present so we can allocate a local for DWARF to reference. + bool NeedsSPForDebug = + MF.getFunction().getSubprogram() && + MF.getSubtarget().hasLibcallThreadContext(); + return MFI.getStackSize() || MFI.adjustsStack() || hasFP(MF) || - HasExplicitSPUse; + HasExplicitSPUse || NeedsSPForDebug; } // In function with EH pads, we need to make a copy of the value of -// __stack_pointer global in SP32/64 register, in order to use it when -// restoring __stack_pointer after an exception is caught. +// the stack pointer in the SP32/64 register, in order to use it when +// restoring the stack pointer after an exception is caught. bool WebAssemblyFrameLowering::needsPrologForEH( const MachineFunction &MF) const { auto EHType = MF.getTarget().getMCAsmInfo().getExceptionHandlingType(); @@ -151,15 +157,16 @@ bool WebAssemblyFrameLowering::needsPrologForEH( /// Returns true if this function needs a local user-space stack pointer. /// Unlike a machine stack pointer, the wasm user stack pointer is a global -/// variable, so it is loaded into a register in the prolog. +/// variable or managed by library calls, so it is loaded +/// into a register in the prolog. bool WebAssemblyFrameLowering::needsSP(const MachineFunction &MF) const { return needsSPForLocalFrame(MF) || needsPrologForEH(MF); } /// Returns true if the local user-space stack pointer needs to be written back -/// to __stack_pointer global by this function (this is not meaningful if -/// needsSP is false). If false, the stack red zone can be used and only a local -/// SP is needed. +/// to the stack pointer global/thread context by this function (this is not +/// meaningful if needsSP is false). If false, the stack red zone can be used +/// and only a local SP is needed. bool WebAssemblyFrameLowering::needsSPWriteback( const MachineFunction &MF) const { auto &MFI = MF.getFrameInfo(); @@ -227,17 +234,25 @@ WebAssemblyFrameLowering::getOpcGlobSet(const MachineFunction &MF) { : WebAssembly::GLOBAL_SET_I32; } -void WebAssemblyFrameLowering::writeSPToGlobal( +void WebAssemblyFrameLowering::writeBackSP( unsigned SrcReg, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator &InsertStore, const DebugLoc &DL) const { const auto *TII = MF.getSubtarget().getInstrInfo(); - const char *ES = "__stack_pointer"; - auto *SPSymbol = MF.createExternalSymbolName(ES); + if (MF.getSubtarget().hasLibcallThreadContext()) { + const char *ES = "__wasm_set_stack_pointer"; + auto *SPSymbol = MF.createExternalSymbolName(ES); + BuildMI(MBB, InsertStore, DL, TII->get(WebAssembly::CALL)) + .addExternalSymbol(SPSymbol) + .addReg(SrcReg); + } else { + const char *ES = "__stack_pointer"; + auto *SPSymbol = MF.createExternalSymbolName(ES); - BuildMI(MBB, InsertStore, DL, TII->get(getOpcGlobSet(MF))) - .addExternalSymbol(SPSymbol) - .addReg(SrcReg); + BuildMI(MBB, InsertStore, DL, TII->get(getOpcGlobSet(MF))) + .addExternalSymbol(SPSymbol) + .addReg(SrcReg); + } } MachineBasicBlock::iterator @@ -251,7 +266,7 @@ WebAssemblyFrameLowering::eliminateCallFramePseudoInstr( if (I->getOpcode() == TII->getCallFrameDestroyOpcode() && needsSPWriteback(MF)) { DebugLoc DL = I->getDebugLoc(); - writeSPToGlobal(getSPReg(MF), MF, MBB, I, DL); + writeBackSP(getSPReg(MF), MF, MBB, I, DL); } return MBB.erase(I); } @@ -283,10 +298,17 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF, if (StackSize) SPReg = MRI.createVirtualRegister(PtrRC); - const char *ES = "__stack_pointer"; - auto *SPSymbol = MF.createExternalSymbolName(ES); - BuildMI(MBB, InsertPt, DL, TII->get(getOpcGlobGet(MF)), SPReg) - .addExternalSymbol(SPSymbol); + if (ST.hasLibcallThreadContext()) { + const char *ES = "__wasm_get_stack_pointer"; + auto *SPSymbol = MF.createExternalSymbolName(ES); + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CALL), SPReg) + .addExternalSymbol(SPSymbol); + } else { + const char *ES = "__stack_pointer"; + auto *SPSymbol = MF.createExternalSymbolName(ES); + BuildMI(MBB, InsertPt, DL, TII->get(getOpcGlobGet(MF)), SPReg) + .addExternalSymbol(SPSymbol); + } bool HasBP = hasBP(MF); if (HasBP) { @@ -322,7 +344,7 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF, .addReg(getSPReg(MF)); } if (StackSize && needsSPWriteback(MF)) { - writeSPToGlobal(getSPReg(MF), MF, MBB, InsertPt, DL); + writeBackSP(getSPReg(MF), MF, MBB, InsertPt, DL); } } @@ -364,7 +386,7 @@ void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF, SPReg = SPFPReg; } - writeSPToGlobal(SPReg, MF, MBB, InsertPt, DL); + writeBackSP(SPReg, MF, MBB, InsertPt, DL); } bool WebAssemblyFrameLowering::isSupportedStackID( @@ -386,6 +408,11 @@ WebAssemblyFrameLowering::getDwarfFrameBase(const MachineFunction &MF) const { unsigned LocalNum = MFI.getFrameBaseLocal(); Loc.Location.WasmLoc = {WebAssembly::TI_LOCAL, LocalNum}; } else { + // There is no __stack_pointer global in libcall thread context mode, so + // TI_GLOBAL_RELOC would produce a bogus relocation. We take care to ensure + // that this code is not reached in that case, but assert here to be sure. + assert(!MF.getSubtarget().hasLibcallThreadContext()); + // TODO: This should work on a breakpoint at a function with no frame, // but probably won't work for traversing up the stack. Loc.Location.WasmLoc = {WebAssembly::TI_GLOBAL_RELOC, 0}; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h index 710d5173d64db..f836f4e95a93b 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h @@ -23,7 +23,7 @@ class WebAssemblyFrameLowering final : public TargetFrameLowering { public: /// Size of the red zone for the user stack (leaf functions can use this much /// space below the stack pointer without writing it back to __stack_pointer - /// global). + /// global/__wasm_set_stack_pointer). // TODO: (ABI) Revisit and decide how large it should be. static const size_t RedZoneSize = 128; @@ -47,11 +47,10 @@ class WebAssemblyFrameLowering final : public TargetFrameLowering { bool needsPrologForEH(const MachineFunction &MF) const; - /// Write SP back to __stack_pointer global. - void writeSPToGlobal(unsigned SrcReg, MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator &InsertStore, - const DebugLoc &DL) const; + /// Write SP back to __stack_pointer global, or call __wasm_set_stack_pointer. + void writeBackSP(unsigned SrcReg, MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator &InsertStore, + const DebugLoc &DL) const; // Returns the index of the WebAssembly local to which the stack object // FrameIndex in MF should be allocated, or std::nullopt. diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp index 510be1ca64fa8..c7b57588877b7 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp @@ -337,10 +337,8 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) { MVT PtrVT = TLI.getPointerTy(CurDAG->getDataLayout()); switch (IntNo) { case Intrinsic::wasm_tls_base: { - MachineSDNode *TLSBase = CurDAG->getMachineNode( - GlobalGetIns, DL, PtrVT, MVT::Other, - CurDAG->getTargetExternalSymbol("__tls_base", PtrVT), - Node->getOperand(0)); + MachineSDNode *TLSBase = llvm::WebAssembly::getTLSBase( + *CurDAG, DL, Subtarget, Node->getOperand(0)); ReplaceNode(Node, TLSBase); return; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index dda91ac19b44a..7f22dc0fed135 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -2087,17 +2087,11 @@ WebAssemblyTargetLowering::LowerGlobalTLSAddress(SDValue Op, model == GlobalValue::LocalDynamicTLSModel || (model == GlobalValue::GeneralDynamicTLSModel && getTargetMachine().shouldAssumeDSOLocal(GV))) { - // For DSO-local TLS variables we use offset from __tls_base + // For DSO-local TLS variables we use offset from __tls_base, or + // __wasm_get_tls_base() if using libcall thread context. MVT PtrVT = getPointerTy(DAG.getDataLayout()); - auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64 - : WebAssembly::GLOBAL_GET_I32; - const char *BaseName = MF.createExternalSymbolName("__tls_base"); - - SDValue BaseAddr( - DAG.getMachineNode(GlobalGet, DL, PtrVT, - DAG.getTargetExternalSymbol(BaseName, PtrVT)), - 0); + SDValue BaseAddr(WebAssembly::getTLSBase(DAG, DL, Subtarget), 0); SDValue TLSOffset = DAG.getTargetGlobalAddress( GV, DL, PtrVT, GA->getOffset(), WebAssemblyII::MO_TLS_BASE_REL); @@ -2283,14 +2277,7 @@ SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op, } case Intrinsic::thread_pointer: { - MVT PtrVT = getPointerTy(DAG.getDataLayout()); - auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64 - : WebAssembly::GLOBAL_GET_I32; - const char *TlsBase = MF.createExternalSymbolName("__tls_base"); - return SDValue( - DAG.getMachineNode(GlobalGet, DL, PtrVT, - DAG.getTargetExternalSymbol(TlsBase, PtrVT)), - 0); + return SDValue(WebAssembly::getTLSBase(DAG, DL, Subtarget), 0); } } } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp index 664f0f2e25ffc..cb96e313e5f1a 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp @@ -376,8 +376,8 @@ bool WebAssemblyLateEHPrepare::removeUnnecessaryUnreachables( } // After the stack is unwound due to a thrown exception, the __stack_pointer -// global can point to an invalid address. This inserts instructions that -// restore __stack_pointer global. +// global/__wasm_get_stack_pointer() can point to an invalid address. This +// inserts instructions that restore the stack pointer state. bool WebAssemblyLateEHPrepare::restoreStackPointer(MachineFunction &MF) { const auto *FrameLowering = static_cast( MF.getSubtarget().getFrameLowering()); @@ -390,11 +390,11 @@ bool WebAssemblyLateEHPrepare::restoreStackPointer(MachineFunction &MF) { continue; Changed = true; - // Insert __stack_pointer restoring instructions at the beginning of each EH + // Insert stack pointer restoring instructions at the beginning of each EH // pad, after the catch instruction. Here it is safe to assume that SP32 - // holds the latest value of __stack_pointer, because the only exception for - // this case is when a function uses the red zone, but that only happens - // with leaf functions, and we don't restore __stack_pointer in leaf + // holds the latest value of the stack pointer, because the only exception + // for this case is when a function uses the red zone, but that only happens + // with leaf functions, and we don't restore the stack pointer in leaf // functions anyway. auto InsertPos = MBB.begin(); // Skip EH_LABELs in the beginning of an EH pad if present. @@ -404,8 +404,8 @@ bool WebAssemblyLateEHPrepare::restoreStackPointer(MachineFunction &MF) { WebAssembly::isCatch(InsertPos->getOpcode()) && "catch/catch_all should be present in every EH pad at this point"); ++InsertPos; // Skip the catch instruction - FrameLowering->writeSPToGlobal(FrameLowering->getSPReg(MF), MF, MBB, - InsertPos, MBB.begin()->getDebugLoc()); + FrameLowering->writeBackSP(FrameLowering->getSPReg(MF), MF, MBB, InsertPos, + MBB.begin()->getDebugLoc()); } return Changed; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp index 97f2ed0a828ba..9015ceab87fb7 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp @@ -251,6 +251,10 @@ static void query(const MachineInstr &MI, bool &Read, bool &Write, !strcmp(MI.getOperand(0).getSymbolName(), "__stack_pointer")) StackPointer = true; + if (MI.isCall() && MI.getOperand(0).isSymbol() && + !strcmp(MI.getOperand(0).getSymbolName(), "__wasm_get_stack_pointer")) + StackPointer = true; + // Analyze calls. if (MI.isCall()) { queryCallee(MI, Read, Write, Effects, StackPointer); @@ -287,17 +291,28 @@ static MachineInstr *getVRegDef(unsigned Reg, const MachineInstr *Insert, // generalization of MachineRegisterInfo::hasOneNonDBGUse that uses // LiveIntervals to handle complex cases in optimized code. static bool hasSingleUse(unsigned Reg, MachineRegisterInfo &MRI, - WebAssemblyFunctionInfo &MFI, bool Optimize, + const MachineFunction &MF, bool Optimize, MachineInstr *Def, LiveIntervals *LIS) { + auto &MFI = *MF.getInfo(); + // The frame base always has an implicit DBG use as DW_AT_frame_base. + if (MFI.isFrameBaseVirtual() && MFI.getFrameBaseVreg() == Reg) { + // When using global thread context, the frame base can be encoded + // as an offset from __stack_pointer, so the vreg can be stackified. + // However, when using libcall thread context, we need to keep the frame + // base vreg around if debug info is enabled, because there is no + // global to refer to. + bool NeedsRegForDebug = + MF.getFunction().getSubprogram() && + MF.getSubtarget().hasLibcallThreadContext(); + if (!Optimize || NeedsRegForDebug) + return false; + } if (!Optimize) { // Using "hasOneUse" instead of "hasOneNonDBGUse" here because we don't // want to stackify DBG_VALUE operands - WASM stack locations are less // useful and less widely supported than WASM local locations. if (!MRI.hasOneUse(Reg)) return false; - // The frame base always has an implicit DBG use as DW_AT_frame_base. - if (MFI.isFrameBaseVirtual() && MFI.getFrameBaseVreg() == Reg) - return false; return true; } @@ -918,7 +933,7 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { bool CanMove = SameBlock && isSafeToMove(Def, &Use, Insert, MFI, MRI, Optimize) && !TreeWalker.isOnStack(Reg); - if (CanMove && hasSingleUse(Reg, MRI, MFI, Optimize, DefI, LIS)) { + if (CanMove && hasSingleUse(Reg, MRI, MF, Optimize, DefI, LIS)) { Insert = moveForSingleUse(Reg, Use, DefI, MBB, Insert, LIS, MFI, MRI); // If we are removing the frame base reg completely, remove the debug @@ -960,7 +975,7 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { Register UseReg = SubsequentUse->getReg(); // TODO: This single-use restriction could be relaxed by using tees if (DefReg != UseReg || - !hasSingleUse(DefReg, MRI, MFI, Optimize, nullptr, nullptr)) + !hasSingleUse(DefReg, MRI, MF, Optimize, nullptr, nullptr)) break; MFI.stackifyVReg(MRI, DefReg); ++SubsequentDef; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp index 641eef73044cd..6326b7d76db82 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp @@ -40,6 +40,10 @@ WebAssemblySubtarget::initializeSubtargetDependencies(StringRef CPU, ParseSubtargetFeatures(CPU, /*TuneCPU*/ CPU, FS); + // WASIP3 implies using the libcall thread context. + if (TargetTriple.getOS() == Triple::WASIp3) + HasLibcallThreadContext = true; + FeatureBitset Bits = getFeatureBits(); // bulk-memory implies bulk-memory-opt diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h index 798dea25ef5e6..5c6f4cb5b36ff 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h @@ -52,6 +52,7 @@ class WebAssemblySubtarget final : public WebAssemblyGenSubtargetInfo { bool HasExtendedConst = false; bool HasFP16 = false; bool HasGC = false; + bool HasLibcallThreadContext = false; bool HasMultiMemory = false; bool HasMultivalue = false; bool HasMutableGlobals = false; @@ -116,6 +117,7 @@ class WebAssemblySubtarget final : public WebAssemblyGenSubtargetInfo { bool hasExtendedConst() const { return HasExtendedConst; } bool hasFP16() const { return HasFP16; } bool hasGC() const { return HasGC; } + bool hasLibcallThreadContext() const { return HasLibcallThreadContext; } bool hasMultiMemory() const { return HasMultiMemory; } bool hasMultivalue() const { return HasMultivalue; } bool hasMutableGlobals() const { return HasMutableGlobals; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp index f533e839fe6d0..1361dd99b7072 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -410,7 +410,7 @@ class CoalesceFeaturesAndStripAtomics final : public ModulePass { // Code compiled without atomics or bulk-memory may have had its atomics or // thread-local data lowered to nonatomic operations or non-thread-local // data. In that case, we mark the pseudo-feature "shared-mem" as disallowed - // to tell the linker that it would be unsafe to allow this code ot be used + // to tell the linker that it would be unsafe to allow this code to be used // in a module with shared memory. if (Stripped) { M.addModuleFlag(Module::ModFlagBehavior::Error, "wasm-feature-shared-mem", diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp index 890486778e700..ac8df67fe7557 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp @@ -194,3 +194,26 @@ bool WebAssembly::canLowerReturn(size_t ResultSize, const WebAssemblySubtarget *Subtarget) { return ResultSize <= 1 || canLowerMultivalueReturn(Subtarget); } + +MachineSDNode *WebAssembly::getTLSBase(SelectionDAG &DAG, const SDLoc &DL, + const WebAssemblySubtarget *Subtarget, + SDValue Chain) { + MVT PtrVT = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32; + + unsigned Opcode; + const char *SymName; + if (Subtarget->hasLibcallThreadContext()) { + Opcode = WebAssembly::CALL; + SymName = "__wasm_get_tls_base"; + } else { + Opcode = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64 + : WebAssembly::GLOBAL_GET_I32; + SymName = "__tls_base"; + } + + SDValue Sym = DAG.getTargetExternalSymbol(SymName, PtrVT); + + if (Chain.getNode()) + return DAG.getMachineNode(Opcode, DL, {PtrVT, MVT::Other}, {Sym, Chain}); + return DAG.getMachineNode(Opcode, DL, PtrVT, Sym); +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h b/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h index 046b1b5db2a79..0827791d93657 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h @@ -15,6 +15,7 @@ #ifndef LLVM_LIB_TARGET_WEBASSEMBLY_UTILS_WEBASSEMBLYUTILITIES_H #define LLVM_LIB_TARGET_WEBASSEMBLY_UTILS_WEBASSEMBLYUTILITIES_H +#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/Support/CommandLine.h" namespace llvm { @@ -27,6 +28,9 @@ class MCSymbolWasm; class TargetRegisterClass; class WebAssemblyFunctionInfo; class WebAssemblySubtarget; +class MachineSDNode; +class SDLoc; +class SelectionDAG; namespace WebAssembly { @@ -73,6 +77,13 @@ bool canLowerMultivalueReturn(const WebAssemblySubtarget *Subtarget); /// memory. bool canLowerReturn(size_t ResultSize, const WebAssemblySubtarget *Subtarget); +// Get the TLS base value for the current target +// If using libcall thread context, calls +// __wasm_get_tls_base, otherwise, global.get __tls_base +MachineSDNode *getTLSBase(SelectionDAG &DAG, const SDLoc &DL, + const WebAssemblySubtarget *Subtarget, + const SDValue Chain = SDValue()); + } // end namespace WebAssembly } // end namespace llvm diff --git a/llvm/test/CodeGen/WebAssembly/stack-abi.ll b/llvm/test/CodeGen/WebAssembly/stack-abi.ll new file mode 100644 index 0000000000000..684abb9d80028 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/stack-abi.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s --mtriple=wasm32-wasip3 -asm-verbose=false | FileCheck --check-prefix=LIBCALL %s +; RUN: llc < %s --mtriple=wasm32-unknown-unknown -asm-verbose=false | FileCheck --check-prefix=GLOBAL %s + +declare void @force_sp_save() +define void @use_stack() #0 { + %1 = alloca i32, align 4 + %2 = alloca ptr, align 4 + store ptr %1, ptr %2, align 4 + call void @force_sp_save() + ret void +} + +; LIBCALL-LABEL: use_stack: +; LIBCALL: call __wasm_get_stack_pointer +; LIBCALL: call __wasm_set_stack_pointer +; LIBCALL-NOT: global.get __stack_pointer +; LIBCALL-NOT: global.set __stack_pointer + +; GLOBAL-LABEL: use_stack: +; GLOBAL: global.get __stack_pointer +; GLOBAL: global.set __stack_pointer +; GLOBAL-NOT: call __wasm_get_stack_pointer +; GLOBAL-NOT: call __wasm_set_stack_pointer + diff --git a/llvm/test/CodeGen/WebAssembly/thread_pointer.ll b/llvm/test/CodeGen/WebAssembly/thread_pointer.ll index 18716988673db..875f0f4c84c39 100644 --- a/llvm/test/CodeGen/WebAssembly/thread_pointer.ll +++ b/llvm/test/CodeGen/WebAssembly/thread_pointer.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=wasm32-unknown-unknown | FileCheck %s --check-prefix=WASM32 ; RUN: llc < %s -mtriple=wasm64-unknown-unknown | FileCheck %s --check-prefix=WASM64 +; RUN: llc < %s -mtriple=wasm32-wasip3 | FileCheck %s --check-prefix=WASM32-LIBCALL declare ptr @llvm.thread.pointer() @@ -16,6 +17,13 @@ define ptr @thread_pointer() nounwind { ; WASM64-NEXT: # %bb.0: ; WASM64-NEXT: global.get __tls_base ; WASM64-NEXT: # fallthrough-return +; +; WASM32-LIBCALL-LABEL: thread_pointer: +; WASM32-LIBCALL: .functype thread_pointer () -> (i32) +; WASM32-LIBCALL-NEXT: # %bb.0: +; WASM32-LIBCALL-NEXT: call __wasm_get_tls_base +; WASM32-LIBCALL-NEXT: # fallthrough-return +; %1 = tail call ptr @llvm.thread.pointer() ret ptr %1 } diff --git a/llvm/test/CodeGen/WebAssembly/tls-local-exec.ll b/llvm/test/CodeGen/WebAssembly/tls-local-exec.ll index dc0d40c7973ad..1807ea2263338 100644 --- a/llvm/test/CodeGen/WebAssembly/tls-local-exec.ll +++ b/llvm/test/CodeGen/WebAssembly/tls-local-exec.ll @@ -1,13 +1,16 @@ ; Run the tests with the `localexec` TLS mode specified. ; RUN: sed -e 's/\[\[TLS_MODE\]\]/(localexec)/' %s | llc -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+bulk-memory,atomics - | FileCheck --check-prefixes=CHECK,TLS %s ; RUN: sed -e 's/\[\[TLS_MODE\]\]/(localexec)/' %s | llc -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+bulk-memory,atomics -fast-isel - | FileCheck --check-prefixes=CHECK,TLS %s +; RUN: sed -e 's/\[\[TLS_MODE\]\]/(localexec)/' %s | llc -mtriple=wasm32-wasip3 -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=bulk-memory,atomics -fast-isel - | FileCheck --check-prefixes=CHECK,TLS-LIBCALL %s ; Also, run the same tests without a specified TLS mode--this should still emit `localexec` code on non-Emscripten targtes which don't currently support dynamic linking. ; RUN: sed -e 's/\[\[TLS_MODE\]\]//' %s | llc -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+bulk-memory,atomics - | FileCheck --check-prefixes=CHECK,TLS %s ; RUN: sed -e 's/\[\[TLS_MODE\]\]//' %s | llc -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+bulk-memory,atomics -fast-isel - | FileCheck --check-prefixes=CHECK,TLS %s +; RUN: sed -e 's/\[\[TLS_MODE\]\]//' %s | llc -mtriple=wasm32-wasip3 -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=bulk-memory,atomics -fast-isel - | FileCheck --check-prefixes=CHECK,TLS-LIBCALL %s ; Finally, when bulk memory is disabled, no TLS code should be generated. ; RUN: sed -e 's/\[\[TLS_MODE\]\]/(localexec)/' %s | llc -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=-bulk-memory,atomics - | FileCheck --check-prefixes=CHECK,NO-TLS %s +; RUN: sed -e 's/\[\[TLS_MODE\]\]/(localexec)/' %s | llc -mtriple=wasm32-wasip3 -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=-bulk-memory,atomics - | FileCheck --check-prefixes=CHECK,NO-TLS %s target triple = "wasm32-unknown-unknown" ; CHECK-LABEL: address_of_tls: @@ -18,6 +21,11 @@ define i32 @address_of_tls() { ; TLS-NEXT: i32.add ; TLS-NEXT: return + ; TLS-LIBCALL-DAG: call __wasm_get_tls_base + ; TLS-LIBCALL-DAG: i32.const tls@TLSREL + ; TLS-LIBCALL-NEXT: i32.add + ; TLS-LIBCALL-NEXT: return + ; NO-TLS-NEXT: i32.const tls ; NO-TLS-NEXT: return %p = call ptr @llvm.threadlocal.address.p0(ptr @tls) @@ -33,6 +41,11 @@ define i32 @address_of_tls_external() { ; TLS-NEXT: i32.add ; TLS-NEXT: return + ; TLS-LIBCALL-DAG: call __wasm_get_tls_base + ; TLS-LIBCALL-DAG: i32.const tls_external@TLSREL + ; TLS-LIBCALL-NEXT: i32.add + ; TLS-LIBCALL-NEXT: return + ; NO-TLS-NEXT: i32.const tls_external ; NO-TLS-NEXT: return %p = call ptr @llvm.threadlocal.address.p0(ptr @tls_external) @@ -48,6 +61,11 @@ define ptr @ptr_to_tls() { ; TLS-NEXT: i32.add ; TLS-NEXT: return + ; TLS-LIBCALL-DAG: call __wasm_get_tls_base + ; TLS-LIBCALL-DAG: i32.const tls@TLSREL + ; TLS-LIBCALL-NEXT: i32.add + ; TLS-LIBCALL-NEXT: return + ; NO-TLS-NEXT: i32.const tls ; NO-TLS-NEXT: return %p = call ptr @llvm.threadlocal.address.p0(ptr @tls) @@ -63,6 +81,12 @@ define i32 @tls_load() { ; TLS-NEXT: i32.load 0 ; TLS-NEXT: return + ; TLS-LIBCALL-DAG: call __wasm_get_tls_base + ; TLS-LIBCALL-DAG: i32.const tls@TLSREL + ; TLS-LIBCALL-NEXT: i32.add + ; TLS-LIBCALL-NEXT: i32.load 0 + ; TLS-LIBCALL-NEXT: return + ; NO-TLS-NEXT: i32.const 0 ; NO-TLS-NEXT: i32.load tls ; NO-TLS-NEXT: return @@ -80,6 +104,12 @@ define void @tls_store(i32 %x) { ; TLS-NEXT: i32.store 0 ; TLS-NEXT: return + ; TLS-LIBCALL-DAG: call __wasm_get_tls_base + ; TLS-LIBCALL-DAG: i32.const tls@TLSREL + ; TLS-LIBCALL-NEXT: i32.add + ; TLS-LIBCALL-NEXT: i32.store 0 + ; TLS-LIBCALL-NEXT: return + ; NO-TLS-NEXT: i32.const 0 ; NO-TLS-NEXT: i32.store tls ; NO-TLS-NEXT: return @@ -99,6 +129,7 @@ define i32 @tls_size() { ; CHECK: .type tls,@object ; TLS-NEXT: .section .tbss.tls,"T",@ +; TLS-LIBCALL-NEXT: .section .tbss.tls,"T",@ ; NO-TLS-NEXT: .section .bss.tls,"",@ ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: tls: diff --git a/llvm/test/DebugInfo/WebAssembly/thread-context-abi.ll b/llvm/test/DebugInfo/WebAssembly/thread-context-abi.ll new file mode 100644 index 0000000000000..641529c209559 --- /dev/null +++ b/llvm/test/DebugInfo/WebAssembly/thread-context-abi.ll @@ -0,0 +1,39 @@ +; Ensure that using libcall thread context with an empty function produces a frame base +; that uses a local, and that using the global thread context produces a frame base that +; uses the __stack_pointer global. + +; Test generated via: clang --target=wasm32-unknown-unknown-wasm foo.c -g -O2 +; void foo() {} + +; RUN: llc < %s -filetype=obj -mtriple=wasm32-wasip3 -o - | llvm-dwarfdump - | FileCheck %s --check-prefix=LIBCALL +; RUN: llc < %s -filetype=obj -o - | llvm-dwarfdump - | FileCheck %s --check-prefix=GLOBAL + +target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-i128:128-n32:64-S128-ni:1:10:20" +target triple = "wasm32-unknown-unknown" + +define hidden void @foo() local_unnamed_addr #0 !dbg !9 { + ret void +} + +attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+bulk-memory,+bulk-memory-opt,+call-indirect-overlong,+multivalue,+mutable-globals,+nontrapping-fptoint,+reference-types,+sign-ext" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3} +!llvm.ident = !{!4} +!llvm.errno.tbaa = !{!5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang version 23.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "test.c", directory: "") +!2 = !{i32 7, !"Dwarf Version", i32 4} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{!"clang version 23.0.0"} +!5 = !{!6, !6, i64 0} +!6 = !{!"int", !7, i64 0} +!7 = !{!"omnipotent char", !8, i64 0} +!8 = !{!"Simple C/C++ TBAA"} +!9 = distinct !DISubprogram(name: "caller", scope: !1, file: !1, line: 2, type: !10, scopeLine: 2, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, keyInstructions: true) +!10 = !DISubroutineType(types: !11) +!11 = !{null} + +; LIBCALL: DW_AT_frame_base (DW_OP_WASM_location 0x0 0x0, DW_OP_stack_value) +; GLOBAL: DW_AT_frame_base (DW_OP_WASM_location 0x3 0x0, DW_OP_stack_value)