pax_global_header00006660000000000000000000000064145656231350014524gustar00rootroot0000000000000052 comment=2cc6afbe0c2836190932a0ac8a737079ac4c7371 storm-lang-0.6.20/000077500000000000000000000000001456562313500136745ustar00rootroot00000000000000storm-lang-0.6.20/.dir-locals.el000066400000000000000000000001301456562313500163170ustar00rootroot00000000000000(("doc" . ((fundamental-mode . ((indent-tabs-mode . nil) (tab-width . 4)))))) storm-lang-0.6.20/.gdbinit000066400000000000000000000004501456562313500153140ustar00rootroot00000000000000# Place the following line in the file ~/.gdbinit to load this file. # add-auto-load-safe-path ~/Projects/storm/.gdbinit handle SIGSEGV nostop noprint # Old signals: handle SIGXFSZ nostop noprint handle SIGXCPU nostop noprint # New signals handle SIG34 nostop noprint handle SIG35 nostop noprint storm-lang-0.6.20/.gitignore000066400000000000000000000014641456562313500156710ustar00rootroot00000000000000# Compiled source # ################### *.com *.class *.dll *.exe *.o *.so *.so.* # Temporary files # ################### *~ # Packages # ############ # it's better to unpack these files and commit the raw source # git has its own built in compression methods *.7z *.dmg *.gz *.iso *.jar *.rar *.tar *.zip # Logs and databases # ###################### *.log *.sql *.sqlite *.ncb *.suo # OS generated files # ###################### .DS_Store* ehthumbs.db Icon? Thumbs.db *.aps *.user debug debug64 release release64 release_* slow slow_* *.ncb *.suo Tools/ build build64 build_* log/ ReadMe.txt *.aps \#*\# # External code in other git repos. External/ mymake/ # Generated code. */Gen/* !*/Gen/.gitignore !Core/Gen/* # Documentation pack files for Storm. root/doc *_doc # Database files (sqlite) *.db html/ storm-lang-0.6.20/.gitmodules000066400000000000000000000002041456562313500160450ustar00rootroot00000000000000[submodule "mps"] path = mps url = ../mps.git [submodule "Linux/backtrace"] path = Linux/backtrace url = ../linux/backtrace.git storm-lang-0.6.20/.myproject000066400000000000000000000160371456562313500157200ustar00rootroot00000000000000#Storm has a bit weird concept of release and debug configurations. #The default debug configuration is acutally a release configuration, #but with some debug flags added. This is since Storm is painfully slow #without compiler optimizations on, and when using other debug features #like the debug heap. There is a mode, called "slow" that will enable #the actual debug mode (i.e. no optimizations etc.) [project] #Only build targets containing .mymake-files. explicitTargets=yes [project,release,!dist] input+=Main input+=Test [project,release] execute=no [build,!slow,!release] #Always 'release' mode, except when 'slow' is specified here. all+=release all+=storm_debug [build,release] #When compiling in release mode, use link-time code generation. all+=release all+=storm_release [build,slow] all+=storm_slow # Forward build flags: [build,dist] all+=dist [build,nostatic] all+=nostatic [build,noskia] Gui+=noskia [build,cairogl] Gui+=usecairogl [build,nobacktrace] Utils+=nobacktrace [build,localmariadb] SQL+=localmariadb # Compatibility flag. For various backwards-compatibility fixes. # Currently, we disable large file support in SQLite, since that causes issues on Ubuntu 18.04 LTS. # We will phase this out eventually. [build,compat] all+=compat [compat] define+=STORM_COMPAT [build] #Regular libraries linked into Storm and/or any storm libraries. Core+=lib Code+=lib Compiler+=lib OS+=lib Shared+=lib Gc+=lib Utils+=lib SoundLib+=lib SoundLib+=extern_lib #Libraries that need the storm preprocessor to be run. Compiler+=stormpp TestLib+=stormpp Gui+=stormpp Graphics+=stormpp Sound+=stormpp SQL+=stormpp Crypto+=stormpp #Libraries linked into shared libraries loaded by Storm at runtime. TestLib+=sharedlib Gui+=sharedlib Graphics+=sharedlib Sound+=sharedlib SQL+=sharedlib Crypto+=sharedlib #Libraries that require position independent code on some platforms. If you intend to put the compiler itself #inside a shared library, add 'Compiler+=pic' and 'Gc+=pic' here. Core+=pic OS+=pic Shared+=pic Utils+=pic [deps] TestLib+=CppTypes Gui+=CppTypes Compiler+=CppTypes Sound+=CppTypes Graphics+=CppTypes SQL+=CppTypes Crypto+=CppTypes [deps,!dist] #To make sure everything is built. #When building for Debian we want a bit more control of what we are building. Compiler+=TestLib Compiler+=Gui Compiler+=Graphics Compiler+=Sound Compiler+=SQL Compiler+=Crypto #Selection GC to use. [build,mps] all+=mps [build,smm] all+=smm # Forward 64-bit flag if present. [build,windows,64] all+=64 #Add variables that indicate the GC choice. We add them to the entire project, even though #it is only necessary for Gc, Test and Main [mps] gcSuffix=_mps [smm] gcSuffix=_smm [!extern_lib] #Global build parameters pch=stdafx.h include+=./ include+=../ [storm_debug] #Turn on some runtime checks (implemented purely in Storm). define+=FAST_DEBUG [storm_debug] buildDir=build/ execDir=../debug/ [storm_debug,windows,64] buildDir=build64/ execDir=../debug64/ linkFlags+=/MACHINE:X64 [storm_debug,windows] #Generate pdb file. flags+=/Zi /Fd linkFlags+=/DEBUG /INCREMENTAL:NO /PDB:.pdb [unix] flags=-std=c++11 -pipe cflags+=-pipe flags+= [storm_debug,unix] #We do not need that aggressive optimizations... opt+=-O1 #Generate debug information. flags+=-g cflags+=-g [storm_release] buildDir=release/ execDir=../release/ [storm_release,windows,64] buildDir=release64/ execDir=../release64/ linkFlags+=/MACHINE:X64 [storm_release,windows] #Enable link-time code generation. Too slow to use regularly, but gives good performance! flags+=/GL linkFlags+=/LTCG [storm_release,lib,windows] #Need extra flag to the linker... link=lib /LTCG /nologo /OUT: [storm_release,unix] #We do not need O3 opt=-O2 [pic,unix] #All libraries need to be compiled with the -fPIC flag. At least on X86-64. flags+=-fPIC cflags+=-fPIC [storm_slow] buildDir=slow/ execDir=../slow/ [nostatic] define+=NOSTATIC_BUILD [windows] #Tell the Win32 API we're working with UTF16. define+=_UNICODE define+=UNICODE [windows,!64] #Compile asm files on X86 ext+=asm noIncludes+=*.asm compile+=*.asm:1!ml /c /nologo /Fo /safeseh /W3 /Zi [windows,64] #Compile asm files on X64 ext+=asm64 noIncludes+=*.asm64 compile+=*.asm64:1!ml64 /c /nologo /Fo /W3 /Zi /Ta [unix] flags+=-Wno-unknown-pragmas flags+=-Wno-reorder flags+=-Wno-terminate flags+=-Wno-unused-parameter flags+=-Wno-missing-field-initializers flags+=-Wno-pmf-conversions flags+=-Wno-switch flags+=-Wno-parentheses flags+=-Wno-unused-function flags+=-Wno-pragmas #Gives out of bounds warnings due to "dynamic arrays" at the end of structs. flags+=-Wno-stringop-overflow #We need to align functions to even addresses, otherwise they will be seen as vtable offsets. flags+=-falign-functions=2 #Do not export all symbols from .so-files. Storm assumes that functions and variables in different #modules are different variables and may thus contain different values. This is not the default on UNIX #systems, at least not when using GCC. flags+=-fvisibility=hidden cflags+=-fvisibility=hidden linkFlags+=-pthread # Note: We exclude ".s" since we want to use the preprocessor in general. ext+=S noIncludes+=*.s noIncludes+=*.S compile+=*.S:gcc -g -c -o [stormpp] stormpp=CppTypes stormppUses=--use stormProvides=./ stormUses=../Core/ stormppUsing=--using packagePath=../root/ docName=doc preBuild+= --template ../Core/Gen/CppTypes.cpp --out Gen/CppTypes.cpp --asm --doc preBuildCreates+=Gen/CppTypes.cpp [stormpp,windows,!64] stormppAsmTemplate=../Core/Gen/CppVTables.VS_X86 stormppAsmOut=Gen/CppVTables.asm preBuildCreates+=Gen/CppVTables.asm [stormpp,windows,64] stormppAsmTemplate=../Core/Gen/CppVTables.VS_X64 stormppAsmOut=Gen/CppVTables.asm64 preBuildCreates+=Gen/CppVTables.asm64 [stormpp,unix] stormppAsmTemplate=../Core/Gen/CppVTables.GCC stormppAsmOut=Gen/CppVTables.S preBuildCreates+=Gen/CppVTables.S [sharedlib] packagePath=../root// docName=_doc [sharedlib,!unix] postBuild+=if not exist "" mkdir postBuild+=1!copy [sharedlib,unix] flags+=-fPIC cflags+=-fPIC linkFlags+=-Wl,-z,defs postBuild+=mkdir -p postBuild+=cp [sharedlib,storm_debug] libPrefix=Debug [sharedlib,storm_debug,windows,64] libPrefix=Debug64 [sharedlib,storm_release] libPrefix=Release [sharedlib,storm_release,windows,64] libPrefix=Release64 [sharedlib,storm_slow] libPrefix=Slow [sharedlib,storm_slow,windows,64] libPrefix=Slow64 # No prefix on Dist releases. [sharedlib,storm_release,dist] libPrefix= #No PCH for c-files. [windows] compile+=*.c:1!cl /c /Fo [unix] compile+=*.c:gcc -Wno-unknown-pragmas -Wno-pragmas -std=c99 -O3 -Wno-maybe-uninitialized -c -o storm-lang-0.6.20/Code/000077500000000000000000000000001456562313500145465ustar00rootroot00000000000000storm-lang-0.6.20/Code/.mymake000066400000000000000000000000551456562313500160320ustar00rootroot00000000000000[] #Needed to mark this as a Mymake project. storm-lang-0.6.20/Code/ActiveBlock.cpp000066400000000000000000000002661456562313500174440ustar00rootroot00000000000000#include "stdafx.h" #include "ActiveBlock.h" namespace code { ActiveBlock::ActiveBlock(Block block, Nat activated, Label pos) : block(block), activated(activated), pos(pos) {} } storm-lang-0.6.20/Code/ActiveBlock.h000066400000000000000000000007661456562313500171160ustar00rootroot00000000000000#pragma once #include "Block.h" #include "Label.h" namespace code { STORM_PKG(core.asm); /** * Generic data structure used in various back-ends to keep track of active blocks during code * generation. * * Used by the X64 and Arm64 backends among others. */ class ActiveBlock { STORM_VALUE; public: ActiveBlock(Block block, Nat activated, Label pos); // Which block? Block block; // Which activation ID? Nat activated; // Where does the block start? Label pos; }; } storm-lang-0.6.20/Code/Arena.cpp000066400000000000000000000024271456562313500163050ustar00rootroot00000000000000#include "stdafx.h" #include "Arena.h" #include "Reg.h" #include "X86/Arena.h" #include "X64/Arena.h" #include "Arm64/Arena.h" #include "Core/Str.h" namespace code { Arena::Arena() {} Ref Arena::external(const wchar *name, const void *ptr) const { return Ref(externalSource(name, ptr)); } RefSource *Arena::externalSource(const wchar *name, const void *ptr) const { RefSource *src = new (this) StrRefSource(name); src->setPtr(ptr); return src; } void Arena::removeFnRegs(RegSet *from) const { from->remove(ptrA); from->remove(ptrB); from->remove(ptrC); } #if defined(X86) && defined(WINDOWS) Arena *arena(EnginePtr e) { return new (e.v) x86::Arena(); } #elif defined(X64) && defined(WINDOWS) Arena *arena(EnginePtr e) { return new (e.v) x64::WindowsArena(); } #elif defined(X64) && defined(POSIX) Arena *arena(EnginePtr e) { return new (e.v) x64::PosixArena(); } #elif defined(ARM64) && defined(POSIX) Arena *arena(EnginePtr e) { return new (e.v) arm64::Arena(); } #else #error "Please note which is the default arena for your platform." #endif Binary *codeBinaryImpl(GcCode *refs) { return (Binary *)refs->refs[0].pointer; } Binary *codeBinary(const void *fn) { // All backends do this. return codeBinaryImpl(runtime::codeRefs((void *)fn)); } } storm-lang-0.6.20/Code/Arena.h000066400000000000000000000103171456562313500157470ustar00rootroot00000000000000#pragma once #include "Core/TObject.h" #include "Core/EnginePtr.h" #include "Output.h" #include "Operand.h" namespace code { STORM_PKG(core.asm); class Listing; class Binary; class RegSet; class TypeDesc; /** * An arena represents a collection of compiled code and external references for some architecture. * * Abstract class, there is one instantiation for each supported platform. */ class Arena : public ObjectOn { STORM_ABSTRACT_CLASS; public: // Create an arena. Arena(); // Create external references. Ref external(const wchar *name, const void *ptr) const; RefSource *externalSource(const wchar *name, const void *ptr) const; /** * Transform and translate code into machine code. */ // Transform the code in preparation for this backend's code generation. This is // backend-specific. 'owner' is the binary object that will be called to handle exceptions. virtual Listing *STORM_FN transform(Listing *src) const ABSTRACT; // Translate a previously transformed listing into machine code for this arena. virtual void STORM_FN output(Listing *src, Output *to) const ABSTRACT; /** * Create output objects for this backend. */ // Create an offset-computing output. virtual LabelOutput *STORM_FN labelOutput() const ABSTRACT; // Create a code-generating output based on sizes computed by a LabelOutput. virtual CodeOutput *STORM_FN codeOutput(Binary *owner, LabelOutput *size) const ABSTRACT; // Remove all registers not preserved during a function call on this platform. This // implementation removes ptrA, ptrB and ptrC, but other Arena implementations may want to // remove others as well. virtual void STORM_FN removeFnRegs(RegSet *from) const; /** * Other backend-specific things. */ // Create a function that calls another function (optionally with a pointer sized parameter) // to figure out which function to actually call. Useful when implementing lazy compilation. // // Calls 'fn' with 'param' (always pointer-sized or empty) to compute the // actual function to call. The actual function (as well as the 'function' implemented by // the redirect) takes params as defined by 'params' and returns 'result'. // // These redirect objects are *not* platform independent! virtual Listing *STORM_FN redirect(Bool member, TypeDesc *result, Array *params, Ref fn, Operand param) ABSTRACT; // Create a function that calls another (pre-determined) function and appends an 'EnginePtr' // object as the first parameter to the other function. Calling member functions in this // manner is not supported. virtual Listing *STORM_FN engineRedirect(TypeDesc *result, Array *params, Ref fn, Operand engine) ABSTRACT; /** * Get the location of the first parameter for a function call. Assumes that a member function is called. * * The location is acquired in two steps: first, an implementation asks the ID of the * parameter location by calling the 'firstParamId(TypeDesc *)' function. This returns one * out of several possible integers describing the parameter location. The number of * possible values can be acquired by calling 'firstParamId(null)'. * * The ID can then be passed to 'firstParamLoc' to get an Operand describing the location. * * This scheme is used so that classes like VTableCalls can detect when two functions with * different return values have the same vtable stub. This allows it to re-use the stubs. */ // Get the ID of the location of the first param. virtual Nat STORM_FN firstParamId(MAYBE(TypeDesc *) desc) ABSTRACT; // Access the location of the first parameter in a function size. The returned Operand is // always pointer-sized. virtual Operand STORM_FN firstParamLoc(Nat id) ABSTRACT; // Get a parameter that can safely be used to implement function dispatches. virtual Reg STORM_FN functionDispatchReg() ABSTRACT; }; // Create an arena for this platform. Arena *STORM_FN arena(EnginePtr e); // Extract the Binary associated with a function. This is only valid for code generated with the current backend. // 'fn' is expected to be a pointer to the start of a code allocation. Binary *codeBinary(const void *fn); Binary *codeBinaryImpl(GcCode *refs); } storm-lang-0.6.20/Code/Arm64/000077500000000000000000000000001456562313500154375ustar00rootroot00000000000000storm-lang-0.6.20/Code/Arm64/Arena.cpp000066400000000000000000000117771456562313500172060ustar00rootroot00000000000000#include "stdafx.h" #include "Arena.h" #include "Asm.h" #include "AsmOut.h" #include "Output.h" #include "Code/Listing.h" #include "Code/Output.h" #include "RemoveInvalid.h" #include "Layout.h" #include "Params.h" #include "../Exception.h" #include "Code/PosixEh/StackInfo.h" namespace code { namespace arm64 { Arena::Arena() {} Listing *Arena::transform(Listing *l) const { #if defined(POSIX) && defined(ARM64) code::eh::activatePosixInfo(); #endif // Remove unsupported OP-codes, replacing them with their equivalents. l = code::transform(l, this, new (this) RemoveInvalid()); // Expand variables and function calls as well as function prolog and epilog. l = code::transform(l, this, new (this) Layout()); return l; } void Arena::output(Listing *src, Output *to) const { code::arm64::output(src, to); to->finish(); } LabelOutput *Arena::labelOutput() const { return new (this) LabelOutput(8); } CodeOutput *Arena::codeOutput(Binary *owner, LabelOutput *size) const { return new (this) CodeOut(owner, size->offsets, size->size, size->refs); } void Arena::removeFnRegs(RegSet *from) const { for (size_t i = 0; i < fnDirtyCount; i++) from->remove(fnDirtyRegs[i]); } Listing *Arena::redirect(Bool member, TypeDesc *result, Array *params, Ref fn, Operand param) { Listing *l = new (this) Listing(this); // Generate a layout of all parameters so we can properly restore them later. Params *layout = layoutParams(result, params); Result res = layout->result(); // Note: We want to use the 'prolog' and 'epilog' functionality so that exceptions from // 'fn' are able to propagate through this stub properly. *l << prolog(); // Store the registers used for parameters inside variables on the stack. Array *vars = new (this) Array(layout->registerCount(), Var()); for (Nat i = 0; i < layout->registerCount(); i++) { if (layout->registerParam(i) != Param()) { Var &v = vars->at(i); v = l->createVar(l->root(), Size::sLong); *l << mov(v, asSize(layout->registerSrc(i), Size::sLong)); } } // If result is in memory, we need to save/restore x8 as well! Var resVar; if (res.memoryRegister() != noReg) { resVar = l->createVar(l->root(), Size::sPtr); *l << mov(resVar, ptrr(8)); } // Call 'fn' to obtain the actual function to call. if (!param.empty()) *l << fnParam(ptrDesc(engine()), param); *l << fnCall(fn, member, ptrDesc(engine()), ptrA); // Save the output from x0 to another register, otherwise parameters will overwrite it. x17 is good. *l << mov(ptrr(17), ptrA); // Restore the registers. for (Nat i = 0; i < layout->registerCount(); i++) { Var v = vars->at(i); if (v != Var()) *l << mov(asSize(layout->registerSrc(i), Size::sLong), v); } if (res.memoryRegister() != noReg) { *l << mov(ptrr(8), resVar); } // Note: The epilog will preserve all registers in this case since there are no destructors to call! *l << epilog(); *l << jmp(ptrr(17)); return l; } static Reg nextIntReg(Params *params, Nat &id) { while (id > 0) { Reg r = params->registerSrc(--id); if (r == noReg || isVectorReg(r)) continue; if (params->registerParam(id) == Param()) continue; return r; } return noReg; } Listing *Arena::engineRedirect(TypeDesc *result, Array *params, Ref fn, Operand engine) { Listing *l = new (this) Listing(this); // Examine parameters to see what we need to do. Aarch64 is a bit tricky since some // register usage is "aligned" to even numbers. For this reason, we produce two layouts // and "diff" them. Params *called = new (this) Params(); Params *toCall = new (this) Params(); toCall->add(0, Primitive(primitive::pointer, Size::sPtr, Offset())); for (Nat i = 0; i < params->count(); i++) { called->add(i + 1, params->at(i)); toCall->add(i + 1, params->at(i)); } if (toCall->stackCount() > 0 || called->stackCount() > 0) throw new (this) InvalidValue(S("Can not create an engine redirect for this function. ") S("It has too many (integer) parameters.")); // Traverse backwards to ensure we don't overwrite anything. Nat calledId = called->registerCount(); Nat toCallId = toCall->registerCount(); while (true) { // Find the next source register: Reg srcReg = nextIntReg(called, calledId); Reg destReg = nextIntReg(toCall, toCallId); if (srcReg == noReg) break; assert(destReg, L"Internal inconsistency when creating a redirect stub!"); *l << mov(destReg, srcReg); } // Now, we can simply put the engine ptr in x0 and jump to the function we need to call. *l << mov(ptrr(0), engine); *l << jmp(fn); return l; } Nat Arena::firstParamId(MAYBE(TypeDesc *) desc) { if (!desc) return 1; return 0; } Operand Arena::firstParamLoc(Nat id) { return ptrr(0); } Reg Arena::functionDispatchReg() { return ptrr(17); // We can also use x16. x17 is nice as we use that elsewhere. } } } storm-lang-0.6.20/Code/Arm64/Arena.h000066400000000000000000000021721456562313500166400ustar00rootroot00000000000000#pragma once #include "../Arena.h" namespace code { namespace arm64 { STORM_PKG(core.asm.arm64); /** * Arena for Arm64 (Aarch64), for UNIX platforms (Windows might be the same) */ class Arena : public code::Arena { STORM_CLASS; public: // Create. STORM_CTOR Arena(); /** * Transform. */ virtual Listing *STORM_FN transform(Listing *src) const; virtual void STORM_FN output(Listing *src, Output *to) const; /** * Outputs. */ virtual LabelOutput *STORM_FN labelOutput() const; virtual CodeOutput *STORM_FN codeOutput(Binary *owner, LabelOutput *size) const; /** * Registers. */ virtual void STORM_FN removeFnRegs(RegSet *from) const; /** * Misc. */ virtual Listing *STORM_FN redirect(Bool member, TypeDesc *result, Array *params, Ref fn, Operand param); virtual Listing *STORM_FN engineRedirect(TypeDesc *result, Array *params, Ref fn, Operand engine); virtual Nat STORM_FN firstParamId(MAYBE(TypeDesc *) desc); virtual Operand STORM_FN firstParamLoc(Nat id); virtual Reg STORM_FN functionDispatchReg(); }; } } storm-lang-0.6.20/Code/Arm64/Asm.cpp000066400000000000000000000264011456562313500166660ustar00rootroot00000000000000#include "stdafx.h" #include "Asm.h" #include "../Listing.h" #include "../Exception.h" namespace code { namespace arm64 { // We map registers as follows: // ptrStack (1) <-> sp // ptrFrame (2) <-> x29 // ptrA (3) <-> x0 // ptrB (4) <-> x1 // ptrC (5) <-> x2 // 0x?30..0x?3F <-> x3..x18 // 0x?40..0x?4F <-> x19..x28,x30,xzr,pc // 0x?50..0x?5F <-> q0..q15 // 0x?60..0x?6F <-> q16..q31 // Arm integer register to storm register. Nat armIntToStorm(Nat arm) { if (arm <= 2) return 0x003 + arm; else if (arm <= 18) return 0x030 + arm - 3; else if (arm <= 28) return 0x040 + arm - 19; else if (arm == 29) return ptrFrame; else if (arm == 30) return 0x04A; else if (arm == 31) return 0x04B; else if (arm == 32) return ptrStack; else if (arm == 33) return 0x04C; else return noReg; } // Storm reg number to Arm int register. Nat stormToArmInt(Reg stormReg) { Nat storm = stormReg & 0xFF; Nat type = storm >> 4; if (storm == 0x01) { return 32; // sp } else if (storm == 0x2) { // Reg. 29 is frame ptr. return 29; } else if (type == 0x0) { return storm - 0x3; } else if (type == 0x3) { return (storm & 0xF) + 3; } else if (type == 0x4) { if (storm < 0x4A) return storm - 0x40 + 19; else if (storm == 0x4A) return 30; else if (storm == 0x4B) return 31; // xzr else if (storm == 0x4C) return 33; // pc } return -1; } Reg xr(Nat id) { return Reg(armIntToStorm(id) | 0x800); } Reg wr(Nat id) { return Reg(armIntToStorm(id) | 0x400); } Reg ptrr(Nat id) { return Reg(armIntToStorm(id) | 0x000); } Reg dr(Nat id) { return Reg(0x850 + id); } Reg sr(Nat id) { return Reg(0x450 + id); } Reg br(Nat id) { return Reg(0x150 + id); } const Reg pc = Reg(0x04C); const Reg sp = ptrStack; const Reg pzr = Reg(0x04B); const Reg xzr = Reg(0x84B); const Reg zr = Reg(0x44B); Bool isIntReg(Reg r) { Nat cat = r & 0x0F0; return cat == 0x000 || cat == 0x030 || cat == 0x40; } Bool isVectorReg(Reg r) { Nat cat = r & 0x0F0; return cat == 0x050 || cat == 0x060; } Nat intRegNumber(Reg r) { return stormToArmInt(r); } Nat vectorRegNumber(Reg r) { Nat z = Nat(r) & 0xFF; if (z < 0x50 || z > 0x6F) return -1; return z - 0x50; } #define ARM_REG_SPECIAL(NR, NAME) \ if (number == NR) { \ if (size == 0) { \ return S("px") S(NAME); \ } else if (size == 4) { \ return S("w") S(NAME); \ } else if (size == 8) { \ return S("x") S(NAME); \ } else if (size == 1) { \ return S("b") S(NAME); \ } \ } #define ARM_REG_CASE(NR) \ ARM_REG_SPECIAL(NR, #NR) #define ARM_VEC(NR) \ if (number == NR) { \ if (size == 1) { \ return S("b") S(#NR); \ } else if (size == 4) { \ return S("s") S(#NR); \ } else if (size == 8) { \ return S("d") S(#NR); \ } else { \ return S("q") S(#NR) S("(invalid)"); \ } \ } const wchar *nameArm64(Reg r) { Nat size = r >> 8; if (isIntReg(r)) { Nat number = stormToArmInt(r); ARM_REG_CASE(0); ARM_REG_CASE(1); ARM_REG_CASE(2); ARM_REG_CASE(3); ARM_REG_CASE(4); ARM_REG_CASE(5); ARM_REG_CASE(6); ARM_REG_CASE(7); ARM_REG_CASE(8); ARM_REG_CASE(9); ARM_REG_CASE(10); ARM_REG_CASE(11); ARM_REG_CASE(12); ARM_REG_CASE(13); ARM_REG_CASE(14); ARM_REG_CASE(15); ARM_REG_CASE(16); ARM_REG_CASE(17); ARM_REG_CASE(18); ARM_REG_CASE(19); ARM_REG_CASE(20); ARM_REG_CASE(21); ARM_REG_CASE(22); ARM_REG_CASE(23); ARM_REG_CASE(24); ARM_REG_CASE(25); ARM_REG_CASE(26); ARM_REG_CASE(27); ARM_REG_CASE(28); ARM_REG_CASE(29); ARM_REG_CASE(30); ARM_REG_SPECIAL(31, "zr"); if (number == 33) return S("pc"); } else if (isVectorReg(r)) { Nat number = vectorRegNumber(r); ARM_VEC(0); ARM_VEC(1); ARM_VEC(2); ARM_VEC(3); ARM_VEC(4); ARM_VEC(5); ARM_VEC(6); ARM_VEC(7); ARM_VEC(8); ARM_VEC(9); ARM_VEC(10); ARM_VEC(11); ARM_VEC(12); ARM_VEC(13); ARM_VEC(14); ARM_VEC(15); ARM_VEC(16); ARM_VEC(17); ARM_VEC(18); ARM_VEC(19); ARM_VEC(20); ARM_VEC(21); ARM_VEC(22); ARM_VEC(23); ARM_VEC(24); ARM_VEC(25); ARM_VEC(26); ARM_VEC(27); ARM_VEC(28); ARM_VEC(29); ARM_VEC(30); ARM_VEC(31); } return null; } Nat condArm64(CondFlag flag) { switch (flag) { case ifAlways: return 0xE; case ifNever: return 0xF; case ifOverflow: return 0x6; case ifNoOverflow: return 0x7; case ifEqual: return 0x0; case ifNotEqual: return 0x1; // Unsigned compare: case ifBelow: return 0x3; case ifBelowEqual: return 0x9; case ifAboveEqual: return 0x2; case ifAbove: return 0x8; // Singned comparision. case ifLess: return 0xB; case ifLessEqual: return 0xD; case ifGreaterEqual: return 0xA; case ifGreater: return 0xC; // Float comparision. case ifFBelow: return 0x3; case ifFBelowEqual: return 0x9; case ifFAboveEqual: return 0xA; case ifFAbove: return 0xC; } return 0xE; } Reg unusedReg(RegSet *used) { Reg r = unusedRegUnsafe(used); if (r == noReg) throw new (used) InvalidValue(S("We should not run out of registers on ARM64.")); return r; } Reg unusedReg(RegSet *used, Size size) { return asSize(unusedReg(used), size); } Reg unusedRegUnsafe(RegSet *used) { static const Reg candidates[] = { ptrr(0), ptrr(1), ptrr(2), ptrr(3), ptrr(4), ptrr(5), ptrr(6), ptrr(7), ptrr(8), ptrr(9), ptrr(10), ptrr(11), ptrr(12), ptrr(13), ptrr(14), ptrr(15), ptrr(16), ptrr(17), ptrr(19), ptrr(20), ptrr(21), ptrr(22), ptrr(23), ptrr(24), ptrr(25), ptrr(26), ptrr(27), ptrr(28), }; for (Nat i = 0; i < ARRAY_COUNT(candidates); i++) if (!used->has(candidates[i])) return candidates[i]; return noReg; } Reg unusedVectorReg(RegSet *used) { for (Nat i = 0; i < 32; i++) { Reg r = sr(i); if (!used->has(r)) return r; } throw new (used) InvalidValue(S("Out of vector registers!")); } Reg unusedVectorReg(RegSet *used, Size size) { return asSize(unusedVectorReg(used), size); } static const Reg dirtyRegs[] = { ptrr(0), ptrr(1), ptrr(2), ptrr(3), ptrr(4), ptrr(5), ptrr(6), ptrr(7), ptrr(8), ptrr(9), ptrr(10), ptrr(11), ptrr(12), ptrr(13), ptrr(14), ptrr(15), ptrr(16), ptrr(17), dr(0), dr(1), dr(2), dr(3), dr(4), dr(5), dr(6), dr(7), }; const Reg *fnDirtyRegs = dirtyRegs; const size_t fnDirtyCount = ARRAY_COUNT(dirtyRegs); Reg preserveRegInReg(Reg reg, RegSet *used, Listing *dest) { Reg targetReg = noReg; if (isIntReg(reg)) { for (Nat i = 19; i < 29; i++) { if (used->has(ptrr(i))) continue; targetReg = ptrr(i); break; } } else { for (Nat i = 8; i < 16; i++) { if (used->has(dr(i))) continue; targetReg = dr(i); break; } } used->remove(reg); if (targetReg != noReg) { targetReg = asSize(targetReg, size(reg)); used->put(targetReg); *dest << mov(targetReg, reg); return targetReg; } return noReg; } Operand preserveReg(Reg reg, RegSet *used, Listing *dest, Block block) { Reg targetReg = preserveRegInReg(reg, used, dest); if (targetReg != noReg) return targetReg; // Store on the stack. Var to = dest->createVar(block, size(reg)); *dest << mov(to, reg); return to; } // Get a pointer-sized offset into whatever "operand" represents. Operand opPtrOffset(Operand op, Nat offset) { return opOffset(Size::sPtr, op, offset); } Operand opOffset(Size sz, Operand op, Nat offset) { switch (op.type()) { case opRelative: return xRel(sz, op.reg(), op.offset() + Offset(offset)); case opVariable: return xRel(sz, op.var(), op.offset() + Offset(offset)); case opRegister: if (offset == 0) return asSize(op.reg(), sz); assert(false, L"Offset in registers are not supported."); break; default: assert(false, L"Unsupported operand passed to 'opOffset'!"); } return Operand(); } void inlineMemcpy(Listing *dest, Operand to, Operand from, Reg tmpA, Reg tmpB) { Nat size = from.size().size64(); if (size <= 8) { *dest << mov(asSize(tmpA, from.size()), from); *dest << mov(to, asSize(tmpA, from.size())); return; } // Make them pointer-sized. tmpA = asSize(tmpA, Size::sPtr); tmpB = asSize(tmpB, Size::sPtr); Nat offset = 0; while (offset + 16 <= size) { // The backend will make this into a double-load. *dest << mov(tmpA, opPtrOffset(from, offset)); *dest << mov(tmpB, opPtrOffset(from, offset + 8)); // The backend will make this into a double-store. *dest << mov(opPtrOffset(to, offset), tmpA); *dest << mov(opPtrOffset(to, offset + 8), tmpB); offset += 16; } // Copy remaining 8 bytes (up to machine alignment, typically OK). if (offset < size) { *dest << mov(tmpA, opPtrOffset(from, offset)); *dest << mov(opPtrOffset(to, offset), tmpA); } } void inlineSlowMemcpy(Listing *dest, Operand to, Operand from, Reg tmpReg) { Nat size = from.size().size64(); if (size <= 8) { *dest << mov(asSize(tmpReg, from.size()), from); *dest << mov(to, asSize(tmpReg, from.size())); return; } tmpReg = asSize(tmpReg, Size::sPtr); Nat offset = 0; while (offset < size) { *dest << mov(tmpReg, opPtrOffset(from, offset)); *dest << mov(opPtrOffset(to, offset), tmpReg); offset += 8; } } Nat encodeBitmask(Word bitmask, bool use64) { if (!use64) { // Pretend that the bitmask was 64-bit by mirroring the existing data. That makes // the algorithm the same for both cases (until we encode the result). bitmask = (bitmask & 0xFFFFFFFF) | (bitmask << 32); } // If it is all ones or all zeroes, we can't encode it (that value is reserved). if (bitmask == 0 || ~bitmask == 0) return 0; // Shift it to the right until we have a one in the least significant position, and a // zero in the most significant position. Nat shift = 0; while ((bitmask & 0x1) != 1 || (bitmask >> 63) != 0) { // This is a rotate right operation. bitmask = ((bitmask & 0x1) << 63) | (bitmask >> 1); shift++; } // Count the number of ones in the sequence. Nat ones = 0; for (Word mask = bitmask; mask & 0x1; mask >>= 1) ones++; // Try different possible pattern lengths. for (Nat length = 2; length <= 64; length *= 2) { if (length <= ones) continue; Word pattern = (Word(1) << ones) - 1; for (Nat offset = length; offset < 64; offset *= 2) pattern |= pattern << offset; if (pattern == bitmask) { // Found it! Encode its representation. Nat immr = length - shift; Nat imms = (Nat(0x80) - (length * 2)) | (ones - 1); imms ^= 0x40; // the N bit is inverted. Note: due to our setup in the start, the // N bit will never be set when we are in 32-bit mode. return ((imms & 0x40) << 6) | (immr << 6) | (imms & 0x3F); } } return 0; } Bool allOnes(Word mask, bool use64) { if (!use64) mask |= mask << 32; return ~mask == 0; } } } storm-lang-0.6.20/Code/Arm64/Asm.h000066400000000000000000000061031456562313500163300ustar00rootroot00000000000000#pragma once #include "Code/Reg.h" #include "Code/Output.h" #include "Code/Operand.h" #include "Code/CondFlag.h" namespace code { class Listing; class TypeDesc; namespace arm64 { STORM_PKG(core.asm.arm64); /** * ARM64 specific registers. * * Since registers are numbered, we don't make constants for all of them. */ Reg xr(Nat id); Reg wr(Nat id); Reg ptrr(Nat id); Reg dr(Nat id); // Doubles Reg sr(Nat id); // Singles Reg br(Nat id); // Bytes extern const Reg pc; // Program counter (for addressing). extern const Reg sp; // Stack pointer (always 64-bit). extern const Reg pzr; // Zero register (ptr). extern const Reg xzr; // Zero register. extern const Reg zr; // Zero register (32-bit). // Check if register is integer register. Bool isIntReg(Reg r); // Check if register is vector register. Bool isVectorReg(Reg r); // Arm integer register number for register. Returns "out-of-bounds" values for pc, etc. Nat intRegNumber(Reg r); // Arm register number for reals. Nat vectorRegNumber(Reg r); // Register name. const wchar *nameArm64(Reg r); // Condition code for ARM. Nat condArm64(CondFlag flag); // Registers clobbered by function calls. extern const Reg *fnDirtyRegs; extern const size_t fnDirtyCount; // Get an unused register. Reg unusedReg(RegSet *used); // Get unused register (as above), but specify desired size. Reg unusedReg(RegSet *used, Size size); // Get unused register, don't throw if none is available. Reg unusedRegUnsafe(RegSet *used); // Get unused fp register. Reg unusedVectorReg(RegSet *used); Reg unusedVectorReg(RegSet *used, Size size); // Preserve a register by saving it to a register that is safe through function // calls. Returns new location of the operand. It could be in memory. // Note: The RegSet is *updated* to match new register allocation. Operand preserveReg(Reg reg, RegSet *used, Listing *dest, Block block); // As above, but attempts to preserve a register inside a new register. May fail. Reg preserveRegInReg(Reg reg, RegSet *used, Listing *dest); // Perform a memcpy operation of a fixed size. Uses the two specified registers as // temporaries (ARM has load pair and store pair). Copies up to 7 bytes beyond the specified // location (i.e., copies a multiple of 8 bytes). void inlineMemcpy(Listing *dest, Operand to, Operand from, Reg tmpA, Reg tmpB); // Slower version of the above, only able to use one register. Avoid if possible. void inlineSlowMemcpy(Listing *dest, Operand to, Operand from, Reg tmpReg); // Get a pointer-sized offset into whatever "operand" represents. Operand opPtrOffset(Operand op, Nat offset); Operand opOffset(Size sz, Operand op, Nat offset); // Encode a bitmask. Returns 12-bits N, immr, imms (in that order) if possible. Otherwise, // returns 0 (which is not a valid encoding). 'n' is only used if 64-bit bitmask is required. Nat encodeBitmask(Word bitmask, bool use64); // Check if the word is all ones, taking into account if the value is 64-bit or not. Bool allOnes(Word mask, bool use64); } } storm-lang-0.6.20/Code/Arm64/AsmOut.cpp000066400000000000000000001200171456562313500173540ustar00rootroot00000000000000#include "stdafx.h" #include "AsmOut.h" #include "Asm.h" #include "../OpTable.h" #include "../Exception.h" namespace code { namespace arm64 { // Get register number where 31=sp. static Nat intRegSP(Reg reg) { Nat r = intRegNumber(reg); if (r < 31) return r; if (r == 32) return 31; throw new (runtime::someEngine()) InternalError(S("Can not use this register with this op-code.")); } // Get register number where 31=zr. static Nat intRegZR(Reg reg) { Nat r = intRegNumber(reg); if (r < 32) return r; throw new (runtime::someEngine()) InternalError(S("Can not use this register with this op-code.")); } // Get fp register number. static Nat fpReg(Reg reg) { return vectorRegNumber(reg); } // Good reference for instruction encoding: // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding?lang=en // Check if value fits in 6-bit signed. static Bool isImm6S(Long value) { return value >= -0x20 && value <= 0x1F; } static void checkImm6S(RootObject *e, Long value) { if (!isImm6S(value)) throw new (e) InvalidValue(TO_S(e, S("Too large signed 6-bit immediate value: ") << value)); } // Check if value fits in 6-bit unsigned. static Bool isImm6U(Word value) { return value <= 0x3F; } static void checkImm6U(RootObject *e, Long value) { if (!isImm6U(value)) throw new (e) InvalidValue(TO_S(e, S("Too large unsigned 6-bit immediate value: ") << value)); } // Check if value fits in 7-bit signed. static Bool isImm7S(Long value) { return value >= -0x40 && value <= 0x3F; } static void checkImm7S(RootObject *e, Long value) { if (!isImm7S(value)) throw new (e) InvalidValue(TO_S(e, S("Too large signed 7-bit immediate value: ") << value)); } // Check if value fits in 7-bit unsigned. static Bool isImm7U(Word value) { return value <= 0x7F; } static void checkImm7U(RootObject *e, Word value) { if (!isImm7U(value)) throw new (e) InvalidValue(TO_S(e, S("Too large unsigned 7-bit immediate value: ") << value)); } // Check if value fits in 9-bit signed. static Bool isImm9S(Long value) { return value >= -0x100 && value <= 0xFF; } static void checkImm9S(RootObject *e, Long value) { if (!isImm9S(value)) throw new (e) InvalidValue(TO_S(e, S("Too large signed 9-bit immediate value: ") << value)); } // Check if value fits in 9-bit unsigned. static Bool isImm9U(Word value) { return value <= 0x1FF; } static void checkImm9U(RootObject *e, Word value) { if (!isImm9U(value)) throw new (e) InvalidValue(TO_S(e, S("Too large unsigned 9-bit immediate value: ") << value)); } // Check if value fits in 12-bit signed. static Bool isImm12S(Long value) { return value >= -0x800 && value <= 0x7FF; } static void checkImm12S(RootObject *e, Long value) { if (!isImm12S(value)) throw new (e) InvalidValue(TO_S(e, S("Too large signed 12-bit immediate value: ") << value)); } // Check if value fits in 12-bit unsigned. static Bool isImm12U(Word value) { return value <= 0xFFF; } static void checkImm12U(RootObject *e, Word value) { if (!isImm12U(value)) throw new (e) InvalidValue(TO_S(e, S("Too large unsigned 12-bit immediate value: ") << value)); } // Check if value fits in 19-bit signed. static Bool isImm19S(Long value) { return value >= -0x40000 && value <= 0x3FFFF; } static void checkImm19S(RootObject *e, Long value) { if (!isImm19S(value)) throw new (e) InvalidValue(TO_S(e, S("Too large signed 19-bit immediate value: ") << value)); } // Check if value fits in 19-bit unsigned. static Bool isImm19U(Word value) { return value <= 0x7FFFF; } static void checkImm19U(RootObject *e, Long value) { if (!isImm19U(value)) throw new (e) InvalidValue(TO_S(e, S("Too large unsigned 19-bit immediate value: ") << value)); } // Check if value fits in 26-bit signed. static Bool isImm26S(Long value) { return value >= -0x02000000 && value <= 0x03FFFFFF; } static void checkImm26S(RootObject *e, Long value) { if (!isImm26S(value)) throw new (e) InvalidValue(TO_S(e, S("Too large signed 26-bit immediate value: ") << value)); } // Put data instructions. 2 registers, 12-bit unsigned immediate. static inline void putData2(Output *to, Nat op, Nat rDest, Nat rSrc, Word imm) { checkImm12U(to, imm); Nat instr = (op << 22) | rDest | (rSrc << 5) | ((imm & 0xFFF) << 10); to->putInt(instr); } // Put data instructions. 3 registers, and a 6-bit unsigned immediate. Some instructions allow 'rb' to be shifted. static inline void putData3(Output *to, Nat op, Nat rDest, Nat ra, Nat rb, Word imm) { checkImm6U(to, imm); Nat instr = (op << 21) | rDest | (rb << 16) | (ra << 5) | ((imm & 0x3F) << 10); to->putInt(instr); } // Put 3-input data instructions. 4 registers, no immediate (modifier is labelled oO in the // docs, part of OP-code for some instructions it seems). static inline void putData4a(Output *to, Nat op, Bool modifier, Nat rDest, Nat ra, Nat rb, Nat rc) { Nat instr = (op << 21) | rDest | (rc << 10) | (rb << 16) | (ra << 5) | (Nat(modifier) << 15); to->putInt(instr); } // Same as putData4a, except that the 'modifier' is in another place. Labelled o1 in the docs. static inline void putData4b(Output *to, Nat op, Bool modifier, Nat rDest, Nat ra, Nat rb, Nat rc) { Nat instr = (op << 21) | rDest | (rc << 16) | (rb << 11) | (ra << 5) | (Nat(modifier) << 10); to->putInt(instr); } // Put a bitmask operation (those that have N, immr, imms in them). The bitmask will be // encoded, and size will be added if needed. static inline void putBitmask(Output *to, Nat op, Nat rSrc, Nat rDest, Bool is64, Word bitmask) { Nat encBitmask = encodeBitmask(bitmask, is64); if (encBitmask == 0) { StrBuf *msg = new (to) StrBuf(); *msg << S("It is not possible to encode the value ") << hex(bitmask) << S(" as a bitmask. It should have been removed by an earlier pass."); throw new (to) InvalidValue(msg->toS()); } Nat instr = (op << 23) | rDest | (rSrc << 5) | (encBitmask << 10); if (is64) instr |= Nat(1) << 31; to->putInt(instr); } // Put instructions for loads and stores: 3 registers and a 7-bit signed immediate. static inline void putLoadStoreS(Output *to, Nat op, Nat base, Nat r1, Nat r2, Long imm) { checkImm7S(to, imm); Nat instr = (op << 22) | r1 | (base << 5) | (r2 << 10) | ((imm & 0x7F) << 15); to->putInt(instr); } // Put instructions for loads and stores: 3 registers and a 7-bit unsigned immediate. static inline void putLoadStoreU(Output *to, Nat op, Nat base, Nat r1, Nat r2, Word imm) { checkImm7U(to, imm); Nat instr = (op << 22) | r1 | (base << 5) | (r2 << 10) | ((imm & 0x7F) << 15); to->putInt(instr); } // Put a "mid-sized" load/store (for negative offsets, mainly): 2 registers and 9-bit immediate. static inline void putLoadStoreMidS(Output *to, Nat op, Nat base, Nat r1, Long imm) { checkImm9S(to, imm); Nat instr = (op << 21) | r1 | (base << 5) | ((0x1FF & imm) << 12); to->putInt(instr); } // Put a "large" load/store (for bytes, mainly): 2 registers and 12-bit immediate. static inline void putLoadStoreLargeS(Output *to, Nat op, Nat base, Nat r1, Long imm) { checkImm12S(to, imm); Nat instr = (op << 22) | r1 | (base << 5) | ((0xFFF & imm) << 10); to->putInt(instr); } // Put a "large" load/store (for bytes, mainly): 2 registers and 12-bit immediate. static inline void putLoadStoreLargeU(Output *to, Nat op, Nat base, Nat r1, Word imm) { checkImm12U(to, imm); Nat instr = (op << 22) | r1 | (base << 5) | ((0xFFF & imm) << 10); to->putInt(instr); } // Put a load/store with 19-bit immediate offset from PC. static inline void putLoadStoreImm(Output *to, Nat op, Nat reg, Long imm) { checkImm19S(to, imm); Nat instr = (op << 24) | reg | ((0x7FFFF & imm) << 5); to->putInt(instr); } // Load a "long" constant into a register. Uses the table of references to store the data. static inline void loadLongConst(Output *to, Nat reg, Ref value) { // Emit "ldr" with literal, make the literal refer to location in the table after the code block. putLoadStoreImm(to, 0x58, reg, 0); to->markGc(GcCodeRef::relativeHereImm19, 4, value); } static inline void loadLongConst(Output *to, Nat reg, RootObject *obj) { // Emit "ldr" with literal, make the literal refer to location in the table after the code block. putLoadStoreImm(to, 0x58, reg, 0); to->markGc(GcCodeRef::relativeHereImm19, 4, (Word)obj); } void nopOut(Output *to, Instr *instr) { // According to ARM manual. to->putInt(0xD503201F); } void prologOut(Output *to, Instr *instr) { Offset stackSize = instr->src().offset(); Int scaled = stackSize.v64() / 8; if (isImm7S(scaled)) { // Small enough: we can do the modifications in the store operation: // - stp x29, x30, [sp, -stackSize]! putLoadStoreS(to, 0x2A6, 31, 29, 30, -scaled); // New offset for stack: to->setFrameOffset(stackSize); // The fact that registers have been preserved is done after the if-stmt as it is the same for all cases. } else if (scaled <= 0xFFFF) { // Too large. Load value into register (inter-procedure clobbered x16 is good for this). // - mov x16, # to->putInt(0xD2800000 | ((scaled & 0xFFFF) << 5) | 16); // - sub sp, sp, (x16 << 3) putData3(to, 0x659, 31, 31, 16, 3 << 3 | 3); // flags mean shift 3 steps, treat as unsigned 64-bit // CFA offset is now different: to->setFrameOffset(stackSize); // - stp x29, x30, [sp] putLoadStoreU(to, 0x2A4, 31, 29, 30, 0); } else { // Note: In reality, the stack size is likely a bit smaller since we are limited by // pointer offsets, since they are limited to 14 bytes (=16 KiB). throw new (to) InvalidValue(S("Too large stack size for Arm64!")); } // We have now saved the stack pointer and the return pointer: to->markSaved(xr(29), -stackSize); to->markSaved(xr(30), -(stackSize - Size::sPtr)); // - mov x29, sp # create stack frame putData2(to, 0x244, 29, 31, 0); // Now: use x29 as the frame register instead! to->setFrameRegister(xr(29)); } void epilogOut(Output *to, Instr *instr) { Offset stackSize = instr->src().offset(); Int scaled = stackSize.v64() / 8; if (isImm7S(scaled)) { // We emit: // - ldp x29, x30, [sp], stackSize putLoadStoreS(to, 0x2A3, 31, 29, 30, scaled); } else if (scaled <= 0xFFFF) { // The inverse of the prolog is: // - ldp x29, x30, [sp] putLoadStoreU(to, 0x2A5, 31, 29, 30, 0); // - mov x16, # to->putInt(0xD2800000 | ((scaled & 0xFFFF) << 5) | 16); // - add sp, sp, (x16 << 3) putData3(to, 0x459, 31, 31, 16, 3 << 3 | 3); // flags mean shift 3 steps, treat as unsigned 64-bit } else { throw new (to) InvalidValue(S("Too large stack size for Arm64!")); } // Note: No DWARF metadata since this could be an early return. } bool loadOut(Output *to, Instr *instr, MAYBE(Instr *) next) { Reg baseReg = instr->src().reg(); Int offset = instr->src().offset().v64(); Int opSize = instr->dest().size().size64(); Reg dest1 = instr->dest().reg(); Reg dest2 = noReg; Bool intReg = isIntReg(dest1); // Bytes are special: if (opSize == 1) { if (offset < 0) putLoadStoreMidS(to, intReg ? 0x1C2 : 0x1E2, intRegSP(baseReg), intRegZR(dest1), offset); else putLoadStoreLargeU(to, intReg ? 0x0E5 : 0x0F5, intRegSP(baseReg), intRegZR(dest1), offset); return false; } // Look at "next" to see if we can merge it with this instruction. if (next && next->dest().type() == opRegister && next->src().type() == opRelative) { if (same(next->src().reg(), baseReg) && Int(next->dest().size().size64()) == opSize) { // Note: It is undefined to load the same register multiple times. Also: it // might break semantics when turning: // - ldr x0, [x0] // - ldr x0, [x0+8] // into // - ldp x0, x0, [x0] if (!same(next->dest().reg(), dest1) && isIntReg(next->dest().reg()) == intReg) { // Look at the offsets, if they are next to each other, we can merge them. Int off = next->src().offset().v64(); if (off == offset + opSize && isImm7S(offset / opSize)) { dest2 = next->dest().reg(); } else if (off == offset - opSize && isImm7S(off / opSize)) { // Put the second one first. dest2 = dest1; dest1 = next->dest().reg(); offset = off; } } } } if (offset % opSize) throw new (to) InvalidValue(S("Memory access on Arm must be aligned!")); offset /= opSize; // Interestingly enough, ldp takes a signed offset but ldr takes an unsigned offset. if (dest2 != noReg) { if (intReg) putLoadStoreS(to, opSize == 4 ? 0x0A5 : 0x2A5, intRegSP(baseReg), intRegZR(dest1), intRegZR(dest2), offset); else putLoadStoreS(to, opSize == 4 ? 0x0B5 : 0x1B5, intRegSP(baseReg), fpReg(dest1), fpReg(dest2), offset); } else if (offset < 0) { // Here: We need to use 'ldur' instead. That one is unscaled. if (intReg) putLoadStoreMidS(to, opSize == 4 ? 0x5C2 : 0x7C2, intRegSP(baseReg), intRegZR(dest1), offset * opSize); else putLoadStoreMidS(to, opSize == 4 ? 0x5E2 : 0x7E2, intRegSP(baseReg), intRegZR(dest1), offset * opSize); } else { if (intReg) putLoadStoreLargeU(to, opSize == 4 ? 0x2E5 : 0x3E5, intRegSP(baseReg), intRegZR(dest1), offset); else putLoadStoreLargeU(to, opSize == 4 ? 0x2F5 : 0x3F5, intRegSP(baseReg), fpReg(dest1), offset); } return dest2 != noReg; } bool storeOut(Output *to, Instr *instr, MAYBE(Instr *) next) { Reg baseReg = instr->dest().reg(); Int offset = instr->dest().offset().v64(); Int opSize = instr->src().size().size64(); Reg src1 = instr->src().reg(); Reg src2 = noReg; Bool intReg = isIntReg(src1); // Bytes are special: if (opSize == 1) { if (offset < 0) putLoadStoreMidS(to, intReg ? 0x1C0 : 0x1E0, intRegSP(baseReg), intRegZR(src1), offset); else putLoadStoreLargeU(to, intReg ? 0x0E4 : 0x0F4, intRegSP(baseReg), intRegZR(src1), offset); return false; } // Look at "next" to see if we can merge it with this instruction. if (next && next->src().type() == opRegister && next->dest().type() == opRelative) { if (same(next->dest().reg(), baseReg) && Int(next->src().size().size64()) == opSize) { // Note: Contrary to the load instruction, it is well-defined to store the same // register multiple times. if (isIntReg(next->src().reg()) == intReg) { // Look at the offsets, if they are next to each other, we can merge them. Int off = next->dest().offset().v64(); if (off == offset + opSize && isImm7S(offset / opSize)) { src2 = next->src().reg(); } else if (off == offset - opSize && isImm7S(off / opSize)) { // Put the second one first. src2 = src1; src1 = next->src().reg(); offset = off; } } } } if (offset % opSize) throw new (to) InvalidValue(S("Memory access on Arm must be aligned!")); offset /= opSize; // Interestingly enough, LDP takes a signed offset while LDR takes an unsigned offset... if (src2 != noReg) { if (intReg) putLoadStoreS(to, opSize == 4 ? 0x0A4 : 0x2A4, intRegSP(baseReg), intRegZR(src1), intRegZR(src2), offset); else putLoadStoreS(to, opSize == 4 ? 0x0B4 : 0x1B4, intRegSP(baseReg), fpReg(src1), fpReg(src2), offset); } else if (offset < 0) { // Here: we need to use 'ldur' instead. if (intReg) putLoadStoreMidS(to, opSize == 4 ? 0x5C0 : 0x7C0, intRegSP(baseReg), intRegZR(src1), offset * opSize); else putLoadStoreMidS(to, opSize == 4 ? 0x5E0 : 0x7E0, intRegSP(baseReg), intRegZR(src1), offset * opSize); } else { if (intReg) putLoadStoreLargeU(to, opSize == 4 ? 0x2E4 : 0x3E4, intRegSP(baseReg), intRegZR(src1), offset); else putLoadStoreLargeU(to, opSize == 4 ? 0x2F4 : 0x3F4, intRegSP(baseReg), fpReg(src1), offset); } return src2 != noReg; } void regRegMove(Output *to, Reg dest, Reg src) { Bool intSrc = isIntReg(src); Bool intDst = isIntReg(dest); if (intSrc && intDst) { if (src == ptrStack || dest == ptrStack) putData2(to, 0x244, intRegSP(dest), intRegSP(src), 0); else if (size(src).size64() > 4) putData3(to, 0x550, intRegZR(dest), 31, intRegZR(src), 0); else putData3(to, 0x150, intRegZR(dest), 31, intRegZR(src), 0); } else if (!intSrc && !intDst) { if (size(src).size64() > 4) to->putInt(0x1E602000 | (fpReg(src) << 5) | fpReg(dest)); else to->putInt(0x1E202000 | (fpReg(src) << 5) | fpReg(dest)); } else if (intSrc) { if (size(src).size64() > 4) to->putInt(0x9E670000 | (intRegZR(src) << 5) | fpReg(dest)); else to->putInt(0x1E270000 | (intRegZR(src) << 5) | fpReg(dest)); } else if (intDst) { if (size(src).size64() > 4) to->putInt(0x9E660000 | (fpReg(src) << 5) | intRegZR(dest)); else to->putInt(0x1E260000 | (fpReg(src) << 5) | intRegZR(dest)); } } // Special version called directly when more than one mov was found. Returns "true" if we // could merge the two passed to us. We know that "next" is a mov op if it is non-null. bool movOut(Output *to, Instr *instr, MAYBE(Instr *) next) { switch (instr->dest().type()) { case opRegister: // Fall through to next switch statement. break; case opRelative: if (instr->src().type() != opRegister) throw new (to) InvalidValue(TO_S(to, S("Invalid source for store operation on ARM: ") << instr->src())); return storeOut(to, instr, next); default: throw new (to) InvalidValue(TO_S(to, S("Invalid destination for move operation: ") << instr)); } // dest is a register! Reg destReg = instr->dest().reg(); Operand src = instr->src(); switch (src.type()) { case opRegister: regRegMove(to, destReg, src.reg()); return false; case opRelative: return loadOut(to, instr, next); case opReference: // Must be a pointer: Also, dest must be register. loadLongConst(to, intRegZR(destReg), src.ref()); return false; case opObjReference: // Must be a pointer, and dest must be a register. loadLongConst(to, intRegZR(destReg), src.object()); return false; case opConstant: if (src.constant() > 0xFFFF) throw new (to) InvalidValue(TO_S(to, S("Too large immediate to load: ") << src)); // Note: No difference between nat and word version. to->putInt(0xD2800000 | ((0xFFFF & src.constant()) << 5) | intRegZR(destReg)); return false; case opLabel: case opRelativeLbl: { Int offset = to->offset(src.label()) + src.offset().v64() - to->tell(); Bool large = size(destReg).size64() > 4; if (isIntReg(destReg)) { putLoadStoreImm(to, large ? 0x58 : 0x18, intRegZR(destReg), offset / 4); } else { putLoadStoreImm(to, large ? 0x5C : 0x1C, fpReg(destReg), offset / 4); } return false; } default: throw new (to) InvalidValue(TO_S(to, S("Invalid source for move operation: ") << instr)); } } void movOut(Output *to, Instr *instr) { movOut(to, instr, null); } void leaOut(Output *to, Instr *instr) { Operand destOp = instr->dest(); if (destOp.type() != opRegister) throw new (to) InvalidValue(S("Destination of lea should have been transformed to a register.")); Nat dest = intRegZR(destOp.reg()); Operand src = instr->src(); switch (src.type()) { case opRelative: // Note: These are not sign-extended, so we need to be careful about the sign. if (src.offset().v64() > 0) { // add: putData2(to, 0x244, dest, intRegZR(src.reg()), src.offset().v64()); } else { // sub: putData2(to, 0x344, dest, intRegZR(src.reg()), -src.offset().v64()); } break; case opReference: // This means to load the refSource instead of loading from the pointer. loadLongConst(to, dest, src.refSource()); break; default: throw new (to) InvalidValue(TO_S(to, S("Unsupported source operand for lea: ") << src)); } } void callOut(Output *to, Instr *instr) { // Note: We need to use x17 for temporary values. This is assumed by the code in Gc/CodeArm64.cpp. Nat offset; Operand target = instr->src(); switch (target.type()) { case opReference: // Load addr. into x17. putLoadStoreImm(to, 0x58, 17, 0); // blr x17 to->putInt(0xD63F0220); // Mark it accordingly. to->markGc(GcCodeRef::jump, 8, target.ref()); break; case opRegister: to->putInt(0xD63F0000 | (intRegZR(target.reg()) << 5)); break; case opRelative: // Split into two op-codes: a load and a register call. offset = target.offset().v64() / 8; putLoadStoreLargeU(to, 0x3E5, intRegZR(target.reg()), 17, offset); // blr x17 to->putInt(0xD63F0220); break; default: assert(false, L"Unsupported call target!"); break; } } void retOut(Output *to, Instr *) { to->putInt(0xD65F03C0); } void jmpCondOut(Output *to, CondFlag cond, const Operand &target) { Int offset; switch (target.type()) { case opLabel: offset = Int(to->offset(target.label())) - Int(to->tell()); offset /= 4; checkImm19S(to, offset); to->putInt(0x54000000 | ((Nat(offset) & 0x7FFFF) << 5) | condArm64(cond)); break; default: assert(false, L"Unsupported target for conditional branches."); break; } } void jmpOut(Output *to, Instr *instr) { CondFlag cond = instr->src().condFlag(); Operand target = instr->dest(); Int offset; if (cond == ifNever) return; if (cond != ifAlways) { // Conditional jumps are special, handle them separately. jmpCondOut(to, cond, target); return; } // Note: We need to use x17 for temporary values for long jumps. This is assumed by the // code in Gc/CodeArm64.cpp. switch (target.type()) { case opReference: // Load addr. into x17. putLoadStoreImm(to, 0x58, 17, 0); // br x17 to->putInt(0xD61F0220); // Mark it accordingly. to->markGc(GcCodeRef::jump, 8, target.ref()); break; case opRegister: to->putInt(0xD61F0000 | (intRegZR(target.reg()) << 5)); break; case opRelative: // Split into two op-codes: a load and a register jump. offset = target.offset().v64() / 8; putLoadStoreLargeU(to, 0x3E5, intRegZR(target.reg()), 17, offset); // br x17 to->putInt(0xD61F0220); break; case opLabel: offset = Int(to->offset(target.label())) - Int(to->tell()); offset /= 4; checkImm26S(to, offset); to->putInt(0x14000000 | (Nat(offset) & 0x03FFFFFF)); break; default: assert(false, L"Unsupported jump target!"); break; } } // Generic output of data instructions that use 12-bit immediates or registers. Assumes that // the high bit of the op-code is the size bit. static void data12Out(Output *to, Instr *instr, Nat opImm, Nat opReg) { assert(instr->dest().type() == opRegister, L"Destinations for data operations should have been transformed into registers."); Nat dest = intRegSP(instr->dest().reg()); if (instr->src().size().size64() >= 8) { opImm |= 0x200; opReg |= 0x400; } switch (instr->src().type()) { case opRegister: putData3(to, opReg, dest, dest, intRegSP(instr->src().reg()), 0); break; case opConstant: putData2(to, opImm, dest, dest, instr->src().constant()); break; default: assert(false, L"Unsupported source for data operation."); break; } } void addOut(Output *to, Instr *instr) { data12Out(to, instr, 0x044, 0x058); } void subOut(Output *to, Instr *instr) { data12Out(to, instr, 0x144, 0x258); } void cmpOut(Output *to, Instr *instr) { assert(instr->dest().type() == opRegister, L"Src and dest for cmp should have been transformed into registers."); Nat dest = intRegZR(instr->dest().reg()); Nat opImm = 0x1C4; Nat opReg = 0x358; if (instr->src().size().size64() >= 8) { opImm |= 0x200; opReg |= 0x400; } switch (instr->src().type()) { case opRegister: putData3(to, opReg, 31, dest, intRegSP(instr->src().reg()), 0); break; case opConstant: putData2(to, opImm, 31, dest, instr->src().constant()); break; default: assert(false, L"Unsupported source for data operation."); break; } } void setCondOut(Output *to, Instr *instr) { Nat dest = intRegZR(instr->dest().reg()); CondFlag cond = instr->src().condFlag(); // Note: There is no "if never" condition, so we need a special case for that. Since we // need to invert the condition, we just special-case both always and never. if (cond == ifAlways) { to->putInt(0xD2800020 | dest); } else if (cond == ifNever) { to->putInt(0xD2800000 | dest); } else { to->putInt(0x1A9F07E0 | dest | (condArm64(inverse(instr->src().condFlag())) << 12)); } } void mulOut(Output *to, Instr *instr) { // Everything has to be in registers here. Operand src = instr->src(); Operand dest = instr->dest(); Nat op = 0x0D8; if (src.size().size64() >= 8) op |= 0x400; Nat destReg = intRegZR(dest.reg()); putData4a(to, op, false, destReg, destReg, intRegZR(src.reg()), 31); } static void divOut(Output *to, Instr *instr, Bool sign) { Operand src = instr->src(); Operand dest = instr->dest(); Nat op = 0x0D6; if (src.size().size64() >= 8) op |= 0x400; Nat destReg = intRegZR(dest.reg()); putData4b(to, op, sign, destReg, destReg, 0x1, intRegZR(src.reg())); } void idivOut(Output *to, Instr *instr) { divOut(to, instr, true); } void udivOut(Output *to, Instr *instr) { divOut(to, instr, false); } static void clampSize(Output *to, Reg reg, Nat size) { if (size == 1) { // This is AND with an encoded bitmask. to->putInt(0x92401C00 | (intRegSP(reg) << 5) | intRegZR(reg)); } else if (size == 4) { // This is AND with an encoded bitmask. to->putInt(0x92407C00 | (intRegSP(reg) << 5) | intRegZR(reg)); } else { // No need to clamp larger values. } } void icastOut(Output *to, Instr *instr) { Operand src = instr->src(); Operand dst = instr->dest(); Nat srcSize = src.size().size64(); Nat dstSize = dst.size().size64(); if (!isIntReg(dst.reg())) throw new (to) InvalidValue(S("Can not sign extend floating point registers.")); // Source is either register or memory reference. if (src.type() == opRelative) { // Use a suitable load instruction. Int offset = instr->src().offset().v64(); if (srcSize == 1) { Nat op = 0x0E6; putLoadStoreLargeU(to, op, intRegSP(src.reg()), intRegZR(dst.reg()), offset); } else if (srcSize == 4) { Nat op = 0x2E6; putLoadStoreLargeU(to, op, intRegSP(src.reg()), intRegZR(dst.reg()), offset / 4); } else { // This is a regular load. Nat op = 0x3E5; putLoadStoreLargeU(to, op, intRegSP(src.reg()), intRegZR(dst.reg()), offset / 8); } // Maybe clamp to smaller size. if (srcSize > dstSize) clampSize(to, dst.reg(), dstSize); } else if (src.type() == opRegister) { // Sign extend to 64 bits: if (srcSize == 1) { // Insn: sxtb Nat op = 0x93401C00 | (intRegZR(src.reg()) << 5) | intRegZR(dst.reg()); to->putInt(op); } else if (srcSize == 4) { Nat op = 0x93407C00 | (intRegZR(src.reg()) << 5) | intRegZR(dst.reg()); to->putInt(op); } clampSize(to, dst.reg(), dstSize); } } void ucastOut(Output *to, Instr *instr) { Operand src = instr->src(); Operand dst = instr->dest(); Nat srcSize = src.size().size64(); Nat dstSize = dst.size().size64(); Bool intDst = isIntReg(dst.reg()); // Source is either register or memory reference. if (src.type() == opRelative) { // Use a suitable load instruction. Int offset = instr->src().offset().v64(); if (srcSize == 1) { Nat op = intDst ? 0x0E5 : 0x0F5; putLoadStoreLargeU(to, op, intRegSP(src.reg()), intRegZR(dst.reg()), offset); } else if (srcSize == 4) { Nat op = intDst ? 0x2E5 : 0x2F5; putLoadStoreLargeU(to, op, intRegSP(src.reg()), intRegZR(dst.reg()), offset / 4); } else { Nat op = intDst ? 0x3E5 : 0x3F5; putLoadStoreLargeU(to, op, intRegSP(src.reg()), intRegZR(dst.reg()), offset / 8); } // Maybe clamp to smaller size. if (srcSize > dstSize) clampSize(to, dst.reg(), dstSize); } else if (src.type() == opRegister) { // Make sure that the upper bits are zero. Just move the register if needed, then // clamp as necessary. if (!same(src.reg(), dst.reg())) { regRegMove(to, dst.reg(), src.reg()); } clampSize(to, dst.reg(), dstSize); } } void borOut(Output *to, Instr *instr) { Operand src = instr->src(); Operand dst = instr->dest(); Nat dstReg = intRegZR(dst.reg()); bool is64 = dst.size().size64() > 4; if (src.type() == opConstant) { Word op = src.constant(); if (op == 0) { // Or with zero is a no-op. Don't do anything. } else if (allOnes(op, is64)) { // Fill target with all ones. We use ORN , zr, zr for this. putData3(to, is64 ? 0x551 : 0x151, dstReg, 31, 31, 0); } else { putBitmask(to, 0x64, dstReg, dstReg, is64, op); } } else { Nat opCode = is64 ? 0x550 : 0x150; putData3(to, opCode, dstReg, dstReg, intRegZR(src.reg()), 0); } } void bandOut(Output *to, Instr *instr) { Operand src = instr->src(); Operand dst = instr->dest(); Nat dstReg = intRegZR(dst.reg()); bool is64 = dst.size().size64() > 4; if (src.type() == opConstant) { Word op = src.constant(); if (op == 0) { // And with zero always gives a zero. Simply emit a mov instruction instead // (technically a orr , zr, zr). putData3(to, 0x550, dstReg, 31, 31, 0); } else if (allOnes(op, is64)) { // And with 0xFF is a no-op. Don't emit any code. } else { putBitmask(to, 0x24, dstReg, dstReg, is64, op); } } else { Nat opCode = is64 ? 0x450 : 0x050; putData3(to, opCode, dstReg, dstReg, intRegZR(src.reg()), 0); } } void testOut(Output *to, Instr *instr) { Operand src = instr->src(); Operand dst = instr->dest(); Nat dstReg = intRegZR(dst.reg()); bool is64 = dst.size().size64() > 4; if (src.type() == opConstant) { Word op = src.constant(); putBitmask(to, 0xE4, 31, dstReg, is64, op); } else { Nat opCode = is64 ? 0x750 : 0x350; putData3(to, opCode, 31, dstReg, intRegZR(src.reg()), 0); } } void bxorOut(Output *to, Instr *instr) { Operand src = instr->src(); Operand dst = instr->dest(); Nat dstReg = intRegZR(dst.reg()); bool is64 = dst.size().size64() > 4; if (src.type() == opConstant) { Word op = src.constant(); if (op == 0) { // XOR with a zero is a no-op. } else if (allOnes(op, is64)) { // XOR with all ones is simply a negate. Use orn , zr, instead. putData3(to, is64 ? 0x551 : 0x151, dstReg, 31, dstReg, 0); } else { putBitmask(to, 0xA8, dstReg, dstReg, is64, op); } } else { Nat opCode = is64 ? 0x650 : 0x250; putData3(to, opCode, dstReg, dstReg, intRegZR(src.reg()), 0); } } void bnotOut(Output *to, Instr *instr) { Operand dst = instr->dest(); Nat dstReg = intRegZR(dst.reg()); bool is64 = dst.size().size64() > 4; // This is ORN , zr, putData3(to, is64 ? 0x551 : 0x151, dstReg, 31, dstReg, 0); } void shlOut(Output *to, Instr *instr) { Operand src = instr->src(); Operand dst = instr->dest(); Nat dstReg = intRegZR(dst.reg()); bool is64 = dst.size().size64() > 4; if (src.type() == opConstant) { Nat shift = Nat(src.constant()); if (shift == 0) { // Nothing to do. } else if (shift >= Nat(is64 ? 64 : 32)) { // Saturated shift. Simply move 0 to the register. putData3(to, 0x550, dstReg, 31, 31, 0); } else { Nat opCode = 0x53000000 | dstReg | (dstReg << 5); if (is64) opCode |= 0x80400000; // immr opCode |= ((~shift + 1) & (is64 ? 0x3F : 0x1F)) << 16; // imms opCode |= (((is64 ? 63 : 31) - shift) & 0x3F) << 10; to->putInt(opCode); } } else { putData3(to, is64 ? 0x4D6 : 0x0D6, dstReg, dstReg, intRegZR(src.reg()), 0x08); } } void shrOut(Output *to, Instr *instr) { Operand src = instr->src(); Operand dst = instr->dest(); Nat dstReg = intRegZR(dst.reg()); bool is64 = dst.size().size64() > 4; if (src.type() == opConstant) { Nat shift = Nat(src.constant()); if (shift == 0) { // Nothing to do. } else if (shift >= Nat(is64 ? 64 : 32)) { // Saturated shift. Simply move 0 to the register. putData3(to, 0x550, dstReg, 31, 31, 0); } else { Nat opCode = 0x53000000 | dstReg | (dstReg << 5); if (is64) opCode |= 0x80400000; // immr opCode |= shift << 16; // imms opCode |= (is64 ? 63 : 31) << 10; to->putInt(opCode); } } else { putData3(to, is64 ? 0x4D6 : 0x0D6, dstReg, dstReg, intRegZR(src.reg()), 0x09); } } void sarOut(Output *to, Instr *instr) { Operand src = instr->src(); Operand dst = instr->dest(); Nat dstReg = intRegZR(dst.reg()); bool is64 = dst.size().size64() > 4; if (src.type() == opConstant) { Nat bits = is64 ? 64 : 32; Nat shift = Nat(src.constant()); if (shift > bits - 1) shift = bits - 1; if (shift == 0) { // Nothing to do. } else { Nat opCode = 0x13000000 | dstReg | (dstReg << 5); if (is64) opCode |= 0x80400000; // immr opCode |= shift << 16; // imms opCode |= bits << 10; to->putInt(opCode); } } else { putData3(to, is64 ? 0x4D6 : 0x0D6, dstReg, dstReg, intRegZR(src.reg()), 0x0A); } } void preserveOut(Output *to, Instr *instr) { to->markSaved(instr->src().reg(), instr->dest().offset()); } static void fpOut(Output *to, Instr *instr, Nat op) { Operand dest = instr->dest(); Bool is64 = dest.size().size64() > 4; Nat baseOp = 0x0F1; if (is64) baseOp |= 0x2; // sets ftype to 0x1 Nat destReg = fpReg(dest.reg()); putData3(to, baseOp, destReg, destReg, fpReg(instr->src().reg()), op); } void faddOut(Output *to, Instr *instr) { fpOut(to, instr, 0x0A); } void fsubOut(Output *to, Instr *instr) { fpOut(to, instr, 0x0E); } void fnegOut(Output *to, Instr *instr) { Operand dest = instr->dest(); Bool is64 = dest.size().size64() > 4; Nat op = 0x1E214000; if (is64) op |= Nat(1) << 22; op |= fpReg(dest.reg()); op |= fpReg(instr->src().reg()) << 5; to->putInt(op); } void fmulOut(Output *to, Instr *instr) { fpOut(to, instr, 0x02); } void fdivOut(Output *to, Instr *instr) { fpOut(to, instr, 0x06); } void fcmpOut(Output *to, Instr *instr) { // Note: This op-code supports comparing to the literal zero. We don't emit that op-code though... Operand src = instr->src(); Operand dest = instr->dest(); Bool is64 = dest.size().size64() > 4; Nat baseOp = 0x0F1; if (is64) baseOp |= Nat(1) << 1; putData3(to, baseOp, 0x0, fpReg(dest.reg()), fpReg(src.reg()), 0x08); } void fcastOut(Output *to, Instr *instr) { Bool in64 = instr->dest().size().size64() > 4; Bool out64 = instr->dest().size().size64() > 4; if (in64 == out64) { // Just emit a mov instruction: regRegMove(to, instr->dest().reg(), instr->src().reg()); return; } Nat op = 0x1E224000; if (in64) op |= Nat(1) << 22; if (out64) op |= Nat(1) << 15; op |= intRegZR(instr->dest().reg()); op |= fpReg(instr->src().reg()) << 5; to->putInt(op); } static void fromFloat(Output *to, Instr *instr, Nat op) { Bool in64 = instr->src().size().size64() > 4; Bool out64 = instr->dest().size().size64() > 4; if (in64) op |= Nat(1) << 22; if (out64) op |= Nat(1) << 31; op |= intRegZR(instr->dest().reg()); op |= fpReg(instr->src().reg()) << 5; to->putInt(op); } void fcastiOut(Output *to, Instr *instr) { fromFloat(to, instr, 0x1E380000); } void fcastuOut(Output *to, Instr *instr) { fromFloat(to, instr, 0x1E390000); } static void toFloat(Output *to, Instr *instr, Nat op) { Bool in64 = instr->src().size().size64() > 4; Bool out64 = instr->dest().size().size64() > 4; if (out64) op |= Nat(1) << 22; if (in64) op |= Nat(1) << 31; op |= fpReg(instr->dest().reg()); op |= intRegZR(instr->src().reg()) << 5; to->putInt(op); } void icastfOut(Output *to, Instr *instr) { toFloat(to, instr, 0x1E220000); } void ucastfOut(Output *to, Instr *instr) { toFloat(to, instr, 0x1E230000); } void datOut(Output *to, Instr *instr) { Operand src = instr->src(); switch (src.type()) { case opLabel: to->putAddress(src.label()); break; case opReference: to->putAddress(src.ref()); break; case opObjReference: to->putObject(src.object()); break; case opConstant: to->putSize(src.constant(), src.size()); break; default: assert(false, L"Unsupported type for 'dat'."); break; } } void lblOffsetOut(Output *to, Instr *instr) { to->putOffset(instr->src().label()); } void alignOut(Output *to, Instr *instr) { to->align(Nat(instr->src().constant())); } void locationOut(Output *, Instr *) { // We don't save location data in the generated code. } void metaOut(Output *, Instr *) { // We don't save metadata in the generated code. } #define OUTPUT(x) { op::x, &x ## Out } typedef void (*OutputFn)(Output *to, Instr *instr); // Note: "mov" is special: we try to merge mov operations. const OpEntry outputMap[] = { OUTPUT(nop), OUTPUT(prolog), OUTPUT(epilog), OUTPUT(mov), OUTPUT(lea), OUTPUT(call), OUTPUT(ret), OUTPUT(jmp), OUTPUT(sub), OUTPUT(add), OUTPUT(cmp), OUTPUT(setCond), OUTPUT(mul), OUTPUT(idiv), OUTPUT(udiv), OUTPUT(icast), OUTPUT(ucast), OUTPUT(band), OUTPUT(bor), OUTPUT(bxor), OUTPUT(bnot), OUTPUT(test), OUTPUT(shl), OUTPUT(shr), OUTPUT(sar), OUTPUT(fadd), OUTPUT(fsub), OUTPUT(fneg), OUTPUT(fmul), OUTPUT(fdiv), OUTPUT(fcmp), OUTPUT(fcast), OUTPUT(fcasti), OUTPUT(fcastu), OUTPUT(icastf), OUTPUT(ucastf), OUTPUT(preserve), OUTPUT(dat), OUTPUT(lblOffset), OUTPUT(align), OUTPUT(location), OUTPUT(meta), }; bool empty(Array