File loong64-support.patch of Package luajit2
From 8e40aca7b3a919456b15698273e9b00e9250e769 Mon Sep 17 00:00:00 2001
From: Xiaolin Zhao <zhaoxiaolin@loongson.cn>
Date: Sat, 2 Nov 2024 10:19:52 +0100
Subject: [PATCH] Add support for LoongArch64
---
Makefile | 1 +
dynasm/dasm_loongarch64.h | 443 ++++
dynasm/dasm_loongarch64.lua | 979 ++++++++
src/Makefile | 10 +-
src/host/buildvm.c | 2 +
src/host/buildvm_asm.c | 9 +
src/jit/bcsave.lua | 1 +
src/jit/dis_loongarch64.lua | 697 ++++++
src/lib_jit.c | 3 +-
src/lj_arch.h | 32 +
src/lj_asm.c | 4 +
src/lj_asm_loongarch64.h | 1990 +++++++++++++++
src/lj_ccall.c | 152 +-
src/lj_ccall.h | 17 +-
src/lj_ccallback.c | 58 +-
src/lj_emit_loongarch64.h | 306 +++
src/lj_frame.h | 9 +
src/lj_gdbjit.c | 12 +
src/lj_jit.h | 15 +-
src/lj_target.h | 4 +-
src/lj_target_loongarch64.h | 313 +++
src/lj_trace.c | 6 +-
src/lj_vmmath.c | 2 +-
src/vm_loongarch64.dasc | 4625 +++++++++++++++++++++++++++++++++++
24 files changed, 9673 insertions(+), 17 deletions(-)
create mode 100644 dynasm/dasm_loongarch64.h
create mode 100644 dynasm/dasm_loongarch64.lua
create mode 100644 src/jit/dis_loongarch64.lua
create mode 100644 src/lj_asm_loongarch64.h
create mode 100644 src/lj_emit_loongarch64.h
create mode 100644 src/lj_target_loongarch64.h
create mode 100644 src/vm_loongarch64.dasc
diff --git a/Makefile b/Makefile
index f2e4d553b..0c6447df4 100644
--- a/Makefile
+++ b/Makefile
@@ -101,6 +101,7 @@ FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \
dis_mips64.lua dis_mips64el.lua \
dis_mips64r6.lua dis_mips64r6el.lua \
dis_riscv.lua dis_riscv64.lua \
+ dis_loongarch64.lua \
vmdef.lua
ifeq (,$(findstring Windows,$(OS)))
diff --git a/dynasm/dasm_loongarch64.h b/dynasm/dasm_loongarch64.h
new file mode 100644
index 000000000..6d232ece4
--- /dev/null
+++ b/dynasm/dasm_loongarch64.h
@@ -0,0 +1,443 @@
+/*
+** DynASM LoongArch encoding engine.
+** Copyright (C) 2005-2022 Mike Pall. All rights reserved.
+** Released under the MIT license. See dynasm.lua for full copyright notice.
+*/
+
+#include <stddef.h>
+#include <stdarg.h>
+#include <string.h>
+#include <stdlib.h>
+
+#define DASM_ARCH "loongarch64"
+
+#ifndef DASM_EXTERN
+#define DASM_EXTERN(a,b,c,d) 0
+#endif
+
+/* Action definitions. */
+enum {
+ DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT,
+ /* The following actions need a buffer position. */
+ DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
+ /* The following actions also have an argument. */
+ DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMM2,
+ DASM__MAX
+};
+
+/* Maximum number of section buffer positions for a single dasm_put() call. */
+#define DASM_MAXSECPOS 25
+
+/* DynASM encoder status codes. Action list offset or number are or'ed in. */
+#define DASM_S_OK 0x00000000
+#define DASM_S_NOMEM 0x01000000
+#define DASM_S_PHASE 0x02000000
+#define DASM_S_MATCH_SEC 0x03000000
+#define DASM_S_RANGE_I 0x11000000
+#define DASM_S_RANGE_SEC 0x12000000
+#define DASM_S_RANGE_LG 0x13000000
+#define DASM_S_RANGE_PC 0x14000000
+#define DASM_S_RANGE_REL 0x15000000
+#define DASM_S_UNDEF_LG 0x21000000
+#define DASM_S_UNDEF_PC 0x22000000
+
+/* Macros to convert positions (8 bit section + 24 bit index). */
+#define DASM_POS2IDX(pos) ((pos)&0x00ffffff)
+#define DASM_POS2BIAS(pos) ((pos)&0xff000000)
+#define DASM_SEC2POS(sec) ((sec)<<24)
+#define DASM_POS2SEC(pos) ((pos)>>24)
+#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos))
+
+/* Action list type. */
+typedef const unsigned int *dasm_ActList;
+
+/* Per-section structure. */
+typedef struct dasm_Section {
+ int *rbuf; /* Biased buffer pointer (negative section bias). */
+ int *buf; /* True buffer pointer. */
+ size_t bsize; /* Buffer size in bytes. */
+ int pos; /* Biased buffer position. */
+ int epos; /* End of biased buffer position - max single put. */
+ int ofs; /* Byte offset into section. */
+} dasm_Section;
+
+/* Core structure holding the DynASM encoding state. */
+struct dasm_State {
+ size_t psize; /* Allocated size of this structure. */
+ dasm_ActList actionlist; /* Current actionlist pointer. */
+ int *lglabels; /* Local/global chain/pos ptrs. */
+ size_t lgsize;
+ int *pclabels; /* PC label chains/pos ptrs. */
+ size_t pcsize;
+ void **globals; /* Array of globals. */
+ dasm_Section *section; /* Pointer to active section. */
+ size_t codesize; /* Total size of all code sections. */
+ int maxsection; /* 0 <= sectionidx < maxsection. */
+ int status; /* Status code. */
+ dasm_Section sections[1]; /* All sections. Alloc-extended. */
+};
+
+/* The size of the core structure depends on the max. number of sections. */
+#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section))
+
+
+/* Initialize DynASM state. */
+void dasm_init(Dst_DECL, int maxsection)
+{
+ dasm_State *D;
+ size_t psz = 0;
+ Dst_REF = NULL;
+ DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
+ D = Dst_REF;
+ D->psize = psz;
+ D->lglabels = NULL;
+ D->lgsize = 0;
+ D->pclabels = NULL;
+ D->pcsize = 0;
+ D->globals = NULL;
+ D->maxsection = maxsection;
+ memset((void *)D->sections, 0, maxsection * sizeof(dasm_Section));
+}
+
+/* Free DynASM state. */
+void dasm_free(Dst_DECL)
+{
+ dasm_State *D = Dst_REF;
+ int i;
+ for (i = 0; i < D->maxsection; i++)
+ if (D->sections[i].buf)
+ DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize);
+ if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize);
+ if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize);
+ DASM_M_FREE(Dst, D, D->psize);
+}
+
+/* Setup global label array. Must be called before dasm_setup(). */
+void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
+{
+ dasm_State *D = Dst_REF;
+ D->globals = gl;
+ DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
+}
+
+/* Grow PC label array. Can be called after dasm_setup(), too. */
+void dasm_growpc(Dst_DECL, unsigned int maxpc)
+{
+ dasm_State *D = Dst_REF;
+ size_t osz = D->pcsize;
+ DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int));
+ memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz);
+}
+
+/* Setup encoder. */
+void dasm_setup(Dst_DECL, const void *actionlist)
+{
+ dasm_State *D = Dst_REF;
+ int i;
+ D->actionlist = (dasm_ActList)actionlist;
+ D->status = DASM_S_OK;
+ D->section = &D->sections[0];
+ memset((void *)D->lglabels, 0, D->lgsize);
+ if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
+ for (i = 0; i < D->maxsection; i++) {
+ D->sections[i].pos = DASM_SEC2POS(i);
+ D->sections[i].rbuf = D->sections[i].buf - D->sections[i].pos;
+ D->sections[i].ofs = 0;
+ }
+}
+
+
+#ifdef DASM_CHECKS
+#define CK(x, st) \
+ do { if (!(x)) { \
+ D->status = DASM_S_##st|(int)(p-D->actionlist-1); return; } } while (0)
+#define CKPL(kind, st) \
+ do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \
+ D->status = DASM_S_RANGE_##st|(int)(p-D->actionlist-1); return; } } while (0)
+#else
+#define CK(x, st) ((void)0)
+#define CKPL(kind, st) ((void)0)
+#endif
+
+static int dasm_imm2(unsigned int n)
+{
+ if ((n >> 21) == 0)
+ return n;
+ else if ((n >> 26) == 0)
+ return n;
+ else
+ return -1;
+}
+
+/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */
+void dasm_put(Dst_DECL, int start, ...)
+{
+ va_list ap;
+ dasm_State *D = Dst_REF;
+ dasm_ActList p = D->actionlist + start;
+ dasm_Section *sec = D->section;
+ int pos = sec->pos, ofs = sec->ofs;
+ int *b;
+
+ if (pos >= sec->epos) {
+ DASM_M_GROW(Dst, int, sec->buf, sec->bsize,
+ sec->bsize + 2*DASM_MAXSECPOS*sizeof(int));
+ sec->rbuf = sec->buf - DASM_POS2BIAS(pos);
+ sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos);
+ }
+
+ b = sec->rbuf;
+ b[pos++] = start;
+
+ va_start(ap, start);
+ while (1) {
+ unsigned int ins = *p++;
+ unsigned int action = (ins >> 16) - 0xff00;
+ if (action >= DASM__MAX) {
+ ofs += 4;
+ } else {
+ int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0;
+ switch (action) {
+ case DASM_STOP: goto stop;
+ case DASM_SECTION:
+ n = (ins & 255); CK(n < D->maxsection, RANGE_SEC);
+ D->section = &D->sections[n]; goto stop;
+ case DASM_ESC: p++; ofs += 4; break;
+ case DASM_REL_EXT: break;
+ case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break;
+ case DASM_REL_LG:
+ n = (ins & 2047) - 10; pl = D->lglabels + n;
+ /* Bkwd rel or global. */
+ if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; }
+ pl += 10; n = *pl;
+ if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */
+ goto linkrel;
+ case DASM_REL_PC:
+ pl = D->pclabels + n; CKPL(pc, PC);
+ putrel:
+ n = *pl;
+ if (n < 0) { /* Label exists. Get label pos and store it. */
+ b[pos] = -n;
+ } else {
+ linkrel:
+ b[pos] = n; /* Else link to rel chain, anchored at label. */
+ *pl = pos;
+ }
+ pos++;
+ break;
+ case DASM_LABEL_LG:
+ pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel;
+ case DASM_LABEL_PC:
+ pl = D->pclabels + n; CKPL(pc, PC);
+ putlabel:
+ n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */
+ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos;
+ }
+ *pl = -pos; /* Label exists now. */
+ b[pos++] = ofs; /* Store pass1 offset estimate. */
+ break;
+ case DASM_IMM:
+#ifdef DASM_CHECKS
+ CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I);
+#endif
+ n >>= ((ins>>10)&31);
+#ifdef DASM_CHECKS
+ if (ins & 0x8000)
+ CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I);
+ else
+ CK((n>>((ins>>5)&31)) == 0, RANGE_I);
+#endif
+ b[pos++] = n;
+ break;
+ case DASM_IMM2:
+ CK(dasm_imm2((unsigned int)n) != -1, RANGE_I);
+ b[pos++] = n;
+ break;
+ }
+ }
+ }
+stop:
+ va_end(ap);
+ sec->pos = pos;
+ sec->ofs = ofs;
+}
+#undef CK
+
+/* Pass 2: Link sections, shrink aligns, fix label offsets. */
+int dasm_link(Dst_DECL, size_t *szp)
+{
+ dasm_State *D = Dst_REF;
+ int secnum;
+ int ofs = 0;
+
+#ifdef DASM_CHECKS
+ *szp = 0;
+ if (D->status != DASM_S_OK) return D->status;
+ {
+ int pc;
+ for (pc = 0; pc*sizeof(int) < D->pcsize; pc++)
+ if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc;
+ }
+#endif
+
+ { /* Handle globals not defined in this translation unit. */
+ int idx;
+ for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) {
+ int n = D->lglabels[idx];
+ /* Undefined label: Collapse rel chain and replace with marker (< 0). */
+ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
+ }
+ }
+
+ /* Combine all code sections. No support for data sections (yet). */
+ for (secnum = 0; secnum < D->maxsection; secnum++) {
+ dasm_Section *sec = D->sections + secnum;
+ int *b = sec->rbuf;
+ int pos = DASM_SEC2POS(secnum);
+ int lastpos = sec->pos;
+
+ while (pos != lastpos) {
+ dasm_ActList p = D->actionlist + b[pos++];
+ while (1) {
+ unsigned int ins = *p++;
+ unsigned int action = (ins >> 16) - 0xff00;
+ switch (action) {
+ case DASM_STOP: case DASM_SECTION: goto stop;
+ case DASM_ESC: p++; break;
+ case DASM_REL_EXT: break;
+ case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
+ case DASM_REL_LG: case DASM_REL_PC: pos++; break;
+ case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
+ case DASM_IMM: case DASM_IMM2: pos++; break;
+ }
+ }
+ stop: (void)0;
+ }
+ ofs += sec->ofs; /* Next section starts right after current section. */
+ }
+
+ D->codesize = ofs; /* Total size of all code sections */
+ *szp = ofs;
+ return DASM_S_OK;
+}
+
+#ifdef DASM_CHECKS
+#define CK(x, st) \
+ do { if (!(x)) return DASM_S_##st|(int)(p-D->actionlist-1); } while (0)
+#else
+#define CK(x, st) ((void)0)
+#endif
+
+/* Pass 3: Encode sections. */
+int dasm_encode(Dst_DECL, void *buffer)
+{
+ dasm_State *D = Dst_REF;
+ char *base = (char *)buffer;
+ unsigned int *cp = (unsigned int *)buffer;
+ int secnum;
+
+ /* Encode all code sections. No support for data sections (yet). */
+ for (secnum = 0; secnum < D->maxsection; secnum++) {
+ dasm_Section *sec = D->sections + secnum;
+ int *b = sec->buf;
+ int *endb = sec->rbuf + sec->pos;
+
+ while (b != endb) {
+ dasm_ActList p = D->actionlist + *b++;
+ while (1) {
+ unsigned int ins = *p++;
+ unsigned int action = (ins >> 16) - 0xff00;
+ int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0;
+ switch (action) {
+ case DASM_STOP: case DASM_SECTION: goto stop;
+ case DASM_ESC: *cp++ = *p++; break;
+ case DASM_REL_EXT:
+ n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins & 2047), 1);
+ goto patchrel;
+ case DASM_ALIGN:
+ ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000;
+ break;
+ case DASM_REL_LG:
+ if (n < 0) {
+ n = (int)((ptrdiff_t)D->globals[-n-10] - (ptrdiff_t)cp + 4);
+ goto patchrel;
+ }
+ /* fallthrough */
+ case DASM_REL_PC:
+ CK(n >= 0, UNDEF_PC);
+ n = *DASM_POS2PTR(D, n);
+ if (ins & 2048)
+ n = (n + (int)(size_t)base) & 0x0fffffff;
+ else
+ n = n - (int)((char *)cp - base) + 4;
+ patchrel: {
+ unsigned int e = 16 + ((ins >> 12) & 15);
+ CK((n & 3) == 0 &&
+ ((n + ((ins & 2048) ? 0 : (1<<(e+1)))) >> (e+2)) == 0, RANGE_REL);
+ if (!(ins & 0xf800)) { /* BEQ, BNE, BLT, BGE, BLTU, BGEU */
+ cp[-1] |= (((n >> 2) & 0xffff) << 10);
+ } else if ((ins & 0x5000)) { /* BEQZ, BNEZ, BCEQZ, BCNEZ */
+ cp[-1] |= (((n >> 2) & 0xffff) << 10) | (((n >> 2) & 0x1f0000) >> 16);
+ } else if ((ins & 0xa000)) { /* B, BL */
+ cp[-1] |= (((n >> 2) & 0xffff) << 10) | (((n >> 2) & 0x3ff0000) >> 16);
+ }
+ }
+ break;
+ case DASM_LABEL_LG:
+ ins &= 2047; if (ins >= 20) D->globals[ins-20] = (void *)(base + n);
+ break;
+ case DASM_LABEL_PC: break;
+ case DASM_IMM2: {
+ //cp[-1] |= ((n>>3) & 4); n &= 0x1f;
+ unsigned int imm2n = dasm_imm2((unsigned int)n);
+ cp[-1] |= ((imm2n&0x3ff0000) | ((imm2n&0xffff))>>10);
+ }
+ break;
+ /* fallthrough */
+ case DASM_IMM:
+ cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31);
+ break;
+ default: *cp++ = ins; break;
+ }
+ }
+ stop: (void)0;
+ }
+ }
+
+ if (base + D->codesize != (char *)cp) /* Check for phase errors. */
+ return DASM_S_PHASE;
+ return DASM_S_OK;
+}
+#undef CK
+
+/* Get PC label offset. */
+int dasm_getpclabel(Dst_DECL, unsigned int pc)
+{
+ dasm_State *D = Dst_REF;
+ if (pc*sizeof(int) < D->pcsize) {
+ int pos = D->pclabels[pc];
+ if (pos < 0) return *DASM_POS2PTR(D, -pos);
+ if (pos > 0) return -1; /* Undefined. */
+ }
+ return -2; /* Unused or out of range. */
+}
+
+#ifdef DASM_CHECKS
+/* Optional sanity checker to call between isolated encoding steps. */
+int dasm_checkstep(Dst_DECL, int secmatch)
+{
+ dasm_State *D = Dst_REF;
+ if (D->status == DASM_S_OK) {
+ int i;
+ for (i = 1; i <= 9; i++) {
+ if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; }
+ D->lglabels[i] = 0;
+ }
+ }
+ if (D->status == DASM_S_OK && secmatch >= 0 &&
+ D->section != &D->sections[secmatch])
+ D->status = DASM_S_MATCH_SEC|(int)(D->section-D->sections);
+ return D->status;
+}
+#endif
+
diff --git a/dynasm/dasm_loongarch64.lua b/dynasm/dasm_loongarch64.lua
new file mode 100644
index 000000000..ba6bf67e3
--- /dev/null
+++ b/dynasm/dasm_loongarch64.lua
@@ -0,0 +1,979 @@
+------------------------------------------------------------------------------
+-- DynASM LoongArch module.
+--
+-- Copyright (C) 2005-2022 Mike Pall. All rights reserved.
+-- See dynasm.lua for full copyright notice.
+------------------------------------------------------------------------------
+
+-- Module information:
+local _info = {
+ arch = "loongarch64",
+ description = "DynASM LoongArch64 module",
+ version = "1.5.0",
+ vernum = 10500,
+ release = "2021-05-02",
+ author = "Mike Pall",
+ license = "MIT",
+}
+
+-- Exported glue functions for the arch-specific module.
+local _M = { _info = _info }
+
+-- Cache library functions.
+local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs
+local assert, setmetatable = assert, setmetatable
+local _s = string
+local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
+local match, gmatch = _s.match, _s.gmatch
+local concat, sort = table.concat, table.sort
+local bit = bit or require("bit")
+local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift
+local tohex = bit.tohex
+
+-- Inherited tables and callbacks.
+local g_opt, g_arch
+local wline, werror, wfatal, wwarn
+
+-- Action name list.
+-- CHECK: Keep this in sync with the C code!
+local action_names = {
+ "STOP", "SECTION", "ESC", "REL_EXT",
+ "ALIGN", "REL_LG", "LABEL_LG",
+ "REL_PC", "LABEL_PC", "IMM", "IMM2",
+}
+
+-- Maximum number of section buffer positions for dasm_put().
+-- CHECK: Keep this in sync with the C code!
+local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines.
+
+-- Action name -> action number.
+local map_action = {}
+for n,name in ipairs(action_names) do
+ map_action[name] = n-1
+end
+
+-- Action list buffer.
+local actlist = {}
+
+-- Argument list for next dasm_put(). Start with offset 0 into action list.
+local actargs = { 0 }
+
+-- Current number of section buffer positions for dasm_put().
+local secpos = 1
+
+------------------------------------------------------------------------------
+
+-- Dump action names and numbers.
+local function dumpactions(out)
+ out:write("DynASM encoding engine action codes:\n")
+ for n,name in ipairs(action_names) do
+ local num = map_action[name]
+ out:write(format(" %-10s %02X %d\n", name, num, num))
+ end
+ out:write("\n")
+end
+
+-- Write action list buffer as a huge static C array.
+local function writeactions(out, name)
+ local nn = #actlist
+ if nn == 0 then nn = 1; actlist[0] = map_action.STOP end
+ out:write("static const unsigned int ", name, "[", nn, "] = {\n")
+ for i = 1,nn-1 do
+ assert(out:write("0x", tohex(actlist[i]), ",\n"))
+ end
+ assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n"))
+end
+
+------------------------------------------------------------------------------
+
+-- Add word to action list.
+local function wputxw(n)
+ assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
+ actlist[#actlist+1] = n
+end
+
+-- Add action to list with optional arg. Advance buffer pos, too.
+local function waction(action, val, a, num)
+ local w = assert(map_action[action], "bad action name `"..action.."'")
+ wputxw(0xff000000 + w * 0x10000 + (val or 0))
+ if a then actargs[#actargs+1] = a end
+ if a or num then secpos = secpos + (num or 1) end
+end
+
+-- Flush action list (intervening C code or buffer pos overflow).
+local function wflush(term)
+ if #actlist == actargs[1] then return end -- Nothing to flush.
+ if not term then waction("STOP") end -- Terminate action list.
+ wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true)
+ actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put().
+ secpos = 1 -- The actionlist offset occupies a buffer position, too.
+end
+
+-- Put escaped word.
+local function wputw(n)
+ if n >= 0xff000000 then waction("ESC") end
+ wputxw(n)
+end
+
+-- Reserve position for word.
+local function wpos()
+ local pos = #actlist+1
+ actlist[pos] = ""
+ return pos
+end
+
+-- Store word to reserved position.
+local function wputpos(pos, n)
+ assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
+ actlist[pos] = n
+end
+
+------------------------------------------------------------------------------
+
+-- Global label name -> global label number. With auto assignment on 1st use.
+local next_global = 20
+local map_global = setmetatable({}, { __index = function(t, name)
+ if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end
+ local n = next_global
+ if n > 2047 then werror("too many global labels") end
+ next_global = n + 1
+ t[name] = n
+ return n
+end})
+
+-- Dump global labels.
+local function dumpglobals(out, lvl)
+ local t = {}
+ for name, n in pairs(map_global) do t[n] = name end
+ out:write("Global labels:\n")
+ for i=20,next_global-1 do
+ out:write(format(" %s\n", t[i]))
+ end
+ out:write("\n")
+end
+
+-- Write global label enum.
+local function writeglobals(out, prefix)
+ local t = {}
+ for name, n in pairs(map_global) do t[n] = name end
+ out:write("enum {\n")
+ for i=20,next_global-1 do
+ out:write(" ", prefix, t[i], ",\n")
+ end
+ out:write(" ", prefix, "_MAX\n};\n")
+end
+
+-- Write global label names.
+local function writeglobalnames(out, name)
+ local t = {}
+ for name, n in pairs(map_global) do t[n] = name end
+ out:write("static const char *const ", name, "[] = {\n")
+ for i=20,next_global-1 do
+ out:write(" \"", t[i], "\",\n")
+ end
+ out:write(" (const char *)0\n};\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Extern label name -> extern label number. With auto assignment on 1st use.
+local next_extern = 0
+local map_extern_ = {}
+local map_extern = setmetatable({}, { __index = function(t, name)
+ -- No restrictions on the name for now.
+ local n = next_extern
+ if n > 2047 then werror("too many extern labels") end
+ next_extern = n + 1
+ t[name] = n
+ map_extern_[n] = name
+ return n
+end})
+
+-- Dump extern labels.
+local function dumpexterns(out, lvl)
+ out:write("Extern labels:\n")
+ for i=0,next_extern-1 do
+ out:write(format(" %s\n", map_extern_[i]))
+ end
+ out:write("\n")
+end
+
+-- Write extern label names.
+local function writeexternnames(out, name)
+ out:write("static const char *const ", name, "[] = {\n")
+ for i=0,next_extern-1 do
+ out:write(" \"", map_extern_[i], "\",\n")
+ end
+ out:write(" (const char *)0\n};\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Arch-specific maps.
+local map_archdef = { sp="r3", ra="r1" } -- Ext. register name -> int. name.
+
+local map_type = {} -- Type name -> { ctype, reg }
+local ctypenum = 0 -- Type number (for Dt... macros).
+
+-- Reverse defines for registers.
+function _M.revdef(s)
+ if s == "r3" then return "sp"
+ elseif s == "r1" then return "ra" end
+ return s
+end
+
+------------------------------------------------------------------------------
+
+-- Template strings for LoongArch instructions.
+local map_op = {
+ ["clo.w_2"] = "00001000DJ",
+ ["clz.w_2"] = "00001400DJ",
+ ["cto.w_2"] = "00001800DJ",
+ ["ctz.w_2"] = "00001c00DJ",
+ ["clo.d_2"] = "00002000DJ",
+ ["clz.d_2"] = "00002400DJ",
+ ["cto.d_2"] = "00002800DJ",
+ ["ctz.d_2"] = "00002c00DJ",
+ ["revb.2h_2"] = "00003000DJ",
+ ["revb.4h_2"] = "00003400DJ",
+ ["revb.2w_2"] = "00003800DJ",
+ ["revb.d_2"] = "00003c00DJ",
+ ["revh.2w_2"] = "00004000DJ",
+ ["revh.d_2"] = "00004400DJ",
+ ["bitrev.4b_2"] = "00004800DJ",
+ ["bitrev.8b_2"] = "00004c00DJ",
+ ["bitrev.w_2"] = "00005000DJ",
+ ["bitrev.d_2"] = "00005400DJ",
+ ["ext.w.h_2"] = "00005800DJ",
+ ["ext.w.b_2"] = "00005c00DJ",
+
+ ["add.w_3"] = "00100000DJK",
+ ["add.d_3"] = "00108000DJK",
+ ["sub.w_3"] = "00110000DJK",
+ ["sub.d_3"] = "00118000DJK",
+ slt_3 = "00120000DJK",
+ sltu_3 = "00128000DJK",
+ maskeqz_3 = "00130000DJK",
+ masknez_3 = "00138000DJK",
+
+ nor_3 = "00140000DJK",
+ and_3 = "00148000DJK",
+ or_3 = "00150000DJK",
+ xor_3 = "00158000DJK",
+ orn_3 = "00160000DJK",
+ andn_3 = "00168000DJK",
+ ["sll.w_3"] = "00170000DJK",
+ ["srl.w_3"] = "00178000DJK",
+ ["sra.w_3"] = "00180000DJK",
+ ["sll.d_3"] = "00188000DJK",
+ ["srl.d_3"] = "00190000DJK",
+ ["sra.d_3"] = "00198000DJK",
+ ["rotr.w_3"] = "001b0000DJK",
+ ["rotr.d_3"] = "001b8000DJK",
+ ["mul.w_3"] = "001c0000DJK",
+ ["mulh.w_3"] = "001c8000DJK",
+ ["mulh.wu_3"] = "001d0000DJK",
+ ["mul.d_3"] = "001d8000DJK",
+ ["mulh.d_3"] = "001e0000DJK",
+ ["mulh.du_3"] = "001e8000DJK",
+ ["mulw.d.w_3"] = "001f0000DJK",
+ ["mulw.d.wu_3"] = "001f8000DJK",
+
+ ["fabs.h_2"] = "01140000FG",
+ ["fabs.s_2"] = "01140400FG",
+ ["fabs.d_2"] = "01140800FG",
+ ["fneg.h_2"] = "01141000FG",
+ ["fneg.s_2"] = "01141400FG",
+ ["fneg.d_2"] = "01141800FG",
+ ["flogb.h_2"] = "01142000FG",
+ ["flogb.s_2"] = "01142400FG",
+ ["flogb.d_2"] = "01142800FG",
+ ["fclass.h_2"] = "01143000FG",
+ ["fclass.s_2"] = "01143400FG",
+ ["fclass.d_2"] = "01143800FG",
+ ["fsqrt.h_2"] = "01144000FG",
+ ["fsqrt.s_2"] = "01144400FG",
+ ["fsqrt.d_2"] = "01144800FG",
+ ["frecip.h_2"] = "01145000FG",
+ ["frecip.s_2"] = "01145400FG",
+ ["frecip.d_2"] = "01145800FG",
+ ["frsqrt.h_2"] = "01146000FG",
+ ["frsqrt.s_2"] = "01146400FG",
+ ["frsqrt.d_2"] = "01146800FG",
+ ["frecipe.h_2"] = "01147000FG",
+ ["frecipe.s_2"] = "01147400FG",
+ ["frecipe.d_2"] = "01147800FG",
+ ["frsqrte.h_2"] = "01148000FG",
+ ["frsqrte.s_2"] = "01148400FG",
+ ["frsqrte.d_2"] = "01148800FG",
+
+ ["fmov.h_2"] = "01149000FG",
+ ["fmov.s_2"] = "01149400FG",
+ ["fmov.d_2"] = "01149800FG",
+ ["movgr2fr.h_2"] = "0114a000FJ",
+ ["movgr2fr.w_2"] = "0114a400FJ",
+ ["movgr2fr.d_2"] = "0114a800FJ",
+ ["movgr2frh.w_2"] = "0114ac00FJ",
+ ["movfr2gr.h_2"] = "0114b000DG",
+ ["movfr2gr.s_2"] = "0114b400DG",
+ ["movfr2gr.d_2"] = "0114b800DG",
+ ["movfrh2gr.s_2"] = "0114bc00DG",
+ movgr2fcsr_2 = "0114c000SG",
+ movfcsr2gr_2 = "0114c800FR",
+ movfr2cf_2 = "0114d000EG",
+ movcf2fr_2 = "0114d400FA",
+ movgr2cf_2 = "0114d800EG",
+ movcf2gr_2 = "0114dc00DA",
+ ["fcvt.ld.d_2"] = "0114e000FG",
+ ["fcvt.ud.d_2"] = "0114e400FG",
+ ["fcvt.s.d_2"] = "01191800FG",
+ ["fcvt.d.s_2"] = "01192400FG",
+ ["ftintrm.w.s_2"] = "011a0400FG",
+ ["ftintrm.w.d_2"] = "011a0800FG",
+ ["ftintrm.l.s_2"] = "011a2400FG",
+ ["ftintrm.l.d_2"] = "011a2800FG",
+ ["ftintrp.w.s_2"] = "011a4400FG",
+ ["ftintrp.w.d_2"] = "011a4800FG",
+ ["ftintrp.l.s_2"] = "011a6400FG",
+ ["ftintrp.l.d_2"] = "011a6800FG",
+ ["ftintrz.w.s_2"] = "011a8400FG",
+ ["ftintrz.w.d_2"] = "011a8800FG",
+ ["ftintrz.l.s_2"] = "011aa400FG",
+ ["ftintrz.l.d_2"] = "011aa800FG",
+ ["ftintrne.w.s_2"] = "011ac400FG",
+ ["ftintrne.w.d_2"] = "011ac800FG",
+ ["ftintrne.l.s_2"] = "011ae400FG",
+ ["ftintrne.l.d_2"] = "011ae800FG",
+ ["ftint.w.s_2"] = "011b0400FG",
+ ["ftint.w.d_2"] = "011b0800FG",
+ ["ftint.l.s_2"] = "011b2400FG",
+ ["ftint.l.d_2"] = "011b2800FG",
+ ["ffint.s.w_2"] = "011d1000FG",
+ ["ffint.s.l_2"] = "011d1800FG",
+ ["ffint.d.w_2"] = "011d2000FG",
+ ["ffint.d.l_2"] = "011d2800FG",
+ ["frint.s_2"] = "011e4400FG",
+ ["frint.d_2"] = "011e4800FG",
+
+ ["fadd.h_3"] = "01000000FGH",
+ ["fadd.s_3"] = "01008000FGH",
+ ["fadd.d_3"] = "01010000FGH",
+ ["fsub.h_3"] = "01020000FGH",
+ ["fsub.s_3"] = "01028000FGH",
+ ["fsub.d_3"] = "01030000FGH",
+ ["fmul.h_3"] = "01040000FGH",
+ ["fmul.s_3"] = "01048000FGH",
+ ["fmul.d_3"] = "01050000FGH",
+ ["fdiv.h_3"] = "01060000FGH",
+ ["fdiv.s_3"] = "01068000FGH",
+ ["fdiv.d_3"] = "01070000FGH",
+ ["fmax.h_3"] = "01080000FGH",
+ ["fmax.s_3"] = "01088000FGH",
+ ["fmax.d_3"] = "01090000FGH",
+ ["fmin.h_3"] = "010a0000FGH",
+ ["fmin.s_3"] = "010a8000FGH",
+ ["fmin.d_3"] = "010b0000FGH",
+ ["fmaxa.h_3"] = "010c0000FGH",
+ ["fmaxa.s_3"] = "010c8000FGH",
+ ["fmaxa.d_3"] = "010d0000FGH",
+ ["fmina.h_3"] = "010e0000FGH",
+ ["fmina.s_3"] = "010e8000FGH",
+ ["fmina.d_3"] = "010f0000FGH",
+ ["fscaleb.h_3"] = "01100000FGH",
+ ["fscaleb.s_3"] = "01108000FGH",
+ ["fscaleb.d_3"] = "01110000FGH",
+ ["fcopysign.h_3"] = "01120000FGH",
+ ["fcopysign.s_3"] = "01128000FGH",
+ ["fcopysign.d_3"] = "01130000FGH",
+
+ ["fmadd.s_4"] = "08100000FGHi",
+ ["fmadd.d_4"] = "08200000FGHi",
+ ["fnmadd.d_4"] = "08a00000FGHi",
+ ["fmsub.s_4"] = "08500000FGHi",
+ ["fmsub.d_4"] = "08600000FGHi",
+ ["fnmsub.d_4"] = "08e00000FGHi",
+
+ ["alsl.w_4"] = "00040000DJKQ",
+ ["alsl.wu_4"] = "00060000DJKQ",
+ ["alsl.d_4"] = "002c0000DJKQ",
+ ["bytepick.w_4"] = "00080000DJKQ",
+ ["bytepick.d_4"] = "000c0000DJKB",
+
+ ["div.w_3"] = "00200000DJK",
+ ["mod.w_3"] = "00208000DJK",
+ ["div.wu_3"] = "00210000DJK",
+ ["mod.wu_3"] = "00218000DJK",
+ ["div.d_3"] = "00220000DJK",
+ ["mod.d_3"] = "00228000DJK",
+ ["div.du_3"] = "00230000DJK",
+ ["mod.du_3"] = "00238000DJK",
+ ["crc.w.b.w_3"] = "00240000DJK",
+ ["crc.w.h.w_3"] = "00248000DJK",
+ ["crc.w.w.w_3"] = "00250000DJK",
+ ["crc.w.d.w_3"] = "00258000DJK",
+ ["crcc.w.b.w_3"] = "00260000DJK",
+ ["crcc.w.h.w_3"] = "00268000DJK",
+ ["crcc.w.w.w_3"] = "00270000DJK",
+ ["crcc.w.d.w_3"] = "00278000DJK",
+
+ break_1 = "002a0000C",
+ syscall_1 = "002b0000C",
+
+ ["slli.w_3"] = "00408000DJU",
+ ["slli.d_3"] = "00410000DJV",
+ ["srli.w_3"] = "00448000DJU",
+ ["srli.d_3"] = "00450000DJV",
+ ["srai.w_3"] = "00488000DJU",
+ ["srai.d_3"] = "00490000DJV",
+ ["rotri.w_3"] = "004c8000DJU",
+ ["rotri.d_3"] = "004d0000DJV",
+
+ ["bstrins.w_4"] = "00600000DJMU",
+ ["bstrpick.w_4"] = "00608000DJMU",
+ ["bstrins.d_4"] = "00800000DJNV",
+ ["bstrpick.d_4"] = "00c00000DJNV",
+ slti_3 = "02000000DJX",
+ sltui_3 = "02400000DJX",
+ ["addi.w_3"] = "02800000DJX",
+ ["addi.d_3"] = "02c00000DJX",
+ ["lu52i.d_3"] = "03000000DJX",
+ andi_3 = "03400000DJT",
+ ori_3 = "03800000DJT",
+ xori_3 = "03c00000DJT",
+ ["lu12i.w_2"] = "14000000DZ",
+ ["lu32i.d_2"] = "16000000DZ",
+ pcaddi_2 = "18000000DZ",
+ pcalau12i_2 = "1a000000DZ",
+ pcaddu12i_2 = "1c000000DZ",
+ pcaddu18i_2 = "1e000000DZ",
+
+ ["ldx.b_3"] = "38000000DJK",
+ ["ldx.h_3"] = "38040000DJK",
+ ["ldx.w_3"] = "38080000DJK",
+ ["ldx.d_3"] = "380c0000DJK",
+ ["stx.b_3"] = "38100000DJK",
+ ["stx.h_3"] = "38140000DJK",
+ ["stx.w_3"] = "38180000DJK",
+ ["stx.d_3"] = "381c0000DJK",
+ ["ldx.bu_3"] = "38200000DJK",
+ ["ldx.hu_3"] = "38240000DJK",
+ ["ldx.wu_3"] = "38280000DJK",
+ ["fldx.s_3"] = "38300000FJK",
+ ["fldx.d_3"] = "38340000FJK",
+ ["fstx.s_3"] = "38380000FJK",
+ ["fstx.d_3"] = "383c0000FJK",
+ ["fldgt.s_3"] = "38740000FJK",
+ ["fldgt.d_3"] = "38748000FJK",
+ ["fldle.s_3"] = "38750000FJK",
+ ["fldle.d_3"] = "38758000FJK",
+ ["fstgt.s_3"] = "38760000FJK",
+ ["fstgt.d_3"] = "38768000FJK",
+ ["fstle.s_3"] = "38770000FJK",
+ ["fstle.d_3"] = "38778000FJK",
+ ["ldgt.b_3"] = "38780000DJK",
+ ["ldgt.h_3"] = "38788000DJK",
+ ["ldgt.w_3"] = "38790000DJK",
+ ["ldgt.d_3"] = "38798000DJK",
+ ["ldle.b_3"] = "387a0000DJK",
+ ["ldle.h_3"] = "387a8000DJK",
+ ["ldle.w_3"] = "387b0000DJK",
+ ["ldle.d_3"] = "387b8000DJK",
+ ["stgt.b_3"] = "387c0000DJK",
+ ["stgt.h_3"] = "387c8000DJK",
+ ["stgt.w_3"] = "387d0000DJK",
+ ["stgt.d_3"] = "387d8000DJK",
+ ["stle.b_3"] = "387e0000DJK",
+ ["stle.h_3"] = "387e8000DJK",
+ ["stle.w_3"] = "387f0000DJK",
+ ["stle.d_3"] = "387f8000DJK",
+
+ ["ll.w_3"] = "20000000DJW",
+ ["sc.w_3"] = "21000000DJW",
+ ["ll.d_3"] = "22000000DJW",
+ ["sc.d_3"] = "23000000DJW",
+ ["ldptr.w_3"] = "24000000DJW",
+ ["stptr.w_3"] = "25000000DJW",
+ ["ldptr.d_3"] = "26000000DJW",
+ ["stptr.d_3"] = "27000000DJW",
+
+ ["ld.b_2"] = "28000000Do",
+ ["ld.h_2"] = "28400000Do",
+ ["ld.w_2"] = "28800000Do",
+ ["ld.d_2"] = "28c00000Do",
+ ["st.b_2"] = "29000000Do",
+ ["st.h_2"] = "29400000Do",
+ ["st.w_2"] = "29800000Do",
+ ["st.d_2"] = "29c00000Do",
+ ["ld.bu_2"] = "2a000000Do",
+ ["ld.hu_2"] = "2a400000Do",
+ ["ld.wu_2"] = "2a800000Do",
+ ["ldx.d_3"] = "380c0000DJK",
+ ["stx.d_3"] = "381c0000DJK",
+ ["fld.s_2"] = "2b000000Fo",
+ ["fst.s_2"] = "2b400000Fo",
+ ["fld.d_2"] = "2b800000Fo",
+ ["fst.d_2"] = "2bc00000Fo",
+
+ ["fcmp.caf.s_3"] = "0c100000EGH",
+ ["fcmp.saf.s_3"] = "0c108000EGH",
+ ["fcmp.clt.s_3"] = "0c110000EGH",
+ ["fcmp.slt.s_3"] = "0c118000EGH",
+ ["fcmp.ceq.s_3"] = "0c120000EGH",
+ ["fcmp.seq.s_3"] = "0c128000EGH",
+ ["fcmp.cle.s_3"] = "0c130000EGH",
+ ["fcmp.sle.s_3"] = "0c138000EGH",
+ ["fcmp.cun.s_3"] = "0c140000EGH",
+ ["fcmp.sun.s_3"] = "0c148000EGH",
+ ["fcmp.cult.s_3"] = "0c150000EGH",
+ ["fcmp.sult.s_3"] = "0c158000EGH",
+ ["fcmp.cueq.s_3"] = "0c160000EGH",
+ ["fcmp.sueq.s_3"] = "0c168000EGH",
+ ["fcmp.cule.s_3"] = "0c170000EGH",
+ ["fcmp.sule.s_3"] = "0c178000EGH",
+ ["fcmp.cne.s_3"] = "0c180000EGH",
+ ["fcmp.sne.s_3"] = "0c188000EGH",
+ ["fcmp.cor.s_3"] = "0c1a0000EGH",
+ ["fcmp.sor.s_3"] = "0c1a8000EGH",
+ ["fcmp.cune.s_3"] = "0c1c0000EGH",
+ ["fcmp.sune.s_3"] = "0c1c8000EGH",
+ ["fcmp.caf.d_3"] = "0c200000EGH",
+ ["fcmp.saf.d_3"] = "0c208000EGH",
+ ["fcmp.clt.d_3"] = "0c210000EGH",
+ ["fcmp.slt.d_3"] = "0c218000EGH",
+ ["fcmp.ceq.d_3"] = "0c220000EGH",
+ ["fcmp.seq.d_3"] = "0c228000EGH",
+ ["fcmp.cle.d_3"] = "0c230000EGH",
+ ["fcmp.sle.d_3"] = "0c238000EGH",
+ ["fcmp.cun.d_3"] = "0c240000EGH",
+ ["fcmp.sun.d_3"] = "0c248000EGH",
+ ["fcmp.cult.d_3"] = "0c250000EGH",
+ ["fcmp.sult.d_3"] = "0c258000EGH",
+ ["fcmp.cueq.d_3"] = "0c260000EGH",
+ ["fcmp.sueq.d_3"] = "0c268000EGH",
+ ["fcmp.cule.d_3"] = "0c270000EGH",
+ ["fcmp.sule.d_3"] = "0c278000EGH",
+ ["fcmp.cne.d_3"] = "0c280000EGH",
+ ["fcmp.sne.d_3"] = "0c288000EGH",
+ ["fcmp.cor.d_3"] = "0c2a0000EGH",
+ ["fcmp.sor.d_3"] = "0c2a8000EGH",
+ ["fcmp.cune.d_3"] = "0c2c0000EGH",
+ ["fcmp.sune.d_3"] = "0c2c8000EGH",
+
+ fsel_4 = "0d000000FGHI",
+
+ ["addu16i.d_3"] = "10000000DJY",
+ beqz_2 = "40000000JL",
+ bnez_2 = "44000000JL",
+ bceqz_2 = "48000000AL",
+ bcnez_2 = "48000100AL",
+ jirl_3 = "4c000000DJa",
+ b_1 = "50000000P",
+ bl_1 = "54000000P",
+ beq_3 = "58000000JDO",
+ bne_3 = "5c000000JDO",
+ blt_3 = "60000000JDO",
+ bge_3 = "64000000JDO",
+ bltu_3 = "68000000JDO",
+ bgeu_3 = "6c000000JDO",
+}
+
+------------------------------------------------------------------------------
+
+local function parse_gpr(expr)
+ local tname, ovreg = match(expr, "^([%w_]+):(r[1-3]?[0-9])$")
+ local tp = map_type[tname or expr]
+ if tp then
+ local reg = ovreg or tp.reg
+ if not reg then
+ werror("type `"..(tname or expr).."' needs a register override")
+ end
+ expr = reg
+ end
+ local r = match(expr, "^r([1-3]?[0-9])$")
+ if r then
+ r = tonumber(r)
+ if r <= 31 then return r, tp end
+ end
+ werror("bad register name `"..expr.."'")
+end
+
+local function parse_fpr(expr)
+ local r = match(expr, "^f([1-3]?[0-9])$")
+ if r then
+ r = tonumber(r)
+ if r <= 31 then return r end
+ end
+ werror("bad register name `"..expr.."'")
+end
+
+local function parse_fcsr(expr)
+ local r = match(expr, "^fcsr([0-3])$")
+ if r then
+ r = tonumber(r)
+ return r
+ end
+ werror("bad register name `"..expr.."'")
+end
+
+local function parse_fcc(expr)
+ local r = match(expr, "^fcc([0-7])$")
+ if r then
+ r = tonumber(r)
+ return r
+ end
+ werror("bad register name `"..expr.."'")
+end
+
+local function parse_imm(imm, bits, shift, scale, signed, action)
+ local n = tonumber(imm)
+ if n then
+ local m = sar(n, scale)
+ if shl(m, scale) == n then
+ if signed then
+ local s = sar(m, bits-1)
+ if s == 0 or s == 1 then return shl(m, shift)
+ elseif s == -1 then return shl(m + shl(1, bits), shift) end
+ else
+ if sar(m, bits) == 0 then return shl(m, shift) end
+ end
+ end
+ werror("out of range immediate1 `"..imm.."'")
+ elseif match(imm, "^[rf]([1-3]?[0-9])$") or
+ match(imm, "^([%w_]+):([rf][1-3]?[0-9])$") then
+ werror("expected immediate operand, got register")
+ else
+ waction(action or "IMM",
+ (signed and 32768 or 0)+shl(scale, 10)+shl(bits, 5)+shift, imm)
+ return 0
+ end
+end
+
+local function parse_imm21or26(imm, i)
+ local n = tonumber(imm)
+ if n then
+ -- signed
+ local m = sar(n, 0)
+ if shl(m, 0) == n then
+ local s = sar(m, i-1)
+ if s == 0 then
+ return shl(sub(m, 1, 16), 10) + shl(sub(m, 17, i), 0)
+ elseif s == -1 then
+ return shl(sub(m, 1, 16), 10) + shl(sub(m, 17, i), 0)
+ end
+ end
+ werror("out of range immediate2 `"..imm.."'")
+ else
+ waction("IMM2", 0, imm)
+ return 0
+ end
+end
+
+local function parse_disp(disp)
+ local imm, reg = match(disp, "^(.*)%(([%w_:]+)%)$")
+ if imm then
+ local r = shl(parse_gpr(reg), 5)
+ local extname = match(imm, "^extern%s+(%S+)$")
+ if extname then
+ waction("REL_EXT", map_extern[extname], nil, 1)
+ return r
+ else
+ return r + parse_imm(imm, 12, 10, 0, true)
+ end
+ end
+ local reg, tailr = match(disp, "^([%w_:]+)%s*(.*)$")
+ if reg and tailr ~= "" then
+ local r, tp = parse_gpr(reg)
+ if tp then
+ waction("IMM", 32768+12*32+10, format(tp.ctypefmt, tailr))
+ return shl(r, 5)
+ end
+ end
+ werror("bad displacement `"..disp.."'")
+end
+
+local function parse_label(label, def)
+ local prefix = sub(label, 1, 2)
+ -- =>label (pc label reference)
+ if prefix == "=>" then
+ return "PC", 0, sub(label, 3)
+ end
+ -- ->name (global label reference)
+ if prefix == "->" then
+ return "LG", map_global[sub(label, 3)]
+ end
+ if def then
+ -- [1-9] (local label definition)
+ if match(label, "^[1-9]$") then
+ return "LG", 10+tonumber(label)
+ end
+ else
+ -- [<>][1-9] (local label reference)
+ local dir, lnum = match(label, "^([<>])([1-9])$")
+ if dir then -- Fwd: 1-9, Bkwd: 11-19.
+ return "LG", lnum + (dir == ">" and 0 or 10)
+ end
+ -- extern label (extern label reference)
+ local extname = match(label, "^extern%s+(%S+)$")
+ if extname then
+ return "EXT", map_extern[extname]
+ end
+ end
+ werror("bad label `"..label.."'")
+end
+
+local function branch_type(op)
+ if shr(op, 26) == 0x16 or shr(op, 26) == 0x17 or shr(op, 26) == 0x18 or
+ shr(op, 26) == 0x19 or shr(op, 26) == 0x1a or shr(op, 26) == 0x1b then
+ return 0 -- BEQ, BNE, BLT, BGE, BLTU, BGEU
+ elseif shr(op, 26) == 0x10 or shr(op, 26) == 0x11 or shr(op, 26) == 0x12 then
+ return 0x5000 -- BEQZ, BNEZ, BCEQZ, BCNEZ
+ elseif band(op, 0xf8000000) == 0x50000000 then return 0xa000 --B, BL
+ else
+ assert(false, "unknown branch type")
+ end
+end
+
+------------------------------------------------------------------------------
+
+-- Handle opcodes defined with template strings.
+map_op[".template__"] = function(params, template, nparams)
+ if not params then return sub(template, 9) end
+ local op = tonumber(sub(template, 1, 8), 16)
+ local n = 1
+
+ -- Limit number of section buffer positions used by a single dasm_put().
+ -- A single opcode needs a maximum of 2 positions (ins/ext).
+ if secpos+2 > maxsecpos then wflush() end
+ local pos = wpos()
+
+ -- Process each character.
+ for p in gmatch(sub(template, 9), ".") do
+ if p == "D" then
+ op = op + shl(parse_gpr(params[n]), 0); n = n + 1
+ elseif p == "J" then
+ op = op + shl(parse_gpr(params[n]), 5); n = n + 1
+ elseif p == "K" then
+ op = op + shl(parse_gpr(params[n]), 10); n = n + 1
+ elseif p == "F" then
+ op = op + shl(parse_fpr(params[n]), 0); n = n + 1
+ elseif p == "G" then
+ op = op + shl(parse_fpr(params[n]), 5); n = n + 1
+ elseif p == "H" then
+ op = op + shl(parse_fpr(params[n]), 10); n = n + 1
+ elseif p == "i" then
+ op = op + shl(parse_fpr(params[n]), 15); n = n + 1
+ elseif p == "I" then
+ op = op + shl(parse_fcc(params[n]), 15); n = n + 1
+ elseif p == "A" then
+ op = op + shl(parse_fcc(params[n]), 5); n = n + 1
+ elseif p == "E" then
+ op = op + shl(parse_fcc(params[n]), 0); n = n + 1
+ elseif op == "S" then
+ op = op + shl(parse_fcsr(params[n]), 0); n = n + 1
+ elseif op == "R" then
+ op = op + shl(parse_fcsr(params[n]), 5); n = n + 1
+ elseif p == "U" then
+ op = op + parse_imm(params[n], 5, 10, 0, false); n = n + 1
+ elseif p == "V" then
+ op = op + parse_imm(params[n], 6, 10, 0, false); n = n + 1
+ elseif p == "W" then
+ op = op + parse_imm(params[n], 14, 10, 0, true); n = n + 1
+ elseif p == "X" then
+ op = op + parse_imm(params[n], 12, 10, 0, true); n = n + 1
+ elseif p == "o" then
+ op = op + parse_disp(params[n]); n = n + 1
+ elseif p == "Y" then
+ op = op + parse_imm(params[n], 16, 10, 0, true); n = n + 1
+ elseif p == "Z" then
+ op = op + parse_imm(params[n], 20, 5, 0, true); n = n + 1
+ elseif p == "T" then
+ op = op + parse_imm(params[n], 12, 10, 0, false); n = n + 1
+ elseif p == "C" then
+ op = op + parse_imm(params[n], 15, 0, 0, false); n = n + 1
+ elseif p == "Q" then
+ op = op + parse_imm(params[n], 2, 15, 0, false); n = n + 1
+ elseif p == "B" then
+ op = op + parse_imm(params[n], 3, 15, 0, false); n = n + 1
+ elseif p == "M" then
+ op = op + parse_imm(params[n], 5, 16, 0, false); n = n + 1
+ elseif p == "N" then
+ op = op + parse_imm(params[n], 6, 16, 0, false); n = n + 1
+ elseif p == "O" or p == "L" or p == "P" then
+ local mode, m, s = parse_label(params[n], false)
+ local v = branch_type(op)
+ waction("REL_"..mode, m+v, s, 1)
+ n = n + 1
+ elseif p == "a" then
+ op = op + parse_imm(params[n], 16, 10, 0, true); n = n + 1
+ else
+ assert(false)
+ end
+ end
+ wputpos(pos, op)
+end
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcode to mark the position where the action list is to be emitted.
+map_op[".actionlist_1"] = function(params)
+ if not params then return "cvar" end
+ local name = params[1] -- No syntax check. You get to keep the pieces.
+ wline(function(out) writeactions(out, name) end)
+end
+
+-- Pseudo-opcode to mark the position where the global enum is to be emitted.
+map_op[".globals_1"] = function(params)
+ if not params then return "prefix" end
+ local prefix = params[1] -- No syntax check. You get to keep the pieces.
+ wline(function(out) writeglobals(out, prefix) end)
+end
+
+-- Pseudo-opcode to mark the position where the global names are to be emitted.
+map_op[".globalnames_1"] = function(params)
+ if not params then return "cvar" end
+ local name = params[1] -- No syntax check. You get to keep the pieces.
+ wline(function(out) writeglobalnames(out, name) end)
+end
+
+-- Pseudo-opcode to mark the position where the extern names are to be emitted.
+map_op[".externnames_1"] = function(params)
+ if not params then return "cvar" end
+ local name = params[1] -- No syntax check. You get to keep the pieces.
+ wline(function(out) writeexternnames(out, name) end)
+end
+
+------------------------------------------------------------------------------
+
+-- Label pseudo-opcode (converted from trailing colon form).
+map_op[".label_1"] = function(params)
+ if not params then return "[1-9] | ->global | =>pcexpr" end
+ if secpos+1 > maxsecpos then wflush() end
+ local mode, n, s = parse_label(params[1], true)
+ if mode == "EXT" then werror("bad label definition") end
+ waction("LABEL_"..mode, n, s, 1)
+end
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcodes for data storage.
+map_op[".long_*"] = function(params)
+ if not params then return "imm..." end
+ for _,p in ipairs(params) do
+ local n = tonumber(p)
+ if not n then werror("bad immediate `"..p.."'") end
+ if n < 0 then n = n + 2^32 end
+ wputw(n)
+ if secpos+2 > maxsecpos then wflush() end
+ end
+end
+
+-- Alignment pseudo-opcode.
+map_op[".align_1"] = function(params)
+ if not params then return "numpow2" end
+ if secpos+1 > maxsecpos then wflush() end
+ local align = tonumber(params[1])
+ if align then
+ local x = align
+ -- Must be a power of 2 in the range (2 ... 256).
+ for i=1,8 do
+ x = x / 2
+ if x == 1 then
+ waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1.
+ return
+ end
+ end
+ end
+ werror("bad alignment")
+end
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcode for (primitive) type definitions (map to C types).
+map_op[".type_3"] = function(params, nparams)
+ if not params then
+ return nparams == 2 and "name, ctype" or "name, ctype, reg"
+ end
+ local name, ctype, reg = params[1], params[2], params[3]
+ if not match(name, "^[%a_][%w_]*$") then
+ werror("bad type name `"..name.."'")
+ end
+ local tp = map_type[name]
+ if tp then
+ werror("duplicate type `"..name.."'")
+ end
+ -- Add #type to defines. A bit unclean to put it in map_archdef.
+ map_archdef["#"..name] = "sizeof("..ctype..")"
+ -- Add new type and emit shortcut define.
+ local num = ctypenum + 1
+ map_type[name] = {
+ ctype = ctype,
+ ctypefmt = format("Dt%X(%%s)", num),
+ reg = reg,
+ }
+ wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype))
+ ctypenum = num
+end
+map_op[".type_2"] = map_op[".type_3"]
+
+-- Dump type definitions.
+local function dumptypes(out, lvl)
+ local t = {}
+ for name in pairs(map_type) do t[#t+1] = name end
+ sort(t)
+ out:write("Type definitions:\n")
+ for _,name in ipairs(t) do
+ local tp = map_type[name]
+ local reg = tp.reg or ""
+ out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg))
+ end
+ out:write("\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Set the current section.
+function _M.section(num)
+ waction("SECTION", num)
+ wflush(true) -- SECTION is a terminal action.
+end
+
+------------------------------------------------------------------------------
+
+-- Dump architecture description.
+function _M.dumparch(out)
+ out:write(format("DynASM %s version %s, released %s\n\n",
+ _info.arch, _info.version, _info.release))
+ dumpactions(out)
+end
+
+-- Dump all user defined elements.
+function _M.dumpdef(out, lvl)
+ dumptypes(out, lvl)
+ dumpglobals(out, lvl)
+ dumpexterns(out, lvl)
+end
+
+------------------------------------------------------------------------------
+
+-- Pass callbacks from/to the DynASM core.
+function _M.passcb(wl, we, wf, ww)
+ wline, werror, wfatal, wwarn = wl, we, wf, ww
+ return wflush
+end
+
+-- Setup the arch-specific module.
+function _M.setup(arch, opt)
+ g_arch, g_opt = arch, opt
+end
+
+-- Merge the core maps and the arch-specific maps.
+function _M.mergemaps(map_coreop, map_def)
+ setmetatable(map_op, { __index = map_coreop })
+ setmetatable(map_def, { __index = map_archdef })
+ return map_op, map_def
+end
+
+return _M
+
+------------------------------------------------------------------------------
+
diff --git a/src/Makefile b/src/Makefile
index 3d2ba4575..db66fc577 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -53,6 +53,7 @@ CCOPT_arm64=
CCOPT_ppc=
CCOPT_mips=
CCOPT_riscv64=
+CCOPT_loongarch64= -fwrapv
#
#CCDEBUG=
# Uncomment the next line to generate debug information:
@@ -247,6 +248,10 @@ else
ifneq (,$(findstring LJ_TARGET_S390X ,$(TARGET_TESTARCH)))
TARGET_LJARCH= s390x
else
+ifneq (,$(findstring LJ_TARGET_LOONGARCH64 ,$(TARGET_TESTARCH)))
+ TARGET_ARCH= -DLJ_ARCH_ENDIAN=LUAJIT_LE
+ TARGET_LJARCH= loongarch64
+else
ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH)))
ifneq (,$(findstring __AARCH64EB__ ,$(TARGET_TESTARCH)))
TARGET_ARCH= -D__AARCH64EB__=1
@@ -283,6 +288,7 @@ endif
endif
endif
endif
+endif
ifneq (,$(findstring LJ_TARGET_PS3 1,$(TARGET_TESTARCH)))
TARGET_SYS= PS3
@@ -346,7 +352,9 @@ else
# Find out whether the target toolchain always generates unwind tables.
TARGET_TESTUNWIND=$(shell exec 2>/dev/null; echo 'extern void b(void);int a(void){b();return 0;}' | $(TARGET_CC) -c -x c - -o tmpunwind.o && { grep -qa -e eh_frame -e __unwind_info tmpunwind.o || grep -qU -e eh_frame -e __unwind_info tmpunwind.o; } && echo E; rm -f tmpunwind.o)
ifneq (,$(findstring E,$(TARGET_TESTUNWIND)))
- TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL
+ ifeq (,$(findstring LJ_TARGET_LOONGARCH64 ,$(TARGET_TESTARCH)))
+ TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL
+ endif
endif
endif
ifneq (SunOS,$(TARGET_SYS))
diff --git a/src/host/buildvm.c b/src/host/buildvm.c
index ff4e01e11..70452d44f 100644
--- a/src/host/buildvm.c
+++ b/src/host/buildvm.c
@@ -71,6 +71,8 @@ static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type);
#include "../dynasm/dasm_s390x.h"
#elif LJ_TARGET_RISCV64
#include "../dynasm/dasm_riscv.h"
+#elif LJ_TARGET_LOONGARCH64
+#include "../dynasm/dasm_loongarch64.h"
#else
#error "No support for this architecture (yet)"
#endif
diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c
index 0f32546fc..256591492 100644
--- a/src/host/buildvm_asm.c
+++ b/src/host/buildvm_asm.c
@@ -229,6 +229,15 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n,
ins, sym);
exit(1);
}
+#elif LJ_TARGET_LOONGARCH64
+ if ((ins >> 26) == 21) {
+ fprintf(ctx->fp, "\tbl %s\n", sym);
+ } else {
+ fprintf(stderr,
+ "Error: unsupported opcode %08x for %s symbol relocation.\n",
+ ins, sym);
+ exit(1);
+ }
#else
#error "missing relocation support for this architecture"
#endif
diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua
index 81cbb0a11..cf4fc9974 100644
--- a/src/jit/bcsave.lua
+++ b/src/jit/bcsave.lua
@@ -104,6 +104,7 @@ local map_arch = {
mips64r6el = { e = "le", b = 64, m = 8, f = 0xa0000407, },
s390x = { e = "be", b = 64, m = 22, },
riscv64 = { e = "le", b = 64, m = 243, f = 0x00000004, },
+ loongarch64 = { e = "le", b = 64, m = 258, f = 0x3},
}
local map_os = {
diff --git a/src/jit/dis_loongarch64.lua b/src/jit/dis_loongarch64.lua
new file mode 100644
index 000000000..0fe0266be
--- /dev/null
+++ b/src/jit/dis_loongarch64.lua
@@ -0,0 +1,697 @@
+----------------------------------------------------------------------------
+-- LuaJIT LoongArch64 disassembler module.
+--
+-- Copyright (C) 2005-2022 Mike Pall. All rights reserved.
+-- Released under the MIT/X license. See Copyright Notice in luajit.h
+----------------------------------------------------------------------------
+-- This is a helper module used by the LuaJIT machine code dumper module.
+--
+-- It disassembles most LoongArch instructions.
+-- NYI: SIMD instructions.
+------------------------------------------------------------------------------
+
+local type = type
+local byte, format = string.byte, string.format
+local match, gmatch = string.match, string.gmatch
+local concat = table.concat
+local bit = require("bit")
+local band, bor, bnot, tohex = bit.band, bit.bor, bit.bnot, bit.tohex
+local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift
+
+------------------------------------------------------------------------------
+-- Opcode maps
+------------------------------------------------------------------------------
+
+local map_18_0 = { -- 18-20:0, 10-17
+ shift = 10, mask = 255,
+ [4] = "clo.wDJ",
+ [5] = "clz.wDJ",
+ [6] = "cto.wDJ",
+ [7] = "ctz.wDJ",
+ [8] = "clo.dDJ",
+ [9] = "clz.dDJ",
+ [10] = "cto.dDJ",
+ [11] = "ctz.dDJ",
+ [12] = "revb.2hDJ",
+ [13] = "revb.4hDJ",
+ [14] = "revb.2wDJ",
+ [15] = "revb.dDJ",
+ [16] = "revh.2wDJ",
+ [17] = "revh.dDJ",
+ [18] = "bitrev.4bDJ",
+ [19] = "bitrev.8bDJ",
+ [20] = "bitrev.wDJ",
+ [21] = "bitrev.dDJ",
+ [22] = "ext.w.hDJ",
+ [23] = "ext.w.bDJ",
+}
+
+local map_18_4 = { -- 18-20:4, 15-17
+ shift = 15, mask = 7,
+ [0] = "add.wDJK",
+ [1] = "add.dDJK",
+ [2] = "sub.wDJK",
+ [3] = "sub.dDJK",
+ [4] = "sltDJK",
+ [5] = "sltuDJK",
+ [6] = "maskeqzDJK",
+ [7] = "masknezDJK",
+}
+
+local map_18_5 = { -- 18-20:5, 15-17
+ shift = 15, mask = 7,
+ [0] = "norDJK",
+ [1] = "andDJK",
+ [2] = "orDJK",
+ [3] = "xorDJK",
+ [4] = "ornDJK",
+ [5] = "andnDJK",
+ [6] = "sll.wDJK",
+ [7] = "srl.wDJK",
+}
+
+local map_18_6 = { -- 18-20:6, 15-17
+ shift = 15, mask = 7,
+ [0] = "sra.wDJK",
+ [1] = "sll.dDJK",
+ [2] = "srl.dDJK",
+ [3] = "sra.dDJK",
+ [6] = "rotr.wDJK",
+ [7] = "rotr.dDJK",
+}
+
+local map_18_7 = { -- 18-20:7, 15-17
+ shift = 15, mask = 7,
+ [0] = "mul.wDJK",
+ [1] = "mulh.wDJK",
+ [2] = "mulh.wuDJK",
+ [3] = "mul.dDJK",
+ [4] = "mulh.dDJK",
+ [5] = "mulh.duDJK",
+ [6] = "mulw.d.wDJK",
+ [7] = "mulw.d.wuDJK",
+}
+
+local map_farith2 = {
+ shift = 10, mask = 31,
+ [1] = "fabs.sFG",
+ [2] = "fabs.dFG",
+ [5] = "fneg.sFG",
+ [6] = "fneg.dFG",
+ [9] = "flogb.sFG",
+ [10] = "flogb.dFG",
+ [13] = "fclass.sFG",
+ [14] = "fclass.dFG",
+ [17] = "fsqrt.sFG",
+ [18] = "fsqrt.dFG",
+ [21] = "frecip.sFG",
+ [22] = "frecip.dFG",
+ [25] = "frsqrt.sFG",
+ [26] = "frsqrt.dFG",
+ [29] = "frecipe.sFG",
+ [30] = "frecipe.dFG",
+ [33] = "frsqrte.sFG",
+ [34] = "frsqrte.dFG",
+}
+
+local map_fmov = {
+ shift = 10, mask = 31,
+ [5] = "fmov.sFG",
+ [6] = "fmov.dFG",
+ [9] = "movgr2fr.wFJ",
+ [10] = "movgr2fr.dFJ",
+ [11] = "movgr2frh.wFJ",
+ [13] = "movfr2gr.sDG",
+ [14] = "movfr2gr.dDG",
+ [15] = "movfrh2gr.sDG",
+ [16] = "movgr2fcsrSJ",
+ [18] = "movfcsr2grDR",
+ [20] = { shift = 3, mask = 3, [0] = "movfr2cfEG", },
+ [21] = { shift = 8, mask = 3, [0] = "movcf2frFA", },
+ [22] = { shift = 3, mask = 3, [0] = "movgr2cfEJ", },
+ [23] = { shift = 8, mask = 3, [0] = "movcf2grDA", },
+}
+
+local map_fconvert = { -- 15-20: 110010
+ shift = 10, mask = 31,
+ [6] = "fcvt.s.dFG", [9] = "fcvt.d.sFG",
+}
+
+local map_fconvert1 = { -- 15-20: 110100
+ shift = 10, mask = 31,
+ [1] = "ftintrm.w.sFG",
+ [2] = "ftintrm.w.dFG",
+ [9] = "ftintrm.l.sFG",
+ [10] = "ftintrm.l.dFG",
+ [17] = "ftintrp.w.sFG",
+ [18] = "ftintrp.w.dFG",
+ [25] = "ftintrp.l.sFG",
+ [26] = "ftintrp.l.dFG",
+}
+
+local map_fconvert2 = { -- 15-20: 110101
+ shift = 10, mask = 31,
+ [1] = "ftintrz.w.sFG",
+ [2] = "ftintrz.w.dFG",
+ [9] = "ftintrz.l.sFG",
+ [10] = "ftintrz.l.dFG",
+ [17] = "ftintrne.w.sFG",
+ [18] = "ftintrne.w.dFG",
+ [25] = "ftintrne.l.sFG",
+ [26] = "ftintrne.l.dFG",
+}
+
+local map_fconvert3 = { -- 15-20: 110110
+ shift = 10, mask = 31,
+ [1] = "ftint.w.sFG",
+ [2] = "ftint.w.dFG",
+ [9] = "ftint.l.sFG",
+ [10] = "ftint.l.dFG",
+}
+
+local map_fconvert4 = { -- 15-20: 111010
+ shift = 10, mask = 31,
+ [4] = "ffint.s.wFG",
+ [6] = "ffint.s.lFG",
+ [8] = "ffint.d.wFG",
+ [10] = "ffint.d.lFG",
+}
+
+local map_fconvert5 = { -- 15-20: 111100
+ shift = 10, mask = 31,
+ [17] = "frint.sFG",
+ [18] = "frint.dFG",
+}
+
+local map_farith = { -- 22-25:4, 15-21
+ shift = 15, mask = 127,
+ [1] = "fadd.sFGH",
+ [2] = "fadd.dFGH",
+ [5] = "fsub.sFGH",
+ [6] = "fsub.dFGH",
+ [9] = "fmul.sFGH",
+ [10] = "fmul.dFGH",
+ [13] = "fdiv.sFGH",
+ [14] = "fdiv.dFGH",
+ [17] = "fmax.sFGH",
+ [18] = "fmax.dFGH",
+ [21] = "fmin.sFGH",
+ [22] = "fmin.dFGH",
+ [25] = "fmaxa.sFGH",
+ [26] = "fmaxa.dFGH",
+ [29] = "fmina.sFGH",
+ [30] = "fmina.dFGH",
+ [33] = "fscaleb.sFGH",
+ [34] = "fscaleb.dFGH",
+ [37] = "fcopysign.sFGH",
+ [38] = "fcopysign.dFGH",
+ [40] = map_farith2, [41] = map_fmov,
+ [50] = map_fconvert, [52] = map_fconvert1,
+ [53] = map_fconvert2, [54] = map_fconvert3,
+ [58] = map_fconvert4, [60] = map_fconvert5,
+}
+
+local map_21_0 = { --21st:0, 18-20
+ shift = 18, mask = 7,
+ [0] = map_18_0,
+ [1] = { shift = 17, mask = 1, [0] = "alsl.wDJKQ", "alsl.wuDJKQ", },
+ [2] = {shift = 17, mask = 1, [0] = "bytepick.wDJKQ", },
+ [3] = "bytepick.dDJKB",
+ [4] = map_18_4,
+ [5] = map_18_5,
+ [6] = map_18_6,
+ [7] = map_18_7,
+}
+
+local map_21_1 = { --21st:1, 22nd:0, 15-20
+ shift = 21, mask = 1,
+ [1] = {
+ shift = 18, mask = 7,
+ [0] = {
+ shift = 15, mask = 7,
+ [0] = "div.wDJK",
+ [1] = "mod.wDJK",
+ [2] = "div.wuDJK",
+ [3] = "mod.wuDJK",
+ [4] = "div.dDJK",
+ [5] = "mod.dDJK",
+ [6] = "div.duDJK",
+ [7] = "mod.duDJK",
+ },
+ [1] = {
+ shift = 18, mask = 7,
+ [0] = "crc.w.b.wDJK",
+ [1] = "crc.w.h.wDJK",
+ [2] = "crc.w.w.wDJK",
+ [3] = "crc.w.d.wDJK",
+ [4] = "crcc.w.b.wDJK",
+ [5] = "crcc.w.h.wDJK",
+ [6] = "crcc.w.w.wDJK",
+ [7] = "crcc.w.d.wDJK",
+ },
+ [2] = {
+ shift = 15, mask = 7,
+ [4] = breakC, [6] = syscallC,
+ },
+ [3] = { shift = 17, mask = 1, [0] = "alsl.dDJKQ", },
+ },
+}
+
+local map_22_0 = {
+ shift = 21, mask = 1,
+ [0] = map_21_0,
+ [1] = map_21_1,
+}
+
+local map_shift = { -- 22nd:1, 21st:0
+ shift = 16, mask = 31,
+ [0] = { shift = 15, mask = 1, [1] = "slli.wDJU", },
+ [1] = "slli.dDJV",
+ [4] = { shift = 15, mask = 1, [1] = "srli.wDJU", },
+ [5] = "srli.dDJV",
+ [8] = { shift = 15, mask = 1, [1] = "srai.wDJU", },
+ [9] = "srai.dDJV",
+ [12] = { shift = 15, mask = 1, [1] = "rotri.wDJU", },
+ [13] = "rotri.dDJV",
+}
+
+local map_22_1 = { -- 22nd:1
+ shift = 21, mask = 1,
+ [0] = map_shift,
+ [1] = { shift = 15, mask = 1, [0] = "bstrins.wDJMU", [1] = "bstrpick.wDJMU", },
+}
+
+local map_26_0 = {
+ shift = 22, mask = 15,
+ [0] = map_22_0,
+ [1] = map_22_1,
+ [2] = "bstrins.dDJNV",
+ [3] = "bstrpick.dDJNV",
+ [4] = map_farith,
+ [8] = "sltiDJX",
+ [9] = "sltuiDJX",
+ [10] = "addi.wDJX",
+ [11] = "addi.dDJX",
+ [12] = "lu52i.dDJX",
+ [13] = "andiDJT",
+ [14] = "oriDJT",
+ [15] = "xoriDJT",
+}
+
+local map_long_i_5 = { -- Long immediate fixed-point arithmetic.
+ shift = 25, mask = 1,
+ [0] = "lu12i.wDZ",
+ [1] = "lu32i.dDZ",
+}
+
+local map_long_i_6 = {
+ shift = 25, mask = 1,
+ [0] = "pcaddiDZ",
+ [1] = "pcalau12iDZ",
+}
+
+local map_long_i_7 = {
+ shift = 25, mask = 1,
+ [0] = "pcaddu12iDZ",
+ [1] = "pcaddu18iDZ",
+}
+
+local map_ldst0_14 = {
+ shift = 15, mask = 2047,
+ [0] = "ldx.bDJK", [8] = "ldx.hDJK", [16] = "ldx.wDJK",
+ [24] = "ldx.dDJK", [32] = "stx.bDJK", [40] = "stx.hDJK",
+ [48] = "stx.wDJK", [56] = "stx.dDJK", [64] = "ldx.buDJK",
+ [72] = "ldx.huDJK", [80] = "ldx.wuDJK", [96] = "fldx.sFJK",
+ [104] = "fldx.dFJK", [112] = "fstx.sFJK", [120] = "fstx.dFJK",
+ [232] = "fldgt.sFJK", [233] = "fldgt.dFJK", [234] = "fldle.sFJK",
+ [235] = "fldle.dFJK", [236] = "fstgt.sFJK", [237] = "fstgt.dFJK",
+ [238] = "fstle.sFJK", [239] = "fstle.dFJK", [240] = "ldgt.bDJK",
+ [241] = "ldgt.hDJK", [242] = "ldgt.wDJK", [243] = "ldgt.dDJK",
+ [244] = "ldle.bDJK", [245] = "ldle.hDJK", [246] = "ldle.wDJK",
+ [247] = "ldle.dDJK", [248] = "stgt.bDJK", [249] = "stgt.hDJK",
+ [250] = "stgt.wDJK", [251] = "stgt.dDJK", [252] = "stle.bDJK",
+ [253] = "stle.hDJK", [254] = "stle.wDJK", [255] = "stle.dDJK",
+}
+
+local map_ldst1_8 = {
+ shift = 24, mask = 3,
+ [0] = "ll.wDJW",
+ [1] = "sc.wDJW",
+ [2] = "ll.dDJW",
+ [3] = "sc.dDJW",
+}
+
+local map_ldst1_9 = {
+ shift = 24, mask = 3,
+ [0] = "ldptr.wDJW",
+ [1] = "stptr.wDJW",
+ [2] = "ldptr.dDJW",
+ [3] = "stptr.dDJW",
+}
+
+local map_ldst1_10 = {
+ shift = 22, mask = 15,
+ [0] = "ld.bDJX",
+ [1] = "ld.hDJX",
+ [2] = "ld.wDo",
+ [3] = "ld.dDo",
+ [4] = "st.bDo",
+ [5] = "st.hDo",
+ [6] = "st.wDo",
+ [7] = "st.dDo",
+ [8] = "ld.buDo",
+ [9] = "ld.huDo",
+ [10] = "ld.wuDJX",
+ [12] = "fld.sFo",
+ [13] = "fst.sFo",
+ [14] = "fld.dFo",
+ [15] = "fst.dFo",
+}
+
+local map_fcmp0 = {
+ shift = 15, mask = 31,
+ [0] = "fcmp.caf.sEGH",
+ [1] = "fcmp.saf.sEGH",
+ [2] = "fcmp.clt.sEGH",
+ [3] = "fcmp.slt.sEGH",
+ [4] = "fcmp.ceq.sEGH",
+ [5] = "fcmp.seq.sEGH",
+ [6] = "fcmp.cle.sEGH",
+ [7] = "fcmp.sle.sEGH",
+ [8] = "fcmp.cun.sEGH",
+ [9] = "fcmp.sun.sEGH",
+ [10] = "fcmp.cult.sEGH",
+ [11] ="fcmp.sult.sEGH",
+ [12] = "fcmp.cueq.sEGH",
+ [13] = "fcmp.sueq.sEGH",
+ [14] = "fcmp.cule.sEGH",
+ [15] = "fcmp.sule.sEGH",
+ [16] = "fcmp.cne.sEGH",
+ [17] = "fcmp.sne.sEGH",
+ [20] = "fcmp.cor.sEGH",
+ [21] = "fcmp.sor.sEGH",
+ [24] = "fcmp.cune.sEGH",
+ [25] = "fcmp.sune.sEGH",
+}
+
+local map_fcmp1 = {
+ shift = 15, mask = 31,
+ [0] = "fcmp.caf.dEGH",
+ [1] = "fcmp.saf.dEGH",
+ [2] = "fcmp.clt.dEGH",
+ [3] = "fcmp.slt.dEGH",
+ [4] = "fcmp.ceq.dEGH",
+ [5] = "fcmp.seq.dEGH",
+ [6] = "fcmp.cle.dEGH",
+ [7] = "fcmp.sle.dEGH",
+ [8] = "fcmp.cun.dEGH",
+ [9] = "fcmp.sun.dEGH",
+ [10] = "fcmp.cult.dEGH",
+ [11] = "fcmp.sult.dEGH",
+ [12] = "fcmp.cueq.dEGH",
+ [13] = "fcmp.sueq.dEGH",
+ [14] = "fcmp.cule.dEGH",
+ [15] = "fcmp.sule.dEGH",
+ [16] = "fcmp.cne.dEGH",
+ [17] = "fcmp.sne.dEGH",
+ [20] = "fcmp.cor.dEGH",
+ [21] = "fcmp.sor.dEGH",
+ [24] = "fcmp.cune.dEGH",
+ [25] = "fcmp.sune.dEGH",
+}
+
+local map_fcmp = {
+ shift = 20, mask = 63,
+ [1] = { shift = 3, mask = 3, [0] = map_fcmp0, },
+ [2] = { shift = 3, mask = 3, [0] = map_fcmp1, },
+ [16] = { shift = 18, mask = 3, [0] = "fselFGHI", },
+}
+
+local map_fp = {
+ shift = 20, mask = 15,
+ [1] = "fmadd.sFGHi",
+ [2] = "fmadd.dFGHi",
+ [4] = "fmsub.sFGHi",
+ [5] = "fmsub.dFGHi",
+ [10] = "fnmadd.dFGHi",
+ [14] = "fnmsub.dFGHi",
+}
+
+local map_init = {
+ shift = 26, mask = 63,
+ [0] = map_26_0,
+ [2] = map_fp,
+ [3] = map_fcmp,
+ [4] = "addu16i.dDJY",
+ [5] = map_long_i_5,
+ [6] = map_long_i_6,
+ [7] = map_long_i_7,
+ [8] = map_ldst1_8,
+ [9] = map_ldst1_9,
+ [10] = map_ldst1_10,
+ [14] = map_ldst0_14,
+ [16] = "beqzJL",
+ [17] = "bnezJL",
+ [18] = { shift = 8, mask = 3, [0] = "bceqzAL", "bcnezAL", },
+ [19] = "jirlDJa",
+ [20] = "bP",
+ [21] = "blP",
+ [22] = "beqJDO",
+ [23] = "bneJDO",
+ [24] = "bltJDO",
+ [25] = "bgeJDO",
+ [26] = "bltuJDO",
+ [27] = "bgeuJDO",
+}
+
+------------------------------------------------------------------------------
+
+local map_gpr = {
+ [0] = "r0", "ra", "r2", "sp", "r4", "r5", "r6", "r7",
+ "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
+ "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
+ "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
+}
+
+------------------------------------------------------------------------------
+
+-- Output a nicely formatted line with an opcode and operands.
+local function putop(ctx, text, operands)
+ local pos = ctx.pos
+ local extra = ""
+ if ctx.rel then
+ local sym = ctx.symtab[ctx.rel]
+ if sym then extra = "\t->"..sym end
+ end
+ if ctx.hexdump > 0 then
+ ctx.out(format("%08x %s %-7s %s%s\n",
+ ctx.addr+pos, tohex(ctx.op), text, concat(operands, ", "), extra))
+ else
+ ctx.out(format("%08x %-7s %s%s\n",
+ ctx.addr+pos, text, concat(operands, ", "), extra))
+ end
+ ctx.pos = pos + 4
+end
+
+-- Fallback for unknown opcodes.
+local function unknown(ctx)
+ return putop(ctx, ".long", { "0x"..tohex(ctx.op) })
+end
+
+local function get_le(ctx)
+ local pos = ctx.pos
+ local b0, b1, b2, b3 = byte(ctx.code, pos+1, pos+4)
+ return bor(lshift(b3, 24), lshift(b2, 16), lshift(b1, 8), b0)
+end
+
+-- Decode imm.
+local function decode_si_imm(imm, bits, scale, signed, mask)
+ local n = tonumber(imm)
+ if n then
+ local m = arshift(n, scale)
+ if lshift(m, scale) == n then
+ if signed then
+ local s = arshift(band(m, mask), bits-1)
+ if s == 0 then
+ return m
+ elseif s == 1 then
+ return -(band(bnot(m), mask)+1)
+ end
+ else
+ if arshift(m, bits) == 0 then
+ return m
+ end
+ end
+ end
+ end
+end
+
+-- Disassemble a single instruction.
+local function disass_ins(ctx)
+ local op = ctx:get()
+ local operands = {}
+ local last = nil
+ ctx.op = op
+ ctx.rel = nil
+
+ local opat = ctx.map_pri[rshift(op, 26)]
+ while type(opat) ~= "string" do
+ if not opat then return unknown(ctx) end
+ opat = opat[band(rshift(op, opat.shift), opat.mask)]
+ end
+ local name, pat = match(opat, "^([a-z0-9_.]*)(.*)")
+ local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)")
+ if altname then pat = pat2 end
+
+ for p in gmatch(pat, ".") do
+ local x = nil
+ if p == "D" then
+ x = map_gpr[band(rshift(op, 0), 31)]
+ elseif p == "J" then
+ x = map_gpr[band(rshift(op, 5), 31)]
+ elseif p == "K" then
+ x = map_gpr[band(rshift(op, 10), 31)]
+ elseif p == "F" then
+ x = "f"..band(rshift(op, 0), 31)
+ elseif p == "G" then
+ x = "f"..band(rshift(op, 5), 31)
+ elseif p == "H" then
+ x = "f"..band(rshift(op, 10), 31)
+ elseif p == "i" then
+ x = "f"..band(rshift(op, 15), 31)
+ elseif p == "S" then
+ x = "fcsr"..band(rshift(op, 0), 31)
+ elseif p == "R" then
+ x = "fcsr"..band(rshift(op, 5), 31)
+ elseif p == "E" then
+ x = "fcc"..band(rshift(op, 0), 7)
+ elseif p == "A" then
+ x = "fcc"..band(rshift(op, 5), 7)
+ elseif p == "I" then
+ x = "fcc"..band(rshift(op, 15), 7)
+ elseif p == "Q" then -- sa2
+ x = band(rshift(op, 15), 3)
+ ctx.rel = x
+ x = format("%d", x)
+ elseif p == "B" then -- sa3
+ x = band(rshift(op, 15), 7)
+ ctx.rel = x
+ x = format("%d", x)
+ elseif p == "M" then -- msbw
+ x = band(rshift(op, 16), 31)
+ ctx.rel = x
+ x = format("%d(0x%x)", x, x)
+ elseif p == "N" then -- msbd
+ x = band(rshift(op, 16), 63)
+ ctx.rel = x
+ x = format("%d(0x%x)", x, x)
+ elseif p == "U" then -- ui5
+ x = band(rshift(op, 10), 31)
+ ctx.rel = x
+ x = format("%d(0x%x)", x, x)
+ elseif p == "V" then -- ui6
+ x = band(rshift(op, 10), 63)
+ ctx.rel = x
+ x = format("%d(0x%x)", x, x)
+ elseif p == "T" then -- ui12
+ x = band(rshift(op, 10), 4095)
+ ctx.rel = x
+ x = format("%d(0x%x)", x, x)
+ elseif p == "W" then -- si14
+ x = band(rshift(op, 10), 16383)
+ x = decode_si_imm(x, 14, 0, true, 0x3fff)
+ ctx.rel = x
+ x = format("%d(0x%04x)", x, band(x, 0x3fff))
+ elseif p == "X" then -- si12
+ x = band(rshift(op, 10), 4095)
+ x = decode_si_imm(x, 12, 0, true, 0xfff)
+ ctx.rel = x
+ x = format("%d(0x%03x)", x, band(x, 0xfff))
+ elseif p == "o" then
+ local disp = band((rshift(op, 10)), 0xfff)
+ operands[#operands] = format("%s, %d", last, disp)
+ elseif p == "Y" then -- si16
+ x = band(rshift(op, 10), 65535)
+ x = decode_si_imm(x, 16, 0, true, 0xffff)
+ ctx.rel = x
+ x = format("%d(0x%04x)", x, band(x, 0xffff))
+ elseif p == "Z" then -- si20
+ x = band(rshift(op, 10), 1048575)
+ x = decode_si_imm(x, 20, 0, true, 0xfffff)
+ ctx.rel = x
+ x = format("%d(0x%05x)", x, band(x, 0xfffff))
+ elseif p == "C" then -- code
+ x = band(rshift(op, 0), 32767)
+ elseif p == "O" then -- offs[15:0]
+ x = band(rshift(op, 10), 65535)
+ x = decode_si_imm(x, 16, 0, true, 0xffff)
+ ctx.rel = x
+ x = format("%d(0x%04x)", x, band(x, 0xffff))
+ elseif p == "L" then -- offs[15:0] + offs[20:16]
+ x = lshift(band(op, 31), 16) + band(rshift(op, 10), 65535)
+ x = decode_si_imm(x, 21, 0, true, 0x1fffff)
+ ctx.rel = x
+ x = format("%d(0x%06x)", x, band(x, 0x1fffff))
+ elseif p == "P" then -- offs[15:0] + offs[25:16]
+ x = lshift(band(op, 1023), 16) + band(rshift(op, 10), 65535)
+ x = decode_si_imm(x, 26, 0, true, 0x3ffffff)
+ ctx.rel = x
+ x = format("%d(0x%07x)", x, band(x, 0x3ffffff))
+ elseif p == "a" then
+ x = band(rshift(op, 10), 65535)
+ x = decode_si_imm(x, 16, 0, true, 0xffff)
+ ctx.rel = x
+ x = format("%d(0x%04x)", x, band(x, 0xffff))
+ else
+ assert(false)
+ end
+ if x then operands[#operands+1] = x; last = x end
+ end
+
+ return putop(ctx, name, operands)
+end
+
+------------------------------------------------------------------------------
+
+-- Disassemble a block of code.
+local function disass_block(ctx, ofs, len)
+ if not ofs then ofs = 0 end
+ local stop = len and ofs+len or #ctx.code
+ stop = stop - stop % 4
+ ctx.pos = ofs - ofs % 4
+ ctx.rel = nil
+ while ctx.pos < stop do disass_ins(ctx) end
+end
+
+-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
+local function create(code, addr, out)
+ local ctx = {}
+ ctx.code = code
+ ctx.addr = addr or 0
+ ctx.out = out or io.write
+ ctx.symtab = {}
+ ctx.disass = disass_block
+ ctx.hexdump = 8
+ ctx.get = get_le
+ ctx.map_pri = map_init
+ return ctx
+end
+
+-- Simple API: disassemble code (a string) at address and output via out.
+local function disass(code, addr, out)
+ create(code, addr, out):disass()
+end
+
+-- Return register name for RID.
+local function regname(r)
+ if r < 32 then return map_gpr[r] end
+ return "f"..(r-32)
+end
+
+-- Public module functions.
+return {
+ create = create,
+ disass = disass,
+ regname = regname
+}
+
diff --git a/src/lib_jit.c b/src/lib_jit.c
index 296bb7698..7663805a4 100644
--- a/src/lib_jit.c
+++ b/src/lib_jit.c
@@ -847,7 +847,8 @@ static uint32_t jit_cpudetect(void)
#endif
#elif LJ_TARGET_S390X
/* No optional CPU features to detect (for now). */
-
+#elif LJ_TARGET_LOONGARCH64
+ /* No optional CPU features to detect (for now). */
#elif LJ_TARGET_RISCV64
#if LJ_HASJIT
diff --git a/src/lj_arch.h b/src/lj_arch.h
index b85d29e16..c82f7464a 100644
--- a/src/lj_arch.h
+++ b/src/lj_arch.h
@@ -35,6 +35,8 @@
#define LUAJIT_ARCH_s390x 8
#define LUAJIT_ARCH_RISCV64 9
#define LUAJIT_ARCH_riscv64 9
+#define LUAJIT_ARCH_LOONGARCH64 10
+#define LUAJIT_ARCH_loongarch64 10
/* Target OS. */
#define LUAJIT_OS_OTHER 0
@@ -73,6 +75,8 @@
#define LUAJIT_TARGET LUAJIT_ARCH_MIPS32
#elif (defined(__riscv) || defined(__riscv__)) && __riscv_xlen == 64
#define LUAJIT_TARGET LUAJIT_ARCH_RISCV64
+#elif defined(__loongarch64)
+#define LUAJIT_TARGET LUAJIT_ARCH_LOONGARCH64
#else
#error "Architecture not supported (in this version), see: https://luajit.org/status.html#architectures"
#endif
@@ -489,6 +493,20 @@
#define LJ_TARGET_MASKROT 1
#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
+#elif LUAJIT_TARGET == LUAJIT_ARCH_LOONGARCH64
+#define LJ_ARCH_NAME "loongarch64"
+#define LJ_ARCH_BITS 64
+#define LJ_ARCH_ENDIAN LUAJIT_LE
+#define LJ_TARGET_LOONGARCH64 1
+#define LJ_TARGET_GC64 1
+#define LJ_TARGET_EHRETREG 4
+#define LJ_TARGET_EHRAREG 1
+#define LJ_TARGET_JUMPRANGE 27 /* +-2^27 = +-128MB */
+#define LJ_TARGET_MASKSHIFT 1
+#define LJ_TARGET_MASKROT 1
+#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
+#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
+
#else
#error "No target architecture defined"
#endif
@@ -519,6 +537,16 @@
#error "Need at least GCC 4.8 or newer"
#endif
#endif
+#elif LJ_TARGET_LOONGARCH64
+#if __clang__
+#if ((__clang_major__ < 8) || ((__clang_major__ == 8) && __clang_minor__ < 0)) && !defined(__NX_TOOLCHAIN_MAJOR__)
+#error "Need at least Clang 8.0 or newer"
+#endif
+#else
+#if (__GNUC__ < 8) || ((__GNUC__ == 8) && __GNUC_MINOR__ < 3)
+#error "Need at least GCC 8.3 or newer"
+#endif
+#endif
#elif !LJ_TARGET_PS3
#if __clang__
#if ((__clang_major__ < 3) || ((__clang_major__ == 3) && __clang_minor__ < 5))
@@ -576,6 +604,10 @@
#if !defined(__riscv_float_abi_double)
#error "Only RISC-V 64 double float supported for now"
#endif
+#elif LJ_TARGET_LOONGARCH64
+#if !(defined(_ABILP64) && _LOONGARCH_SIM == _ABILP64)
+#error "Only LOONGARCH lp64d ABI is supported"
+#endif
#endif
#endif
diff --git a/src/lj_asm.c b/src/lj_asm.c
index d0b679dc4..114df7c4b 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -229,6 +229,8 @@ static Reg rset_pickrandom(ASMState *as, RegSet rs)
#include "lj_emit_mips.h"
#elif LJ_TARGET_RISCV64
#include "lj_emit_riscv.h"
+#elif LJ_TARGET_LOONGARCH64
+#include "lj_emit_loongarch64.h"
#else
#error "Missing instruction emitter for target CPU"
#endif
@@ -1714,6 +1716,8 @@ static void asm_loop(ASMState *as)
#include "lj_asm_s390x.h"
#elif LJ_TARGET_RISCV64
#include "lj_asm_riscv64.h"
+#elif LJ_TARGET_LOONGARCH64
+#include "lj_asm_loongarch64.h"
#else
#error "Missing assembler for target CPU"
#endif
diff --git a/src/lj_asm_loongarch64.h b/src/lj_asm_loongarch64.h
new file mode 100644
index 000000000..3ab9e4f30
--- /dev/null
+++ b/src/lj_asm_loongarch64.h
@@ -0,0 +1,1990 @@
+/*
+** LoongArch IR assembler (SSA IR -> machine code).
+** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+/* -- Register allocator extensions --------------------------------------- */
+
+/* Allocate a register with a hint. */
+static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow)
+{
+ Reg r = IR(ref)->r;
+ if (ra_noreg(r)) {
+ if (!ra_hashint(r) && !iscrossref(as, ref))
+ ra_sethint(IR(ref)->r, hint); /* Propagate register hint. */
+ r = ra_allocref(as, ref, allow);
+ }
+ ra_noweak(as, r);
+ return r;
+}
+
+/* Allocate two source registers for three-operand instructions. */
+static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow)
+{
+ IRIns *irl = IR(ir->op1), *irr = IR(ir->op2);
+ Reg left = irl->r, right = irr->r;
+ if (ra_hasreg(left)) {
+ ra_noweak(as, left);
+ if (ra_noreg(right))
+ right = ra_allocref(as, ir->op2, rset_exclude(allow, left));
+ else
+ ra_noweak(as, right);
+ } else if (ra_hasreg(right)) {
+ ra_noweak(as, right);
+ left = ra_allocref(as, ir->op1, rset_exclude(allow, right));
+ } else if (ra_hashint(right)) {
+ right = ra_allocref(as, ir->op2, allow);
+ left = ra_alloc1(as, ir->op1, rset_exclude(allow, right));
+ } else {
+ left = ra_allocref(as, ir->op1, allow);
+ right = ra_alloc1(as, ir->op2, rset_exclude(allow, left));
+ }
+ return left | (right << 8);
+}
+
+/* -- Guard handling ------------------------------------------------------ */
+
+/* Setup exit stub after the end of each trace. */
+static void asm_exitstub_setup(ASMState *as)
+{
+ MCode *mxp = as->mctop;
+ if (as->mcp == mxp)
+ --as->mcp;
+ /* st.w TMP, sp, 0; li TMP, traceno; jirl ->vm_exit_handler;*/
+ *--mxp = LOONGI_JIRL | RID_R0 | LOONGF_J(RID_R20) | 0<<10;
+ emit_dj32i(as, RID_TMP, RID_ZERO, as->T->traceno);
+ *--mxp = *as->mcp;
+ *--mxp = LOONGI_LU52I_D | RID_R20 | LOONGF_J(RID_R20)
+ | LOONGF_I((((uintptr_t)(void *)lj_vm_exit_handler)>>52)&0xfff);
+ *--mxp = LOONGI_LU32I_D | RID_R20
+ | LOONGF_I20((((uintptr_t)(void *)lj_vm_exit_handler)>>32)&0xfffff);
+ *--mxp = LOONGI_ORI | RID_R20 | LOONGF_J(RID_R20)
+ | LOONGF_I(((uintptr_t)(void *)lj_vm_exit_handler)&0xfff);
+ *--mxp = LOONGI_LU12I_W | RID_R20
+ | LOONGF_I20((((uintptr_t)(void *)lj_vm_exit_handler)&0xfffff000)>>12);
+ *--mxp = LOONGI_ST_W | LOONGF_D(RID_TMP) | LOONGF_J(RID_SP);
+ as->mctop = mxp;
+}
+
+/* Keep this in-sync with exitstub_trace_addr(). */
+#define asm_exitstub_addr(as) ((as)->mctop)
+
+/* Emit conditional branch to exit for guard. */
+static void asm_guard(ASMState *as, LOONGIns loongi, Reg rj, Reg rd)
+{
+ MCode *target = asm_exitstub_addr(as);
+ MCode *p = as->mcp;
+ if (LJ_UNLIKELY(p == as->invmcp)) {
+ as->invmcp = NULL;
+ as->loopinv = 1;
+ as->mcp = p;
+ loongi = loongi ^ ((loongi>>28) == 4 ? 0x00000100u : 0x04000000u); /* Invert cond. BEQ BNE BGE BLZ*/
+ target = p - 1; /* Patch target later in asm_loop_fixup. */
+ }
+ emit_branch(as, loongi, rj, rd, target);
+ emit_dji(as, LOONGI_ADDI_D, RID_TMP, RID_ZERO, as->snapno);
+}
+
+static void asm_guard21(ASMState *as, LOONGIns loongi, Reg rj)
+{
+ MCode *target = asm_exitstub_addr(as);
+ MCode *p = as->mcp;
+ if (LJ_UNLIKELY(p == as->invmcp)) {
+ as->invmcp = NULL;
+ as->loopinv = 1;
+ as->mcp = p;
+ loongi = loongi ^ ((loongi>>28) == 4 ? 0x00000100u : 0x04000000u); /* Invert cond. BCEQZ BCNEZ*/
+ target = p - 1; /* Patch target later in asm_loop_fixup. */
+ }
+ emit_branch21(as, loongi, rj, target);
+ emit_dji(as, LOONGI_ADDI_D, RID_TMP, RID_ZERO, as->snapno);
+}
+
+/* -- Operand fusion ------------------------------------------------------ */
+
+/* Limit linear search to this distance. Avoids O(n^2) behavior. */
+#define CONFLICT_SEARCH_LIM 31
+
+/* Check if there's no conflicting instruction between curins and ref. */
+static int noconflict(ASMState *as, IRRef ref, IROp conflict)
+{
+ IRIns *ir = as->ir;
+ IRRef i = as->curins;
+ if (i > ref + CONFLICT_SEARCH_LIM)
+ return 0; /* Give up, ref is too far away. */
+ while (--i > ref)
+ if (ir[i].o == conflict)
+ return 0; /* Conflict found. */
+ return 1; /* Ok, no conflict. */
+}
+
+/* Fuse the array base of colocated arrays. */
+static int32_t asm_fuseabase(ASMState *as, IRRef ref)
+{
+ IRIns *ir = IR(ref);
+ if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE &&
+ !neverfuse(as) && noconflict(as, ref, IR_NEWREF))
+ return (int32_t)sizeof(GCtab);
+ return 0;
+}
+
+/* Fuse array/hash/upvalue reference into register+offset operand. */
+static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow)
+{
+ IRIns *ir = IR(ref);
+ if (ra_noreg(ir->r)) {
+ if (ir->o == IR_AREF) {
+ if (mayfuse(as, ref)) {
+ if (irref_isk(ir->op2)) {
+ IRRef tab = IR(ir->op1)->op1;
+ int32_t ofs = asm_fuseabase(as, tab);
+ IRRef refa = ofs ? tab : ir->op1;
+ ofs += 8*IR(ir->op2)->i;
+ if (checki16(ofs)) {
+ *ofsp = ofs;
+ return ra_alloc1(as, refa, allow);
+ }
+ }
+ }
+ } else if (ir->o == IR_HREFK) {
+ if (mayfuse(as, ref)) {
+ int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node));
+ if (checki16(ofs)) {
+ *ofsp = ofs;
+ return ra_alloc1(as, ir->op1, allow);
+ }
+ }
+ } else if (ir->o == IR_UREFC) {
+ if (irref_isk(ir->op1)) {
+ GCfunc *fn = ir_kfunc(IR(ir->op1));
+ intptr_t ofs = (intptr_t)&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv;
+ intptr_t jgl = (intptr_t)J2G(as->J);
+ if ((uintptr_t)(ofs-jgl) < 65536) {
+ *ofsp = ofs-jgl-32768;
+ return RID_JGL;
+ } else {
+ *ofsp = (int16_t)ofs;
+ return ra_allock(as, ofs-(int16_t)ofs, allow);
+ }
+ }
+ } else if (ir->o == IR_TMPREF) {
+ *ofsp = (int32_t)(offsetof(global_State, tmptv)-32768);
+ return RID_JGL;
+ }
+ }
+ *ofsp = 0;
+ return ra_alloc1(as, ref, allow);
+}
+
+/* Fuse XLOAD/XSTORE reference into load/store operand. */
+static void asm_fusexref(ASMState *as, LOONGIns loongi, Reg rd, IRRef ref,
+ RegSet allow, int32_t ofs)
+{
+ IRIns *ir = IR(ref);
+ Reg base;
+ if (ra_noreg(ir->r) && canfuse(as, ir)) {
+ intptr_t ofs2;
+ if (ir->o == IR_ADD) {
+ if (irref_isk(ir->op2) && (ofs2 = ofs + get_kval(as, ir->op2),
+ checki12(ofs2))) {
+ ref = ir->op1;
+ ofs = (int32_t)ofs2;
+ }
+ } else if (ir->o == IR_STRREF) {
+ ofs2 = 4096;
+ lj_assertA(ofs == 0, "bad usage");
+ ofs = (int32_t)sizeof(GCstr);
+ if (irref_isk(ir->op2)) {
+ ofs2 = ofs + get_kval(as, ir->op2);
+ ref = ir->op1;
+ } else if (irref_isk(ir->op1)) {
+ ofs2 = ofs + get_kval(as, ir->op1);
+ ref = ir->op2;
+ }
+ if (!checki12(ofs2)) {
+ /* NYI: Fuse ADD with constant. */
+ Reg right, left = ra_alloc2(as, ir, allow);
+ right = (left >> 8); left &= 255;
+ emit_dji(as, loongi, rd, RID_TMP, ofs&0xfff);
+ emit_djk(as, LOONGI_ADD_D, RID_TMP, left, right);
+ return;
+ }
+ ofs = ofs2;
+ }
+ }
+ base = ra_alloc1(as, ref, allow);
+ emit_dji(as, loongi, rd, base, ofs&0xfff);
+}
+
+/* Fuse FP multiply-add/sub. */
+
+static int asm_fusemadd(ASMState *as, IRIns *ir, LOONGIns loongi, LOONGIns loongir)
+{
+ IRRef lref = ir->op1, rref = ir->op2;
+ IRIns *irm;
+ if (lref != rref &&
+ ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) &&
+ ra_noreg(irm->r)) ||
+ (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) &&
+ (rref = lref, loongi = loongir, ra_noreg(irm->r))))) {
+ Reg dest = ra_dest(as, ir, RSET_FPR);
+ Reg add = ra_hintalloc(as, rref, dest, RSET_FPR);
+ Reg left = ra_alloc2(as, irm, rset_exclude(rset_exclude(RSET_FPR, dest), add));
+ Reg right = (left >> 8); left &= 255;
+ emit_djka(as, loongi, (dest & 0x1f), (left & 0x1f), (right & 0x1f), (add & 0x1f));
+ return 1;
+ }
+ return 0;
+}
+/* -- Calls --------------------------------------------------------------- */
+
+/* Generate a call to a C function. */
+static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
+{
+ uint32_t n, nargs = CCI_XNARGS(ci);
+ int32_t ofs = 0;
+ Reg gpr, fpr = REGARG_FIRSTFPR;
+ if ((void *)ci->func)
+ emit_call(as, (void *)ci->func);
+ for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++)
+ as->cost[gpr] = REGCOST(~0u, ASMREF_L);
+ gpr = REGARG_FIRSTGPR;
+ for (n = 0; n < nargs; n++) { /* Setup args. */
+ IRRef ref = args[n];
+ if (ref) {
+ IRIns *ir = IR(ref);
+ if (irt_isfp(ir->t) && (n == 0 || !(ci->flags & CCI_VARARG))) {
+ if (fpr <= REGARG_LASTFPR) {
+ lj_assertA(rset_test(as->freeset, fpr),
+ "reg %d not free", fpr); /* Must have been evicted. */
+ ra_leftov(as, fpr, ref);
+ fpr++;
+ } else if (gpr <= REGARG_LASTGPR) {
+ lj_assertA(rset_test(as->freeset, gpr),
+ "reg %d not free", gpr); /* Must have been evicted. */
+ ra_leftov(as, gpr, ref);
+ gpr++;
+ } else {
+ Reg r = ra_alloc1(as, ref, RSET_FPR);
+ emit_spstore(as, ir, r, ofs);
+ ofs += 8;
+ }
+ } else {
+ if (gpr <= REGARG_LASTGPR) {
+ lj_assertA(rset_test(as->freeset, gpr),
+ "reg %d not free", gpr); /* Must have been evicted. */
+ ra_leftov(as, gpr, ref);
+ gpr++;
+ } else {
+ Reg r = ra_alloc1(as, ref, RSET_GPR);
+ emit_spstore(as, ir, r, ofs);
+ ofs += 8;
+ }
+ }
+ }
+ }
+}
+
+/* Setup result reg/sp for call. Evict scratch regs. */
+static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
+{
+ RegSet drop = RSET_SCRATCH;
+ int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t));
+ if (ra_hasreg(ir->r))
+ rset_clear(drop, ir->r); /* Dest reg handled below. */
+ if (hiop && ra_hasreg((ir+1)->r))
+ rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */
+ ra_evictset(as, drop); /* Evictions must be performed first. */
+ if (ra_used(ir)) {
+ lj_assertA(!irt_ispri(ir->t), "PRI dest");
+ if (irt_isfp(ir->t)) {
+ if ((ci->flags & CCI_CASTU64)) {
+ Reg dest = ra_dest(as, ir, RSET_FPR);
+ emit_dj(as, irt_isnum(ir->t) ? LOONGI_MOVGR2FR_D : LOONGI_MOVGR2FR_W,
+ dest, RID_RET);
+ } else {
+ ra_destreg(as, ir, RID_FPRET);
+ }
+ } else if (hiop) {
+ ra_destpair(as, ir);
+ } else {
+ ra_destreg(as, ir, RID_RET);
+ }
+ }
+}
+
+static void asm_callx(ASMState *as, IRIns *ir)
+{
+ IRRef args[CCI_NARGS_MAX*2];
+ CCallInfo ci;
+ IRRef func;
+ IRIns *irf;
+ ci.flags = asm_callx_flags(as, ir);
+ asm_collectargs(as, ir, &ci, args);
+ asm_setupresult(as, ir, &ci);
+ func = ir->op2; irf = IR(func);
+ if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
+ if (irref_isk(func)) { /* Call to constant address. */
+ ci.func = (ASMFunction)(void *)get_kval(as, func);
+ } else { /* Need specific register for indirect calls. */
+ Reg freg = ra_alloc1(as, func, RSET_RANGE(RID_R12, RID_MAX_GPR)-RSET_FIXED);
+ *--as->mcp = LOONGI_JIRL | LOONGF_D(RID_RA) | LOONGF_J(freg);
+ ci.func = (ASMFunction)(void *)0;
+ }
+ asm_gencall(as, &ci, args);
+}
+
+static void asm_callround(ASMState *as, IRIns *ir, IRCallID id)
+{
+ /* The modified regs must match with the *.dasc implementation. */
+ RegSet drop = RID2RSET(RID_R12)|RID2RSET(RID_R13)|RID2RSET(RID_F0)|
+ RID2RSET(RID_F4)|RID2RSET(RID_F9)|RID2RSET(RID_F22)
+ |RID2RSET(RID_F23);
+ if (ra_hasreg(ir->r)) rset_clear(drop, ir->r);
+ ra_evictset(as, drop);
+ ra_destreg(as, ir, RID_FPRET);
+ emit_call(as, (void *)lj_ir_callinfo[id].func);
+ ra_leftov(as, REGARG_FIRSTFPR, ir->op1);
+}
+
+/* -- Returns ------------------------------------------------------------- */
+
+/* Return to lower frame. Guard that it goes to the right spot. */
+static void asm_retf(ASMState *as, IRIns *ir)
+{
+ Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
+ void *pc = ir_kptr(IR(ir->op2));
+ int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
+ as->topslot -= (BCReg)delta;
+ if ((int32_t)as->topslot < 0) as->topslot = 0;
+ irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */
+ emit_setgl(as, base, jit_base);
+ emit_addptr(as, base, -8*delta);
+ Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, base));
+ asm_guard(as, LOONGI_BNE, tmp,
+ ra_allock(as, igcptr(pc), rset_exclude(rset_exclude(RSET_GPR, base), tmp)));
+ emit_dji(as, LOONGI_LD_D, tmp, base, -8&0xfff);
+}
+
+/* -- Buffer operations --------------------------------------------------- */
+
+#if LJ_HASBUFFER
+static void asm_bufhdr_write(ASMState *as, Reg sb)
+{
+ Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
+ IRIns irgc;
+ irgc.ot = IRT(0, IRT_PGC); /* GC type. */
+ emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L));
+ emit_djml(as, LOONGI_BSTRINS_D, RID_TMP, tmp,
+ lj_fls(SBUF_MASK_FLAG), 0);
+ emit_getgl(as, RID_TMP, cur_L);
+ emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L));
+}
+#endif
+
+/* -- Type conversions ---------------------------------------------------- */
+
+static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
+{
+ Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ asm_guard21(as, LOONGI_BCEQZ, 0);
+ emit_djk(as, LOONGI_FCMP_CEQ_D, 0, tmp, left);
+ emit_dj(as, LOONGI_FFINT_D_W, tmp, tmp);
+ emit_dj(as, LOONGI_MOVFR2GR_S, dest, tmp);
+ emit_dj(as, LOONGI_FTINT_W_D, tmp, left);
+}
+
+static void asm_tobit(ASMState *as, IRIns *ir)
+{
+ RegSet allow = RSET_FPR;
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ Reg left = ra_alloc1(as, ir->op1, allow);
+ Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left));
+ Reg tmp = ra_scratch(as, rset_clear(allow, right));
+ emit_dj(as, LOONGI_MOVFR2GR_S, dest, tmp);
+ emit_djk(as, LOONGI_FADD_D, tmp, left, right);
+}
+
+static void asm_conv(ASMState *as, IRIns *ir)
+{
+ IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); // source type
+ int stfp = (st == IRT_NUM || st == IRT_FLOAT);
+ int st64 = (st == IRT_I64 || st == IRT_U64 || st == IRT_P64);
+ IRRef lref = ir->op1;
+ lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV");
+ /* Use GPR to pass floating-point arguments */
+ if (irt_isfp(ir->t) && ir->r >= RID_R4 && ir->r <= RID_R11) {
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ Reg ftmp = ra_scratch(as, RSET_FPR);
+ if (stfp) { /* FP to FP conversion. */
+ emit_dj(as, st == IRT_NUM ? LOONGI_MOVFR2GR_S : LOONGI_MOVFR2GR_D, dest, ftmp);
+ emit_dj(as, st == IRT_NUM ? LOONGI_FCVT_S_D : LOONGI_FCVT_D_S,
+ ftmp, ra_alloc1(as, lref, RSET_FPR));
+ } else if (st == IRT_U32) { /* U32 to FP conversion. */
+ /* y = (x ^ 0x80000000) + 2147483648.0 */
+ Reg left = ra_alloc1(as, lref, RSET_GPR);
+ Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, ftmp));
+ if (irt_isfloat(ir->t)) {
+ emit_dj(as, LOONGI_MOVFR2GR_S, dest, ftmp);
+ emit_dj(as, LOONGI_FCVT_S_D, ftmp, ftmp);
+ } else {
+ emit_dj(as, LOONGI_MOVFR2GR_D, dest, ftmp);
+ }
+ /* Must perform arithmetic with doubles to keep the precision. */
+ emit_djk(as, LOONGI_FADD_D, ftmp, ftmp, tmp);
+ emit_dj(as, LOONGI_FFINT_D_W, ftmp, ftmp);
+ emit_lsptr(as, LOONGI_FLD_D, (tmp & 0x1f),
+ (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR);
+ emit_dj(as, LOONGI_MOVGR2FR_W, ftmp, RID_TMP);
+ emit_djk(as, LOONGI_XOR, RID_TMP, RID_TMP, left);
+ emit_dji(as, LOONGI_ADDU16I_D, RID_TMP, RID_R0, 0x8000);
+ } else if(st == IRT_U64) { /* U64 to FP conversion. */
+ /* if (x >= 1u<<63) y = (double)(int64_t)(x&(1u<<63)-1) + pow(2.0, 63) */
+ Reg left = ra_alloc1(as, lref, RSET_GPR);
+ Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, ftmp));
+ MCLabel l_end = emit_label(as);
+ if (irt_isfloat(ir->t)) {
+ emit_dj(as, LOONGI_MOVFR2GR_S, dest, ftmp);
+ emit_djk(as, LOONGI_FADD_S, ftmp, ftmp, tmp);
+ emit_lsptr(as, LOONGI_FLD_S, (tmp & 0x1f), (void *)&as->J->k32[LJ_K32_2P63],
+ rset_exclude(RSET_GPR, left));
+ emit_branch(as, LOONGI_BGE, left, RID_ZERO, l_end);
+ emit_dj(as, LOONGI_FFINT_S_L, ftmp, ftmp);
+ } else {
+ emit_dj(as, LOONGI_MOVFR2GR_D, dest, ftmp);
+ emit_djk(as, LOONGI_FADD_D, ftmp, ftmp, tmp);
+ emit_lsptr(as, LOONGI_FLD_D, (tmp & 0x1f), (void *)&as->J->k64[LJ_K64_2P63],
+ rset_exclude(RSET_GPR, left));
+ emit_branch(as, LOONGI_BGE, left, RID_ZERO, l_end);
+ emit_dj(as, LOONGI_FFINT_D_L, ftmp, ftmp);
+ }
+ emit_dj(as, LOONGI_MOVGR2FR_D, ftmp, RID_TMP);
+ emit_djml(as, LOONGI_BSTRPICK_D, RID_TMP, left, 62, 0);
+ } else { /* Integer to FP conversion. */
+ Reg left = ra_alloc1(as, lref, RSET_GPR);
+ LOONGIns loongi = irt_isfloat(ir->t) ?
+ (st64 ? LOONGI_FFINT_S_L : LOONGI_FFINT_S_W) :
+ (st64 ? LOONGI_FFINT_D_L : LOONGI_FFINT_D_W);
+ emit_dj(as, st64 ? LOONGI_MOVFR2GR_D : LOONGI_MOVFR2GR_S, dest, ftmp);
+ emit_dj(as, loongi, ftmp, ftmp);
+ emit_dj(as, st64 ? LOONGI_MOVGR2FR_D : LOONGI_MOVGR2FR_W, ftmp, left);
+ }
+ } else if (irt_isfp(ir->t)) {
+ Reg dest = ra_dest(as, ir, RSET_FPR);
+ if (stfp) { /* FP to FP conversion. */
+ emit_dj(as, st == IRT_NUM ? LOONGI_FCVT_S_D : LOONGI_FCVT_D_S,
+ dest, ra_alloc1(as, lref, RSET_FPR));
+ } else if (st == IRT_U32) { /* U32 to FP conversion. */
+ /* y = (x ^ 0x80000000) + 2147483648.0 */
+ Reg left = ra_alloc1(as, lref, RSET_GPR);
+ Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, dest));
+ if (irt_isfloat(ir->t))
+ emit_dj(as, LOONGI_FCVT_S_D, dest, dest);
+ /* Must perform arithmetic with doubles to keep the precision. */
+ emit_djk(as, LOONGI_FADD_D, dest, dest, tmp);
+ emit_dj(as, LOONGI_FFINT_D_W, dest, dest);
+ emit_lsptr(as, LOONGI_FLD_D, (tmp & 0x1f),
+ (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR);
+ emit_dj(as, LOONGI_MOVGR2FR_W, dest, RID_TMP);
+ emit_djk(as, LOONGI_XOR, RID_TMP, RID_TMP, left);
+ emit_dji(as, LOONGI_ADDU16I_D, RID_TMP, RID_R0, 0x8000);
+ } else if(st == IRT_U64) { /* U64 to FP conversion. */
+ /* if (x >= 1u<<63) y = (double)(int64_t)(x&(1u<<63)-1) + pow(2.0, 63) */
+ Reg left = ra_alloc1(as, lref, RSET_GPR);
+ Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, dest));
+ MCLabel l_end = emit_label(as);
+ if (irt_isfloat(ir->t)) {
+ emit_djk(as, LOONGI_FADD_S, dest, dest, tmp);
+ emit_lsptr(as, LOONGI_FLD_S, (tmp & 0x1f), (void *)&as->J->k32[LJ_K32_2P63],
+ rset_exclude(RSET_GPR, left));
+ emit_branch(as, LOONGI_BGE, left, RID_ZERO, l_end);
+ emit_dj(as, LOONGI_FFINT_S_L, dest, dest);
+ } else {
+ emit_djk(as, LOONGI_FADD_D, dest, dest, tmp);
+ emit_lsptr(as, LOONGI_FLD_D, (tmp & 0x1f), (void *)&as->J->k64[LJ_K64_2P63],
+ rset_exclude(RSET_GPR, left));
+ emit_branch(as, LOONGI_BGE, left, RID_ZERO, l_end);
+ emit_dj(as, LOONGI_FFINT_D_L, dest, dest);
+ }
+ emit_dj(as, LOONGI_MOVGR2FR_D, dest, RID_TMP);
+ emit_djml(as, LOONGI_BSTRPICK_D, RID_TMP, left, 62, 0);
+ } else { /* Integer to FP conversion. */
+ Reg left = ra_alloc1(as, lref, RSET_GPR);
+ LOONGIns loongi = irt_isfloat(ir->t) ?
+ (st64 ? LOONGI_FFINT_S_L : LOONGI_FFINT_S_W) :
+ (st64 ? LOONGI_FFINT_D_L : LOONGI_FFINT_D_W);
+ emit_dj(as, loongi, dest, dest);
+ emit_dj(as, st64 ? LOONGI_MOVGR2FR_D : LOONGI_MOVGR2FR_W, dest, left);
+ }
+ } else if (stfp) { /* FP to integer conversion. */
+ if (irt_isguard(ir->t)) {
+ /* Checked conversions are only supported from number to int. */
+ lj_assertA(irt_isint(ir->t) && st == IRT_NUM,
+ "bad type for checked CONV");
+ asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
+ } else {
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ Reg left = ra_alloc1(as, lref, RSET_FPR);
+ Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
+ if (irt_isu32(ir->t)) { /* FP to U32 conversion. */
+ /* y = (int)floor(x - 2147483648.0) ^ 0x80000000 */
+ emit_djk(as, LOONGI_XOR, dest, dest, RID_TMP);
+ emit_dji(as, LOONGI_ADDU16I_D, RID_TMP, RID_R0, 0x8000);
+ emit_dj(as, LOONGI_MOVFR2GR_S, dest, tmp);
+ emit_dj(as, st == IRT_FLOAT ? LOONGI_FTINTRM_W_S : LOONGI_FTINTRM_W_D,
+ tmp, tmp);
+ emit_djk(as, st == IRT_FLOAT ? LOONGI_FSUB_S : LOONGI_FSUB_D,
+ tmp, left, tmp);
+ if (st == IRT_FLOAT)
+ emit_lsptr(as, LOONGI_FLD_S, (tmp & 0x1f),
+ (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR);
+ else
+ emit_lsptr(as, LOONGI_FLD_D, (tmp & 0x1f),
+ (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR);
+ } else if (irt_isu64(ir->t)) { /* FP to U64 conversion. */
+ MCLabel l_end;
+ emit_dj(as, LOONGI_MOVFR2GR_D, dest, tmp);
+ l_end = emit_label(as);
+ /* For inputs >= 2^63 add -2^64 and convert again. */
+ if (st == IRT_NUM) {
+ emit_dj(as, LOONGI_FTINTRZ_L_D, tmp, tmp);
+ emit_djk(as, LOONGI_FADD_D, tmp, left, tmp);
+ emit_lsptr(as, LOONGI_FLD_D, (tmp & 0x1f),
+ (void *)&as->J->k64[LJ_K64_M2P64],
+ rset_exclude(RSET_GPR, dest));
+ emit_branch21(as, LOONGI_BCNEZ, 0, l_end);
+ emit_dj(as, LOONGI_FTINTRZ_L_D, tmp, left);
+ emit_djk(as, LOONGI_FCMP_CLT_D, 0, left, tmp);
+ emit_lsptr(as, LOONGI_FLD_D, (tmp & 0x1f),
+ (void *)&as->J->k64[LJ_K64_2P63],
+ rset_exclude(RSET_GPR, dest));
+ } else {
+ emit_dj(as, LOONGI_FTINTRZ_L_S, tmp, tmp);
+ emit_djk(as, LOONGI_FADD_S, tmp, left, tmp);
+ emit_lsptr(as, LOONGI_FLD_S, (tmp & 0x1f),
+ (void *)&as->J->k32[LJ_K32_M2P64],
+ rset_exclude(RSET_GPR, dest));
+ emit_branch21(as, LOONGI_BCNEZ, 0, l_end);
+ emit_dj(as, LOONGI_FTINTRZ_L_S, tmp, left);
+ emit_djk(as, LOONGI_FCMP_CLT_S, 0, left, tmp);
+ emit_lsptr(as, LOONGI_FLD_S, (tmp & 0x1f),
+ (void *)&as->J->k32[LJ_K32_2P63],
+ rset_exclude(RSET_GPR, dest));
+ }
+ } else {
+ LOONGIns loongi = irt_is64(ir->t) ?
+ (st == IRT_NUM ? LOONGI_FTINTRZ_L_D : LOONGI_FTINTRZ_L_S) :
+ (st == IRT_NUM ? LOONGI_FTINTRZ_W_D : LOONGI_FTINTRZ_W_S);
+ emit_dj(as, irt_is64(ir->t) ? LOONGI_MOVFR2GR_D : LOONGI_MOVFR2GR_S, dest, left);
+ emit_dj(as, loongi, left, left);
+ }
+ }
+ } else {
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
+ Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
+ lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT");
+ if ((ir->op2 & IRCONV_SEXT)) { // sign-extend
+ emit_dj(as, st == IRT_I8 ? LOONGI_EXT_W_B : LOONGI_EXT_W_H, dest, left);
+ } else { // zero-extend
+ int msbd = st == IRT_U8 ? 7 : 15;
+ emit_djml(as, LOONGI_BSTRPICK_D, dest, left, msbd, 0);
+ }
+ } else { /* 32/64 bit integer conversions. */
+ if (irt_is64(ir->t)) {
+ if (st64) {
+ /* 64/64 bit no-op (cast)*/
+ ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */
+ } else {
+ Reg left = ra_alloc1(as, lref, RSET_GPR);
+ if ((ir->op2 & IRCONV_SEXT)) { /* 32 to 64 bit sign extension. */
+ emit_dju(as, LOONGI_SLLI_W, dest, left, 0);
+ } else { /* 32 to 64 bit zero extension. */
+ emit_djml(as, LOONGI_BSTRPICK_D, dest, left, 31, 0);
+ }
+ }
+ } else {
+ if (st64 && !(ir->op2 & IRCONV_NONE)) {
+ /* This is either a 32 bit reg/reg mov which zeroes the hiword
+ ** or a load of the loword from a 64 bit address.
+ */
+ Reg left = ra_alloc1(as, lref, RSET_GPR);
+ emit_djml(as, LOONGI_BSTRPICK_D, dest, left, 31, 0);
+ } else { /* 32/32 bit no-op (cast). */
+ ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */
+ }
+ }
+ }
+ }
+}
+
+static void asm_strto(ASMState *as, IRIns *ir)
+{
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
+ IRRef args[2];
+ int32_t ofs = SPOFS_TMP;
+ RegSet drop = RSET_SCRATCH;
+ if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */
+ ra_evictset(as, drop);
+ if (ir->s) ofs = sps_scale(ir->s);
+ asm_guard(as, LOONGI_BEQ, RID_RET, RID_ZERO); /* Test return status. */
+ args[0] = ir->op1; /* GCstr *str */
+ args[1] = ASMREF_TMP1; /* TValue *n */
+ asm_gencall(as, ci, args);
+ /* Store the result to the spill slot or temp slots. */
+ Reg tmp = ra_releasetmp(as, ASMREF_TMP1);
+ emit_addk(as, tmp, RID_SP, ofs, RSET_GPR);
+}
+
+/* -- Memory references --------------------------------------------------- */
+
+/* Store tagged value for ref at base+ofs. */
+static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref)
+{
+ RegSet allow = rset_exclude(RSET_GPR, base);
+ IRIns *ir = IR(ref);
+ lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t),
+ "store of IR type %d", irt_type(ir->t));
+ if (irref_isk(ref)) {
+ TValue k;
+ lj_ir_kvalue(as->J->L, &k, ir);
+ Reg ku64 = ra_allock(as, (int64_t)k.u64, allow);
+ rset_clear(allow, ku64);
+ if (checki12(ofs)) {
+ emit_dji(as, LOONGI_ST_D, ku64, base, ofs&0xfff);
+ } else {
+ emit_djk(as, LOONGI_STX_D, ku64, base, ra_allock(as, ofs, allow));
+ }
+ } else {
+ Reg src = ra_alloc1(as, ref, allow);
+ rset_clear(allow, src);
+ Reg type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow);
+ emit_dji(as, LOONGI_ST_D, RID_TMP, base, ofs&0xfff);
+ if (irt_isinteger(ir->t)) {
+ emit_djk(as, LOONGI_ADD_D, RID_TMP, RID_TMP, type);
+ emit_djml(as, LOONGI_BSTRPICK_D, RID_TMP, src, 31, 0);
+ } else {
+ emit_djk(as, LOONGI_ADD_D, RID_TMP, src, type);
+ }
+ }
+}
+
+/* Get pointer to TValue. */
+static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode) // todo-new
+{
+ int32_t tmpofs = (int32_t)(offsetof(global_State, tmptv)-32768);
+ RegSet allow = RSET_GPR;
+ if ((mode & IRTMPREF_IN1)) {
+ IRIns *ir = IR(ref);
+ if (irt_isnum(ir->t)) {
+ if ((mode & IRTMPREF_OUT1)) {
+ Reg src = ra_alloc1(as, ref, RSET_FPR);
+ emit_addk(as, dest, RID_JGL, tmpofs, allow);
+ emit_lso(as, LOONGI_ST_D, src, RID_JGL, tmpofs, allow);
+ } else if (irref_isk(ref)) {
+ /* Use the number constant itself as a TValue. */
+ ra_allockreg(as, igcptr(ir_knum(ir)), dest);
+ } else {
+ emit_dji(as, LOONGI_ADDI_D, dest, RID_SP, ra_spill(as, ir)&0xfff);
+ }
+ } else {
+ /* Otherwise use g->tmptv to hold the TValue. */
+ asm_tvstore64(as, dest, 0, ref);
+ emit_addk(as, dest, RID_JGL, tmpofs, RSET_GPR);
+ }
+ } else {
+ emit_addk(as, dest, RID_JGL, tmpofs, RSET_GPR);
+ }
+}
+
+static void asm_aref(ASMState *as, IRIns *ir)
+{
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ Reg idx, base;
+ if (irref_isk(ir->op2)) {
+ IRRef tab = IR(ir->op1)->op1;
+ int32_t ofs = asm_fuseabase(as, tab);
+ IRRef refa = ofs ? tab : ir->op1;
+ ofs += 8*IR(ir->op2)->i;
+ if (checki12(ofs)) {
+ base = ra_alloc1(as, refa, RSET_GPR);
+ emit_dji(as, LOONGI_ADDI_D, dest, base, ofs&0xfff);
+ return;
+ }
+ }
+ base = ra_alloc1(as, ir->op1, RSET_GPR);
+ idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base));
+ emit_djk(as, LOONGI_ADD_D, dest, RID_TMP, base);
+ emit_dju(as, LOONGI_SLLI_D, RID_TMP, idx, 3);
+}
+
+/* Inlined hash lookup. Specialized for key type and for const keys.
+** The equivalent C code is:
+** Node *n = hashkey(t, key);
+** do {
+** if (lj_obj_equal(&n->key, key)) return &n->val;
+** } while ((n = nextnode(n)));
+** return niltv(L);
+*/
+static void asm_href(ASMState *as, IRIns *ir, IROp merge)
+{
+ RegSet allow = RSET_GPR;
+ int destused = ra_used(ir);
+ Reg dest = ra_dest(as, ir, allow);
+ Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
+ Reg key = RID_NONE, type = RID_NONE, tmpnum = RID_NONE, tmp1, tmp2;
+ Reg cmp64 = RID_NONE;
+ IRRef refkey = ir->op2;
+ IRIns *irkey = IR(refkey);
+ int isk = irref_isk(refkey);
+ IRType1 kt = irkey->t;
+ uint32_t khash;
+ MCLabel l_end, l_loop, l_next;
+ rset_clear(allow, tab);
+ tmp1 = ra_scratch(as, allow);
+ rset_clear(allow, tmp1);
+ tmp2 = ra_scratch(as, allow);
+ rset_clear(allow, tmp2);
+
+ if (irt_isnum(kt)) {
+ key = ra_alloc1(as, refkey, RSET_FPR);
+ tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key));
+ } else {
+ /* Allocate cmp64 register used for 64-bit comparisons */
+ if (!isk && irt_isaddr(kt)) {
+ cmp64 = tmp2;
+ } else {
+ int64_t k;
+ if (isk && irt_isaddr(kt)) {
+ k = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64;
+ } else {
+ lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type");
+ k = ~((int64_t)~irt_toitype(kt) << 47);
+ }
+ cmp64 = ra_allock(as, k, allow);
+ rset_clear(allow, cmp64);
+ }
+ if (!irt_ispri(kt)) {
+ key = ra_alloc1(as, refkey, allow);
+ rset_clear(allow, key);
+ }
+ }
+
+ /* Key not found in chain: jump to exit (if merged) or load niltv. */
+ l_end = emit_label(as);
+ as->invmcp = NULL;
+ if (merge == IR_NE)
+ asm_guard(as, LOONGI_BEQ, RID_ZERO, RID_ZERO);
+ else if (destused)
+ emit_loada(as, dest, niltvg(J2G(as->J)));
+
+ /* Follow hash chain until the end. */
+ l_loop = --as->mcp;
+ emit_move(as, dest, tmp1);
+ emit_dji(as, LOONGI_LD_D, tmp1, dest, (int32_t)offsetof(Node, next)&0xfff);
+ l_next = emit_label(as);
+
+ /* Type and value comparison. */
+ if (merge == IR_EQ) { /* Must match asm_guard(). */
+ l_end = asm_exitstub_addr(as);
+ }
+ if (irt_isnum(kt)) {
+ emit_branch21(as, LOONGI_BCNEZ, 0, l_end);
+ emit_dj32i(as, RID_TMP, RID_ZERO, as->snapno);
+ emit_djk(as, LOONGI_FCMP_CEQ_D, 0, tmpnum, key);
+ emit_branch(as, LOONGI_BEQ, tmp1, RID_ZERO, l_next);
+ emit_dju(as, LOONGI_SLTUI, tmp1, tmp1, ((int32_t)LJ_TISNUM)&0xfff);
+ emit_dju(as, LOONGI_SRAI_D, tmp1, tmp1, 47);
+ emit_dj(as, LOONGI_MOVGR2FR_D, tmpnum, tmp1);
+ } else {
+ emit_branch(as, LOONGI_BEQ, tmp1, cmp64, l_end);
+ emit_dj32i(as, RID_TMP, RID_ZERO, as->snapno);
+ }
+ emit_dji(as, LOONGI_LD_D, tmp1, dest, (int32_t)offsetof(Node, key.u64)&0xfff);
+ *l_loop = LOONGI_BNE | LOONGF_J(tmp1) | LOONGF_D(RID_ZERO) | LOONGF_I(((as->mcp-l_loop) & 0xffffu));
+ if (!isk && irt_isaddr(kt)) {
+ type = ra_allock(as, (int64_t)irt_toitype(kt) << 47, allow);
+ emit_djk(as, LOONGI_ADD_D, tmp2, key, type);
+ rset_clear(allow, type);
+ }
+
+ /* Load main position relative to tab->node into dest. */
+ khash = isk ? ir_khash(as, irkey) : 1;
+ if (khash == 0) {
+ emit_dji(as, LOONGI_LD_D, dest, tab, (int32_t)offsetof(GCtab, node)&0xfff);
+ } else {
+ Reg tmphash = tmp1;
+ if (isk)
+ tmphash = ra_allock(as, khash, allow);
+ /* node = tab->node + (idx*32-idx*8) */
+ emit_djk(as, LOONGI_ADD_D, dest, dest, tmp1);
+ lj_assertA(sizeof(Node) == 24, "bad Node size");
+ emit_djk(as, LOONGI_SUB_W, tmp1, tmp2, tmp1);
+ emit_dju(as, LOONGI_SLLI_W, tmp1, tmp1, 3);
+ emit_dju(as, LOONGI_SLLI_W, tmp2, tmp1, 5);
+ emit_djk(as, LOONGI_AND, tmp1, tmp2, tmphash); // idx = hi & tab->hmask
+ emit_dji(as, LOONGI_LD_D, dest, tab, ((int32_t)offsetof(GCtab, node))&0xfff);
+ emit_dji(as, LOONGI_LD_W, tmp2, tab, ((int32_t)offsetof(GCtab, hmask))&0xfff);
+ if (isk) {
+ /* Nothing to do. */
+ } else if (irt_isstr(kt)) {
+ emit_dji(as, LOONGI_LD_W, tmp1, key, ((int32_t)offsetof(GCstr, sid))&0xfff);
+ } else { /* Must match with hash*() in lj_tab.c. */
+ emit_djk(as, LOONGI_SUB_W, tmp1, tmp1, tmp2);
+ emit_dju(as, LOONGI_ROTRI_W, tmp2, tmp2, (-HASH_ROT3)&0x1f);
+ emit_djk(as, LOONGI_XOR, tmp1, tmp2, tmp1);
+ emit_dju(as, LOONGI_ROTRI_W, tmp1, tmp1, (-HASH_ROT2-HASH_ROT1)&0x1f);
+ emit_djk(as, LOONGI_SUB_W, tmp2, tmp2, dest);
+ emit_djk(as, LOONGI_XOR, tmp2, tmp2, tmp1);
+ emit_dju(as, LOONGI_ROTRI_W, dest, tmp1, (-HASH_ROT1)&0x1f);
+ if (irt_isnum(kt)) {
+ emit_dju(as, LOONGI_SLLI_W, tmp1, tmp1, 1);
+ emit_dju(as, LOONGI_SRAI_D, tmp1, tmp1, 32); // hi
+ emit_dju(as, LOONGI_SLLI_W, tmp2, tmp1, 0); // lo
+ emit_dj(as, LOONGI_MOVFR2GR_D, tmp1, key);
+ } else {
+ checkmclim(as);
+ emit_dju(as, LOONGI_SRAI_D, tmp1, tmp1, 32); // hi
+ emit_dju(as, LOONGI_SLLI_W, tmp2, key, 0); // lo
+ emit_djk(as, LOONGI_ADD_D, tmp1, key, type);
+ }
+ }
+ }
+}
+
+static void asm_hrefk(ASMState *as, IRIns *ir)
+{
+ IRIns *kslot = IR(ir->op2);
+ IRIns *irkey = IR(kslot->op1);
+ int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node));
+ int32_t kofs = ofs + (int32_t)offsetof(Node, key);
+ Reg dest = (ra_used(ir)||ofs > 32736) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;
+ Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
+ RegSet allow = rset_exclude(RSET_GPR, node);
+ Reg idx = node;
+ Reg key = ra_scratch(as, allow);
+ int64_t k;
+ lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot");
+ if (ofs > 32736) {
+ idx = dest;
+ rset_clear(allow, dest);
+ kofs = (int32_t)offsetof(Node, key);
+ } else if (ra_hasreg(dest)) {
+ emit_addk(as, dest, node, ofs, allow);
+ }
+ if (irt_ispri(irkey->t)) {
+ lj_assertA(!irt_isnil(irkey->t), "bad HREFK key type");
+ k = ~((int64_t)~irt_toitype(irkey->t) << 47);
+ } else if (irt_isnum(irkey->t)) {
+ k = (int64_t)ir_knum(irkey)->u64;
+ } else {
+ k = ((int64_t)irt_toitype(irkey->t) << 47) | (int64_t)ir_kgc(irkey);
+ }
+ asm_guard(as, LOONGI_BNE, key, ra_allock(as, k, allow));
+ emit_lso(as, LOONGI_LD_D, key, idx, kofs, allow);
+ if (ofs > 32736)
+ emit_djk(as, LOONGI_ADD_D, dest, node, ra_allock(as, ofs, allow));
+}
+
+static void asm_uref(ASMState *as, IRIns *ir)
+{
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ if (irref_isk(ir->op1)) {
+ GCfunc *fn = ir_kfunc(IR(ir->op1));
+ MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
+ emit_lsptr(as, LOONGI_LD_D, dest, v, RSET_GPR);
+ } else {
+ Reg uv = ra_scratch(as, RSET_GPR);
+ Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
+ if (ir->o == IR_UREFC) {
+ Reg tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, dest), uv));
+ asm_guard(as, LOONGI_BEQ, tmp, RID_ZERO);
+ emit_dji(as, LOONGI_ADDI_D, dest, uv, ((int32_t)offsetof(GCupval, tv))&0xfff);
+ emit_dji(as, LOONGI_LD_BU, tmp, uv, ((int32_t)offsetof(GCupval, closed))&0xfff);
+ } else {
+ emit_dji(as, LOONGI_LD_D, dest, uv, ((int32_t)offsetof(GCupval, v))&0xfff);
+ }
+ emit_lso(as, LOONGI_LD_D, uv, func, (int32_t)offsetof(GCfuncL, uvptr) +
+ (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8), RSET_GPR);
+ }
+}
+
+static void asm_fref(ASMState *as, IRIns *ir)
+{
+ UNUSED(as); UNUSED(ir);
+ lj_assertA(!ra_used(ir), "unfused FREF");
+}
+
+static void asm_strref(ASMState *as, IRIns *ir)
+{
+ RegSet allow = RSET_GPR;
+ Reg dest = ra_dest(as, ir, allow);
+ Reg base = ra_alloc1(as, ir->op1, allow);
+ IRIns *irr = IR(ir->op2);
+ int32_t ofs = sizeof(GCstr);
+ rset_clear(allow, base);
+ if (irref_isk(ir->op2) && checki12(ofs + irr->i)) {
+ emit_dji(as, LOONGI_ADDI_D, dest, base, (ofs + irr->i)&0xfff);
+ } else {
+ emit_dji(as, LOONGI_ADDI_D, dest, dest, ofs&0xfff);
+ emit_djk(as, LOONGI_ADD_D, dest, base, ra_alloc1(as, ir->op2, allow));
+ }
+}
+
+/* -- Loads and stores ---------------------------------------------------- */
+
+static LOONGIns asm_fxloadins(ASMState *as, IRIns *ir)
+{
+ UNUSED(as);
+ switch (irt_type(ir->t)) {
+ case IRT_I8:
+ return LOONGI_LD_B;
+ case IRT_U8:
+ return LOONGI_LD_BU;
+ case IRT_I16:
+ return LOONGI_LD_H;
+ case IRT_U16:
+ return LOONGI_LD_HU;
+ case IRT_NUM:
+ lj_assertA(!LJ_SOFTFP32, "unsplit FP op");
+ return LOONGI_FLD_D;
+ /* fallthrough */
+ case IRT_FLOAT:
+ return LOONGI_FLD_S;
+ /* fallthrough */
+ default:
+ return irt_is64(ir->t) ? LOONGI_LD_D : LOONGI_LD_W;
+ }
+}
+
+static LOONGIns asm_fxstoreins(ASMState *as, IRIns *ir)
+{
+ UNUSED(as);
+ switch (irt_type(ir->t)) {
+ case IRT_I8: case IRT_U8: return LOONGI_ST_B;
+ case IRT_I16: case IRT_U16: return LOONGI_ST_H;
+ case IRT_NUM:
+ lj_assertA(!LJ_SOFTFP32, "unsplit FP op");
+ if (!LJ_SOFTFP) return LOONGI_FST_D;
+ /* fallthrough */
+ case IRT_FLOAT: return LOONGI_FST_S;
+ /* fallthrough */
+ default: return (LJ_64 && irt_is64(ir->t)) ? LOONGI_ST_D : LOONGI_ST_W;
+ }
+}
+
+static void asm_fload(ASMState *as, IRIns *ir)
+{
+ RegSet allow = RSET_GPR;
+ Reg idx, dest = ra_dest(as, ir, allow);
+ rset_clear(allow, dest);
+ LOONGIns loongi = asm_fxloadins(as, ir);
+ int32_t ofs;
+ if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */
+ idx = ra_allock(as, (int64_t)J2GG(as->J), allow);
+ ofs = (int32_t)(ir->op2<<2);
+ } else {
+ idx = ra_alloc1(as, ir->op1, allow);
+ if (ir->op2 == IRFL_TAB_ARRAY) {
+ ofs = asm_fuseabase(as, ir->op1);
+ if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
+ emit_dji(as, LOONGI_ADDI_D, dest, idx, ofs);
+ return;
+ }
+ }
+ ofs = field_ofs[ir->op2];
+ lj_assertA(!irt_isfp(ir->t), "bad FP FLOAD");
+ }
+ rset_clear(allow, idx);
+ emit_lso(as, loongi, dest, idx, ofs, allow);
+}
+
+static void asm_fstore(ASMState *as, IRIns *ir)
+{
+ if (ir->r == RID_SINK)
+ return;
+ Reg src = ra_alloc1(as, ir->op2, RSET_GPR);
+ IRIns *irf = IR(ir->op1);
+ Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
+ int32_t ofs = field_ofs[irf->op2];
+ lj_assertA(!irt_isfp(ir->t), "bad FP FSTORE");
+ emit_dji(as, asm_fxstoreins(as, ir), src, idx, ofs&0xfff);
+}
+
+static void asm_xload(ASMState *as, IRIns *ir)
+{
+ Reg dest = ra_dest(as, ir, (irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
+ lj_assertA(LJ_TARGET_UNALIGNED || !(ir->op2 & IRXLOAD_UNALIGNED),
+ "unaligned XLOAD");
+ asm_fusexref(as, asm_fxloadins(as, ir), dest, ir->op1, RSET_GPR, 0);
+}
+
+static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
+{
+ if (ir->r == RID_SINK)
+ return;
+ Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
+ asm_fusexref(as, asm_fxstoreins(as, ir), src, ir->op1,
+ rset_exclude(RSET_GPR, src), ofs);
+}
+
+#define asm_xstore(as, ir) asm_xstore_(as, ir, 0)
+
+static void asm_ahuvload(ASMState *as, IRIns *ir)
+{
+ Reg dest = RID_NONE, type, idx;
+ RegSet allow = RSET_GPR;
+ int32_t ofs = 0;
+ IRType1 t = ir->t;
+
+ type = ra_scratch(as, allow);
+ rset_clear(allow, type);
+
+ if (ra_used(ir)) {
+ lj_assertA((irt_isnum(ir->t)) || irt_isint(ir->t) || irt_isaddr(ir->t),
+ "bad load type %d", irt_type(ir->t));
+ dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : allow);
+ rset_clear(allow, dest);
+ if (irt_isaddr(t))
+ emit_djml(as, LOONGI_BSTRPICK_D, dest, dest, 46, 0);
+ else if (irt_isint(t))
+ emit_dju(as, LOONGI_SLLI_W, dest, dest, 0);
+ }
+ idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
+ if (ir->o == IR_VLOAD) ofs += 8 * ir->op2;
+ rset_clear(allow, idx);
+ if (irt_isnum(t)) {
+ Reg tmp2 = ra_scratch(as, allow);
+ asm_guard(as, LOONGI_BEQ, tmp2, RID_ZERO);
+ emit_dju(as, LOONGI_SLTUI, tmp2, type, ((int32_t)LJ_TISNUM)&0xfff);
+ } else {
+ asm_guard(as, LOONGI_BNE, type,
+ ra_allock(as, (int32_t)irt_toitype(t), allow));
+ }
+ if (ra_hasreg(dest)) {
+ if (irt_isnum(t)) {
+ emit_lso(as, LOONGI_FLD_D, dest, idx, ofs, allow);
+ dest = type;
+ }
+ } else {
+ dest = type;
+ }
+ emit_dju(as, LOONGI_SRAI_D, type, dest, 47);
+ emit_lso(as, LOONGI_LD_D, dest, idx, ofs, allow);
+}
+
+static void asm_ahustore(ASMState *as, IRIns *ir)
+{
+ RegSet allow = RSET_GPR;
+ Reg idx, src = RID_NONE, type = RID_NONE;
+ int32_t ofs = 0;
+ if (ir->r == RID_SINK)
+ return;
+ if (irt_isnum(ir->t)) {
+ src = ra_alloc1(as, ir->op2, RSET_FPR);
+ idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
+ emit_lso(as, LOONGI_FST_D, src, idx, ofs, allow);
+ } else {
+ Reg tmp = RID_TMP;
+ if (irt_ispri(ir->t)) {
+ tmp = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow);
+ rset_clear(allow, tmp);
+ } else {
+ src = ra_alloc1(as, ir->op2, allow);
+ rset_clear(allow, src);
+ type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow);
+ rset_clear(allow, type);
+ }
+ idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
+ emit_lso(as, LOONGI_ST_D, tmp, idx, ofs, allow);
+ if (ra_hasreg(src)) {
+ if (irt_isinteger(ir->t)) {
+ emit_djk(as, LOONGI_ADD_D, tmp, tmp, type);
+ emit_djml(as, LOONGI_BSTRPICK_D, tmp, src, 31, 0);
+ } else {
+ emit_djk(as, LOONGI_ADD_D, tmp, src, type);
+ }
+ }
+ }
+}
+
+static void asm_sload(ASMState *as, IRIns *ir)
+{
+ Reg dest = RID_NONE, type = RID_NONE, base;
+ RegSet allow = RSET_GPR;
+ IRType1 t = ir->t;
+ int32_t ofs = 8*((int32_t)ir->op1-2);
+ lj_assertA(!(ir->op2 & IRSLOAD_PARENT),
+ "bad parent SLOAD"); /* Handled by asm_head_side(). */
+ lj_assertA(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK),
+ "inconsistent SLOAD variant");
+ if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) {
+ dest = ra_scratch(as, RSET_FPR);
+ asm_tointg(as, ir, dest);
+ t.irt = IRT_NUM; /* Continue with a regular number type check. */
+ } else if (ra_used(ir)) {
+ lj_assertA((irt_isnum(ir->t)) ||
+ irt_isint(ir->t) || irt_isaddr(ir->t),
+ "bad SLOAD type %d", irt_type(ir->t));
+ dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : allow);
+ rset_clear(allow, dest);
+ base = ra_alloc1(as, REF_BASE, allow);
+ rset_clear(allow, base);
+ if (ir->op2 & IRSLOAD_CONVERT) {
+ if (irt_isint(t)) {
+ Reg tmp = ra_scratch(as, RSET_FPR);
+ emit_dj(as, LOONGI_MOVFR2GR_S, dest, tmp);
+ emit_dj(as, LOONGI_FTINTRZ_W_D, tmp, tmp);
+ dest = tmp;
+ t.irt = IRT_NUM; /* Check for original type. */
+ } else {
+ Reg tmp = ra_scratch(as, RSET_GPR);
+ emit_dj(as, LOONGI_FFINT_D_W, dest, dest);
+ emit_dj(as, LOONGI_MOVGR2FR_W, dest, tmp);
+ dest = tmp;
+ t.irt = IRT_INT; /* Check for original type. */
+ }
+ } else if (irt_isaddr(t)) {
+ /* Clear type from pointers. */
+ emit_djml(as, LOONGI_BSTRPICK_D, dest, dest, 46, 0);
+ } else if (irt_isint(t) && (ir->op2 & IRSLOAD_TYPECHECK)) {
+ /* Sign-extend integers. */
+ emit_dju(as, LOONGI_SLLI_W, dest, dest, 0);
+ }
+ goto dotypecheck;
+ }
+ base = ra_alloc1(as, REF_BASE, allow);
+ rset_clear(allow, base);
+dotypecheck:
+ if ((ir->op2 & IRSLOAD_TYPECHECK)) {
+ if (dest < RID_MAX_GPR) {
+ type = dest;
+ } else {
+ type = ra_scratch(as, allow);
+ }
+ rset_clear(allow, type);
+ Reg tmp1 = ra_scratch(as, allow);
+ if (irt_ispri(t)) {
+ asm_guard(as, LOONGI_BNE, type,
+ ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow));
+ } else if ((ir->op2 & IRSLOAD_KEYINDEX)) {
+ asm_guard(as, LOONGI_BNE, tmp1,
+ ra_allock(as, (int32_t)LJ_KEYINDEX, allow));
+ emit_dju(as, LOONGI_SRAI_D, tmp1, type, 32);
+ } else {
+ if (irt_isnum(t)) {
+ asm_guard(as, LOONGI_BEQ, tmp1, RID_ZERO);
+ emit_dji(as, LOONGI_SLTUI, tmp1, tmp1, LJ_TISNUM&0xfff);
+ if (ra_hasreg(dest)) {
+ emit_lso(as, LOONGI_FLD_D, dest, base, ofs, allow);
+ }
+ } else {
+ asm_guard(as, LOONGI_BNE, tmp1,
+ ra_allock(as, (int32_t)irt_toitype(t), allow));
+ }
+ emit_dju(as, LOONGI_SRAI_D, tmp1, type, 47);
+ }
+ emit_lso(as, LOONGI_LD_D, type, base, ofs, allow);
+ } else if (ra_hasreg(dest)) {
+ if (irt_isnum(t)) {
+ emit_lso(as, LOONGI_FLD_D, dest, base, ofs, allow);
+ } else {
+ emit_lso(as, irt_isint(t) ? LOONGI_LD_W : LOONGI_LD_D, dest, base, ofs, allow);
+ }
+ }
+}
+
+/* -- Allocations --------------------------------------------------------- */
+
+#if LJ_HASFFI
+static void asm_cnew(ASMState *as, IRIns *ir)
+{
+ CTState *cts = ctype_ctsG(J2G(as->J));
+ CTypeID id = (CTypeID)IR(ir->op1)->i;
+ CTSize sz;
+ CTInfo info = lj_ctype_info(cts, id, &sz);
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
+ IRRef args[4];
+ RegSet drop = RSET_SCRATCH;
+ lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL),
+ "bad CNEW/CNEWI operands");
+
+ as->gcsteps++;
+ if (ra_hasreg(ir->r))
+ rset_clear(drop, ir->r); /* Dest reg handled below. */
+ ra_evictset(as, drop);
+ if (ra_used(ir))
+ ra_destreg(as, ir, RID_RET); /* GCcdata * */
+
+ /* Initialize immutable cdata object. */
+ if (ir->o == IR_CNEWI) {
+ RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
+ emit_dji(as, sz == 8 ? LOONGI_ST_D : LOONGI_ST_W, ra_alloc1(as, ir->op2, allow),
+ RID_RET, (sizeof(GCcdata))&0xfff);
+ lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz);
+ } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
+ ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
+ args[0] = ASMREF_L; /* lua_State *L */
+ args[1] = ir->op1; /* CTypeID id */
+ args[2] = ir->op2; /* CTSize sz */
+ args[3] = ASMREF_TMP1; /* CTSize align */
+ asm_gencall(as, ci, args);
+ emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
+ return;
+ }
+
+ /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
+ emit_dji(as, LOONGI_ST_B, RID_RET+1, RID_RET, (offsetof(GCcdata, gct))&0xfff);
+ emit_dji(as, LOONGI_ST_H, RID_TMP, RID_RET, (offsetof(GCcdata, ctypeid))&0xfff);
+ emit_dji(as, LOONGI_ADDI_D, RID_RET+1, RID_ZERO, ~LJ_TCDATA&0xfff);
+ emit_dj32i(as, RID_TMP, RID_ZERO, id);
+ args[0] = ASMREF_L; /* lua_State *L */
+ args[1] = ASMREF_TMP1; /* MSize size */
+ asm_gencall(as, ci, args);
+ ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), ra_releasetmp(as, ASMREF_TMP1));
+}
+#endif
+
+/* -- Write barriers ------------------------------------------------------ */
+
+static void asm_tbar(ASMState *as, IRIns *ir)
+{
+ Reg tab = ra_alloc1(as, ir->op1, RSET_GPR);
+ Reg mark = ra_scratch(as, rset_exclude(RSET_GPR, tab));
+ Reg link = RID_TMP;
+ MCLabel l_end = emit_label(as);
+ emit_dji(as, LOONGI_ST_D, link, tab, ((int32_t)offsetof(GCtab, gclist))&0xfff);
+ emit_dji(as, LOONGI_ST_B, mark, tab, ((int32_t)offsetof(GCtab, marked))&0xfff);
+ emit_setgl(as, tab, gc.grayagain); // make tab gray again
+ emit_getgl(as, link, gc.grayagain);
+ emit_branch(as, LOONGI_BEQ, RID_TMP, RID_ZERO, l_end); // black: not jump
+ emit_djk(as, LOONGI_XOR, mark, mark, RID_TMP); // mark=0: gray
+ emit_dju(as, LOONGI_ANDI, RID_TMP, mark, LJ_GC_BLACK);
+ emit_dji(as, LOONGI_LD_BU, mark, tab, ((int32_t)offsetof(GCtab, marked))&0xfff);
+}
+
+static void asm_obar(ASMState *as, IRIns *ir)
+{
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv];
+ IRRef args[2];
+ MCLabel l_end;
+ Reg obj, val, tmp;
+ /* No need for other object barriers (yet). */
+ lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type"); // Closed upvalue
+ ra_evictset(as, RSET_SCRATCH);
+ l_end = emit_label(as);
+ args[0] = ASMREF_TMP1; /* global_State *g */
+ args[1] = ir->op1; /* TValue *tv */
+ asm_gencall(as, ci, args);
+ obj = IR(ir->op1)->r;
+ tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj));
+ emit_branch(as, LOONGI_BEQ, tmp, RID_ZERO, l_end);
+ emit_addk(as, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768, RSET_GPR);
+ emit_branch(as, LOONGI_BEQ, RID_TMP, RID_ZERO, l_end); // black: jump
+ emit_dju(as, LOONGI_ANDI, tmp, tmp, LJ_GC_BLACK);
+ emit_dju(as, LOONGI_ANDI, RID_TMP, RID_TMP, LJ_GC_WHITES);
+ val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj));
+ emit_dji(as, LOONGI_LD_BU, tmp, obj,
+ ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv))&0xfff);
+ emit_dji(as, LOONGI_LD_BU, RID_TMP, val, ((int32_t)offsetof(GChead, marked))&0xfff);
+}
+
+/* -- Arithmetic and logic operations ------------------------------------- */
+
+static void asm_fparith(ASMState *as, IRIns *ir, LOONGIns loongi)
+{
+ Reg dest = ra_dest(as, ir, RSET_FPR);
+ Reg right, left = ra_alloc2(as, ir, RSET_FPR);
+ right = (left >> 8); left &= 255;
+ emit_djk(as, loongi, dest, left, right);
+}
+
+static void asm_fpunary(ASMState *as, IRIns *ir, LOONGIns loongi)
+{
+ Reg dest = ra_dest(as, ir, RSET_FPR);
+ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR);
+ emit_dj(as, loongi, dest, left);
+}
+
+static void asm_fpmath(ASMState *as, IRIns *ir)
+{
+ IRFPMathOp fpm = (IRFPMathOp)ir->op2;
+ if (fpm <= IRFPM_TRUNC)
+ asm_callround(as, ir, IRCALL_lj_vm_floor + fpm);
+ else if (fpm == IRFPM_SQRT)
+ asm_fpunary(as, ir, LOONGI_FSQRT_D);
+ else
+ asm_callid(as, ir, IRCALL_lj_vm_floor + fpm);
+}
+
+static void asm_add(ASMState *as, IRIns *ir)
+{
+ IRType1 t = ir->t;
+ if (irt_isnum(t)) {
+ if (!asm_fusemadd(as, ir, LOONGI_FMADD_D, LOONGI_FMADD_D))
+ asm_fparith(as, ir, LOONGI_FADD_D);
+ return;
+ } else {
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
+ if (irref_isk(ir->op2)) {
+ intptr_t k = get_kval(as, ir->op2);
+ if (LOONGF_S_OK(k, 12)) { // si12
+ if (irt_is64(t)) {
+ emit_dji(as, LOONGI_ADDI_D, dest, left, k&0xfff);
+ } else {
+ emit_dji(as, LOONGI_ADDI_W, dest, left, k&0xfff);
+ }
+ return;
+ }
+ }
+ Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
+ emit_djk(as, irt_is64(t) ? LOONGI_ADD_D : LOONGI_ADD_W, dest,
+ left, right);
+ }
+}
+
+static void asm_sub(ASMState *as, IRIns *ir)
+{
+ if (irt_isnum(ir->t)) {
+ if (!asm_fusemadd(as, ir, LOONGI_FMSUB_D, LOONGI_FNMSUB_D))
+ asm_fparith(as, ir, LOONGI_FSUB_D);
+ return;
+ } else {
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ Reg right, left = ra_alloc2(as, ir, RSET_GPR);
+ right = (left >> 8); left &= 255;
+ emit_djk(as, irt_is64(ir->t) ? LOONGI_SUB_D : LOONGI_SUB_W, dest,
+ left, right);
+ }
+}
+
+static void asm_mul(ASMState *as, IRIns *ir)
+{
+ if (irt_isnum(ir->t)) {
+ asm_fparith(as, ir, LOONGI_FMUL_D);
+ } else
+ {
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ Reg right, left = ra_alloc2(as, ir, RSET_GPR);
+ right = (left >> 8); left &= 255;
+ if (irt_is64(ir->t)) {
+ emit_djk(as, LOONGI_MUL_D, dest, left, right);
+ } else {
+ emit_djk(as, LOONGI_MUL_W, dest, left, right);
+ }
+ }
+}
+
+static void asm_fpdiv(ASMState *as, IRIns *ir)
+{
+ asm_fparith(as, ir, LOONGI_FDIV_D);
+}
+
+static void asm_neg(ASMState *as, IRIns *ir)
+{
+ if (irt_isnum(ir->t)) {
+ asm_fpunary(as, ir, LOONGI_FNEG_D);
+ } else {
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
+ emit_djk(as, irt_is64(ir->t) ? LOONGI_SUB_D : LOONGI_SUB_W, dest,
+ RID_ZERO, left);
+ }
+}
+
+#define asm_abs(as, ir) asm_fpunary(as, ir, LOONGI_FABS_D)
+
+static void asm_arithov(ASMState *as, IRIns *ir)
+{
+ RegSet allow = RSET_GPR;
+ Reg right, left, tmp, tmp2, dest = ra_dest(as, ir, allow);
+ rset_clear(allow, dest);
+ lj_assertA(!irt_is64(ir->t), "bad usage");
+ tmp2 = ra_scratch(as, allow);
+ rset_clear(allow, tmp2);
+ if (irref_isk(ir->op2)) {
+ int k = IR(ir->op2)->i;
+ if (ir->o == IR_SUBOV) k = -k;
+ if (LOONGF_S_OK(k, 12)) { /* (dest < left) == (k >= 0 ? 1 : 0) */
+ left = ra_alloc1(as, ir->op1, allow);
+ asm_guard(as, k >= 0 ? LOONGI_BNE : LOONGI_BEQ, tmp2, RID_ZERO);
+ emit_djk(as, LOONGI_SLT, tmp2, dest, dest == left ? tmp2 : left);
+ emit_dji(as, LOONGI_ADDI_D, dest, left, k&0xfff);
+ if (dest == left) emit_move(as, tmp2, left);
+ return;
+ }
+ }
+ left = ra_alloc2(as, ir, allow);
+ right = (left >> 8); left &= 255;
+ rset_clear(allow, right);
+ rset_clear(allow, left);
+ tmp = ra_scratch(as, allow);
+ asm_guard(as, LOONGI_BLT, tmp2, RID_ZERO);
+ emit_djk(as, LOONGI_AND, tmp2, RID_TMP, tmp);
+ if (ir->o == IR_ADDOV) { /* ((dest^left) & (dest^right)) < 0 */
+ emit_djk(as, LOONGI_XOR, RID_TMP, dest, dest == right ? RID_TMP : right);
+ } else { /* ((dest^left) & (dest^~right)) < 0 */
+ emit_djk(as, LOONGI_XOR, RID_TMP, RID_TMP, dest);
+ emit_djk(as, LOONGI_NOR, RID_TMP, dest == right ? RID_TMP : right, RID_ZERO);
+ }
+ emit_djk(as, LOONGI_XOR, tmp, dest, dest == left ? RID_TMP : left);
+ emit_djk(as, ir->o == IR_ADDOV ? LOONGI_ADD_W : LOONGI_SUB_W, dest, left, right);
+ if (dest == left || dest == right)
+ emit_move(as, RID_TMP, dest == left ? left : right);
+}
+
+#define asm_addov(as, ir) asm_arithov(as, ir)
+#define asm_subov(as, ir) asm_arithov(as, ir)
+
+static void asm_mulov(ASMState *as, IRIns *ir)
+{
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ Reg tmp, tmp2, right, left = ra_alloc2(as, ir, RSET_GPR);
+ right = (left >> 8); left &= 255;
+ tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR, left),
+ right), dest));
+ tmp2 = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(rset_exclude(RSET_GPR, left),
+ right), dest), tmp));
+ asm_guard(as, LOONGI_BNE, tmp2, tmp);
+ emit_dju(as, LOONGI_SRAI_W, tmp2, dest, 31);
+ emit_djk(as, LOONGI_MUL_W, dest, left, right); // dest: [31:0]+signextend
+ emit_djk(as, LOONGI_MULH_W, tmp, left, right); // tmp: [63:32]
+}
+
+static void asm_bnot(ASMState *as, IRIns *ir)
+{
+ Reg left, right, dest = ra_dest(as, ir, RSET_GPR);
+ IRIns *irl = IR(ir->op1);
+ if (mayfuse(as, ir->op1) && irl->o == IR_BOR) {
+ left = ra_alloc2(as, irl, RSET_GPR);
+ right = (left >> 8); left &= 255;
+ } else {
+ left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
+ right = RID_ZERO;
+ }
+ emit_djk(as, LOONGI_NOR, dest, left, right);
+}
+
+static void asm_bswap(ASMState *as, IRIns *ir)
+{
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
+ if (irt_is64(ir->t)) {
+ emit_dj(as, LOONGI_REVH_D, dest, RID_TMP);
+ emit_dj(as, LOONGI_REVB_4H, RID_TMP, left);
+ } else {
+ emit_dju(as, LOONGI_ROTRI_W, dest, RID_TMP, 16);
+ emit_dj(as, LOONGI_REVB_2H, RID_TMP, left);
+ }
+}
+
+static void asm_bitop(ASMState *as, IRIns *ir, LOONGIns loongi, LOONGIns loongik)
+{
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
+ if (irref_isk(ir->op2)) {
+ intptr_t k = get_kval(as, ir->op2);
+ if (checku12(k)) {
+ emit_dji(as, loongik, dest, left, k&0xfff);
+ return;
+ }
+ }
+ right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
+ emit_djk(as, loongi, dest, left, right);
+}
+
+#define asm_band(as, ir) asm_bitop(as, ir, LOONGI_AND, LOONGI_ANDI)
+#define asm_bor(as, ir) asm_bitop(as, ir, LOONGI_OR, LOONGI_ORI)
+#define asm_bxor(as, ir) asm_bitop(as, ir, LOONGI_XOR, LOONGI_XORI)
+
+static void asm_bitshift(ASMState *as, IRIns *ir, LOONGIns loongi, LOONGIns loongik)
+{
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
+ uint32_t shmask = irt_is64(ir->t) ? 63 : 31;
+ if (irref_isk(ir->op2)) { /* Constant shifts. */
+ uint32_t shift = (uint32_t)(IR(ir->op2)->i & shmask);
+ emit_dju(as, loongik, dest, left, shift);
+ } else {
+ Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
+ emit_djk(as, loongi, dest, left, right); /* Shift amount is in rs. */
+ }
+}
+
+#define asm_bshl(as, ir) (irt_is64(ir->t) ? \
+ asm_bitshift(as, ir, LOONGI_SLL_D, LOONGI_SLLI_D) : \
+ asm_bitshift(as, ir, LOONGI_SLL_W, LOONGI_SLLI_W))
+#define asm_bshr(as, ir) (irt_is64(ir->t) ? \
+ asm_bitshift(as, ir, LOONGI_SRL_D, LOONGI_SRLI_D) : \
+ asm_bitshift(as, ir, LOONGI_SRL_W, LOONGI_SRLI_W))
+#define asm_bsar(as, ir) (irt_is64(ir->t) ? \
+ asm_bitshift(as, ir, LOONGI_SRA_D, LOONGI_SRAI_D) : \
+ asm_bitshift(as, ir, LOONGI_SRA_W, LOONGI_SRAI_W))
+#define asm_brol(as, ir) lj_assertA(0, "unexpected BROL")
+#define asm_bror(as, ir) (irt_is64(ir->t) ? \
+ asm_bitshift(as, ir, LOONGI_ROTR_D, LOONGI_ROTRI_D) : \
+ asm_bitshift(as, ir, LOONGI_ROTR_W, LOONGI_ROTRI_W))
+
+static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
+{
+ if (irt_isnum(ir->t)) {
+ Reg dest = ra_dest(as, ir, RSET_FPR);
+ Reg right, left = ra_alloc2(as, ir, RSET_FPR);
+ right = (left >> 8); left &= 255;
+ emit_djk(as, ismax ? LOONGI_FMAX_D : LOONGI_FMIN_D, dest, left, right);
+ } else {
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
+ Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
+ emit_djk(as, LOONGI_OR, dest, dest, RID_TMP);
+ if (dest != right) {
+ emit_djk(as, LOONGI_MASKEQZ, RID_TMP, right, RID_TMP);
+ emit_djk(as, LOONGI_MASKNEZ, dest, left, RID_TMP);
+ } else {
+ emit_djk(as, LOONGI_MASKNEZ, RID_TMP, left, RID_TMP);
+ emit_djk(as, LOONGI_MASKEQZ, dest, right, RID_TMP);
+ }
+ emit_djk(as, LOONGI_SLT, RID_TMP,
+ ismax ? left : right, ismax ? right : left);
+ }
+}
+
+#define asm_min(as, ir) asm_min_max(as, ir, 0)
+#define asm_max(as, ir) asm_min_max(as, ir, 1)
+
+/* -- Comparisons --------------------------------------------------------- */
+
+/* FP comparisons. */
+static void asm_fpcomp(ASMState *as, IRIns *ir)
+{
+ IROp op = ir->o;
+ Reg right, left = ra_alloc2(as, ir, RSET_FPR);
+ right = (left >> 8); left &= 255;
+ asm_guard21(as, (op&1) ? LOONGI_BCNEZ : LOONGI_BCEQZ, 0);
+ switch (op) {
+ case IR_LT: case IR_UGE:
+ emit_djk(as, LOONGI_FCMP_CLT_D, 0, left, right);
+ break;
+ case IR_GE: case IR_ULT:
+ emit_djk(as, LOONGI_FCMP_CULT_D, 0, left, right);
+ break;
+ case IR_LE: case IR_UGT: case IR_ABC:
+ emit_djk(as, LOONGI_FCMP_CLE_D, 0, left, right);
+ break;
+ case IR_ULE: case IR_GT:
+ emit_djk(as, LOONGI_FCMP_CULE_D, 0, left, right);
+ break;
+ case IR_EQ: case IR_NE:
+ emit_djk(as, LOONGI_FCMP_CEQ_D, 0, left, right);
+ break;
+ default:
+ break;
+ }
+}
+
+/* Integer comparisons. */
+static void asm_intcomp(ASMState *as, IRIns *ir)
+{
+ /* ORDER IR: LT GE LE GT ULT UGE ULE UGT. */
+ /* 00 01 10 11 100 101 110 111 */
+ IROp op = ir->o;
+ RegSet allow = RSET_GPR;
+ Reg tmp, right, left = ra_alloc1(as, ir->op1, allow);
+ rset_clear(allow, left);
+ if (op == IR_ABC) op = IR_UGT;
+ if ((op&4) == 0 && irref_isk(ir->op2) && get_kval(as, ir->op2) == 0) {
+ switch (op) {
+ case IR_GT: asm_guard(as, LOONGI_BGE, RID_ZERO, left); break;
+ case IR_LE: asm_guard(as, LOONGI_BLT, RID_ZERO, left); break;
+ case IR_GE: asm_guard(as, LOONGI_BLT, left, RID_ZERO); break;
+ case IR_LT: asm_guard(as, LOONGI_BGE, left, RID_ZERO); break;
+ default: break;
+ }
+ return;
+ }
+ tmp = ra_scratch(as, allow);
+ rset_clear(allow, tmp);
+ if (irref_isk(ir->op2)) {
+ intptr_t k = get_kval(as, ir->op2);
+ if ((op&2)) k++;
+ if (checki12(k)) {
+ asm_guard(as, (op&1) ? LOONGI_BNE : LOONGI_BEQ, tmp, RID_ZERO);
+ emit_dji(as, (op&4) ? LOONGI_SLTUI : LOONGI_SLTI, tmp, left, k&0xfff);
+ return;
+ }
+ }
+ right = ra_alloc1(as, ir->op2, allow);
+ asm_guard(as, ((op^(op>>1))&1) ? LOONGI_BNE : LOONGI_BEQ, tmp, RID_ZERO);
+ emit_djk(as, (op&4) ? LOONGI_SLTU : LOONGI_SLT,
+ tmp, (op&2) ? right : left, (op&2) ? left : right);
+}
+
+static void asm_comp(ASMState *as, IRIns *ir)
+{
+ if (irt_isnum(ir->t))
+ asm_fpcomp(as, ir);
+ else
+ asm_intcomp(as, ir);
+}
+
+static void asm_equal(ASMState *as, IRIns *ir)
+{
+ if (irt_isnum(ir->t)) {
+ asm_fpcomp(as, ir);
+ } else {
+ Reg right, left = ra_alloc2(as, ir, RSET_GPR);
+ right = (left >> 8); left &= 255;
+ asm_guard(as, (ir->o & 1) ? LOONGI_BEQ : LOONGI_BNE, left, right);
+ }
+}
+
+/* -- Split register ops -------------------------------------------------- */
+
+/* Hiword op of a split 64 bit op. Previous op must be the loword op. */
+static void asm_hiop(ASMState *as, IRIns *ir)
+{
+ /* HIOP is marked as a store because it needs its own DCE logic. */
+ int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */
+ if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
+ if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
+ switch ((ir-1)->o) {
+ case IR_CALLN:
+ case IR_CALLL:
+ case IR_CALLS:
+ case IR_CALLXS:
+ if (!uselo)
+ ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */
+ break;
+ default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break;
+ }
+}
+
+/* -- Profiling ----------------------------------------------------------- */
+
+static void asm_prof(ASMState *as, IRIns *ir)
+{
+ UNUSED(ir);
+ Reg tmp = ra_scratch(as, RSET_GPR);
+ asm_guard(as, LOONGI_BNE, tmp, RID_ZERO);
+ emit_dju(as, LOONGI_ANDI, tmp, tmp, HOOK_PROFILE);
+ emit_lsglptr2(as, LOONGI_LD_BU, tmp,
+ (int32_t)offsetof(global_State, hookmask));
+}
+
+/* -- Stack handling ------------------------------------------------------ */
+
+/* Check Lua stack size for overflow. Use exit handler as fallback. */
+static void asm_stack_check(ASMState *as, BCReg topslot,
+ IRIns *irp, RegSet allow, ExitNo exitno)
+{
+ /* Try to get an unused temp register, otherwise spill/restore RID_RET*. */
+ Reg tmp, pbase = irp ? (ra_hasreg(irp->r) ? irp->r : RID_TMP) : RID_BASE;
+ ExitNo oldsnap = as->snapno;
+ rset_clear(allow, pbase);
+ as->snapno = exitno;
+ asm_guard(as, LOONGI_BNE, RID_R20, RID_ZERO);
+ as->snapno = oldsnap;
+ if (allow) {
+ tmp = rset_pickbot(allow);
+ ra_modified(as, tmp);
+ } else { // allow == RSET_EMPTY
+ tmp = RID_RET;
+ emit_dji(as, LOONGI_LD_D, tmp, RID_SP, 0); /* Restore tmp1 register. */
+ }
+ lj_assertA(checki12(8*topslot), "slot offset %d does not fit in si12", 8*topslot);
+ emit_dji(as, LOONGI_SLTUI, RID_R20, RID_R20, (int32_t)(8*topslot)&0xfff);
+ emit_djk(as, LOONGI_SUB_D, RID_R20, tmp, pbase);
+ emit_dji(as, LOONGI_LD_D, tmp, tmp, offsetof(lua_State, maxstack));
+ if (pbase == RID_TMP)
+ emit_getgl(as, RID_TMP, jit_base);
+ emit_getgl(as, tmp, cur_L);
+ if (allow == RSET_EMPTY) /* Spill temp register. */
+ emit_dji(as, LOONGI_ST_D, tmp, RID_SP, 0);
+}
+
+/* Restore Lua stack from on-trace state. */
+static void asm_stack_restore(ASMState *as, SnapShot *snap)
+{
+ SnapEntry *map = &as->T->snapmap[snap->mapofs];
+#ifdef LUA_USE_ASSERT
+ SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2];
+#endif
+ MSize n, nent = snap->nent;
+ /* Store the value of all modified slots to the Lua stack. */
+ for (n = 0; n < nent; n++) {
+ SnapEntry sn = map[n];
+ BCReg s = snap_slot(sn);
+ int32_t ofs = 8*((int32_t)s-1-LJ_FR2);
+ IRRef ref = snap_ref(sn);
+ IRIns *ir = IR(ref);
+ if ((sn & SNAP_NORESTORE))
+ continue;
+ if (irt_isnum(ir->t)) {
+ Reg src = ra_alloc1(as, ref, RSET_FPR);
+ emit_dji(as, LOONGI_FST_D, src, RID_BASE, ofs&0xfff);
+ } else {
+ if ((sn & SNAP_KEYINDEX)) {
+ RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
+ int64_t kki = (int64_t)LJ_KEYINDEX << 32;
+ if (irref_isk(ref)) {
+ emit_djk(as, LOONGI_STX_D,
+ ra_allock(as, kki | (int64_t)(uint32_t)ir->i, allow),
+ RID_BASE, RID_R20);
+ emit_d16i(as, RID_R20, ofs);
+ } else {
+ Reg src = ra_alloc1(as, ref, allow);
+ Reg rki = ra_allock(as, kki, rset_exclude(allow, src));
+ emit_djk(as, LOONGI_STX_D, RID_TMP, RID_BASE, RID_R20);
+ emit_d16i(as, RID_R20, ofs);
+ emit_djk(as, LOONGI_ADD_D, RID_TMP, src, rki);
+ }
+ } else {
+ asm_tvstore64(as, RID_BASE, ofs, ref);
+ }
+ }
+ checkmclim(as);
+ }
+ lj_assertA(map + nent == flinks, "inconsistent frames in snapshot");
+}
+
+/* -- GC handling --------------------------------------------------------- */
+
+/* Marker to prevent patching the GC check exit. */
+#define LOONG_NOPATCH_GC_CHECK LOONGI_OR
+
+/* Check GC threshold and do one or more GC steps. */
+static void asm_gc_check(ASMState *as)
+{
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit];
+ IRRef args[2];
+ MCLabel l_end;
+ Reg tmp1, tmp2;
+ ra_evictset(as, RSET_SCRATCH);
+ l_end = emit_label(as);
+ /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */
+ asm_guard(as, LOONGI_BNE, RID_RET, RID_ZERO); /* Assumes asm_snap_prep() already done. */
+ *--as->mcp = LOONG_NOPATCH_GC_CHECK;
+ args[0] = ASMREF_TMP1; /* global_State *g */
+ args[1] = ASMREF_TMP2; /* MSize steps */
+ asm_gencall(as, ci, args);
+ tmp1 = ra_releasetmp(as, ASMREF_TMP1);
+ tmp2 = ra_releasetmp(as, ASMREF_TMP2);
+ ra_allockreg(as, (int64_t)(J2G(as->J)), tmp1);
+ emit_loadi(as, tmp2, as->gcsteps);
+ /* Jump around GC step if GC total < GC threshold. */
+ emit_branch(as, LOONGI_BLTU, RID_TMP, tmp2, l_end);
+ emit_getgl(as, tmp2, gc.threshold);
+ emit_getgl(as, RID_TMP, gc.total);
+ as->gcsteps = 0;
+ checkmclim(as);
+}
+
+/* -- Loop handling ------------------------------------------------------- */
+
+/* Fixup the loop branch. */
+static void asm_loop_fixup(ASMState *as)
+{
+ MCode *p = as->mctop;
+ MCode *target = as->mcp;
+ if (as->loopinv) { /* Inverted loop branch? */
+ /* asm_guard* already inverted the bceqz/bcnez/beq/bne/blt/bge, and patched the final b. */
+ uint32_t mask = (p[-2] & 0xfc000000) == 0x48000000 ? 0x1fffffu : 0xffffu;
+ ptrdiff_t delta = target - (p - 2);
+ if (mask == 0x1fffffu) { /* BCEQZ BCNEZ*/
+ p[-2] = p[-2] | LOONGF_I((uint32_t)delta & 0xffffu) | (((uint32_t)delta & 0x1f0000u) >> 16);
+ } else { /* BEQ BNE BLE BGE BLTU BGEU*/
+ p[-2] |= LOONGF_I(delta & 0xffffu);
+ }
+ if (p[-1] == 0)
+ p[-1] = LOONGI_NOP;
+ } else {
+ /* b */
+ ptrdiff_t delta = target - (p - 1);
+ p[-1] = LOONGI_B | LOONGF_I(delta & 0xffffu) | ((delta & 0x3ff0000) >> 16);
+ }
+}
+
+/* Fixup the tail of the loop. */
+static void asm_loop_tail_fixup(ASMState *as)
+{
+ UNUSED(as); /* Nothing to do. */
+}
+
+/* -- Head of trace ------------------------------------------------------- */
+
+/* Coalesce BASE register for a root trace. */
+static void asm_head_root_base(ASMState *as)
+{
+ IRIns *ir = IR(REF_BASE);
+ Reg r = ir->r;
+ if (ra_hasreg(r)) {
+ ra_free(as, r);
+ if (rset_test(as->modset, r) || irt_ismarked(ir->t))
+ ir->r = RID_INIT; /* No inheritance for modified BASE register. */
+ if (r != RID_BASE)
+ emit_move(as, r, RID_BASE);
+ }
+}
+
+/* Coalesce BASE register for a side trace. */
+static Reg asm_head_side_base(ASMState *as, IRIns *irp)
+{
+ IRIns *ir = IR(REF_BASE);
+ Reg r = ir->r;
+ if (ra_hasreg(r)) {
+ ra_free(as, r);
+ if (rset_test(as->modset, r) || irt_ismarked(ir->t))
+ ir->r = RID_INIT; /* No inheritance for modified BASE register. */
+ if (irp->r == r) {
+ return r; /* Same BASE register already coalesced. */
+ } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
+ emit_move(as, r, irp->r); /* Move from coalesced parent reg. */
+ return irp->r;
+ } else {
+ emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */
+ }
+ }
+ return RID_NONE;
+}
+
+/* -- Tail of trace ------------------------------------------------------- */
+
+/* Fixup the tail code. */
+static void asm_tail_fixup(ASMState *as, TraceNo lnk)
+{
+ MCode *target = lnk ? traceref(as->J,lnk)->mcode : (MCode *)lj_vm_exit_interp;
+ int32_t spadj = as->T->spadjust;
+ MCode *p = as->mctop - 1;
+ if (spadj == 0) {
+ p[-1] = LOONGI_NOP;
+ } else {
+ p[-1] = LOONGI_ADDI_D|LOONGF_D(RID_SP)|LOONGF_J(RID_SP)|LOONGF_I(spadj);
+ }
+
+ MCode *tmp = p;
+ *p = LOONGI_B | LOONGF_I((uintptr_t)(target-tmp)&0xffffu) | (((uintptr_t)(target-tmp)&0x3ff0000u) >> 16);
+}
+
+/* Prepare tail of code. */
+static void asm_tail_prep(ASMState *as)
+{
+ MCode *p = as->mctop - 1; /* Leave room for exit branch. */
+ if (as->loopref) {
+ as->invmcp = as->mcp = p;
+ } else {
+ as->mcp = p-1; /* Leave room for stack pointer adjustment. */
+ as->invmcp = NULL;
+ }
+ *p = LOONGI_NOP; /* Prevent load/store merging. */
+}
+
+/* -- Trace setup --------------------------------------------------------- */
+
+/* Ensure there are enough stack slots for call arguments. */
+static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
+{
+ IRRef args[CCI_NARGS_MAX*2];
+ uint32_t i, nargs = CCI_XNARGS(ci);
+ int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
+ asm_collectargs(as, ir, ci, args);
+ for (i = 0; i < nargs; i++) {
+ if (args[i] && irt_isfp(IR(args[i])->t)) {
+ if (nfpr > 0)
+ nfpr--;
+ else if (ngpr > 0)
+ ngpr--;
+ else
+ nslots += 2;
+ } else {
+ if (ngpr > 0)
+ ngpr--;
+ else
+ nslots += 2;
+ }
+ }
+ if (nslots > as->evenspill) /* Leave room for args in stack slots. */
+ as->evenspill = nslots;
+ return REGSP_HINT(RID_RET);
+}
+
+static void asm_sparejump_setup(ASMState *as)
+{
+ MCode *mxp = as->mctop;
+ if ((char *)mxp == (char *)as->J->mcarea + as->J->szmcarea) {
+ mxp -= 4*1;
+ as->mctop = mxp;
+ }
+}
+
+static void asm_setup_target(ASMState *as)
+{
+ asm_sparejump_setup(as);
+ asm_exitstub_setup(as);
+}
+
+/* -- Trace patching ------------------------------------------------------ */
+
+/* Patch exit jumps of existing machine code to a new target. */
+void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
+{
+ MCode *p = T->mcode;
+ MCode *pe = (MCode *)((char *)p + T->szmcode);
+ MCode *px = exitstub_trace_addr(T, exitno);
+ MCode *cstart = NULL;
+ MCode *mcarea = lj_mcode_patch(J, p, 0);
+
+ MCode exitload = LOONGI_ADDI_D | LOONGF_D(RID_TMP) | LOONGF_J(RID_ZERO) | LOONGF_I(exitno&0xfff);
+
+ for (; p < pe; p++) {
+ if (*p == exitload) {
+ /* Look for exitstub branch, replace with branch to target. */
+ ptrdiff_t delta = target - p - 1;
+ MCode ins = p[1];
+ if (((ins ^ ((px-p-1)<<10)) & 0x3fffc00) == 0 &&
+ ((ins & 0xfc000000u) == LOONGI_BEQ ||
+ (ins & 0xfc000000u) == LOONGI_BNE ||
+ (ins & 0xfc000000u) == LOONGI_BLT ||
+ (ins & 0xfc000000u) == LOONGI_BGE ||
+ (ins & 0xfc000000u) == LOONGI_BLTU)) {
+ /* Patch beq/bne/blt/bge, if within range. */
+ if (p[-1] == LOONG_NOPATCH_GC_CHECK) {
+ /* nothing */
+ } else if (LOONGF_S_OK(delta, 16)) {
+ p[1] = (ins & 0xfc0003ffu) | LOONGF_I(delta & 0xffff);
+ *p = LOONGI_NOP;
+ if (!cstart) cstart = p + 1;
+ }
+ } else if (((ins ^ ((((px-p-1)&0xffff)<<10) + (((px-p-1)>>10)&0x1f))) & 0x3fffc1f) == 0 &&
+ ((ins & 0xfc000000u) == LOONGI_BCEQZ ||
+ (ins & 0xfc000100u) == LOONGI_BCNEZ)) {
+ /* Patch bceqz/bcnez, if within range. */
+ if (p[-1] == LOONG_NOPATCH_GC_CHECK) {
+ /* nothing */
+ } else if (LOONGF_S_OK(delta, 21)) {
+ p[1] = (ins & 0xfc0003e0u) | LOONGF_I(delta & 0xffff) | ((delta & 0x1f0000) >> 16);
+ *p = LOONGI_NOP;
+ if (!cstart) cstart = p + 1;
+ }
+ } else if (((ins ^ ((((px-p-1)&0xffff)<<10) + (((px-p-1)>>10)&0x3f))) & 0x3ffffff) == 0 &&
+ ((ins & 0xfc000000u) == LOONGI_B)) {
+ /* Patch b. */
+ lj_assertJ(LOONGF_S_OK(delta, 26), "branch target out of range");
+ p[1] = (ins & 0xfc000000u) | LOONGF_I(delta & 0xffff) | ((delta & 0x3ff0000) >> 16);
+ *p = LOONGI_NOP;
+ if (!cstart) cstart = p + 1;
+ } else if (p+2 == pe){
+ if (p[2] == LOONGI_NOP) {
+ ptrdiff_t delta = target - &p[2];
+ lj_assertJ(LOONGF_S_OK(delta, 26), "branch target out of range");
+ p[2] = LOONGI_B | LOONGF_I(delta & 0xffff) | ((delta & 0x3ff0000) >> 16);
+ *p = LOONGI_NOP;
+ if (!cstart) cstart = p + 2;
+ }
+ }
+ }
+ }
+ if (cstart) lj_mcode_sync(cstart, px+1);
+ lj_mcode_patch(J, mcarea, 1);
+}
diff --git a/src/lj_ccall.c b/src/lj_ccall.c
index e4bed4f84..db4abacdc 100644
--- a/src/lj_ccall.c
+++ b/src/lj_ccall.c
@@ -778,6 +778,95 @@
} \
}
+#elif LJ_TARGET_LOONGARCH64
+/* -- LoongArch lp64 calling conventions ---------------------------------------- */
+
+#define CCALL_HANDLE_STRUCTRET \
+ /* Return structs of size > 16 by reference. */ \
+ cc->retref = !(sz <= 16); \
+ if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp;
+
+#define CCALL_HANDLE_STRUCTRET2 \
+ unsigned int cl = ccall_classify_struct(cts, ctr); \
+ if ((cl & 4) && (cl >> 8) <= 2) { \
+ CTSize i = (cl >> 8) - 1; \
+ do { ((float *)dp)[i] = cc->fpr[i].f; } while (i--); \
+ } else { \
+ if (cl > 1) { \
+ sp = (uint8_t *)&cc->fpr[0]; \
+ if ((cl >> 8) > 2) \
+ sp = (uint8_t *)&cc->gpr[0]; \
+ } \
+ memcpy(dp, sp, ctr->size); \
+ } \
+
+#define CCALL_HANDLE_COMPLEXRET \
+ /* Complex values are returned in 1 or 2 FPRs. */ \
+ cc->retref = 0;
+
+#define CCALL_HANDLE_COMPLEXRET2 \
+ if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \
+ ((float *)dp)[0] = cc->fpr[0].f; \
+ ((float *)dp)[1] = cc->fpr[1].f; \
+ } else { /* Copy complex double from FPRs. */ \
+ ((double *)dp)[0] = cc->fpr[0].d; \
+ ((double *)dp)[1] = cc->fpr[1].d; \
+ }
+
+#define CCALL_HANDLE_COMPLEXARG \
+ /* Pass complex double by reference. */ \
+ if (sz == 4*sizeof(double)) { \
+ rp = cdataptr(lj_cdata_new(cts, did, sz)); \
+ sz = CTSIZE_PTR; \
+ } else if (sz == 2*sizeof(float)) { \
+ isfp = 2; \
+ sz = 2*CTSIZE_PTR; \
+ } else { \
+ isfp = 1; \
+ sz = 2*CTSIZE_PTR; \
+ }
+
+#define CCALL_HANDLE_RET \
+ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
+ sp = (uint8_t *)&cc->fpr[0].f;
+
+#define CCALL_HANDLE_STRUCTARG \
+ /* Pass structs of size >16 by reference. */ \
+ unsigned int cl = ccall_classify_struct(cts, d); \
+ nff = cl >> 8; \
+ if (sz > 16) { \
+ rp = cdataptr(lj_cdata_new(cts, did, sz)); \
+ sz = CTSIZE_PTR; \
+ } \
+ /* Pass struct in FPRs. */ \
+ if (cl > 1) { \
+ isfp = (cl & 4) ? 2 : 1; \
+ }
+
+
+#define CCALL_HANDLE_REGARG \
+ if (isfp && (!isva)) { /* Try to pass argument in FPRs. */ \
+ int n2 = ctype_isvector(d->info) ? 1 : \
+ isfp == 1 ? n : 2; \
+ if (nfpr + n2 <= CCALL_NARG_FPR && nff <= 2) { \
+ dp = &cc->fpr[nfpr]; \
+ nfpr += n2; \
+ goto done; \
+ } else { \
+ if (ngpr + n2 <= maxgpr) { \
+ dp = &cc->gpr[ngpr]; \
+ ngpr += n2; \
+ goto done; \
+ } \
+ } \
+ } else { /* Try to pass argument in GPRs. */ \
+ if (ngpr + n <= maxgpr) { \
+ dp = &cc->gpr[ngpr]; \
+ ngpr += n; \
+ goto done; \
+ } \
+ }
+
#else
#error "Missing calling convention definitions for this architecture"
#endif
@@ -1183,6 +1272,53 @@ static unsigned int ccall_classify_struct(CTState *cts, CType *ct)
#endif
+/* -- LoongArch64 ABI struct classification ---------------------------- */
+
+#if LJ_TARGET_LOONGARCH64
+
+static unsigned int ccall_classify_struct(CTState *cts, CType *ct)
+{
+ CTSize sz = ct->size;
+ unsigned int r = 0, n = 0, isu = (ct->info & CTF_UNION);
+ while (ct->sib) {
+ CType *sct;
+ ct = ctype_get(cts, ct->sib);
+ if (ctype_isfield(ct->info)) {
+ sct = ctype_rawchild(cts, ct);
+ if (ctype_isfp(sct->info)) {
+ r |= sct->size;
+ if (!isu) n++; else if (n == 0) n = 1;
+ } else if (ctype_iscomplex(sct->info)) {
+ r |= (sct->size >> 1);
+ if (!isu) n += 2; else if (n < 2) n = 2;
+ } else if (ctype_isstruct(sct->info)) {
+ goto substruct;
+ } else {
+ goto noth;
+ }
+ } else if (ctype_isbitfield(ct->info)) {
+ goto noth;
+ } else if (ctype_isxattrib(ct->info, CTA_SUBTYPE)) {
+ sct = ctype_rawchild(cts, ct);
+ substruct:
+ if (sct->size > 0) {
+ unsigned int s = ccall_classify_struct(cts, sct);
+ if (s <= 1) goto noth;
+ r |= (s & 255);
+ if (!isu) n += (s >> 8); else if (n < (s >>8)) n = (s >> 8);
+ }
+ }
+ }
+ if ((r == 4 || r == 8) && n <= 4)
+ return r + (n << 8);
+noth: /* Not a homogeneous float/double aggregate. */
+ return (sz <= 16); /* Return structs of size <= 16 in GPRs. */
+}
+
+
+#endif
+
+
/* -- Common C call handling ---------------------------------------------- */
/* Infer the destination CTypeID for a vararg argument. */
@@ -1232,7 +1368,9 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
#if LJ_TARGET_RISCV64
int nff = 0;
#endif
-
+#if LJ_TARGET_LOONGARCH64
+ int nff = 0;
+#endif
/* Clear unused regs to get some determinism in case of misdeclaration. */
memset(cc->gpr, 0, sizeof(cc->gpr));
#if CCALL_NUM_FPR
@@ -1426,7 +1564,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
if (isfp && d->size == sizeof(float))
((uint32_t *)dp)[1] = 0xffffffffu; /* Float NaN boxing */
#endif
-#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) || LJ_TARGET_RISCV64
+#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) || LJ_TARGET_RISCV64 || LJ_TARGET_LOONGARCH64
if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info)
#if LJ_TARGET_MIPS64
|| (isfp && nsp == 0)
@@ -1474,6 +1612,14 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
((uint64_t *)dp)[i] = 0xffffffff00000000ul | ((uint32_t *)dp)[i];
} while (i--);
}
+#elif LJ_TARGET_LOONGARCH64
+ if (isfp == 2 && nff <= 2) {
+ /* Split complex float into separate registers. */
+ CTSize i = (sz >> 2) - 1;
+ do {
+ ((uint64_t *)dp)[i] = ((uint32_t *)dp)[i];
+ } while (i--);
+ }
#else
UNUSED(isfp);
#endif
@@ -1483,7 +1629,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
if ((int32_t)nsp < 0) nsp = 0;
#endif
-#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP) || LJ_TARGET_RISCV64
+#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP) || LJ_TARGET_RISCV64 || LJ_TARGET_LOONGARCH64
cc->nfpr = nfpr; /* Required for vararg functions. */
#endif
cc->nsp = (nsp + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1);
diff --git a/src/lj_ccall.h b/src/lj_ccall.h
index 609effa0b..ad2e1e9fe 100644
--- a/src/lj_ccall.h
+++ b/src/lj_ccall.h
@@ -172,6 +172,21 @@ typedef union FPRArg {
struct { LJ_ENDIAN_LOHI(float f; , float g;) };
} FPRArg;
+#elif LJ_TARGET_LOONGARCH64
+
+#define CCALL_NARG_GPR 8
+#define CCALL_NARG_FPR 8
+#define CCALL_NRET_GPR 2
+#define CCALL_NRET_FPR 2
+#define CCALL_SPS_EXTRA 3
+#define CCALL_SPS_FREE 1
+
+typedef intptr_t GPRArg;
+typedef union FPRArg {
+ double d;
+ struct { LJ_ENDIAN_LOHI(float f; , float g;) };
+} FPRArg;
+
#else
#error "Missing calling convention definitions for this architecture"
#endif
@@ -219,7 +234,7 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState {
uint8_t resx87; /* Result on x87 stack: 1:float, 2:double. */
#elif LJ_TARGET_ARM64
void *retp; /* Aggregate return pointer in x8. */
-#elif LJ_TARGET_PPC || LJ_TARGET_RISCV64
+#elif LJ_TARGET_PPC || LJ_TARGET_RISCV64 || LJ_TARGET_LOONGARCH64
uint8_t nfpr; /* Number of arguments in FPRs. */
#endif
#if LJ_32
diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c
index ef9c13ffc..a563cf1e1 100644
--- a/src/lj_ccallback.c
+++ b/src/lj_ccallback.c
@@ -95,6 +95,10 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs)
#define CALLBACK_MCODE_HEAD 68
+#elif LJ_TARGET_LOONGARCH64
+
+#define CALLBACK_MCODE_HEAD 52
+
#else
/* Missing support for this architecture. */
@@ -330,6 +334,33 @@ static void *callback_mcode_init(global_State *g, uint32_t *page)
}
return p;
}
+#elif LJ_TARGET_LOONGARCH64
+static void *callback_mcode_init(global_State *g, uint32_t *page)
+{
+ uint32_t *p = page;
+ uintptr_t target = (uintptr_t)(void *)lj_vm_ffi_callback;
+ uintptr_t ug = (uintptr_t)(void *)g;
+ MSize slot;
+ *p++ = LOONGI_LU12I_W | LOONGF_D(RID_R18) | LOONGF_I20((target >> 12) & 0xfffff);
+ *p++ = LOONGI_LU12I_W | LOONGF_D(RID_R17) | LOONGF_I20((ug >> 12) & 0xfffff);
+ *p++ = LOONGI_ORI | LOONGF_D(RID_R18) | LOONGF_J(RID_R18) | LOONGF_I(target & 0xfff);
+ *p++ = LOONGI_ORI | LOONGF_D(RID_R17) | LOONGF_J(RID_R17) | LOONGF_I(ug & 0xfff);
+ *p++ = LOONGI_LU32I_D | LOONGF_D(RID_R18) | LOONGF_I20((target >> 32) & 0xfffff);
+ *p++ = LOONGI_LU32I_D | LOONGF_D(RID_R17) | LOONGF_I20((ug >> 32) & 0xfffff);
+ *p++ = LOONGI_LU52I_D | LOONGF_D(RID_R18) | LOONGF_J(RID_R18) | LOONGF_I((target >> 52) & 0xfff);
+ *p++ = LOONGI_LU52I_D | LOONGF_D(RID_R17) | LOONGF_J(RID_R17) | LOONGF_I((ug >> 52) & 0xfff);
+ *p++ = LOONGI_NOP;
+ *p++ = LOONGI_NOP;
+ *p++ = LOONGI_NOP;
+ *p++ = LOONGI_NOP;
+ *p++ = LOONGI_JIRL | LOONGF_D(RID_R0) | LOONGF_J(RID_R18) | LOONGF_I(0);
+ for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
+ *p++ = LOONGI_ORI | LOONGF_D(RID_R19) | LOONGF_J(RID_R0) | LOONGF_I(slot & 0xfff);
+ *p = LOONGI_B | LOONGF_I((page-p) & 0xffff) | (((page-p) >> 16) & 0x3ff);
+ p++;
+ }
+ return p;
+}
#else
/* Missing support for this architecture. */
#define callback_mcode_init(g, p) (p)
@@ -617,6 +648,31 @@ void lj_ccallback_mcode_free(CTState *cts)
if (ngpr < maxgpr) { sp = &cts->cb.gpr[ngpr++]; goto done; } \
}
+#elif LJ_TARGET_LOONGARCH64
+
+#define CALLBACK_HANDLE_REGARG \
+ if (isfp) { \
+ if (nfpr + n <= CCALL_NARG_FPR) { \
+ sp = &cts->cb.fpr[nfpr]; \
+ nfpr += n; \
+ goto done; \
+ } else if (ngpr + n <= maxgpr) { \
+ sp = &cts->cb.gpr[ngpr]; \
+ ngpr += n; \
+ goto done; \
+ } \
+ } else { \
+ if (ngpr + n <= maxgpr) { \
+ sp = &cts->cb.gpr[ngpr]; \
+ ngpr += n; \
+ goto done; \
+ } \
+ }
+
+#define CALLBACK_HANDLE_RET \
+ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
+ ((float *)dp)[1] = *(float *)dp;
+
#elif LJ_TARGET_RISCV64
#define CALLBACK_HANDLE_REGARG \
@@ -797,7 +853,7 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o)
*(int64_t *)dp = (int64_t)*(int32_t *)dp;
}
#endif
-#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) || LJ_TARGET_RISCV64
+#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) || LJ_TARGET_RISCV64 || LJ_TARGET_LOONGARCH64
/* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */
if (ctr->size <= 4 &&
(LJ_ABI_SOFTFP || ctype_isinteger_or_bool(ctr->info)))
diff --git a/src/lj_emit_loongarch64.h b/src/lj_emit_loongarch64.h
new file mode 100644
index 000000000..74a293cc2
--- /dev/null
+++ b/src/lj_emit_loongarch64.h
@@ -0,0 +1,306 @@
+/*
+** LoongArch instruction emitter.
+** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+static intptr_t get_k64val(ASMState *as, IRRef ref)
+{
+ IRIns *ir = IR(ref);
+ if (ir->o == IR_KINT64) {
+ return (intptr_t)ir_kint64(ir)->u64;
+ } else if (ir->o == IR_KGC) {
+ return (intptr_t)ir_kgc(ir);
+ } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
+ return (intptr_t)ir_kptr(ir);
+ } else {
+ lj_assertA(ir->o == IR_KINT || ir->o == IR_KNULL,
+ "bad 64 bit const IR op %d", ir->o);
+ return ir->i; /* Sign-extended. */
+ }
+}
+
+#define get_kval(as, ref) get_k64val(as, ref)
+
+/* -- Emit basic instructions --------------------------------------------- */
+
+static void emit_djk(ASMState *as, LOONGIns loongi, Reg rd, Reg rj, Reg rk)
+{
+ *--as->mcp = loongi | LOONGF_D(rd & 0x1f) | LOONGF_J(rj & 0x1f) | LOONGF_K(rk & 0x1f);
+}
+
+#define emit_dj(as, loongi, rd, rj) emit_djk(as, loongi, rd, rj, 0)
+
+static void emit_di(ASMState *as, LOONGIns loongi, Reg rd, int32_t i)
+{
+ *--as->mcp = loongi | LOONGF_D(rd & 0x1f) | LOONGF_I20(i & 0xfffff);
+}
+
+static void emit_dji(ASMState *as, LOONGIns loongi, Reg rd, Reg rj, int32_t i)
+{
+ *--as->mcp = loongi | LOONGF_D(rd & 0x1f) | LOONGF_J(rj & 0x1f) | LOONGF_I(i);
+}
+
+static void emit_dju(ASMState *as, LOONGIns loongi, Reg rd, Reg rj, uint32_t u)
+{
+ *--as->mcp = loongi | LOONGF_D(rd & 0x1f) | LOONGF_J(rj & 0x1f) | LOONGF_I(u);
+}
+
+#define checki12(x) LOONGF_S_OK(x, 12)
+#define checku12(x) ((x) == ((x) & 0xfff))
+
+static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow);
+static void ra_allockreg(ASMState *as, intptr_t k, Reg r);
+static Reg ra_scratch(ASMState *as, RegSet allow);
+
+static void emit_dj32i(ASMState *as, Reg rd, Reg rj, int32_t i)
+{
+ if (checki12(i)) {
+ *--as->mcp = LOONGI_ADDI_D | LOONGF_D(rd) | LOONGF_J(rj) | LOONGF_I(i&0xfff);
+ } else {
+ emit_djk(as, LOONGI_ADD_D, rd, RID_R20, rj);
+ emit_dju(as, LOONGI_ORI, RID_R20, RID_R20, i&0xfff);
+ emit_di(as, LOONGI_LU12I_W, RID_R20, (i>>12)&0xfffff);
+ }
+}
+
+static void emit_d16i(ASMState *as, Reg rd, int32_t i)
+{
+ emit_dji(as, LOONGI_SRAI_D, rd, rd, 16);
+ emit_dji(as, LOONGI_ADDU16I_D, rd, RID_ZERO, (i&0xffff));
+}
+
+static void emit_djml(ASMState *as, LOONGIns loongi, Reg rd, Reg rj, uint32_t m, uint32_t l)
+{
+ *--as->mcp = loongi | LOONGF_D(rd & 0x1f) | LOONGF_J(rj & 0x1f) | LOONGF_I(l & 0x3f) | LOONGF_M(m & 0x3f);
+}
+
+static void emit_djka(ASMState *as, LOONGIns loongi, Reg rd, Reg rj, Reg rk, Reg ra)
+{
+ *--as->mcp = loongi | LOONGF_D(rd & 0x1f) | LOONGF_J(rj & 0x1f) | LOONGF_K(rk & 0x1f) | LOONGF_A(ra & 0x1f);
+}
+
+static void emit_b_bl(ASMState *as, LOONGIns loongi, uint32_t i)
+{
+ *--as->mcp = loongi | LOONGF_I(i & 0xffff) | ((i >> 16) & 0x3ff);
+}
+
+
+/* -- Emit loads/stores --------------------------------------------------- */
+
+/* Prefer rematerialization of BASE/L from global_State over spills. */
+#define emit_canremat(ref) ((ref) <= REF_BASE)
+
+
+/* Load a 32 bit constant into a GPR. */
+static void emit_loadi(ASMState *as, Reg r, int32_t i)
+{
+ emit_dj32i(as, r, RID_ZERO, i);
+}
+
+/* Load a 64 bit constant into a GPR. */
+static void emit_loadu64(ASMState *as, Reg r, uint64_t u64)
+{
+ if (checki32((int64_t)u64)) {
+ emit_dj32i(as, r, RID_ZERO, (int32_t)u64);
+ } else {
+ *--as->mcp = LOONGI_LU52I_D | LOONGF_D(r) | LOONGF_J(r) | LOONGF_I((u64>>52)&0xfff);
+ *--as->mcp = LOONGI_LU32I_D | LOONGF_D(r) | LOONGF_I20((u64>>32)&0xfffff);
+ *--as->mcp = LOONGI_ORI | LOONGF_D(r) | LOONGF_J(r) | LOONGF_I(u64&0xfff);
+ *--as->mcp = LOONGI_LU12I_W | LOONGF_D(r) | LOONGF_I20((u64>>12)&0xfffff);
+ }
+}
+
+#define emit_loada(as, r, addr) emit_loadu64(as, (r), u64ptr((addr)))
+
+/* Get/set from constant pointer. */
+static void emit_lsptr(ASMState *as, LOONGIns loongi, Reg r, void *p, RegSet allow)
+{
+ intptr_t jgl = (intptr_t)(J2G(as->J));
+ intptr_t i = (intptr_t)(p);
+ Reg base;
+ if ((uint32_t)(i-jgl) < 65536) {
+ i = i-jgl-32768;
+ base = RID_JGL;
+ } else {
+ base = ra_allock(as, i-(int16_t)i, allow);
+ }
+ if (checki12(i)) {
+ emit_dji(as, loongi, r, base, i&0xfff);
+ }
+ else {
+ /* ld.d->ldx.d, fld.d->fldx.d, ld.s->fldx.s */
+ if (loongi == LOONGI_LD_D)
+ loongi = LOONGI_LDX_D;
+ else if (loongi == LOONGI_FLD_D)
+ loongi = LOONGI_FLDX_D;
+ else if (loongi == LOONGI_FLD_S)
+ loongi = LOONGI_FLDX_S;
+ emit_djk(as, loongi, r, base, RID_R20);
+
+ /* move i to a GPR */
+ emit_d16i(as, RID_R20, i); // i&0xffff
+ }
+}
+
+/* Load 64 bit IR constant into register. */
+static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
+{
+ const uint64_t *k = &ir_k64(ir)->u64;
+ Reg r64 = r;
+ if (rset_test(RSET_FPR, r)) {
+ r64 = RID_TMP;
+ emit_dj(as, LOONGI_MOVGR2FR_D, r, r64);
+ }
+ if ((uint32_t)((intptr_t)k-(intptr_t)J2G(as->J)) < 65536)
+ emit_lsptr(as, LOONGI_LD_D, r64, (void *)k, 0); /*To copy a doubleword from a GPR to an FPR*/
+ else
+ emit_loadu64(as, r64, *k);
+}
+
+/* Get/set global_State fields. */
+static void emit_lsglptr2(ASMState *as, LOONGIns loongi, Reg r, int32_t ofs)
+{
+ emit_djk(as, loongi, r, RID_JGL, RID_R20);
+ emit_loadi(as, RID_R20, (ofs-32768));
+}
+
+#define emit_getgl(as, r, field) \
+ emit_lsglptr2(as, LOONGI_LDX_D, (r), (int32_t)offsetof(global_State, field))
+#define emit_setgl(as, r, field) \
+ emit_lsglptr2(as, LOONGI_STX_D, (r), (int32_t)offsetof(global_State, field))
+
+/* Trace number is determined from per-trace exit stubs. */
+#define emit_setvmstate(as, i) UNUSED(i)
+
+/* -- Emit control-flow instructions -------------------------------------- */
+
+/* Label for internal jumps. */
+typedef MCode *MCLabel;
+
+/* Return label pointing to current PC. */
+#define emit_label(as) ((as)->mcp)
+
+static void emit_branch(ASMState *as, LOONGIns loongi, Reg rj, Reg rd, MCode *target)
+{
+ MCode *p = as->mcp;
+ ptrdiff_t delta = target - (p - 1);
+ lj_assertA(((delta + 0x8000) >> 16) == 0, "branch target out of range");
+ /*BEQ BNE BGE BLZ*/
+ *--p = loongi | LOONGF_D(rd) | LOONGF_J(rj) | LOONGF_I(((uint32_t)delta & 0xffffu));
+ as->mcp = p;
+}
+
+static void emit_branch21(ASMState *as, LOONGIns loongi, Reg rj, MCode *target)
+{
+ MCode *p = as->mcp;
+ ptrdiff_t delta = target - (p - 1);
+ lj_assertA(((delta + 0x100000) >> 21) == 0, "branch target out of range");
+ *--p = loongi | LOONGF_J(rj) | LOONGF_I(((uint32_t)delta & 0xffffu))
+ | (((uint32_t)delta & 0x1f0000u)>>16); /*BEQZ BNEZ BCEQZ BCNEZ*/
+ as->mcp = p;
+}
+
+static void emit_jmp(ASMState *as, MCode *target)
+{
+ MCode *p = as->mcp;
+ ptrdiff_t delta = target - (p - 1);
+ emit_b_bl(as, LOONGI_B, (delta&0x3ffffff)); /*offs 26*/
+}
+
+#define emit_move(as, dst, src) \
+ emit_djk(as, LOONGI_OR, (dst), (src), RID_ZERO)
+
+static void emit_call(ASMState *as, void *target)
+{
+ MCode *p = --as->mcp;
+ ptrdiff_t delta = (char *)target - (char *)p;
+ if (LOONGF_S_OK(delta>>2, 26)) {
+ *p = LOONGI_BL | LOONGF_I((delta>>2) & 0xffff) | (((delta>>2) >> 16) & 0x3ff);
+ } else { /* Target out of range: need indirect call. */
+ Reg r = ra_allock(as, (intptr_t)target, RSET_RANGE(RID_R12, RID_R19+1));
+ *p = LOONGI_JIRL | LOONGF_D(RID_RA) | LOONGF_J(r) | LOONGF_I(0);
+ }
+}
+
+/* -- Emit generic operations --------------------------------------------- */
+
+/* Generic move between two regs. */
+static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
+{
+ if (dst < RID_MAX_GPR && src >= RID_MIN_FPR)
+ emit_dj(as, irt_isnum(ir->t) ? LOONGI_MOVFR2GR_D : LOONGI_MOVFR2GR_S, dst, src);
+ else if (dst < RID_MAX_GPR)
+ emit_move(as, dst, src);
+ else
+ emit_dj(as, irt_isnum(ir->t) ? LOONGI_FMOV_D : LOONGI_FMOV_S, dst, src);
+}
+
+/* Emit an arithmetic operation with a constant operand. */
+static void emit_addk(ASMState *as, Reg dest, Reg src, int32_t i, RegSet allow)
+{
+ if (checki12(i)) {
+ emit_dji(as, LOONGI_ADDI_D, dest, src, i&0xfff);
+ } else {
+ Reg src2 = ra_allock(as, i, allow);
+ emit_djk(as, LOONGI_ADD_D, dest, src, src2);
+ }
+}
+
+static void emit_lso(ASMState *as, LOONGIns loongi, Reg dest, Reg src, int64_t i, RegSet allow)
+{
+ if (checki12(i)) {
+ emit_dji(as, loongi, dest, src, i&0xfff);
+ } else {
+ LOONGIns loongk = LOONGI_NOP;
+ switch (loongi) {
+ case LOONGI_LD_D: loongk = LOONGI_LDX_D; break;
+ case LOONGI_LD_W: loongk = LOONGI_LDX_W; break;
+ case LOONGI_ST_D: loongk = LOONGI_STX_D; break;
+ case LOONGI_FLD_D: loongk = LOONGI_FLDX_D; break;
+ case LOONGI_FST_D: loongk = LOONGI_FSTX_D; break;
+ case LOONGI_LD_B: loongk = LOONGI_LDX_B; break;
+ case LOONGI_LD_BU: loongk = LOONGI_LDX_BU; break;
+ case LOONGI_LD_H: loongk = LOONGI_LDX_H; break;
+ case LOONGI_LD_HU: loongk = LOONGI_LDX_HU; break;
+ case LOONGI_FLD_S: loongk = LOONGI_FLDX_S; break;
+ default: break;
+ }
+ //Reg src2 = ra_allock(as, i, allow);
+ Reg src2 = ra_scratch(as, allow);
+ emit_djk(as, loongk, dest, src, src2);
+ emit_d16i(as, src2, i);
+ }
+}
+
+/* Generic load of register with base and (small) offset address. */
+static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
+{
+ if (r < RID_MAX_GPR) {
+ emit_djk(as, irt_is64(ir->t) ? LOONGI_LDX_D : LOONGI_LDX_W, r, base, RID_R20);
+ } else {
+ emit_djk(as, irt_isnum(ir->t) ? LOONGI_FLDX_D : LOONGI_FLDX_S, r, base, RID_R20);
+ }
+ emit_d16i(as, RID_R20, ofs);
+}
+
+/* Generic store of register with base and (small) offset address. */
+static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
+{
+ if (r < RID_MAX_GPR) {
+ emit_djk(as, irt_is64(ir->t) ? LOONGI_STX_D : LOONGI_STX_W, r, base, RID_R20);
+ } else {
+ emit_djk(as, irt_isnum(ir->t) ? LOONGI_FSTX_D : LOONGI_FSTX_S, (r&31), base, RID_R20);
+ }
+ emit_d16i(as, RID_R20, ofs);
+}
+
+/* Add offset to pointer. */
+static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
+{
+ if (ofs) {
+ emit_addk(as, r, r, ofs, rset_exclude(RSET_GPR, r));
+ }
+}
+
+
+#define emit_spsub(as, ofs) emit_addptr(as, RID_SP, -(ofs))
diff --git a/src/lj_frame.h b/src/lj_frame.h
index 440e83c36..853572c61 100644
--- a/src/lj_frame.h
+++ b/src/lj_frame.h
@@ -155,6 +155,15 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
#define CFRAME_SIZE (10*8)
#define CFRAME_SIZE_JIT (CFRAME_SIZE + 9*16 + 4*8)
#define CFRAME_SHIFT_MULTRES 0
+#elif LJ_TARGET_LOONGARCH64
+#define CFRAME_OFS_ERRF 196
+#define CFRAME_OFS_NRES 192
+#define CFRAME_OFS_PREV 184
+#define CFRAME_OFS_L 176
+#define CFRAME_OFS_PC 168
+#define CFRAME_SIZE 200
+#define CFRAME_OFS_MULTRES 0
+#define CFRAME_SHIFT_MULTRES 3
#else
#define CFRAME_OFS_PREV (4*8)
#if LJ_GC64
diff --git a/src/lj_gdbjit.c b/src/lj_gdbjit.c
index f1a208bd4..f0ffe32bd 100644
--- a/src/lj_gdbjit.c
+++ b/src/lj_gdbjit.c
@@ -309,6 +309,9 @@ enum {
#elif LJ_TARGET_RISCV64
DW_REG_SP = 2,
DW_REG_RA = 1,
+#elif LJ_TARGET_LOONGARCH64
+ DW_REG_SP = 3,
+ DW_REG_RA = 1,
#else
#error "Unsupported target architecture"
#endif
@@ -388,6 +391,8 @@ static const ELFheader elfhdr_template = {
.machine = 8,
#elif LJ_TARGET_RISCV64
.machine = 243,
+#elif LJ_TARGET_LOONGARCH64
+ .machine = 258,
#else
#error "Unsupported target architecture"
#endif
@@ -606,6 +611,13 @@ static void LJ_FASTCALL gdbjit_ehframe(GDBJITctx *ctx)
DB(DW_CFA_offset|32|9); DUV(29);
DB(DW_CFA_offset|32|8); DUV(30);
}
+#elif LJ_TARGET_LOONGARCH64
+ {
+ int i;
+ DB(DW_CFA_offset|30); DUV(2);
+ for (i = 31; i >= 23; i--) { DB(DW_CFA_offset|i); DUV(3+(31-i)); }
+ for (i = 31; i >= 24; i--) { DB(DW_CFA_offset|32|i); DUV(43-i); }
+ }
#else
#error "Unsupported target architecture"
#endif
diff --git a/src/lj_jit.h b/src/lj_jit.h
index 4c3a5dbdf..cc6e243b4 100644
--- a/src/lj_jit.h
+++ b/src/lj_jit.h
@@ -106,6 +106,10 @@ struct riscv_hwprobe {
#endif
+//#elif LJ_TARGET_LOONGARCH64
+//#define JIT_F_GS464V (JIT_F_CPU << 0)
+//#define JIT_F_CPUSTRING "\6GS464V"
+
#else
#define JIT_F_CPUSTRING ""
@@ -407,7 +411,7 @@ enum {
LJ_K64_M2P64_31 = LJ_K64_M2P64,
#endif
#endif
-#if LJ_TARGET_MIPS
+#if LJ_TARGET_MIPS || LJ_TARGET_LOONGARCH64
LJ_K64_2P31, /* 2^31 */
#if LJ_64
LJ_K64_2P63, /* 2^63 */
@@ -416,7 +420,7 @@ enum {
#endif
LJ_K64__MAX,
};
-#define LJ_K64__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS)
+#define LJ_K64__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS || LJ_TARGET_LOONGARCH64)
enum {
#if LJ_TARGET_X86ORX64
@@ -426,16 +430,17 @@ enum {
LJ_K32_2P52_2P31, /* 2^52 + 2^31 */
LJ_K32_2P52, /* 2^52 */
#endif
-#if LJ_TARGET_PPC || LJ_TARGET_MIPS
+#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_LOONGARCH64
LJ_K32_2P31, /* 2^31 */
#endif
-#if LJ_TARGET_MIPS64
+#if LJ_TARGET_MIPS64 || LJ_TARGET_LOONGARCH64
LJ_K32_2P63, /* 2^63 */
LJ_K32_M2P64, /* -2^64 */
#endif
LJ_K32__MAX
};
-#define LJ_K32__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_PPC || LJ_TARGET_MIPS)
+#define LJ_K32__USED \
+ (LJ_TARGET_X86ORX64 || LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_LOONGARCH64)
/* Get 16 byte aligned pointer to SIMD constant. */
#define LJ_KSIMD(J, n) \
diff --git a/src/lj_target.h b/src/lj_target.h
index a79f5d6a0..5a3490ca3 100644
--- a/src/lj_target.h
+++ b/src/lj_target.h
@@ -55,7 +55,7 @@ typedef uint32_t RegSP;
/* Bitset for registers. 32 registers suffice for most architectures.
** Note that one set holds bits for both GPRs and FPRs.
*/
-#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 || LJ_TARGET_RISCV64
+#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 || LJ_TARGET_RISCV64 || LJ_TARGET_LOONGARCH64
typedef uint64_t RegSet;
#define RSET_BITS 6
#define rset_picktop_(rs) ((Reg)lj_fls64(rs))
@@ -147,6 +147,8 @@ typedef uint32_t RegCost;
#include "lj_target_s390x.h"
#elif LJ_TARGET_RISCV64
#include "lj_target_riscv.h"
+#elif LJ_TARGET_LOONGARCH64
+#include "lj_target_loongarch64.h"
#else
#error "Missing include for target CPU"
#endif
diff --git a/src/lj_target_loongarch64.h b/src/lj_target_loongarch64.h
new file mode 100644
index 000000000..100f5e876
--- /dev/null
+++ b/src/lj_target_loongarch64.h
@@ -0,0 +1,313 @@
+/*
+** Definitions for LoongArch CPUs.
+** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_TARGET_LOONGARCH_H
+#define _LJ_TARGET_LOONGARCH_H
+
+/* -- Registers IDs ------------------------------------------------------- */
+
+#define GPRDEF(_) \
+ _(R0) _(RA) _(R2) _(SP) _(R4) _(R5) _(R6) _(R7) \
+ _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _(R15) \
+ _(R16) _(R17) _(R18) _(R19) _(R20) _(X) _(R22) _(R23) \
+ _(R24) _(R25) _(R26) _(R27) _(R28) _(R29) _(R30) _(R31)
+#define FPRDEF(_) \
+ _(F0) _(F1) _(F2) _(F3) _(F4) _(F5) _(F6) _(F7) \
+ _(F8) _(F9) _(F10) _(F11) _(F12) _(F13) _(F14) _(F15) \
+ _(F16) _(F17) _(F18) _(F19) _(F20) _(F21) _(F22) _(F23) \
+ _(F24) _(F25) _(F26) _(F27) _(F28) _(F29) _(F30) _(F31)
+#define VRIDDEF(_)
+
+#define RIDENUM(name) RID_##name,
+
+enum {
+ GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */
+ FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */
+ RID_MAX,
+ RID_ZERO = RID_R0,
+ RID_TMP = RID_RA,
+
+ /* Calling conventions. */
+ RID_RET = RID_R4,
+
+ RID_RETHI = RID_R5,
+ RID_RETLO = RID_R4,
+
+ RID_FPRET = RID_F0,
+
+ /* These definitions must match with the *.dasc file(s): */
+ RID_BASE = RID_R23, /* Interpreter BASE. */
+ RID_LPC = RID_R25, /* Interpreter PC. */
+ RID_DISPATCH = RID_R26, /* Interpreter DISPATCH table. */
+ RID_LREG = RID_R27, /* Interpreter L. */
+ RID_JGL = RID_R22, /* On-trace: global_State + 32768. */
+
+ /* Register ranges [min, max) and number of registers. */
+ RID_MIN_GPR = RID_R0,
+ RID_MAX_GPR = RID_R31+1,
+ RID_MIN_FPR = RID_MAX_GPR,
+ RID_MAX_FPR = RID_F31+1,
+ RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR,
+ RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR
+};
+
+#define RID_NUM_KREF RID_NUM_GPR
+#define RID_MIN_KREF RID_R0
+
+/* -- Register sets ------------------------------------------------------- */
+
+/* Make use of all registers, except ZERO, TMP, R2, SP, JGL, R20 and X. */
+#define RSET_FIXED \
+ (RID2RSET(RID_ZERO)|RID2RSET(RID_TMP)|RID2RSET(RID_R2)|\
+ RID2RSET(RID_SP)|RID2RSET(RID_JGL)|RID2RSET(RID_R20)|\
+ RID2RSET(RID_X))
+#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED)
+#define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)
+#define RSET_ALL (RSET_GPR|RSET_FPR)
+#define RSET_INIT RSET_ALL
+
+/* scratch register. */
+#define RSET_SCRATCH_GPR RSET_RANGE(RID_R4, RID_R19+1)
+#define RSET_SCRATCH_FPR RSET_RANGE(RID_F0, RID_F23+1)
+#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR)
+#define REGARG_FIRSTGPR RID_R4
+#define REGARG_LASTGPR RID_R11
+#define REGARG_NUMGPR 8
+#define REGARG_FIRSTFPR RID_F0
+#define REGARG_LASTFPR RID_F7
+#define REGARG_NUMFPR 8
+
+/* -- Spill slots --------------------------------------------------------- */
+
+/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs.
+**
+** SPS_FIXED: Available fixed spill slots in interpreter frame.
+** This definition must match with the *.dasc file(s).
+**
+** SPS_FIRST: First spill slot for general use.
+*/
+#define SPS_FIXED 4
+#define SPS_FIRST 4
+
+#define SPOFS_TMP 0
+
+#define sps_scale(slot) (4 * (int32_t)(slot))
+#define sps_align(slot) (((slot) - SPS_FIXED + 3) & ~3)
+
+/* -- Exit state ---------------------------------------------------------- */
+
+/* This definition must match with the *.dasc file(s). */
+typedef struct {
+ lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */
+ intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */
+ int32_t spill[256]; /* Spill slots. */
+} ExitState;
+
+/* Highest exit + 1 indicates stack check. */
+#define EXITSTATE_CHECKEXIT 1
+
+/* Return the address of a per-trace exit stub. */
+static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p)
+{
+ while (*p == 0x03400000) p++; /* Skip LOONGI_NOP. */
+ return p;
+}
+/* Avoid dependence on lj_jit.h if only including lj_target.h. */
+#define exitstub_trace_addr(T, exitno) \
+ exitstub_trace_addr_((MCode *)((char *)(T)->mcode + (T)->szmcode))
+
+/* -- Instructions -------------------------------------------------------- */
+
+/* Instruction fields. */
+#define LOONGF_D(r) (r)
+#define LOONGF_J(r) ((r) << 5)
+#define LOONGF_K(r) ((r) << 10)
+#define LOONGF_A(r) ((r) << 15)
+#define LOONGF_I(n) ((n) << 10)
+#define LOONGF_I20(n) ((n) << 5)
+#define LOONGF_M(n) ((n) << 16)
+
+/* Check for valid field range. */
+#define LOONGF_S_OK(x, b) ((((x) + (1 << (b-1))) >> (b)) == 0)
+
+typedef enum LOONGIns {
+/* Integer instructions. */
+ LOONGI_MOVE = 0x00150000,
+ LOONGI_NOP = 0x03400000,
+
+ LOONGI_AND = 0x00148000,
+ LOONGI_ANDI = 0x03400000,
+ LOONGI_OR = 0x00150000,
+ LOONGI_ORI = 0x03800000,
+ LOONGI_XOR = 0x00158000,
+ LOONGI_XORI = 0x03c00000,
+ LOONGI_NOR = 0x00140000,
+
+ LOONGI_SLT = 0x00120000,
+ LOONGI_SLTU = 0x00128000,
+ LOONGI_SLTI = 0x02000000,
+ LOONGI_SLTUI = 0x02400000,
+
+ LOONGI_ADD_W = 0x00100000,
+ LOONGI_ADDI_W = 0x02800000,
+ LOONGI_SUB_W = 0x00110000,
+ LOONGI_MUL_W = 0x001c0000,
+ LOONGI_MULH_W = 0x001c8000,
+ LOONGI_DIV_W = 0x00200000,
+ LOONGI_DIV_WU = 0x00210000,
+
+ LOONGI_SLLI_W = 0x00408000,
+ LOONGI_SRLI_W = 0x00448000,
+ LOONGI_SRAI_W = 0x00488000,
+ LOONGI_ROTRI_W = 0x004c8000,
+ LOONGI_ROTRI_D = 0x004d0000,
+ LOONGI_SLL_W = 0x00170000,
+ LOONGI_SRL_W = 0x00178000,
+ LOONGI_SRA_W = 0x00180000,
+ LOONGI_ROTR_W = 0x001b0000,
+ LOONGI_ROTR_D = 0x001b8000,
+
+ LOONGI_EXT_W_B = 0x00005c00,
+ LOONGI_EXT_W_H = 0x00005800,
+ LOONGI_REVB_2H = 0x00003000,
+ LOONGI_REVB_4H = 0x00003400,
+
+ LOONGI_ALSL_W = 0x00040000,
+ LOONGI_ALSL_D = 0x002c0000,
+
+ LOONGI_B = 0x50000000,
+ LOONGI_BL = 0x54000000,
+ LOONGI_JIRL = 0x4c000000,
+
+ LOONGI_BEQ = 0x58000000,
+ LOONGI_BNE = 0x5c000000,
+ LOONGI_BLT = 0x60000000,
+ LOONGI_BGE = 0x64000000,
+ LOONGI_BGEU = 0x6c000000,
+ LOONGI_BLTU = 0x68000000,
+ LOONGI_BCEQZ = 0x48000000,
+ LOONGI_BCNEZ = 0x48000100,
+
+ /* Load/store instructions. */
+ LOONGI_LD_W = 0x28800000,
+ LOONGI_LD_D = 0x28c00000,
+ LOONGI_ST_W = 0x29800000,
+ LOONGI_ST_D = 0x29c00000,
+ LOONGI_LD_B = 0x28000000,
+ LOONGI_ST_B = 0x29000000,
+ LOONGI_LD_H = 0x28400000,
+ LOONGI_ST_H = 0x29400000,
+ LOONGI_LD_BU = 0x2a000000,
+ LOONGI_LD_HU = 0x2a400000,
+ LOONGI_LDX_B = 0x38000000,
+ LOONGI_LDX_BU = 0x38200000,
+ LOONGI_LDX_H = 0x38040000,
+ LOONGI_LDX_HU = 0x38240000,
+ LOONGI_LDX_D = 0x380c0000,
+ LOONGI_STX_D = 0x381c0000,
+ LOONGI_LDX_W = 0x38080000,
+ LOONGI_STX_W = 0x38180000,
+ LOONGI_STX_B = 0x38100000,
+ LOONGI_STX_H = 0x38140000,
+ LOONGI_FLD_S = 0x2b000000,
+ LOONGI_FST_S = 0x2b400000,
+ LOONGI_FLD_D = 0x2b800000,
+ LOONGI_FST_D = 0x2bc00000,
+ LOONGI_FLDX_D = 0x38340000,
+ LOONGI_FLDX_S = 0x38300000,
+ LOONGI_FSTX_D = 0x383c0000,
+ LOONGI_FSTX_S = 0x38380000,
+
+ LOONGI_ADD_D = 0x00108000,
+ LOONGI_ADDI_D = 0x02c00000,
+ LOONGI_ADDU16I_D = 0x10000000,
+ LOONGI_LU12I_W = 0x14000000,
+ LOONGI_LU32I_D = 0x16000000,
+ LOONGI_LU52I_D = 0x3000000,
+ LOONGI_SUB_D = 0x00118000,
+ LOONGI_DIV_D = 0x00220000,
+ LOONGI_DIV_DU = 0x00230000,
+ LOONGI_MUL_D = 0x001d8000,
+
+ LOONGI_SLLI_D = 0x00410000,
+ LOONGI_SRLI_D = 0x00450000,
+ LOONGI_SLL_D = 0x00188000,
+ LOONGI_SRL_D = 0x00190000,
+ LOONGI_SRAI_D = 0x00490000,
+ LOONGI_SRA_D = 0x00198000,
+ LOONGI_REVH_D = 0x00004400,
+
+ /* Extract/insert instructions. */
+ LOONGI_BSTRPICK_D = 0x00c00000,
+ LOONGI_BSTRINS_D = 0x00800000,
+
+ LOONGI_MASKEQZ = 0x00130000,
+ LOONGI_MASKNEZ = 0x00138000,
+
+ /* FP instructions. */
+ LOONGI_FRINT_S = 0x011e4400,
+ LOONGI_FRINT_D = 0x011e4800,
+ LOONGI_FTINTRM_L_D = 0x011a2800,
+ LOONGI_FTINTRP_L_D = 0x011a6800,
+ LOONGI_FTINTRNE_L_D = 0x011ae800,
+
+ LOONGI_FMOV_S = 0x01149400,
+ LOONGI_FMOV_D = 0x01149800,
+
+ LOONGI_FABS_D = 0x01140800,
+ LOONGI_FNEG_D = 0x01141800,
+
+ LOONGI_FADD_D = 0x01010000,
+ LOONGI_FSUB_D = 0x01030000,
+ LOONGI_FMUL_D = 0x01050000,
+ LOONGI_FDIV_D = 0x01070000,
+ LOONGI_FSQRT_D = 0x01144800,
+
+ LOONGI_FMIN_D = 0x010b0000,
+ LOONGI_FMAX_D = 0x01090000,
+
+ LOONGI_FADD_S = 0x01008000,
+ LOONGI_FSUB_S = 0x01028000,
+
+ LOONGI_FMADD_S = 0x08100000,
+ LOONGI_FMADD_D = 0x08200000,
+ LOONGI_FNMADD_D = 0x08a00000,
+ LOONGI_FMSUB_S = 0x08500000,
+ LOONGI_FMSUB_D = 0x08600000,
+ LOONGI_FNMSUB_D = 0x08e00000,
+
+ LOONGI_FCVT_D_S = 0x01192400,
+ LOONGI_FTINT_W_S = 0x011b0400,
+ LOONGI_FCVT_S_D = 0x01191800,
+ LOONGI_FTINT_W_D = 0x011b0800,
+ LOONGI_FFINT_S_W = 0x011d1000,
+ LOONGI_FFINT_D_W = 0x011d2000,
+ LOONGI_FFINT_S_L = 0x011d1800,
+ LOONGI_FFINT_D_L = 0x011d2800,
+
+ LOONGI_FTINTRZ_W_S = 0x011a8400,
+ LOONGI_FTINTRZ_W_D = 0x011a8800,
+ LOONGI_FTINTRZ_L_S = 0x011aa400,
+ LOONGI_FTINTRZ_L_D = 0x011aa800,
+ LOONGI_FTINTRM_W_S = 0x011a0400,
+ LOONGI_FTINTRM_W_D = 0x011a0800,
+
+ LOONGI_MOVFR2GR_S = 0x0114b400,
+ LOONGI_MOVGR2FR_W = 0x0114a400,
+ LOONGI_MOVGR2FR_D = 0x0114a800,
+ LOONGI_MOVFR2GR_D = 0x0114b800,
+
+ LOONGI_FCMP_CEQ_D = 0x0c220000,
+ LOONGI_FCMP_CLT_S = 0x0c110000,
+ LOONGI_FCMP_CLT_D = 0x0c210000,
+ LOONGI_FCMP_CLE_D = 0x0c230000,
+ LOONGI_FCMP_CULE_D = 0x0c270000,
+ LOONGI_FCMP_CULT_D = 0x0c250000,
+ LOONGI_FCMP_CNE_D = 0x0c280000,
+ LOONGI_FSEL = 0x0d000000,
+} LOONGIns;
+
+#endif
+
diff --git a/src/lj_trace.c b/src/lj_trace.c
index 6dc77e286..4756d4b2b 100644
--- a/src/lj_trace.c
+++ b/src/lj_trace.c
@@ -334,17 +334,17 @@ void lj_trace_initstate(global_State *g)
J->k64[LJ_K64_2P64].u64 = U64x(43f00000,00000000);
J->k32[LJ_K32_M2P64_31] = LJ_64 ? 0xdf800000 : 0xcf000000;
#endif
-#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS64
+#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS64 || LJ_TARGET_LOONGARCH64
J->k64[LJ_K64_M2P64].u64 = U64x(c3f00000,00000000);
#endif
#if LJ_TARGET_PPC
J->k32[LJ_K32_2P52_2P31] = 0x59800004;
J->k32[LJ_K32_2P52] = 0x59800000;
#endif
-#if LJ_TARGET_PPC || LJ_TARGET_MIPS
+#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_LOONGARCH64
J->k32[LJ_K32_2P31] = 0x4f000000;
#endif
-#if LJ_TARGET_MIPS
+#if LJ_TARGET_MIPS || LJ_TARGET_LOONGARCH64
J->k64[LJ_K64_2P31].u64 = U64x(41e00000,00000000);
#if LJ_64
J->k64[LJ_K64_2P63].u64 = U64x(43e00000,00000000);
diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c
index 1ee32d018..7a045f41b 100644
--- a/src/lj_vmmath.c
+++ b/src/lj_vmmath.c
@@ -70,7 +70,7 @@ double lj_vm_foldarith(double x, double y, int op)
/* -- Helper functions for generated machine code ------------------------- */
#if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS \
- || LJ_TARGET_RISCV64
+ || LJ_TARGET_RISCV64 || LJ_TARGET_LOONGARCH64
int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b)
{
uint32_t y, ua, ub;
diff --git a/src/vm_loongarch64.dasc b/src/vm_loongarch64.dasc
new file mode 100644
index 000000000..9458e7e2f
--- /dev/null
+++ b/src/vm_loongarch64.dasc
@@ -0,0 +1,4625 @@
+|// Low-level VM code for LoongArch CPUs.
+|// Bytecode interpreter, fast functions and helper functions.
+|// Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h
+|
+|.arch loongarch64
+|.section code_op, code_sub
+|
+|.actionlist build_actionlist
+|.globals GLOB_
+|.globalnames globnames
+|.externnames extnames
+|
+|//-----------------------------------------------------------------------
+|
+|// Fixed register assignments for the interpreter.
+|// Don't use: r0 = 0, r1 = ra, r2 = tp, r3 = sp, r21 = reserved
+|
+|
+|// The following must be C callee-save (but BASE is often refetched).
+|.define BASE, r23 // Base of current Lua stack frame.
+|.define KBASE, r24 // Constants of current Lua function.
+|.define PC, r25 // Next PC.
+|.define DISPATCH, r26 // Opcode dispatch table.
+|.define LREG, r27 // Register holding lua_State (also in SAVE_L).
+|.define MULTRES, r28 // Size of multi-result: (nresults+1)*8.
+|
+|.define JGL, r22 // On-trace: global_State + 32768.
+|
+|// Constants for type-comparisons, stores and conversions. C callee-save.
+|.define TISNIL, r22
+|.define TISNUM, r29
+|.define TOBIT, f30 // 2^52 + 2^51.
+|
+|// The following temporaries are not saved across C calls, except for RA.
+|.define RA, r30 // Callee-save.
+|.define RB, r8
+|.define RC, r9
+|.define RD, r10
+|.define INS, r11
+|
+|.define TMP0, r12
+|.define TMP1, r13
+|.define TMP2, r14
+|.define TMP3, r15
+|.define TMP4, r17
+|
+|// Loongarch lp64 calling convention.
+|.define CARG1, r4
+|.define CARG2, r5
+|.define CARG3, r6
+|.define CARG4, r7
+|.define CARG5, r8
+|.define CARG6, r9
+|.define CARG7, r10
+|.define CARG8, r11
+|
+|.define CRET1, r4
+|.define CRET2, r5
+|
+|.define FARG1, f0
+|.define FARG2, f1
+|.define FARG3, f2
+|.define FARG4, f3
+|.define FARG5, f4
+|.define FARG6, f5
+|.define FARG7, f6
+|.define FARG8, f7
+|
+|.define FRET1, f0
+|.define FRET2, f1
+|
+|.define FTMP0, f8
+|.define FTMP1, f9
+|.define FTMP2, f10
+|.define FTMP3, f22
+|.define FTMP4, f23
+|
+|.define FCC0, fcc0
+|.define FCC1, fcc1
+|
+|// Stack layout while in interpreter. Must match with lj_frame.h.
+|// LoongArch64 hard-float.
+|
+|.define CFRAME_SPACE, 200 // Delta for sp.
+|
+|//----- 16 byte aligned, <-- sp entering interpreter
+|.define SAVE_ERRF, 196 // 32 bit values.
+|.define SAVE_NRES, 192
+|.define SAVE_CFRAME, 184 // 64 bit values.
+|.define SAVE_L, 176
+|.define SAVE_PC, 168
+|//----- 16 byte aligned
+|.define SAVE_GPR_, 80 // .. 80+11*8: 64 bit GPR saves.
+|.define SAVE_FPR_, 16 // .. 16+8*8: 64 bit FPR saves.
+|
+|
+|.define TMPD, 0
+|//----- 16 byte aligned
+|
+|.define TMPD_OFS, 0
+|
+|//-----------------------------------------------------------------------
+|
+|.macro saveregs
+| addi.d sp, sp, -CFRAME_SPACE
+| st.d ra, SAVE_GPR_+10*8(sp)
+| st.d r22, SAVE_GPR_+9*8(sp)
+| st.d r31, SAVE_GPR_+8*8(sp)
+| fst.d f31, SAVE_FPR_+7*8(sp)
+| st.d r30, SAVE_GPR_+7*8(sp)
+| fst.d f30, SAVE_FPR_+6*8(sp)
+| st.d r29, SAVE_GPR_+6*8(sp)
+| fst.d f29, SAVE_FPR_+5*8(sp)
+| st.d r28, SAVE_GPR_+5*8(sp)
+| fst.d f28, SAVE_FPR_+4*8(sp)
+| st.d r27, SAVE_GPR_+4*8(sp)
+| fst.d f27, SAVE_FPR_+3*8(sp)
+| st.d r26, SAVE_GPR_+3*8(sp)
+| fst.d f26, SAVE_FPR_+2*8(sp)
+| st.d r25, SAVE_GPR_+2*8(sp)
+| fst.d f25, SAVE_FPR_+1*8(sp)
+| st.d r24, SAVE_GPR_+1*8(sp)
+| fst.d f24, SAVE_FPR_+0*8(sp)
+| st.d r23, SAVE_GPR_+0*8(sp)
+|.endmacro
+|
+|.macro restoreregs_ret
+| ld.d ra, SAVE_GPR_+10*8(sp)
+| ld.d r22, SAVE_GPR_+9*8(sp)
+| ld.d r31, SAVE_GPR_+8*8(sp)
+| ld.d r30, SAVE_GPR_+7*8(sp)
+| fld.d f31, SAVE_FPR_+7*8(sp)
+| ld.d r29, SAVE_GPR_+6*8(sp)
+| fld.d f30, SAVE_FPR_+6*8(sp)
+| ld.d r28, SAVE_GPR_+5*8(sp)
+| fld.d f29, SAVE_FPR_+5*8(sp)
+| ld.d r27, SAVE_GPR_+4*8(sp)
+| fld.d f28, SAVE_FPR_+4*8(sp)
+| ld.d r26, SAVE_GPR_+3*8(sp)
+| fld.d f27, SAVE_FPR_+3*8(sp)
+| ld.d r25, SAVE_GPR_+2*8(sp)
+| fld.d f26, SAVE_FPR_+2*8(sp)
+| ld.d r24, SAVE_GPR_+1*8(sp)
+| fld.d f25, SAVE_FPR_+1*8(sp)
+| ld.d r23, SAVE_GPR_+0*8(sp)
+| fld.d f24, SAVE_FPR_+0*8(sp)
+| addi.d sp, sp, CFRAME_SPACE
+| jirl r0, ra, 0
+|.endmacro
+|
+|//-----------------------------------------------------------------------
+|
+|.macro .STXW, a, b, c
+| addu16i.d r20, r0, c
+| srai.d r20, r20, 16
+| stx.w a, b, r20
+|.endmacro
+|
+|.macro .STXD, a, b, c
+| addu16i.d r20, r0, c
+| srai.d r20, r20, 16
+| stx.d a, b, r20
+|.endmacro
+|
+|.macro .LDXW, a, b, c
+| addu16i.d r20, r0, c
+| srai.d r20, r20, 16
+| ldx.w a, b, r20
+|.endmacro
+|
+|.macro .LDXD, a, b, c
+| addu16i.d r20, r0, c
+| srai.d r20, r20, 16
+| ldx.d a, b, r20
+|.endmacro
+|
+|.macro .LDXBU, a, b, c
+| addu16i.d r20, r0, c
+| srai.d r20, r20, 16
+| ldx.bu a, b, r20
+|.endmacro
+|
+|.macro .ADD16I, a, b, c
+| addu16i.d r20, r0, c
+| srai.d r20, r20, 16
+| add.d a, b, r20
+|.endmacro
+|
+|// Type definitions. Some of these are only used for documentation.
+|.type L, lua_State, LREG
+|.type GL, global_State
+|.type TVALUE, TValue
+|.type GCOBJ, GCobj
+|.type STR, GCstr
+|.type TAB, GCtab
+|.type LFUNC, GCfuncL
+|.type CFUNC, GCfuncC
+|.type PROTO, GCproto
+|.type UPVAL, GCupval
+|.type NODE, Node
+|.type NARGS8, int
+|.type TRACE, GCtrace
+|.type SBUF, SBuf
+|
+|//-----------------------------------------------------------------------
+|
+|// Trap for not-yet-implemented parts.
+|.macro NYI; break 0; .endmacro
+|
+|//-----------------------------------------------------------------------
+|
+|// Access to frame relative to BASE.
+|.define FRAME_PC, -8
+|.define FRAME_FUNC, -16
+|
+|//-----------------------------------------------------------------------
+|
+|// Endian-specific defines. LoongArch is little endian.
+|.define OFS_RD, 2
+|.define OFS_RA, 1
+|.define OFS_OP, 0
+|
+|// Instruction decode.
+|.macro decode_BC4b, dst; slli.w dst, dst, 2; .endmacro
+|.macro decode_BC8b, dst; slli.w dst, dst, 3; .endmacro
+|.macro decode_OP, dst, ins; andi dst, ins, 0xff; .endmacro
+|.macro decode_RA, dst, ins; bstrpick.d dst, ins, 15, 8; decode_BC8b dst; .endmacro
+|.macro decode_RB, dst, ins; bstrpick.d dst, ins, 31, 24; decode_BC8b dst; .endmacro
+|.macro decode_RC, dst, ins; bstrpick.d dst, ins, 23, 16; decode_BC8b dst; .endmacro
+|.macro decode_RD, dst, ins; bstrpick.d dst, ins, 31, 16; decode_BC8b dst; .endmacro
+|.macro decode_RDtoRC8, dst, src; andi dst, src, 0x7f8; .endmacro
+|
+|// Instruction fetch.
+|.macro ins_NEXT1
+| ld.w INS, 0(PC)
+| addi.d PC, PC, 4
+|.endmacro
+|// Instruction decode+dispatch.
+|.macro ins_NEXT2
+| decode_OP TMP1, INS
+| decode_BC8b TMP1
+| add.d TMP0, DISPATCH, TMP1
+| ld.d TMP4, 0(TMP0)
+| decode_RD RD, INS
+| decode_RA RA, INS
+| jirl r0, TMP4, 0
+|.endmacro
+|.macro ins_NEXT
+| ins_NEXT1
+| ins_NEXT2
+|.endmacro
+|
+|// Instruction footer.
+|.if 1
+| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
+| .define ins_next, ins_NEXT
+| .define ins_next_, ins_NEXT
+| .define ins_next1, ins_NEXT1
+| .define ins_next2, ins_NEXT2
+|.else
+| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
+| // Affects only certain kinds of benchmarks (and only with -j off).
+| .macro ins_next
+| b ->ins_next
+| .endmacro
+| .macro ins_next1
+| .endmacro
+| .macro ins_next2
+| b ->ins_next
+| .endmacro
+| .macro ins_next_
+| ->ins_next:
+| ins_NEXT
+| .endmacro
+|.endif
+|
+|// Call decode and dispatch.
+|.macro ins_callt
+| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
+| ld.d PC, LFUNC:RB->pc
+| ld.w INS, 0(PC)
+| addi.d PC, PC, 4
+| decode_OP TMP1, INS
+| decode_RA RA, INS
+| decode_BC8b TMP1
+| add.d TMP0, DISPATCH, TMP1
+| ld.d TMP0, 0(TMP0)
+| add.d RA, RA, BASE
+| jirl r0, TMP0, 0
+|.endmacro
+|
+|.macro ins_call
+| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, PC = caller PC
+| st.d PC, FRAME_PC(BASE)
+| ins_callt
+|.endmacro
+|
+|//-----------------------------------------------------------------------
+|
+|.macro branch_RD
+| srli.w TMP0, RD, 1
+| addu16i.d TMP4, r0, -0x2 // -BCBIAS_J*4
+| add.w TMP0, TMP0, TMP4 // (jump - 0x8000)<<2
+| add.d PC, PC, TMP0
+|.endmacro
+|
+|// Assumes DISPATCH is relative to GL.
+#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
+#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
+|
+#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
+|
+|.macro hotcheck, delta, target
+| srli.d TMP1, PC, 1
+| andi TMP1, TMP1, 126
+| add.d TMP1, TMP1, DISPATCH
+| ld.hu TMP2, GG_DISP2HOT(TMP1)
+| addi.w TMP2, TMP2, -delta
+| st.h TMP2, GG_DISP2HOT(TMP1)
+| blt TMP2, r0, target
+|.endmacro
+|
+|.macro hotloop
+| hotcheck HOTCOUNT_LOOP, ->vm_hotloop
+|.endmacro
+|
+|.macro hotcall
+| hotcheck HOTCOUNT_CALL, ->vm_hotcall
+|.endmacro
+|
+|// Set current VM state. Uses TMP0.
+|.macro li_vmstate, st; addi.w TMP0, r0, ~LJ_VMST_..st; .endmacro
+|.macro st_vmstate; .STXW TMP0, DISPATCH, DISPATCH_GL(vmstate); .endmacro
+|
+|// Move table write barrier back. Overwrites mark and tmp.
+|.macro barrierback, tab, mark, tmp, target
+| .LDXD tmp, DISPATCH, DISPATCH_GL(gc.grayagain)
+| andi mark, mark, ~LJ_GC_BLACK & 255 // black2gray(tab)
+| .STXD tab, DISPATCH, DISPATCH_GL(gc.grayagain)
+| st.b mark, tab->marked
+| st.d tmp, tab->gclist
+| b target
+|.endmacro
+|
+|// Clear type tag. Isolate lowest 47 bits of reg.
+|.macro cleartp, reg; bstrpick.d reg, reg, 46, 0; .endmacro
+|.macro cleartp, dst, reg; bstrpick.d dst, reg, 46, 0; .endmacro
+|
+|// Set type tag: Merge 17 type bits into bits [47, 63] of dst.
+|.macro settp, dst, tp; bstrins.d dst, tp, 63, 47; .endmacro
+|
+|// Extract (negative) type tag.
+|.macro gettp, dst, src; srai.d dst, src, 47; .endmacro
+|
+|// Macros to check the TValue type and extract the GCobj. Branch on failure.
+|.macro checktp, reg, tp, target
+| gettp TMP4, reg
+| addi.d TMP4, TMP4, tp
+| cleartp reg
+| bnez TMP4, target
+|.endmacro
+|.macro checktp, dst, reg, tp, target
+| gettp TMP4, reg
+| addi.d TMP4, TMP4, tp
+| cleartp dst, reg
+| bnez TMP4, target
+|.endmacro
+|.macro checkstr, reg, target; checktp reg, -LJ_TSTR, target; .endmacro
+|.macro checktab, reg, target; checktp reg, -LJ_TTAB, target; .endmacro
+|.macro checkfunc, reg, target; checktp reg, -LJ_TFUNC, target; .endmacro
+|.macro checkint, reg, target
+| gettp TMP4, reg
+| bne TMP4, TISNUM, target
+|.endmacro
+|.macro checknum, reg, target
+| gettp TMP4, reg
+| sltui TMP4, TMP4, LJ_TISNUM
+| beqz TMP4, target
+|.endmacro
+|
+|.macro mov_false, reg
+| addi.d reg, r0, 0x0001
+| slli.d reg, reg, 47
+| nor reg, reg, r0
+|.endmacro
+|.macro mov_true, reg
+| addi.d reg, r0, 0x0001
+| slli.d reg, reg, 48
+| nor reg, reg, r0
+|.endmacro
+|
+|//-----------------------------------------------------------------------
+
+/* Generate subroutines used by opcodes and other parts of the VM. */
+/* The .code_sub section should be last to help static branch prediction. */
+static void build_subroutines(BuildCtx *ctx)
+{
+ |.code_sub
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Return handling ----------------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |->vm_returnp:
+ | // See vm_return. Also: TMP2 = previous base.
+ | andi TMP0, PC, FRAME_P
+ |
+ | // Return from pcall or xpcall fast func.
+ | mov_true TMP1
+ | beqz TMP0, ->cont_dispatch
+ | ld.d PC, FRAME_PC(TMP2) // Fetch PC of previous frame.
+ | or BASE, TMP2, r0 // Restore caller base.
+ | // Prepending may overwrite the pcall frame, so do it at the end.
+ | st.d TMP1, -8(RA) // Prepend true to results.
+ | addi.d RA, RA, -8
+ |
+ |->vm_returnc:
+ | addi.w RD, RD, 8 // RD = (nresults+1)*8.
+ | andi TMP0, PC, FRAME_TYPE
+ | addi.w CRET1, r0, LUA_YIELD
+ | beqz RD, ->vm_unwind_c_eh
+ | or MULTRES, RD, r0
+ | beqz TMP0, ->BC_RET_Z // Handle regular return to Lua.
+ |
+ |->vm_return:
+ | // BASE = base, RA = resultptr, RD/MULTRES = (nresults+1)*8, PC = return
+ | // TMP0 = PC & FRAME_TYPE
+ | addi.w TMP2, r0, -8 // TMP2 = 0xfffffff8
+ | xori TMP0, TMP0, FRAME_C
+ | and TMP2, PC, TMP2
+ | sub.d TMP2, BASE, TMP2 // TMP2 = previous base.
+ | bnez TMP0, ->vm_returnp
+ |
+ | addi.w TMP1, RD, -8
+ | st.d TMP2, L->base
+ | li_vmstate C
+ | ld.w TMP2, SAVE_NRES(sp)
+ | addi.d BASE, BASE, -16
+ | st_vmstate
+ | slli.w TMP2, TMP2, 3
+ | beqz TMP1, >2
+ |1:
+ | addi.w TMP1, TMP1, -8
+ | ld.d CRET1, 0(RA)
+ | addi.d RA, RA, 8
+ | st.d CRET1, 0(BASE)
+ | addi.d BASE, BASE, 8
+ | bnez TMP1, <1
+ |
+ |2:
+ | bne TMP2, RD, >6
+ |3:
+ | st.d BASE, L->top // Store new top.
+ |
+ |->vm_leave_cp:
+ | ld.d TMP0, SAVE_CFRAME(sp) // Restore previous C frame.
+ | or CRET1, r0, r0 // Ok return status for vm_pcall.
+ | st.d TMP0, L->cframe
+ |
+ |->vm_leave_unw:
+ | restoreregs_ret
+ |
+ |6:
+ | ld.d TMP1, L->maxstack
+ | slt TMP0, TMP2, RD
+ | // More results wanted. Check stack size and fill up results with nil.
+ | slt TMP1, BASE, TMP1
+ | bnez TMP0, >7
+ | beqz TMP1, >8
+ | st.d TISNIL, 0(BASE)
+ | addi.w RD, RD, 8
+ | addi.d BASE, BASE, 8
+ | b <2
+ |
+ |7: // Less results wanted.
+ | sub.w TMP0, RD, TMP2
+ | sub.d TMP0, BASE, TMP0 // Either keep top or shrink it.
+ | maskeqz TMP0, TMP0, TMP2 // LUA_MULTRET+1 case?
+ | masknez BASE, BASE, TMP2
+ | or BASE, BASE, TMP0
+ | b <3
+ |
+ |8: // Corner case: need to grow stack for filling up results.
+ | // This can happen if:
+ | // - A C function grows the stack (a lot).
+ | // - The GC shrinks the stack in between.
+ | // - A return back from a lua_call() with (high) nresults adjustment.
+ |
+ | st.d BASE, L->top // Save current top held in BASE (yes).
+ | or MULTRES, RD, r0
+ | srli.w CARG2, TMP2, 3
+ | or CARG1, L, r0
+ | bl extern lj_state_growstack // (lua_State *L, int n)
+ | ld.w TMP2, SAVE_NRES(sp)
+ | ld.d BASE, L->top // Need the (realloced) L->top in BASE.
+ | or RD, MULTRES, r0
+ | slli.w TMP2, TMP2, 3
+ | b <2
+ |
+ |->vm_unwind_c: // Unwind C stack, return from vm_pcall.
+ | // (void *cframe, int errcode)
+ | or sp, CARG1, r0
+ | or CRET1, CARG2, r0
+ |->vm_unwind_c_eh: // Landing pad for external unwinder.
+ | ld.d L, SAVE_L(sp)
+ | addi.w TMP0, r0, ~LJ_VMST_C
+ | ld.d GL:TMP1, L->glref
+ | st.w TMP0, GL:TMP1->vmstate
+ | b ->vm_leave_unw
+ |
+ |->vm_unwind_ff: // Unwind C stack, return from ff pcall.
+ | // (void *cframe)
+ | addi.d TMP3, r0, CFRAME_RAWMASK
+ | and sp, CARG1, TMP3
+ |->vm_unwind_ff_eh: // Landing pad for external unwinder.
+ | ld.d L, SAVE_L(sp)
+ | addu16i.d TMP3, r0, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
+ | addi.d TISNIL, r0, LJ_TNIL
+ | addi.d TISNUM, r0, LJ_TISNUM
+ | ld.d BASE, L->base
+ | ld.d DISPATCH, L->glref // Setup pointer to dispatch table.
+ | movgr2fr.w TOBIT, TMP3
+ | mov_false TMP1
+ | li_vmstate INTERP
+ | ld.d PC, FRAME_PC(BASE) // Fetch PC of previous frame.
+ | fcvt.d.s TOBIT, TOBIT
+ | addi.d RA, BASE, -8 // Results start at BASE-8.
+ | .ADD16I DISPATCH, DISPATCH, GG_G2DISP
+ | st.d TMP1, 0(RA) // Prepend false to error message.
+ | st_vmstate
+ | addi.d RD, r0, 16 // 2 results: false + error message.
+ | b ->vm_returnc
+ |
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Grow stack for calls -----------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |->vm_growstack_c: // Grow stack for C function.
+ | addi.d CARG2, r0, LUA_MINSTACK
+ | b >2
+ |
+ |->vm_growstack_l: // Grow stack for Lua function.
+ | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC
+ | add.d RC, BASE, RC
+ | sub.d RA, RA, BASE
+ | st.d BASE, L->base
+ | addi.d PC, PC, 4 // Must point after first instruction.
+ | st.d RC, L->top
+ | srli.w CARG2, RA, 3
+ |2:
+ | // L->base = new base, L->top = top
+ | st.d PC, SAVE_PC(sp)
+ | or CARG1, L, r0
+ | bl extern lj_state_growstack // (lua_State *L, int n)
+ | ld.d BASE, L->base
+ | ld.d RC, L->top
+ | ld.d LFUNC:RB, FRAME_FUNC(BASE)
+ | sub.d RC, RC, BASE
+ | cleartp LFUNC:RB
+ | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
+ | ins_callt // Just retry the call.
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Entry points into the assembler VM ---------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |->vm_resume: // Setup C frame and resume thread.
+ | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
+ | saveregs
+ | or L, CARG1, r0
+ | ld.d DISPATCH, L->glref // Setup pointer to dispatch table.
+ | or BASE, CARG2, r0
+ | ld.bu TMP1, L->status
+ | st.d L, SAVE_L(sp)
+ | addi.d PC, r0, FRAME_CP
+ | addi.d TMP0, sp, CFRAME_RESUME
+ | .ADD16I DISPATCH, DISPATCH, GG_G2DISP
+ | st.w r0, SAVE_NRES(sp)
+ | st.w r0, SAVE_ERRF(sp)
+ | st.d CARG1, SAVE_PC(sp) // Any value outside of bytecode is ok.
+ | st.d r0, SAVE_CFRAME(sp)
+ | st.d TMP0, L->cframe
+ | beqz TMP1, >3
+ |
+ | // Resume after yield (like a return).
+ | .STXD L, DISPATCH, DISPATCH_GL(cur_L)
+ | or RA, BASE, r0
+ | ld.d BASE, L->base
+ | ld.d TMP1, L->top
+ | ld.d PC, FRAME_PC(BASE)
+ | addu16i.d TMP3, r0, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
+ | sub.d RD, TMP1, BASE
+ | movgr2fr.w TOBIT, TMP3
+ | st.b r0, L->status
+ | fcvt.d.s TOBIT, TOBIT
+ | li_vmstate INTERP
+ | addi.d RD, RD, 8
+ | st_vmstate
+ | or MULTRES, RD, r0
+ | andi TMP0, PC, FRAME_TYPE
+ | addi.d TISNIL, r0, LJ_TNIL
+ | addi.d TISNUM, r0, LJ_TISNUM
+ | beqz TMP0, ->BC_RET_Z
+ | b ->vm_return
+ |
+ |->vm_pcall: // Setup protected C frame and enter VM.
+ | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
+ | saveregs
+ | st.w CARG4, SAVE_ERRF(sp)
+ | addi.d PC, r0, FRAME_CP
+ | b >1
+ |
+ |->vm_call: // Setup C frame and enter VM.
+ | // (lua_State *L, TValue *base, int nres1)
+ | saveregs
+ | addi.d PC, r0, FRAME_C
+ |
+ |1: // Entry point for vm_pcall above (PC = ftype).
+ | ld.d TMP1, L:CARG1->cframe
+ | or L, CARG1, r0
+ | st.w CARG3, SAVE_NRES(sp)
+ | ld.d DISPATCH, L->glref // Setup pointer to dispatch table.
+ | st.d CARG1, SAVE_L(sp)
+ | or BASE, CARG2, r0
+ | .ADD16I DISPATCH, DISPATCH, GG_G2DISP
+ | st.d CARG1, SAVE_PC(sp) // Any value outside of bytecode is ok.
+ | st.d TMP1, SAVE_CFRAME(sp)
+ | st.d sp, L->cframe // Add our C frame to cframe chain.
+ |
+ |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
+ | .STXD L, DISPATCH, DISPATCH_GL(cur_L)
+ | ld.d TMP2, L->base // TMP2 = old base (used in vmeta_call).
+ | addu16i.d TMP3, r0, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
+ | ld.d TMP1, L->top
+ | movgr2fr.w TOBIT, TMP3
+ | add.d PC, PC, BASE
+ | sub.d NARGS8:RC, TMP1, BASE
+ | addi.d TISNUM, r0, LJ_TISNUM
+ | sub.d PC, PC, TMP2 // PC = frame delta + frame type
+ | fcvt.d.s TOBIT, TOBIT
+ | li_vmstate INTERP
+ | addi.d TISNIL, r0, LJ_TNIL
+ | st_vmstate
+ |
+ |->vm_call_dispatch:
+ | // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC
+ | ld.d LFUNC:RB, FRAME_FUNC(BASE)
+ | checkfunc LFUNC:RB, ->vmeta_call
+ |
+ |->vm_call_dispatch_f:
+ | ins_call
+ | // BASE = new base, RB = func, RC = nargs*8, PC = caller PC
+ |
+ |->vm_cpcall: // Setup protected C frame, call C.
+ | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
+ | saveregs
+ | or L, CARG1, r0
+ | ld.d TMP0, L:CARG1->stack
+ | st.d CARG1, SAVE_L(sp)
+ | ld.d TMP1, L->top
+ | ld.d DISPATCH, L->glref // Setup pointer to dispatch table.
+ | st.d CARG1, SAVE_PC(sp) // Any value outside of bytecode is ok.
+ | sub.d TMP0, TMP0, TMP1 // Compute -savestack(L, L->top).
+ | ld.d TMP1, L->cframe
+ | .ADD16I DISPATCH, DISPATCH, GG_G2DISP
+ | st.w TMP0, SAVE_NRES(sp) // Neg. delta means cframe w/o frame.
+ | st.w r0, SAVE_ERRF(sp) // No error function.
+ | st.d TMP1, SAVE_CFRAME(sp)
+ | st.d sp, L->cframe // Add our C frame to cframe chain.
+ | .STXD L, DISPATCH, DISPATCH_GL(cur_L)
+ | jirl r1, CARG4, 0 // (lua_State *L, lua_CFunction func, void *ud)
+ | or BASE, CRET1, r0
+ | addi.d PC, r0, FRAME_CP
+ | bnez CRET1, <3 // Else continue with the call.
+ | b ->vm_leave_cp // No base? Just remove C frame.
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Metamethod handling ------------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |//-- Continuation dispatch ----------------------------------------------
+ |
+ |->cont_dispatch:
+ | // BASE = meta base, RA = resultptr, RD = (nresults+1)*8
+ | ld.d TMP0, -32(BASE) // Continuation.
+ | or RB, BASE, r0
+ | or BASE, TMP2, r0 // Restore caller BASE.
+ | ld.d LFUNC:TMP1, FRAME_FUNC(TMP2)
+ |.if FFI
+ | sltui TMP3, TMP0, 2
+ |.endif
+ | ld.d PC, -24(RB) // Restore PC from [cont|PC].
+ | cleartp LFUNC:TMP1
+ | add.d TMP2, RA, RD
+ | ld.d TMP1, LFUNC:TMP1->pc
+ | st.d TISNIL, -8(TMP2) // Ensure one valid arg.
+ |.if FFI
+ | bnez TMP3, >1
+ |.endif
+ | // BASE = base, RA = resultptr, RB = meta base
+ | ld.d KBASE, PC2PROTO(k)(TMP1)
+ | jirl r0, TMP0, 0 // Jump to continuation.
+ |
+ |.if FFI
+ |1:
+ | addi.d TMP1, RB, -32
+ | bnez TMP0, ->cont_ffi_callback // cont = 1: return from FFI callback.
+ | // cont = 0: tailcall from C function.
+ | sub.d RC, TMP1, BASE
+ | b ->vm_call_tail
+ |.endif
+ |
+ |->cont_cat: // RA = resultptr, RB = meta base
+ | ld.w INS, -4(PC)
+ | addi.d CARG2, RB, -32
+ | ld.d TMP0, 0(RA)
+ | decode_RB MULTRES, INS
+ | decode_RA RA, INS
+ | add.d TMP1, BASE, MULTRES
+ | st.d BASE, L->base
+ | sub.d CARG3, CARG2, TMP1
+ | st.d TMP0, 0(CARG2)
+ | bne TMP1, CARG2, ->BC_CAT_Z
+ | add.d RA, BASE, RA
+ | st.d TMP0, 0(RA)
+ | b ->cont_nop
+ |
+ |//-- Table indexing metamethods -----------------------------------------
+ |
+ |->vmeta_tgets1:
+ | .ADD16I CARG3, DISPATCH, DISPATCH_GL(tmptv)
+ | addi.d TMP0, r0, LJ_TSTR
+ | settp STR:RC, TMP0
+ | st.d STR:RC, 0(CARG3)
+ | b >1
+ |
+ |->vmeta_tgets:
+ | .ADD16I CARG2, DISPATCH, DISPATCH_GL(tmptv)
+ | addi.d TMP0, r0, LJ_TTAB
+ | addi.d TMP1, r0, LJ_TSTR
+ | settp TAB:RB, TMP0
+ | .ADD16I CARG3, DISPATCH, DISPATCH_GL(tmptv2)
+ | st.d TAB:RB, 0(CARG2)
+ | settp STR:RC, TMP1
+ | st.d STR:RC, 0(CARG3)
+ | b >1
+ |
+ |->vmeta_tgetb: // TMP0 = index
+ | .ADD16I CARG3, DISPATCH, DISPATCH_GL(tmptv)
+ | settp TMP0, TISNUM
+ | st.d TMP0, 0(CARG3)
+ |
+ |->vmeta_tgetv:
+ |1:
+ | st.d BASE, L->base
+ | or CARG1, L, r0
+ | st.d PC, SAVE_PC(sp)
+ | bl extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k)
+ | // Returns TValue * (finished) or NULL (metamethod).
+ | beqz CRET1, >3
+ | ld.d TMP0, 0(CRET1)
+ | st.d TMP0, 0(RA)
+ | ins_next
+ |
+ |3: // Call __index metamethod.
+ | // BASE = base, L->top = new base, stack = cont/func/t/k
+ | addi.d TMP1, BASE, -FRAME_CONT
+ | addi.d NARGS8:RC, r0, 16 // 2 args for func(t, k).
+ | ld.d BASE, L->top
+ | st.d PC, -24(BASE) // [cont|PC]
+ | sub.d PC, BASE, TMP1
+ | ld.d LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
+ | cleartp LFUNC:RB
+ | b ->vm_call_dispatch_f
+ |
+ |->vmeta_tgetr:
+ | bl extern lj_tab_getinth // (GCtab *t, int32_t key)
+ | // Returns cTValue * or NULL.
+ | or TMP1, TISNIL, r0
+ | beqz CRET1, ->BC_TGETR_Z
+ | ld.d TMP1, 0(CRET1)
+ | b ->BC_TGETR_Z
+ |
+ |//-----------------------------------------------------------------------
+ |
+ |->vmeta_tsets1:
+ | .ADD16I CARG3, DISPATCH, DISPATCH_GL(tmptv)
+ | addi.d TMP0, r0, LJ_TSTR
+ | settp STR:RC, TMP0
+ | st.d STR:RC, 0(CARG3)
+ | b >1
+ |
+ |->vmeta_tsets:
+ | .ADD16I CARG2, DISPATCH, DISPATCH_GL(tmptv)
+ | addi.d TMP0, r0, LJ_TTAB
+ | addi.d TMP1, r0, LJ_TSTR
+ | settp TAB:RB, TMP0
+ | .ADD16I CARG3, DISPATCH, DISPATCH_GL(tmptv2)
+ | st.d TAB:RB, 0(CARG2)
+ | settp STR:RC, TMP1
+ | st.d STR:RC, 0(CARG3)
+ | b >1
+ |
+ |->vmeta_tsetb: // TMP0 = index
+ | .ADD16I CARG3, DISPATCH, DISPATCH_GL(tmptv)
+ | settp TMP0, TISNUM
+ | st.d TMP0, 0(CARG3)
+ |
+ |->vmeta_tsetv:
+ |1:
+ | st.d BASE, L->base
+ | or CARG1, L, r0
+ | st.d PC, SAVE_PC(sp)
+ | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
+ | // Returns TValue * (finished) or NULL (metamethod).
+ | ld.d TMP2, 0(RA)
+ | beqz CRET1, >3
+ | // NOBARRIER: lj_meta_tset ensures the table is not black.
+ | st.d TMP2, 0(CRET1)
+ | ins_next
+ |
+ |3: // Call __newindex metamethod.
+ | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
+ | addi.d TMP1, BASE, -FRAME_CONT
+ | ld.d BASE, L->top
+ | st.d PC, -24(BASE) // [cont|PC]
+ | sub.d PC, BASE, TMP1
+ | ld.d LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
+ | addi.d NARGS8:RC, r0, 24 // 3 args for func(t, k, v)
+ | cleartp LFUNC:RB
+ | st.d TMP2, 16(BASE) // Copy value to third argument.
+ | b ->vm_call_dispatch_f
+ |
+ |->vmeta_tsetr:
+ | st.d BASE, L->base
+ | or CARG1, L, r0
+ | st.d PC, SAVE_PC(sp)
+ | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
+ | // Returns TValue *.
+ | b ->BC_TSETR_Z
+ |
+ |//-- Comparison metamethods ---------------------------------------------
+ |
+ |->vmeta_comp:
+ | // RA/RD point to o1/o2.
+ | or CARG2, RA, r0
+ | or CARG3, RD, r0
+ | addi.d PC, PC, -4
+ | st.d BASE, L->base
+ | or CARG1, L, r0
+ | decode_OP CARG4, INS
+ | st.d PC, SAVE_PC(sp)
+ | bl extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
+ | // Returns 0/1 or TValue * (metamethod).
+ |3:
+ | sltui TMP1, CRET1, 2
+ | beqz TMP1, ->vmeta_binop
+ | sub.w TMP2, r0, CRET1
+ |4:
+ | ld.hu RD, OFS_RD(PC)
+ | addi.d PC, PC, 4
+ | addu16i.d TMP1, r0, -0x2 // -BCBIAS_J*4
+ | slli.w RD, RD, 2
+ | add.w RD, RD, TMP1
+ | and RD, RD, TMP2
+ | add.d PC, PC, RD
+ |->cont_nop:
+ | ins_next
+ |
+ |->cont_ra: // RA = resultptr
+ | ld.bu TMP1, -4+OFS_RA(PC)
+ | ld.d TMP2, 0(RA)
+ | slli.w TMP1, TMP1, 3
+ | add.d TMP1, BASE, TMP1
+ | st.d TMP2, 0(TMP1)
+ | b ->cont_nop
+ |
+ |->cont_condt: // RA = resultptr
+ | ld.d TMP0, 0(RA)
+ | gettp TMP0, TMP0
+ | sltui TMP1, TMP0, LJ_TISTRUECOND
+ | sub.w TMP2, r0, TMP1 // Branch if result is true.
+ | b <4
+ |
+ |->cont_condf: // RA = resultptr
+ | ld.d TMP0, 0(RA)
+ | gettp TMP0, TMP0
+ | sltui TMP1, TMP0, LJ_TISTRUECOND
+ | addi.w TMP2, TMP1, -1 // Branch if result is false.
+ | b <4
+ |
+ |->vmeta_equal:
+ | // CARG1/CARG2 point to o1/o2. TMP0 is set to 0/1.
+ | cleartp LFUNC:CARG3, CARG2
+ | cleartp LFUNC:CARG2, CARG1
+ | or CARG4, TMP0, r0
+ | addi.d PC, PC, -4
+ | st.d BASE, L->base
+ | or CARG1, L, r0
+ | st.d PC, SAVE_PC(sp)
+ | bl extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne)
+ | // Returns 0/1 or TValue * (metamethod).
+ | b <3
+ |
+ |->vmeta_equal_cd:
+ |.if FFI
+ | or CARG2, INS, r0
+ | addi.d PC, PC, -4
+ | st.d BASE, L->base
+ | or CARG1, L, r0
+ | st.d PC, SAVE_PC(sp)
+ | bl extern lj_meta_equal_cd // (lua_State *L, BCIns op)
+ | // Returns 0/1 or TValue * (metamethod).
+ | b <3
+ |.endif
+ |
+ |->vmeta_istype:
+ | addi.d PC, PC, -4
+ | st.d BASE, L->base
+ | or CARG1, L, r0
+ | srli.w CARG2, RA, 3
+ | srli.w CARG3, RD, 3
+ | st.d PC, SAVE_PC(sp)
+ | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
+ | b ->cont_nop
+ |
+ |//-- Arithmetic metamethods ---------------------------------------------
+ |
+ |->vmeta_unm:
+ | or RC, RB, r0
+ |
+ |->vmeta_arith:
+ | st.d BASE, L->base
+ | or CARG2, RA, r0
+ | st.d PC, SAVE_PC(sp)
+ | or CARG3, RB, r0
+ | or CARG4, RC, r0
+ | decode_OP CARG5, INS
+ | or CARG1, L, r0
+ | bl extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
+ | // Returns NULL (finished) or TValue * (metamethod).
+ | beqz CRET1, ->cont_nop
+ |
+ | // Call metamethod for binary op.
+ |->vmeta_binop:
+ | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2
+ | sub.d TMP1, CRET1, BASE
+ | st.d PC, -24(CRET1) // [cont|PC]
+ | or TMP2, BASE, r0
+ | addi.d PC, TMP1, FRAME_CONT
+ | or BASE, CRET1, r0
+ | addi.d NARGS8:RC, r0, 16 // 2 args for func(o1, o2).
+ | b ->vm_call_dispatch
+ |
+ |->vmeta_len:
+ | // CARG2 already set by BC_LEN.
+#if LJ_52
+ | or MULTRES, CARG1, r0
+#endif
+ | st.d BASE, L->base
+ | or CARG1, L, r0
+ | st.d PC, SAVE_PC(sp)
+ | bl extern lj_meta_len // (lua_State *L, TValue *o)
+ | // Returns NULL (retry) or TValue * (metamethod base).
+#if LJ_52
+ | bnez CRET1, ->vmeta_binop // Binop call for compatibility.
+ | or CARG1, MULTRES, r0
+ | b ->BC_LEN_Z
+#else
+ | b ->vmeta_binop // Binop call for compatibility.
+#endif
+ |
+ |//-- Call metamethod ----------------------------------------------------
+ |
+ |->vmeta_call: // Resolve and call __call metamethod.
+ | // TMP2 = old base, BASE = new base, RC = nargs*8
+ | st.d TMP2, L->base // This is the callers base!
+ | addi.d CARG2, BASE, -16
+ | st.d PC, SAVE_PC(sp)
+ | add.d CARG3, BASE, RC
+ | or CARG1, L, r0
+ | or MULTRES, NARGS8:RC, r0
+ | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
+ | ld.d LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
+ | addi.d NARGS8:RC, MULTRES, 8 // Got one more argument now.
+ | cleartp LFUNC:RB
+ | ins_call
+ |
+ |->vmeta_callt: // Resolve __call for BC_CALLT.
+ | // BASE = old base, RA = new base, RC = nargs*8
+ | st.d BASE, L->base
+ | addi.d CARG2, RA, -16
+ | st.d PC, SAVE_PC(sp)
+ | add.d CARG3, RA, RC
+ | or CARG1, L, r0
+ | or MULTRES, NARGS8:RC, r0
+ | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
+ | ld.d RB, FRAME_FUNC(RA) // Guaranteed to be a function here.
+ | ld.d TMP1, FRAME_PC(BASE)
+ | addi.d NARGS8:RC, MULTRES, 8 // Got one more argument now.
+ | cleartp LFUNC:CARG3, RB
+ | b ->BC_CALLT_Z
+ |
+ |//-- Argument coercion for 'for' statement ------------------------------
+ |
+ |->vmeta_for:
+ | st.d BASE, L->base
+ | or CARG2, RA, r0
+ | st.d PC, SAVE_PC(sp)
+ | or MULTRES, INS, r0
+ | or CARG1, L, r0
+ | bl extern lj_meta_for // (lua_State *L, TValue *base)
+ |.if JIT
+ | decode_OP TMP0, MULTRES
+ | addi.d TMP1, r0, BC_JFORI
+ |.endif
+ | decode_RA RA, MULTRES
+ | decode_RD RD, MULTRES
+ |.if JIT
+ | beq TMP0, TMP1, =>BC_JFORI
+ |.endif
+ | b =>BC_FORI
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Fast functions -----------------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |.macro .ffunc, name
+ |->ff_ .. name:
+ |.endmacro
+ |
+ |.macro .ffunc_1, name
+ |->ff_ .. name:
+ | ld.d CARG1, 0(BASE)
+ | beqz NARGS8:RC, ->fff_fallback
+ |.endmacro
+ |
+ |.macro .ffunc_2, name
+ |->ff_ .. name:
+ | sltui TMP0, NARGS8:RC, 16
+ | ld.d CARG1, 0(BASE)
+ | ld.d CARG2, 8(BASE)
+ | bnez TMP0, ->fff_fallback
+ |.endmacro
+ |
+ |.macro .ffunc_n, name
+ |->ff_ .. name:
+ | ld.d CARG1, 0(BASE)
+ | fld.d FARG1, 0(BASE)
+ | beqz NARGS8:RC, ->fff_fallback
+ | checknum CARG1, ->fff_fallback
+ |.endmacro
+ |
+ |.macro .ffunc_nn, name
+ |->ff_ .. name:
+ | ld.d CARG1, 0(BASE)
+ | ld.d CARG2, 8(BASE)
+ | sltui TMP0, NARGS8:RC, 16
+ | gettp TMP1, CARG1
+ | bnez TMP0, ->fff_fallback
+ | gettp TMP2, CARG2
+ | sltui TMP1, TMP1, LJ_TISNUM
+ | sltui TMP2, TMP2, LJ_TISNUM
+ | fld.d FARG1, 0(BASE)
+ | and TMP1, TMP1, TMP2
+ | fld.d FARG2, 8(BASE)
+ | beqz TMP1, ->fff_fallback
+ |.endmacro
+ |
+ |// Inlined GC threshold check.
+ |.macro ffgccheck
+ | .LDXD TMP0, DISPATCH, DISPATCH_GL(gc.total)
+ | .LDXD TMP1, DISPATCH, DISPATCH_GL(gc.threshold)
+ | bltu TMP0, TMP1, >1
+ | bl ->fff_gcstep
+ |1:
+ |.endmacro
+ |
+ |//-- Base library: checks -----------------------------------------------
+ |.ffunc_1 assert
+ | gettp TMP1, CARG1
+ |// ld.d PC, FRAME_PC(BASE)
+ | sltui TMP1, TMP1, LJ_TISTRUECOND
+ | addi.d RA, BASE, -16
+ | beqz TMP1, ->fff_fallback
+ | ld.d PC, FRAME_PC(BASE)
+ | addi.w RD, NARGS8:RC, 8 // Compute (nresults+1)*8.
+ | addi.d TMP1, BASE, 8
+ | add.d TMP2, RA, RD
+ | st.d CARG1, 0(RA)
+ | beq BASE, TMP2, ->fff_res // Done if exactly 1 argument.
+ |1:
+ | ld.d TMP0, 0(TMP1)
+ | st.d TMP0, -16(TMP1)
+ | or TMP3, TMP1, r0
+ | addi.d TMP1, TMP1, 8
+ | bne TMP3, TMP2, <1
+ | b ->fff_res
+ |
+ |.ffunc_1 type
+ | gettp TMP0, CARG1
+ | addi.w TMP1, r0, ~LJ_TISNUM
+ | sltu TMP2, TISNUM, TMP0
+ | nor TMP3, TMP0, r0
+ | masknez TMP1, TMP1, TMP2
+ | maskeqz TMP3, TMP3, TMP2
+ | or TMP3, TMP3, TMP1
+ | slli.d TMP3, TMP3, 3
+ | add.d TMP3, CFUNC:RB, TMP3
+ | ld.d CARG1, CFUNC:TMP3->upvalue
+ | b ->fff_restv
+ |
+ |//-- Base library: getters and setters ---------------------------------
+ |
+ |.ffunc_1 getmetatable
+ | gettp TMP2, CARG1
+ | addi.d TMP0, TMP2, -LJ_TTAB
+ | addi.d TMP1, TMP2, -LJ_TUDATA
+ | maskeqz TMP0, TMP1, TMP0
+ | cleartp TAB:CARG1
+ | bnez TMP0, >6
+ |1: // Field metatable must be at same offset for GCtab and GCudata!
+ | ld.d TAB:RB, TAB:CARG1->metatable
+ |2:
+ | .LDXD STR:RC, DISPATCH, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])
+ | addi.d CARG1, r0, LJ_TNIL
+ | beqz TAB:RB, ->fff_restv
+ | ld.w TMP0, TAB:RB->hmask
+ | ld.w TMP1, STR:RC->sid
+ | ld.d NODE:TMP2, TAB:RB->node
+ | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
+ | slli.d TMP0, TMP1, 5
+ | slli.d TMP1, TMP1, 3
+ | sub.d TMP1, TMP0, TMP1
+ | add.d NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
+ | addi.w CARG4, r0, LJ_TSTR
+ | settp STR:RC, CARG4 // Tagged key to look for.
+ |3: // Rearranged logic, because we expect _not_ to find the key.
+ | ld.d TMP0, NODE:TMP2->key
+ | ld.d CARG1, NODE:TMP2->val
+ | ld.d NODE:TMP2, NODE:TMP2->next
+ | addi.d TMP3, r0, LJ_TTAB
+ | beq RC, TMP0, >5
+ | bnez NODE:TMP2, <3
+ |4:
+ | or CARG1, RB, r0
+ | settp CARG1, TMP3
+ | b ->fff_restv // Not found, keep default result.
+ |5:
+ | bne CARG1, TISNIL, ->fff_restv
+ | b <4 // Ditto for nil value.
+ |
+ |6:
+ | sltui TMP3, TMP2, LJ_TISNUM
+ | maskeqz TMP0, TISNUM, TMP3
+ | masknez TMP3, TMP2, TMP3
+ | or TMP2, TMP0, TMP3
+ | slli.d TMP2, TMP2, 3
+ | sub.d TMP0, DISPATCH, TMP2
+ | .LDXD TAB:RB, TMP0, DISPATCH_GL(gcroot[GCROOT_BASEMT])-8
+ | b <2
+ |
+ |.ffunc_2 setmetatable
+ | // Fast path: no mt for table yet and not clearing the mt.
+ | checktp TMP1, CARG1, -LJ_TTAB, ->fff_fallback
+ | gettp TMP3, CARG2
+ | ld.d TAB:TMP0, TAB:TMP1->metatable
+ | ld.bu TMP2, TAB:TMP1->marked
+ | addi.d TMP3, TMP3, -LJ_TTAB
+ | cleartp TAB:CARG2
+ | or TMP3, TMP3, TAB:TMP0
+ | bnez TMP3, ->fff_fallback
+ | andi TMP3, TMP2, LJ_GC_BLACK // isblack(table)
+ | st.d TAB:CARG2, TAB:TMP1->metatable
+ | beqz TMP3, ->fff_restv
+ | barrierback TAB:TMP1, TMP2, TMP0, ->fff_restv
+ |
+ |.ffunc rawget
+ | ld.d CARG2, 0(BASE)
+ | sltui TMP0, NARGS8:RC, 16
+ | gettp TMP1, CARG2
+ | cleartp CARG2
+ | addi.d TMP1, TMP1, -LJ_TTAB
+ | or TMP0, TMP0, TMP1
+ | addi.d CARG3, BASE, 8
+ | bnez TMP0, ->fff_fallback
+ | or CARG1, L, r0
+ | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
+ | // Returns cTValue *.
+ | ld.d CARG1, 0(CRET1)
+ | b ->fff_restv
+ |
+ |//-- Base library: conversions ------------------------------------------
+ |
+ |.ffunc tonumber
+ | // Only handles the number case inline (without a base argument).
+ | ld.d CARG1, 0(BASE)
+ | xori TMP0, NARGS8:RC, 8 // Exactly one number argument.
+ | gettp TMP1, CARG1
+ | sltu TMP1, TISNUM, TMP1
+ | or TMP0, TMP0, TMP1
+ | bnez TMP0, ->fff_fallback // No args or CARG1 is not number
+ | b ->fff_restv
+ |
+ |.ffunc_1 tostring
+ | // Only handles the string or number case inline.
+ | gettp TMP0, CARG1
+ | addi.d TMP1, TMP0, -LJ_TSTR
+ | // A __tostring method in the string base metatable is ignored.
+ | beqz TMP1, ->fff_restv // String key?
+ | // Handle numbers inline, unless a number base metatable is present.
+ | .LDXD TMP1, DISPATCH, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])
+ | sltu TMP0, TISNUM, TMP0
+ | st.d BASE, L->base // Add frame since C call can throw.
+ | or TMP0, TMP0, TMP1
+ | bnez TMP0, ->fff_fallback
+ | st.d PC, SAVE_PC(sp) // Redundant (but a defined value).
+ | ffgccheck
+ | or CARG1, L, r0
+ | or CARG2, BASE, r0
+ | bl extern lj_strfmt_number // (lua_State *L, cTValue *o)
+ | // Returns GCstr *.
+ | addi.d TMP1, r0, LJ_TSTR
+ |// ld.d BASE, L->base
+ | settp CARG1, TMP1
+ | b ->fff_restv
+ |
+ |//-- Base library: iterators -------------------------------------------
+ |
+ |.ffunc_1 next
+ | checktp CARG1, -LJ_TTAB, ->fff_fallback
+ | add.d TMP0, BASE, NARGS8:RC
+ | ld.d PC, FRAME_PC(BASE)
+ | st.d TISNIL, 0(TMP0) // Set missing 2nd arg to nil.
+ | addi.d CARG2, BASE, 8
+ | addi.d CARG3, BASE, -16
+ | bl extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o)
+ | // Returns 1=found, 0=end, -1=error.
+ |// addi.d RA, BASE, -16
+ | addi.d RD, r0, (2+1)*8
+ | blt r0, CRET1, ->fff_res // Found key/value.
+ | or TMP1, CRET1, r0
+ | or CARG1, TISNIL, r0
+ | beqz TMP1, ->fff_restv // End of traversal: return nil.
+ | ld.d CFUNC:RB, FRAME_FUNC(BASE)
+ | addi.w RC, r0, 2*8
+ | cleartp CFUNC:RB
+ | b ->fff_fallback // Invalid key.
+ |
+ |.ffunc_1 pairs
+ | checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback
+ | ld.d PC, FRAME_PC(BASE)
+#if LJ_52
+ | ld.d TAB:TMP2, TAB:TMP1->metatable
+ | ld.d TMP0, CFUNC:RB->upvalue[0]
+ | addi.d RA, BASE, -16
+ | bnez TAB:TMP2, ->fff_fallback
+#else
+ | ld.d TMP0, CFUNC:RB->upvalue[0]
+ | addi.d RA, BASE, -16
+#endif
+ | st.d TISNIL, 0(BASE)
+ | st.d CARG1, -8(BASE)
+ | st.d TMP0, 0(RA)
+ | addi.d RD, r0, (3+1)*8
+ | b ->fff_res
+ |
+ |.ffunc_2 ipairs_aux
+ | checktab CARG1, ->fff_fallback
+ | checkint CARG2, ->fff_fallback
+ | ld.w TMP0, TAB:CARG1->asize
+ | ld.d TMP1, TAB:CARG1->array
+ | ld.d PC, FRAME_PC(BASE)
+ | slli.w TMP2, CARG2, 0
+ | addi.w TMP2, TMP2, 1
+ | sltu TMP3, TMP2, TMP0
+ | addi.d RA, BASE, -16
+ | bstrpick.d TMP0, TMP2, 31, 0
+ | settp TMP0, TISNUM
+ | st.d TMP0, 0(RA)
+ | beqz TMP3, >2 // Not in array part?
+ | slli.d TMP3, TMP2, 3
+ | add.d TMP3, TMP1, TMP3
+ | ld.d TMP1, 0(TMP3)
+ |1:
+ | addi.d RD, r0, (0+1)*8
+ | beq TMP1, TISNIL, ->fff_res // End of iteration, return 0 results.
+ | st.d TMP1, -8(BASE)
+ | addi.d RD, r0, (2+1)*8
+ | b ->fff_res
+ |2: // Check for empty hash part first. Otherwise call C function.
+ | ld.w TMP0, TAB:CARG1->hmask
+ | addi.d RD, r0, (0+1)*8
+ | beqz TMP0, ->fff_res
+ | or CARG2, TMP2, r0
+ | bl extern lj_tab_getinth // (GCtab *t, int32_t key)
+ | // Returns cTValue * or NULL.
+ | addi.d RD, r0, (0+1)*8
+ | beqz CRET1, ->fff_res
+ | ld.d TMP1, 0(CRET1)
+ | b <1
+ |
+ |.ffunc_1 ipairs
+ | checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback
+ | ld.d PC, FRAME_PC(BASE)
+#if LJ_52
+ | ld.d TAB:TMP2, TAB:TMP1->metatable
+#endif
+ | ld.d CFUNC:TMP0, CFUNC:RB->upvalue[0]
+ | addi.d RA, BASE, -16
+#if LJ_52
+ | bnez TAB:TMP2, ->fff_fallback
+#endif
+ | slli.d TMP1, TISNUM, 47
+ | st.d CARG1, -8(BASE)
+ | st.d TMP1, 0(BASE)
+ | st.d CFUNC:TMP0, 0(RA)
+ | addi.d RD, r0, (3+1)*8
+ | b ->fff_res
+ |
+ |//-- Base library: catch errors ----------------------------------------
+ |
+ |.ffunc pcall
+ | addi.d NARGS8:RC, NARGS8:RC, -8
+ | .LDXBU TMP3, DISPATCH, DISPATCH_GL(hookmask)
+ | or TMP2, BASE, r0
+ | blt NARGS8:RC, r0, ->fff_fallback
+ | addi.d BASE, BASE, 16
+ | // Remember active hook before pcall.
+ | srli.w TMP3, TMP3, HOOK_ACTIVE_SHIFT
+ | andi TMP3, TMP3, 1
+ | addi.d PC, TMP3, 16+FRAME_PCALL
+ | beqz NARGS8:RC, ->vm_call_dispatch
+ |1:
+ | add.d TMP0, BASE, NARGS8:RC
+ |2:
+ | ld.d TMP1, -16(TMP0)
+ | st.d TMP1, -8(TMP0)
+ | addi.d TMP0, TMP0, -8
+ | bne TMP0, BASE, <2
+ | b ->vm_call_dispatch
+ |
+ |.ffunc xpcall
+ | addi.d NARGS8:TMP0, NARGS8:RC, -16
+ | ld.d CARG1, 0(BASE)
+ | ld.d CARG2, 8(BASE)
+ | .LDXBU TMP1, DISPATCH, DISPATCH_GL(hookmask)
+ | blt NARGS8:TMP0, r0, ->fff_fallback
+ | gettp TMP2, CARG2
+ | addi.d TMP2, TMP2, -LJ_TFUNC
+ | bnez TMP2, ->fff_fallback // Traceback must be a function.
+ | or TMP2, BASE, r0
+ | or NARGS8:RC, NARGS8:TMP0, r0
+ | addi.d BASE, BASE, 24
+ | // Remember active hook before pcall.
+ | srli.w TMP3, TMP3, HOOK_ACTIVE_SHIFT
+ | st.d CARG2, 0(TMP2) // Swap function and traceback.
+ | andi TMP3, TMP3, 1
+ | st.d CARG1, 8(TMP2)
+ | addi.d PC, TMP3, 24+FRAME_PCALL
+ | beqz NARGS8:RC, ->vm_call_dispatch
+ | b <1
+ |
+ |//-- Coroutine library --------------------------------------------------
+ |
+ |.macro coroutine_resume_wrap, resume
+ |.if resume
+ |.ffunc_1 coroutine_resume
+ | checktp CARG1, CARG1, -LJ_TTHREAD, ->fff_fallback
+ |.else
+ |.ffunc coroutine_wrap_aux
+ | ld.d L:CARG1, CFUNC:RB->upvalue[0].gcr
+ | cleartp L:CARG1
+ |.endif
+ | ld.bu TMP0, L:CARG1->status
+ | ld.d TMP1, L:CARG1->cframe
+ | ld.d CARG2, L:CARG1->top
+ | ld.d TMP2, L:CARG1->base
+ | addi.w CARG4, TMP0, -LUA_YIELD
+ | add.d CARG3, CARG2, TMP0
+ | addi.d TMP3, CARG2, 8
+ | masknez CARG2, CARG2, CARG4
+ | maskeqz TMP3, TMP3, CARG4
+ | or CARG2, TMP3, CARG2
+ | blt r0, CARG4, ->fff_fallback // st > LUA_YIELD?
+ | xor TMP2, TMP2, CARG3
+ | or CARG4, TMP2, TMP0
+ | bnez TMP1, ->fff_fallback // cframe != 0?
+ | ld.d TMP0, L:CARG1->maxstack
+ | ld.d PC, FRAME_PC(BASE)
+ | beqz CARG4, ->fff_fallback // base == top && st == 0?
+ | add.d TMP2, CARG2, NARGS8:RC
+ | sltu CARG4, TMP0, TMP2
+ | st.d BASE, L->base
+ | st.d PC, SAVE_PC(sp)
+ | bnez CARG4, ->fff_fallback // Stack overflow?
+ |1:
+ |.if resume
+ | addi.d BASE, BASE, 8 // Keep resumed thread in stack for GC.
+ | addi.d NARGS8:RC, NARGS8:RC, -8
+ | addi.d TMP2, TMP2, -8
+ |.endif
+ | st.d TMP2, L:CARG1->top
+ | st.d BASE, L->top
+ | add.d TMP1, BASE, NARGS8:RC
+ | or CARG3, CARG2, r0
+ |2: // Move args to coroutine.
+ | ld.d TMP0, 0(BASE)
+ | sltu TMP3, BASE, TMP1
+ | addi.d BASE, BASE, 8
+ | beqz TMP3, >3
+ | st.d TMP0, 0(CARG3)
+ | addi.d CARG3, CARG3, 8
+ | b <2
+ |3:
+ | or L:RA, L:CARG1, r0
+ | bl ->vm_resume // (lua_State *L, TValue *base, 0, 0)
+ | // Returns thread status.
+ |4:
+ | ld.d TMP2, L:RA->base
+ | sltui TMP1, CRET1, LUA_YIELD+1
+ | ld.d TMP3, L:RA->top
+ | li_vmstate INTERP
+ | ld.d BASE, L->base
+ | .STXD L, DISPATCH, DISPATCH_GL(cur_L)
+ | st_vmstate
+ | sub.d RD, TMP3, TMP2
+ | beqz TMP1, >8
+ | ld.d TMP0, L->maxstack
+ | add.d TMP1, BASE, RD
+ | beqz RD, >6 // No results?
+ | add.d TMP3, TMP2, RD
+ | bltu TMP0, TMP1, >9 // Need to grow stack?
+ | st.d TMP2, L:RA->top // Clear coroutine stack.
+ | or TMP1, BASE, r0
+ |5: // Move results from coroutine.
+ | ld.d TMP0, 0(TMP2)
+ | addi.d TMP2, TMP2, 8
+ | st.d TMP0, 0(TMP1)
+ | addi.d TMP1, TMP1, 8
+ | bltu TMP2, TMP3, <5
+ |6:
+ |.if resume
+ | mov_true TMP1
+ | addi.d RD, RD, 16
+ |7:
+ | st.d TMP1, -8(BASE) // Prepend true/false to results.
+ | addi.d RA, BASE, -8
+ |.else
+ | or RA, BASE, r0
+ | addi.d RD, RD, 8
+ |.endif
+ | andi TMP0, PC, FRAME_TYPE
+ | st.d PC, SAVE_PC(sp)
+ | or MULTRES, RD, r0
+ | beqz TMP0, ->BC_RET_Z
+ | b ->vm_return
+ |
+ |8: // Coroutine returned with error (at co->top-1).
+ |.if resume
+ | addi.d TMP3, TMP3, -8
+ | mov_false TMP1
+ | addi.w RD, r0, (2+1)*8
+ | ld.d TMP0, 0(TMP3)
+ | st.d TMP3, L:RA->top // Remove error from coroutine stack.
+ | st.d TMP0, 0(BASE) // Copy error message.
+ | b <7
+ |.else
+ | or CARG1, L, r0
+ | or CARG2, L:RA, r0
+ | bl extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co)
+ |.endif
+ |
+ |9: // Handle stack expansion on return from yield.
+ | or CARG1, L, r0
+ | srli.w CARG2, RD, 3
+ | bl extern lj_state_growstack // (lua_State *L, int n)
+ | addi.d CRET1, r0, 0
+ | b <4
+ |.endmacro
+ |
+ | coroutine_resume_wrap 1 // coroutine.resume
+ | coroutine_resume_wrap 0 // coroutine.wrap
+ |
+ |.ffunc coroutine_yield
+ | ld.d TMP0, L->cframe
+ | add.d TMP1, BASE, NARGS8:RC
+ | addi.w CRET1, r0, LUA_YIELD
+ | st.d BASE, L->base
+ | andi TMP0, TMP0, CFRAME_RESUME
+ | st.d TMP1, L->top
+ | beqz TMP0, ->fff_fallback
+ | st.d r0, L->cframe
+ | st.b CRET1, L->status
+ | b ->vm_leave_unw
+ |
+ |//-- Math library -------------------------------------------------------
+ |
+ |.macro math_round, func
+ |->ff_math_ .. func:
+ | ld.d CARG1, 0(BASE)
+ | gettp TMP0, CARG1
+ | beqz NARGS8:RC, ->fff_fallback
+ | beq TMP0, TISNUM, ->fff_restv
+ | fld.d FARG1, 0(BASE)
+ | bgeu TMP0, TISNUM, ->fff_fallback
+ | bl ->vm_ .. func
+ | b ->fff_resn
+ |.endmacro
+ |
+ | math_round floor
+ | math_round ceil
+ |
+ |.ffunc_1 math_abs
+ | gettp CARG2, CARG1
+ | addi.d TMP2, CARG2, -LJ_TISNUM
+ | slli.w TMP1, CARG1, 0
+ | bnez TMP2, >1
+ | srai.w TMP0, TMP1, 31 // Extract sign. int
+ | xor TMP1, TMP1, TMP0
+ | sub.d CARG1, TMP1, TMP0
+ | slli.d TMP3, CARG1, 32
+ | settp CARG1, TISNUM
+ | bge TMP3, r0, ->fff_restv
+ | ori CARG1, r0, 0x41e // 2^31 as a double.
+ | slli.w CARG1, CARG1, 4 // 0x41e0
+ | slli.d CARG1, CARG1, 48
+ | b ->fff_restv
+ |1:
+ | sltui TMP2, CARG2, LJ_TISNUM
+ | bstrpick.d CARG1, CARG1, 62, 0
+ | beqz TMP2, ->fff_fallback // int
+ |// fallthrough
+ |
+ |->fff_restv:
+ | // CARG1 = TValue result.
+ | ld.d PC, FRAME_PC(BASE)
+ | st.d CARG1, -16(BASE)
+ |->fff_res1:
+ | // RA = results, PC = return.
+ | addi.d RD, r0, (1+1)*8
+ |->fff_res:
+ | // RA = results, RD = (nresults+1)*8, PC = return.
+ | andi TMP0, PC, FRAME_TYPE
+ | or MULTRES, RD, r0
+ | addi.d RA, BASE, -16
+ | bnez TMP0, ->vm_return
+ | ld.w INS, -4(PC)
+ | decode_RB RB, INS
+ |5:
+ | sltu TMP2, RD, RB
+ | decode_RA TMP0, INS
+ | bnez TMP2, >6 // More results expected?
+ | // Adjust BASE. KBASE is assumed to be set for the calling frame.
+ | sub.d BASE, RA, TMP0
+ | ins_next
+ |
+ |6: // Fill up results with nil.
+ | add.d TMP1, RA, RD
+ | addi.d RD, RD, 8
+ | st.d TISNIL, -8(TMP1)
+ | b <5
+ |
+ |.macro math_extern, func
+ | .ffunc_n math_ .. func
+ | bl extern func
+ | b ->fff_resn
+ |.endmacro
+ |
+ |.macro math_extern2, func
+ | .ffunc_nn math_ .. func
+ | bl extern func
+ | b ->fff_resn
+ |.endmacro
+ |
+ |.ffunc_n math_sqrt
+ | fsqrt.d FRET1, FARG1
+ |->fff_resn:
+ | ld.d PC, FRAME_PC(BASE)
+ | fst.d FRET1, -16(BASE)
+ | b ->fff_res1
+ |
+ |.ffunc math_log
+ | addi.d TMP1, r0, 8
+ | ld.d CARG1, 0(BASE)
+ | fld.d FARG1, 0(BASE)
+ | bne NARGS8:RC, TMP1, ->fff_fallback // Need exactly 1 argument.
+ | checknum CARG1, ->fff_fallback
+ | bl extern log
+ | b ->fff_resn
+ |
+ | math_extern log10
+ | math_extern exp
+ | math_extern sin
+ | math_extern cos
+ | math_extern tan
+ | math_extern asin
+ | math_extern acos
+ | math_extern atan
+ | math_extern sinh
+ | math_extern cosh
+ | math_extern tanh
+ | math_extern2 pow
+ | math_extern2 atan2
+ | math_extern2 fmod
+ |
+ |.ffunc_2 math_ldexp
+ | checknum CARG1, ->fff_fallback
+ | checkint CARG2, ->fff_fallback
+ | fld.d FARG1, 0(BASE)
+ | ld.w CARG1, 8(BASE)
+ | bl extern ldexp // (double x, int exp)
+ | b ->fff_resn
+ |
+ |.ffunc_n math_frexp
+ | ld.d PC, FRAME_PC(BASE)
+ | .ADD16I CARG1, DISPATCH, DISPATCH_GL(tmptv)
+ | bl extern frexp
+ | .LDXW TMP1, DISPATCH, DISPATCH_GL(tmptv)
+ | movgr2fr.w FARG2, TMP1
+ | fst.d FRET1, -16(BASE)
+ | ffint.d.w FARG2, FARG2
+ | fst.d FARG2, -8(BASE)
+ | addi.d RD, r0, (2+1)*8
+ | b ->fff_res
+ |
+ |.ffunc_n math_modf
+ | addi.d CARG1, BASE, -16
+ | ld.d PC, FRAME_PC(BASE)
+ | bl extern modf
+ | fst.d FRET1, -8(BASE)
+ | addi.d RD, r0, (2+1)*8
+ | b ->fff_res
+ |
+ |.macro math_minmax, name, intins, intinsc, fpins
+ | .ffunc_1 name
+ | add.d TMP3, BASE, NARGS8:RC
+ | addi.d TMP2, BASE, 8
+ | checkint CARG1, >4
+ |1: // Handle integers.
+ | ld.d CARG2, 0(TMP2)
+ | beq TMP2, TMP3, ->fff_restv
+ | slli.w CARG1, CARG1, 0
+ | checkint CARG2, >3
+ | slli.w CARG2, CARG2, 0
+ | slt TMP0, CARG1, CARG2
+ | intins TMP1, CARG2, TMP0
+ | intinsc CARG1, CARG1, TMP0
+ | or CARG1, CARG1, TMP1
+ | addi.d TMP2, TMP2, 8
+ | bstrpick.d CARG1, CARG1, 31, 0
+ | settp CARG1, TISNUM
+ | b <1
+ |
+ |3: // Convert intermediate result to number and continue with number loop.
+ | movgr2fr.w FTMP3, CARG1
+ | checknum CARG2, ->fff_fallback
+ | ffint.d.w FTMP3, FTMP3
+ | fld.d FARG1, 0(TMP2)
+ | fmov.d FTMP4, FARG1
+ | b >6
+ |
+ |4:
+ | fld.d FTMP3, 0(BASE)
+ |5: // Handle numbers.
+ | ld.d CARG2, 0(TMP2)
+ | checknum CARG1, ->fff_fallback
+ | fld.d FTMP4, 0(TMP2)
+ | beq TMP2, TMP3, ->fff_resn
+ | checknum CARG2, >7
+ |6:
+ | fpins FRET1, FTMP3, FTMP4
+ | fmov.d FTMP3, FRET1
+ | addi.d TMP2, TMP2, 8
+ | b <5
+ |
+ |7: // Convert integer to number and continue with number loop.
+ | fld.s FARG1, 0(TMP2)
+ | checkint CARG2, ->fff_fallback
+ | ffint.d.w FARG1, FARG1
+ | b <6
+ |.endmacro
+ |
+ | math_minmax math_min, masknez, maskeqz, fmin.d
+ | math_minmax math_max, maskeqz, masknez, fmax.d
+ |
+ |//-- String library -----------------------------------------------------
+ |
+ |.ffunc string_byte // Only handle the 1-arg case here.
+ | ld.d CARG1, 0(BASE)
+ | gettp TMP0, CARG1
+ | xori TMP1, NARGS8:RC, 8
+ | addi.d TMP0, TMP0, -LJ_TSTR
+ | or TMP1, TMP1, TMP0
+ | cleartp STR:CARG1
+ | bnez TMP1, ->fff_fallback // Need exactly 1 string argument.
+ | ld.w TMP0, STR:CARG1->len
+ | ld.d PC, FRAME_PC(BASE)
+ | sltu RD, r0, TMP0
+ | ld.bu TMP2, STR:CARG1[1] // Access is always ok (NUL at end).
+ | addi.w RD, RD, 1
+ | slli.w RD, RD, 3 // RD = ((str->len != 0)+1)*8
+ | settp TMP2, TISNUM
+ | st.d TMP2, -16(BASE)
+ | b ->fff_res
+ |
+ |.ffunc string_char // Only handle the 1-arg case here.
+ | ffgccheck
+ | ld.d CARG1, 0(BASE)
+ | gettp TMP0, CARG1
+ | xori TMP1, NARGS8:RC, 8 // Need exactly 1 argument.
+ | addi.d TMP0, TMP0, -LJ_TISNUM // Integer.
+ | addi.d TMP2, r0, 255
+ | slli.w CARG1, CARG1, 0
+ | or TMP1, TMP1, TMP0
+ | sltu TMP2, TMP2, CARG1 // !(255 < n).
+ | or TMP1, TMP1, TMP2
+ | addi.d CARG3, r0, 1
+ | bnez TMP1, ->fff_fallback
+ | addi.d CARG2, sp, TMPD_OFS
+ | st.b CARG1, TMPD(sp)
+ |->fff_newstr:
+ | st.d BASE, L->base
+ | st.d PC, SAVE_PC(sp)
+ | or CARG1, L, r0
+ | bl extern lj_str_new // (lua_State *L, char *str, size_t l)
+ | // Returns GCstr *.
+ | ld.d BASE, L->base
+ |->fff_resstr:
+ | addi.d TMP1, r0, LJ_TSTR
+ | settp CRET1, TMP1
+ | b ->fff_restv
+ |
+ |.ffunc string_sub
+ | ffgccheck
+ | ld.d CARG1, 0(BASE)
+ | ld.d CARG2, 8(BASE)
+ | ld.d CARG3, 16(BASE)
+ | addi.d TMP0, NARGS8:RC, -16
+ | gettp TMP1, CARG1
+ | blt TMP0, r0, ->fff_fallback
+ | cleartp STR:CARG1, CARG1
+ | addi.w CARG4, r0, -1
+ | beqz TMP0, >1
+ | slli.w CARG4, CARG3, 0
+ | checkint CARG3, ->fff_fallback
+ |1:
+ | checkint CARG2, ->fff_fallback
+ | addi.d TMP0, TMP1, -LJ_TSTR
+ | slli.w CARG3, CARG2, 0
+ | bnez TMP0, ->fff_fallback
+ | ld.w CARG2, STR:CARG1->len
+ | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end
+ | addi.w TMP0, CARG2, 1
+ | slt TMP3, CARG4, r0
+ | add.w TMP2, CARG4, TMP0
+ | slt TMP1, CARG3, r0
+ | maskeqz TMP2, TMP2, TMP3
+ | masknez CARG4, CARG4, TMP3
+ | or CARG4, TMP2, CARG4 // if (end < 0) end += len+1
+ | add.w TMP2, CARG3, TMP0
+ | maskeqz TMP2, TMP2, TMP1
+ | masknez CARG3, CARG3, TMP1
+ | or CARG3, TMP2, CARG3 // if (start < 0) start += len+1
+ | addi.d TMP3, r0, 1
+ | slt TMP2, CARG4, r0
+ | slt TMP1, r0, CARG3
+ | masknez CARG4, CARG4, TMP2 // if (end < 0) end = 0
+ | maskeqz CARG3, CARG3, TMP1
+ | masknez TMP3, TMP3, TMP1
+ | slt TMP2, CARG2, CARG4
+ | or CARG3, TMP3, CARG3 // if (start < 1) start = 1
+ | masknez CARG4, CARG4, TMP2
+ | maskeqz CARG2, CARG2, TMP2
+ | or CARG4, CARG2, CARG4 // if (end > len) end = len
+ | add.d CARG2, STR:CARG1, CARG3
+ | sub.d CARG3, CARG4, CARG3 // len = end - start
+ | addi.d CARG2, CARG2, sizeof(GCstr)-1
+ | addi.w CARG3, CARG3, 1 // len += 1
+ | bge CARG3, r0, ->fff_newstr
+ |->fff_emptystr: // Return empty string.
+ | addi.d TMP1, r0, LJ_TSTR
+ | .ADD16I STR:CARG1, DISPATCH, DISPATCH_GL(strempty)
+ | settp CARG1, TMP1
+ | b ->fff_restv
+ |
+ |.macro ffstring_op, name
+ | .ffunc string_ .. name
+ | ffgccheck
+ | ld.d CARG2, 0(BASE)
+ | beqz NARGS8:RC, ->fff_fallback
+ | checkstr STR:CARG2, ->fff_fallback
+ | .ADD16I SBUF:CARG1, DISPATCH, DISPATCH_GL(tmpbuf)
+ | ld.d TMP0, SBUF:CARG1->b
+ | st.d L, SBUF:CARG1->L
+ | st.d BASE, L->base
+ | st.d TMP0, SBUF:CARG1->w
+ | st.d PC, SAVE_PC(sp)
+ | bl extern lj_buf_putstr_ .. name
+ |// or SBUF:CARG1, SBUF:CRET1, r0
+ | bl extern lj_buf_tostr
+ | ld.d BASE, L->base
+ | b ->fff_resstr
+ |.endmacro
+ |
+ |ffstring_op reverse
+ |ffstring_op lower
+ |ffstring_op upper
+ |
+ |//-- Bit library --------------------------------------------------------
+ |
+ |->vm_tobit_fb:
+ | fld.d FARG1, 0(BASE)
+ | beqz TMP1, ->fff_fallback
+ | fadd.d FARG1, FARG1, TOBIT
+ | movfr2gr.s CRET1, FARG1
+ | bstrpick.d CRET1, CRET1, 31, 0
+ | jirl r0, ra, 0
+ |
+ |.macro .ffunc_bit, name
+ | .ffunc_1 bit_..name
+ | gettp TMP0, CARG1
+ | bstrpick.d CRET1, CARG1, 31, 0
+ | beq TMP0, TISNUM, >1
+ | sltui TMP1, TMP0, LJ_TISNUM
+ | bl ->vm_tobit_fb
+ |1:
+ |.endmacro
+ |
+ |.macro .ffunc_bit_op, name, bins
+ | .ffunc_bit name
+ | addi.d TMP2, BASE, 8
+ | add.d TMP3, BASE, NARGS8:RC
+ |1:
+ | ld.d TMP1, 0(TMP2)
+ | beq TMP2, TMP3, ->fff_resi
+ | gettp TMP0, TMP1
+ | addi.d TMP2, TMP2, 8
+ | bne TMP0, TISNUM, >2
+ | bstrpick.d TMP1, TMP1, 31, 0
+ | bins CRET1, CRET1, TMP1
+ | b <1
+ |2:
+ | fld.d FARG1, -8(TMP2)
+ | sltui TMP0, TMP0, LJ_TISNUM
+ | fadd.d FARG1, FARG1, TOBIT
+ | beqz TMP0, ->fff_fallback
+ | movfr2gr.s TMP1, FARG1
+ | bstrpick.d TMP1, TMP1, 31, 0
+ | bins CRET1, CRET1, TMP1
+ | b <1
+ |.endmacro
+ |
+ |.ffunc_bit_op band, and
+ |.ffunc_bit_op bor, or
+ |.ffunc_bit_op bxor, xor
+ |
+ |.ffunc_bit bswap
+ | srli.d TMP0, CRET1, 8
+ | srli.d TMP1, CRET1, 24
+ | srli.d TMP2,TMP0, 8
+ | andi TMP3, TMP2, 0xff
+ | slli.d TMP3, TMP3, 8
+ | bstrins.d TMP1, CRET1, 31, 24
+ | bstrins.d TMP3, TMP0, 23, 16
+ | or CRET1, TMP1, TMP3
+ | b ->fff_resi
+ |
+ |.ffunc_bit tobit
+ |->fff_resi:
+ | settp CARG1, TISNUM // CARG1 = CRET1
+ | b ->fff_restv
+ |
+ |.ffunc_bit bnot
+ | nor CRET1, CRET1, r0
+ | bstrpick.d CRET1, CRET1, 31, 0
+ | b ->fff_resi
+ |
+ |.macro .ffunc_bit_sh, name, shins, shmod
+ | .ffunc_2 bit_..name
+ | gettp TMP0, CARG1
+ | beq TMP0, TISNUM, >1
+ | sltui TMP1, TMP0, LJ_TISNUM
+ | bl ->vm_tobit_fb
+ |// or CARG1, CRET1, r0 // CARG1 = CRET1
+ |1:
+ | gettp TMP0, CARG2
+ | bstrpick.d CARG2, CARG2, 31, 0
+ | bne TMP0, TISNUM, ->fff_fallback
+ | slli.w CARG1, CARG1, 0
+ |.if shmod == 1
+ | sub.w CARG2, r0, CARG2
+ |.endif
+ | shins CRET1, CARG1, CARG2
+ | bstrpick.d CRET1, CRET1, 31, 0
+ | b ->fff_resi
+ |.endmacro
+ |
+ |.ffunc_bit_sh lshift, sll.w, 0
+ |.ffunc_bit_sh rshift, srl.w, 0
+ |.ffunc_bit_sh arshift, sra.w, 0
+ |.ffunc_bit_sh rol, rotr.w, 1
+ |.ffunc_bit_sh ror, rotr.w, 0
+ |
+ |//-----------------------------------------------------------------------
+ |
+ |->fff_fallback: // Call fast function fallback handler.
+ | // BASE = new base, RB = CFUNC, RC = nargs*8
+ | ld.d PC, FRAME_PC(BASE) // Fallback may overwrite PC.
+ | ld.d CARG3, CFUNC:RB->f
+ | add.d TMP1, BASE, NARGS8:RC
+ | st.d BASE, L->base
+ | addi.d TMP0, TMP1, 8*LUA_MINSTACK
+ | ld.d TMP2, L->maxstack
+ | st.d PC, SAVE_PC(sp) // Redundant (but a defined value).
+ | st.d TMP1, L->top
+ | or CARG1, L, r0
+ | bltu TMP2, TMP0, >5 // Need to grow stack.
+ | jirl r1, CARG3, 0 // (lua_State *L)
+ | // Either throws an error, or recovers and returns -1, 0 or nresults+1.
+ | ld.d BASE, L->base
+ | slli.w RD, CRET1, 3
+ | blt r0, CRET1, ->fff_res // Returned nresults+1?
+ |1: // Returned 0 or -1: retry fast path.
+ | ld.d LFUNC:RB, FRAME_FUNC(BASE)
+ | ld.d TMP0, L->top
+ | sub.d NARGS8:RC, TMP0, BASE
+ | cleartp LFUNC:RB
+ | bnez CRET1, ->vm_call_tail // Returned -1?
+ | ins_callt // Returned 0: retry fast path.
+ |
+ |// Reconstruct previous base for vmeta_call during tailcall.
+ |->vm_call_tail:
+ | andi TMP0, PC, FRAME_TYPE
+ | addi.d TMP2, r0, ~FRAME_TYPEP // TODO
+ | and TMP1, PC, TMP2
+ | bnez TMP0, >3
+ | ld.bu TMP1, OFS_RA(PC)
+ | slli.w TMP1, TMP1, 3
+ | addi.w TMP1, TMP1, 16
+ |3:
+ | sub.d TMP2, BASE, TMP1
+ | b ->vm_call_dispatch // Resolve again for tailcall.
+ |
+ |5: // Grow stack for fallback handler.
+ | addi.d CARG2, r0, LUA_MINSTACK
+ | or CARG1, L, r0
+ | bl extern lj_state_growstack // (lua_State *L, int n)
+ | ld.d BASE, L->base
+ | addi.d CRET1, r0, 0 // Set zero-flag to force retry.
+ | b <1
+ |
+ |->fff_gcstep: // Call GC step function.
+ | // BASE = new base, RC = nargs*8
+ | or MULTRES, ra, r0
+ | add.d TMP0, BASE, NARGS8:RC // Calculate L->top.
+ | st.d BASE, L->base
+ | st.d PC, SAVE_PC(sp) // Redundant (but a defined value).
+ | or CARG1, L, r0
+ | st.d TMP0, L->top
+ | bl extern lj_gc_step // (lua_State *L)
+ | ld.d BASE, L->base
+ |// or ra, MULTRES, r0
+ | ld.d TMP0, L->top
+ | ld.d CFUNC:RB, FRAME_FUNC(BASE)
+ | cleartp CFUNC:RB
+ | sub.d NARGS8:RC, TMP0, BASE
+ | jirl r0, MULTRES, 0
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Special dispatch targets -------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |->vm_record: // Dispatch target for recording phase.
+ |.if JIT
+ | .LDXBU TMP3, DISPATCH, DISPATCH_GL(hookmask)
+ | andi TMP1, TMP3, HOOK_VMEVENT // No recording while in vmevent.
+ | bnez TMP1, >5
+ | // Decrement the hookcount for consistency, but always do the call.
+ | .LDXW TMP2, DISPATCH, DISPATCH_GL(hookcount)
+ | andi TMP1, TMP3, HOOK_ACTIVE
+ | bnez TMP1, >1
+ | addi.w TMP2, TMP2, -1
+ | andi TMP1, TMP3, LUA_MASKLINE|LUA_MASKCOUNT
+ | beqz TMP1, >1
+ | .STXW TMP2, DISPATCH, DISPATCH_GL(hookcount)
+ | b >1
+ |.endif
+ |
+ |->vm_rethook: // Dispatch target for return hooks.
+ | .LDXBU TMP3, DISPATCH, DISPATCH_GL(hookmask)
+ | andi TMP1, TMP3, HOOK_ACTIVE // Hook already active?
+ | beqz TMP1, >1
+ |5: // Re-dispatch to static ins.
+ | ld.d TMP1, GG_DISP2STATIC(TMP0) // Assumes TMP0 holds DISPATCH+OP*4.
+ | jirl r0, TMP1, 0
+ |
+ |->vm_inshook: // Dispatch target for instr/line hooks.
+ | .LDXBU TMP3, DISPATCH, DISPATCH_GL(hookmask)
+ | .LDXW TMP2, DISPATCH, DISPATCH_GL(hookcount)
+ | andi TMP1, TMP3, HOOK_ACTIVE // Hook already active?
+ | bnez TMP1, <5
+ | andi TMP1, TMP3, LUA_MASKLINE|LUA_MASKCOUNT
+ | addi.w TMP2, TMP2, -1
+ | beqz TMP1, <5
+ | .STXW TMP2, DISPATCH, DISPATCH_GL(hookcount)
+ | beqz TMP2, >1
+ | andi TMP1, TMP3, LUA_MASKLINE
+ | beqz TMP1, <5
+ |1:
+ | st.w MULTRES, TMPD(sp)
+ | or CARG2, PC, r0
+ | st.d BASE, L->base
+ | or CARG1, L, r0
+ | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
+ | bl extern lj_dispatch_ins // (lua_State *L, const BCIns *pc)
+ |3:
+ | ld.d BASE, L->base
+ |4: // Re-dispatch to static ins.
+ | ld.w INS, -4(PC)
+ | decode_OP TMP1, INS
+ | decode_BC8b TMP1
+ | add.d TMP0, DISPATCH, TMP1
+ | decode_RD RD, INS
+ | ld.d TMP1, GG_DISP2STATIC(TMP0)
+ | decode_RA RA, INS
+ | jirl r0, TMP1, 0
+ |
+ |->cont_hook: // Continue from hook yield.
+ | addi.d PC, PC, 4
+ | ld.w MULTRES, -24(RB) // Restore MULTRES for *M ins.
+ | b <4
+ |
+ |->vm_hotloop: // Hot loop counter underflow.
+ |.if JIT
+ | ld.d LFUNC:TMP1, FRAME_FUNC(BASE)
+ | .ADD16I CARG1, DISPATCH, GG_DISP2J
+ | cleartp LFUNC:TMP1
+ | st.d PC, SAVE_PC(sp)
+ | ld.d TMP1, LFUNC:TMP1->pc
+ | or CARG2, PC, r0
+ | .STXD L, DISPATCH, DISPATCH_J(L)
+ | ld.bu TMP1, PC2PROTO(framesize)(TMP1)
+ | st.d BASE, L->base
+ | slli.d TMP1, TMP1, 3
+ | add.d TMP1, BASE, TMP1
+ | st.d TMP1, L->top
+ | bl extern lj_trace_hot // (jit_State *J, const BCIns *pc)
+ | b <3
+ |.endif
+ |
+ |
+ |->vm_callhook: // Dispatch target for call hooks.
+ | or CARG2, PC, r0
+ |.if JIT
+ | b >1
+ |.endif
+ |
+ |->vm_hotcall: // Hot call counter underflow.
+ |.if JIT
+ | ori CARG2, PC, 1
+ |1:
+ |.endif
+ | add.d TMP0, BASE, RC
+ | st.d PC, SAVE_PC(sp)
+ | st.d BASE, L->base
+ | sub.d RA, RA, BASE
+ | st.d TMP0, L->top
+ | or CARG1, L, r0
+ | bl extern lj_dispatch_call // (lua_State *L, const BCIns *pc)
+ | // Returns ASMFunction.
+ | ld.d BASE, L->base
+ | ld.d TMP0, L->top
+ | st.d r0, SAVE_PC(sp) // Invalidate for subsequent line hook.
+ | add.d RA, BASE, RA
+ | sub.d NARGS8:RC, TMP0, BASE
+ | ld.d LFUNC:RB, FRAME_FUNC(BASE)
+ | cleartp LFUNC:RB
+ | ld.w INS, -4(PC)
+ | jirl r0, CRET1, 0
+ |
+ |->cont_stitch: // Trace stitching.
+ |.if JIT
+ | // RA = resultptr, RB = meta base
+ | ld.w INS, -4(PC)
+ | ld.d TRACE:TMP2, -40(RB) // Save previous trace.
+ | decode_RA RC, INS
+ | addi.d TMP1, MULTRES, -8
+ | cleartp TRACE:TMP2
+ | add.d RC, BASE, RC // Call base.
+ | beqz TMP1, >2
+ |1: // Move results down.
+ | ld.d CARG1, 0(RA)
+ | addi.d TMP1, TMP1, -8
+ | addi.d RA, RA, 8
+ | st.d CARG1, 0(RC)
+ | addi.d RC, RC, 8
+ | bnez TMP1, <1
+ |2:
+ | decode_RA RA, INS
+ | decode_RB RB, INS
+ | add.d RA, RA, RB
+ | add.d RA, BASE, RA
+ |3:
+ | sltu TMP1, RC, RA
+ | bnez TMP1, >9 // More results wanted?
+ |
+ | ld.hu TMP3, TRACE:TMP2->traceno
+ | ld.hu RD, TRACE:TMP2->link
+ | beq RD, TMP3, ->cont_nop // Blacklisted.
+ | slli.w RD, RD, 3
+ | bnez RD, =>BC_JLOOP // Jump to stitched trace.
+ |
+ | // Stitch a new trace to the previous trace.
+ | st.w TMP3, DISPATCH_J(exitno)(DISPATCH)
+ | .STXD L, DISPATCH, DISPATCH_J(L)
+ | st.d BASE, L->base
+ | .ADD16I CARG1, DISPATCH, GG_DISP2J
+ | or CARG2, PC, r0
+ | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
+ | ld.d BASE, L->base
+ | b ->cont_nop
+ |
+ |9:
+ | st.d TISNIL, 0(RC)
+ | addi.d RC, RC, 8
+ | b <3
+ |.endif
+ |
+ |->vm_profhook: // Dispatch target for profiler hook.
+#if LJ_HASPROFILE
+ | or CARG1, L, r0
+ | or CARG2, PC, r0
+ | st.d BASE, L->base
+ | st.w MULTRES, TMPD(sp)
+ | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc)
+ | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
+ | addi.d PC, PC, -4
+ | ld.d BASE, L->base
+ | b ->cont_nop
+#endif
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Trace exit handler -------------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |.macro savex_, a, b
+ | fst.d f..a, a*8(sp)
+ | fst.d f..b, b*8(sp)
+ | st.d r..a, 32*8+a*8(sp)
+ | st.d r..b, 32*8+b*8(sp)
+ |.endmacro
+ |
+ |->vm_exit_handler:
+ |.if JIT
+ | addi.d sp, sp, -(32*8+32*8)
+ | savex_ 0, 2
+ | savex_ 4, 5
+ | savex_ 6, 7
+ | savex_ 8, 9
+ | savex_ 10, 11
+ | savex_ 12, 13
+ | savex_ 14, 15
+ | savex_ 16, 17
+ | savex_ 18, 19
+ | savex_ 20, 21
+ | savex_ 22, 23
+ | savex_ 24, 25
+ | savex_ 26, 27
+ | savex_ 28, 29
+ | savex_ 30, 31
+ | fst.d f1, 1*8(sp)
+ | fst.d f3, 3*8(sp)
+ | st.d r0, 32*8+1*8(sp) // Clear RID_TMP.
+ | addi.d TMP2, sp, 32*8+32*8 // Recompute original value of sp.
+ | st.d TMP2, 32*8+3*8(sp) // Store sp in RID_SP
+ | li_vmstate EXIT
+ | .ADD16I DISPATCH, JGL, -GG_DISP2G-32768
+ | ld.w TMP1, 0(TMP2) // Load exit number.
+ | st_vmstate
+ | .LDXD L, DISPATCH, DISPATCH_GL(cur_L)
+ | .LDXD BASE, DISPATCH, DISPATCH_GL(jit_base)
+ | .STXD L, DISPATCH, DISPATCH_J(L)
+ | st.w ra, DISPATCH_J(parent)(DISPATCH) // Store trace number.
+ | st.d BASE, L->base
+ | st.w TMP1, DISPATCH_J(exitno)(DISPATCH) // Store exit number.
+ | .ADD16I CARG1, DISPATCH, GG_DISP2J
+ | .STXD r0, DISPATCH, DISPATCH_GL(jit_base)
+ | or CARG2, sp, r0
+ | bl extern lj_trace_exit // (jit_State *J, ExitState *ex)
+ | // Returns MULTRES (unscaled) or negated error code.
+ | ld.d TMP1, L->cframe
+ | addi.d TMP2, r0, -4
+ | ld.d BASE, L->base
+ | and sp, TMP1, TMP2
+ | ld.d PC, SAVE_PC(sp) // Get SAVE_PC.
+ | st.d L, SAVE_L(sp) // Set SAVE_L (on-trace resume/yield).
+ | b >1
+ |.endif
+ |
+ |->vm_exit_interp:
+ |.if JIT
+ | // CRET1 = MULTRES or negated error code, BASE, PC and JGL set.
+ | ld.d L, SAVE_L(sp)
+ | .ADD16I DISPATCH, JGL, -GG_DISP2G-32768
+ | st.d BASE, L->base
+ |1:
+ | ld.d LFUNC:RB, FRAME_FUNC(BASE)
+ | sltui TMP0, CRET1, -LUA_ERRERR
+ | beqz TMP0, >9
+ | addu16i.d TMP3, r0, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
+ | slli.d MULTRES, CRET1, 3
+ | cleartp LFUNC:RB
+ | st.w MULTRES, TMPD(sp)
+ | addi.d TISNIL, r0, LJ_TNIL
+ | addi.d TISNUM, r0, LJ_TISNUM // Setup type comparison constants.
+ | movgr2fr.w TOBIT, TMP3
+ | ld.d TMP1, LFUNC:RB->pc
+ | .STXD r0, DISPATCH, DISPATCH_GL(jit_base)
+ | ld.d KBASE, PC2PROTO(k)(TMP1)
+ | fcvt.d.s TOBIT, TOBIT
+ | // Modified copy of ins_next which handles function header dispatch, too.
+ | ld.w INS, 0(PC)
+ | addi.d PC, PC, 4
+ | addi.d CRET1, CRET1, 17
+ | // Assumes TISNIL == ~LJ_VMST_INTERP == -1
+ | .STXW TISNIL, DISPATCH, DISPATCH_GL(vmstate)
+ | decode_RD RD, INS
+ | beqz CRET1, >5
+ | decode_OP TMP1, INS
+ | decode_BC8b TMP1
+ | add.d TMP0, DISPATCH, TMP1
+ | sltui TMP2, TMP1, BC_FUNCF*8
+ | ld.d TMP3, 0(TMP0)
+ | decode_RA RA, INS
+ | beqz TMP2, >2
+ | jirl r0, TMP3, 0
+ |2:
+ | sltui TMP2, TMP1, (BC_FUNCC+2)*8 // Fast function?
+ | ld.d TMP1, FRAME_PC(BASE)
+ | bnez TMP2, >3
+ | // Check frame below fast function.
+ | andi TMP0, TMP1, FRAME_TYPE
+ | bnez TMP0, >3 // Trace stitching continuation?
+ | // Otherwise set KBASE for Lua function below fast function.
+ | ld.w TMP2, -4(TMP1)
+ | decode_RA TMP0, TMP2
+ | sub.d TMP1, BASE, TMP0
+ | ld.d LFUNC:TMP2, -32(TMP1)
+ | cleartp LFUNC:TMP2
+ | ld.d TMP1, LFUNC:TMP2->pc
+ | ld.d KBASE, PC2PROTO(k)(TMP1)
+ |3:
+ | addi.d RC, MULTRES, -8
+ | add.d RA, RA, BASE
+ | jirl r0, TMP3, 0
+ |
+ |5: // Dispatch to static entry of original ins replaced by BC_JLOOP.
+ | .LDXD TMP0, DISPATCH, DISPATCH_J(trace)
+ | add.d TMP0, TMP0, RD
+ | ld.d TRACE:TMP2, 0(TMP0)
+ | ld.w INS, TRACE:TMP2->startins
+ | decode_OP TMP1, INS
+ | decode_BC8b TMP1
+ | add.d TMP0, DISPATCH, TMP1
+ | decode_RD RD, INS
+ | ld.d TMP4, GG_DISP2STATIC(TMP0)
+ | decode_RA RA, INS
+ | jirl r0, TMP4, 0
+ |
+ |9: // Rethrow error from the right C frame.
+ | sub.w CARG2, r0, CRET1 //TODO LA: sub.w no trap
+ | or CARG1, L, r0
+ | bl extern lj_err_trace // (lua_State *L, int errcode)
+ |.endif
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Math helper functions ----------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |// Hard-float round to integer.
+ |// Modifies TMP0, TMP1, FARG1, FARG5, FTMP1, FTMP3, FTMP4
+ |.macro vm_round_hf, func
+ | addu16i.d TMP0, r0, 0x4330 // Hiword of 2^52 (double).
+ | slli.d TMP0, TMP0, 32
+ | movgr2fr.d FARG5, TMP0
+ | fabs.d FTMP4, FARG1 // |x|
+ | movfr2gr.d TMP1, FARG1
+ | fcmp.clt.d FCC0, FTMP4, FARG5
+ | fadd.d FTMP3, FTMP4, FARG5 // (|x| + 2^52) - 2^52
+ | fsub.d FTMP3, FTMP3, FARG5
+ | bceqz FCC0, >1 // Truncate only if |x| < 2^52.
+ | slt TMP1, TMP1, r0
+ |.if "func" == "ceil"
+ | addu16i.d TMP0, r0, 0xbff0
+ |.else
+ | addu16i.d TMP0, r0, 0x3ff0 // Hiword of +1 (double).
+ |.endif
+ |.if "func" == "trunc"
+ | slli.d TMP0, TMP0, 32
+ | movgr2fr.d FARG5, TMP0
+ | fcmp.clt.d FCC0, FTMP4, FRET1 // |x| < result?
+ | fsub.d FTMP4, FTMP3, FARG5
+ | fsel FTMP1, FTMP3, FTMP4, FCC0
+ | movgr2fr.d FTMP3, TMP1
+ | fneg.d FTMP4, FTMP1
+ | movfr2cf FCC0, FTMP3
+ | fsel FTMP3, FTMP1, FTMP4, FCC0
+ | jirl r0, ra, 0
+ |.else
+ | fneg.d FTMP4, FTMP3
+ | slli.d TMP0, TMP0, 32
+ | movgr2fr.d FARG5, TMP0
+ | movgr2fr.d FTMP1, TMP1
+ | movfr2cf FCC0, FTMP1
+ | fsel FTMP1, FTMP3, FTMP4, FCC0
+ |.if "func" == "ceil"
+ | fcmp.clt.d FCC0, FTMP1, FARG1 // x > result?
+ |.else
+ | fcmp.clt.d FCC0, FARG1, FTMP1 // x < result?
+ |.endif
+ | fsub.d FTMP4, FTMP1, FARG5 // If yes, subtract +-1.
+ | fsel FTMP3, FTMP1, FTMP4, FCC0
+ | fmov.d FARG1, FTMP3
+ | jirl r0, ra, 0
+ |.endif
+ |1:
+ | fmov.d FTMP3, FARG1
+ | jirl r0, ra, 0
+ |.endmacro
+ |
+ |
+ |->vm_floor:
+ | vm_round_hf floor
+ |->vm_ceil:
+ | vm_round_hf ceil
+ |->vm_trunc:
+ |.if JIT
+ | vm_round_hf trunc
+ |.endif
+ |
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Miscellaneous functions --------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |.define NEXT_TAB, TAB:CARG1
+ |.define NEXT_IDX, CARG2
+ |.define NEXT_ASIZE, CARG3
+ |.define NEXT_NIL, CARG4
+ |.define NEXT_TMP0, TMP0
+ |.define NEXT_TMP1, TMP1
+ |.define NEXT_TMP2, TMP2
+ |.define NEXT_RES_VK, CRET1
+ |.define NEXT_RES_IDX, CRET2
+ |.define NEXT_RES_PTR, sp
+ |.define NEXT_RES_VAL, 0(sp)
+ |.define NEXT_RES_KEY, 8(sp)
+ |
+ |// TValue *lj_vm_next(GCtab *t, uint32_t idx)
+ |// Next idx returned in CRET2.
+ |->vm_next:
+ |.if JIT
+ | ld.w NEXT_ASIZE, NEXT_TAB->asize
+ | ld.d NEXT_TMP0, NEXT_TAB->array
+ | addi.d NEXT_NIL, r0, LJ_TNIL
+ |1: // Traverse array part.
+ | sltu TMP3, NEXT_IDX, NEXT_ASIZE
+ | slli.w NEXT_TMP1, NEXT_IDX, 3
+ | add.d NEXT_TMP1, NEXT_TMP0, NEXT_TMP1
+ | beqz TMP3, >5
+ | addi.d TMP3, r0, LJ_TISNUM
+ | ld.d NEXT_TMP2, 0(NEXT_TMP1)
+ | slli.d TMP3, TMP3, 47
+ | or NEXT_TMP1, NEXT_IDX, TMP3
+ | addi.w NEXT_IDX, NEXT_IDX, 1
+ | beq NEXT_TMP2, NEXT_NIL, <1
+ | st.d NEXT_TMP2, NEXT_RES_VAL
+ | st.d NEXT_TMP1, NEXT_RES_KEY
+ | or NEXT_RES_VK, NEXT_RES_PTR, r0
+ | or NEXT_RES_IDX, NEXT_IDX, r0
+ | jirl r0, ra, 0
+ |
+ |5: // Traverse hash part.
+ | sub.w NEXT_RES_IDX, NEXT_IDX, NEXT_ASIZE
+ | ld.w NEXT_TMP0, NEXT_TAB->hmask
+ | ld.d NODE:NEXT_RES_VK, NEXT_TAB->node
+ | slli.w NEXT_TMP2, NEXT_RES_IDX, 5
+ | slli.w TMP3, NEXT_RES_IDX, 3
+ | sub.w TMP3, NEXT_TMP2, TMP3
+ | add.d NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, TMP3
+ |6:
+ | sltu TMP3, NEXT_TMP0, NEXT_RES_IDX
+ | bnez TMP3, >8
+ | ld.d NEXT_TMP2, NODE:NEXT_RES_VK->val
+ | addi.w NEXT_RES_IDX, NEXT_RES_IDX, 1
+ | bne NEXT_TMP2, NEXT_NIL, >9
+ | // Skip holes in hash part.
+ | addi.d NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, sizeof(Node)
+ | b <6
+ |
+ |8: // End of iteration. Set the key to nil (not the value).
+ | st.d NEXT_NIL, NEXT_RES_KEY
+ | or NEXT_RES_VK, NEXT_RES_PTR, r0
+ |9:
+ | add.w NEXT_RES_IDX, NEXT_RES_IDX, NEXT_ASIZE
+ | jirl r0, ra, 0
+ |.endif
+ |
+ |//-----------------------------------------------------------------------
+ |//-- FFI helper functions -----------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |// Handler for callback functions. Callback slot number in r19, g in r17.
+ |->vm_ffi_callback:
+ |.if FFI
+ |.type CTSTATE, CTState, PC
+ | saveregs
+ | ld.d CTSTATE, GL:r17->ctype_state
+ | .ADD16I DISPATCH, r17, GG_G2DISP
+ | st.w r19, CTSTATE->cb.slot
+ | st.d CARG1, CTSTATE->cb.gpr[0]
+ | fst.d FARG1, CTSTATE->cb.fpr[0]
+ | st.d CARG2, CTSTATE->cb.gpr[1]
+ | fst.d FARG2, CTSTATE->cb.fpr[1]
+ | st.d CARG3, CTSTATE->cb.gpr[2]
+ | fst.d FARG3, CTSTATE->cb.fpr[2]
+ | st.d CARG4, CTSTATE->cb.gpr[3]
+ | fst.d FARG4, CTSTATE->cb.fpr[3]
+ | st.d CARG5, CTSTATE->cb.gpr[4]
+ | fst.d FARG5, CTSTATE->cb.fpr[4]
+ | st.d CARG6, CTSTATE->cb.gpr[5]
+ | fst.d FARG6, CTSTATE->cb.fpr[5]
+ | st.d CARG7, CTSTATE->cb.gpr[6]
+ | fst.d FARG7, CTSTATE->cb.fpr[6]
+ | st.d CARG8, CTSTATE->cb.gpr[7]
+ | fst.d FARG8, CTSTATE->cb.fpr[7]
+ | addi.d TMP0, sp, CFRAME_SPACE
+ | st.d TMP0, CTSTATE->cb.stack
+ | st.d r0, SAVE_PC(sp) // Any value outside of bytecode is ok.
+ | or CARG1, CTSTATE, r0
+ | or CARG2, sp, r0
+ | bl extern lj_ccallback_enter // (CTState *cts, void *cf)
+ | // Returns lua_State *.
+ | ld.d BASE, L:CRET1->base
+ | ld.d RC, L:CRET1->top
+ | or L, CRET1, r0
+ | addu16i.d TMP3, r0, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
+ | ld.d LFUNC:RB, FRAME_FUNC(BASE)
+ | movgr2fr.w TOBIT, TMP3
+ | addi.d TISNIL, r0, LJ_TNIL
+ | addi.d TISNUM, r0, LJ_TISNUM
+ | li_vmstate INTERP
+ | sub.w RC, RC, BASE
+ | cleartp LFUNC:RB
+ | st_vmstate
+ | fcvt.d.s TOBIT, TOBIT
+ | ins_callt
+ |.endif
+ |
+ |->cont_ffi_callback: // Return from FFI callback.
+ |.if FFI
+ | .LDXD CTSTATE, DISPATCH, DISPATCH_GL(ctype_state)
+ | st.d BASE, L->base
+ | st.d RB, L->top
+ | st.d L, CTSTATE->L
+ | or CARG1, CTSTATE, r0
+ | or CARG2, RA, r0
+ | bl extern lj_ccallback_leave // (CTState *cts, TValue *o)
+ | fld.d FRET1, CTSTATE->cb.fpr[0]
+ | ld.d CRET1, CTSTATE->cb.gpr[0]
+ | fld.d FRET2, CTSTATE->cb.fpr[1]
+ | ld.d CRET2, CTSTATE->cb.gpr[1]
+ | b ->vm_leave_unw
+ |.endif
+ |
+ |->vm_ffi_call: // Call C function via FFI.
+ | // Caveat: needs special frame unwinding, see below.
+ |.if FFI
+ | .type CCSTATE, CCallState, CARG1
+ | ld.w TMP1, CCSTATE->spadj
+ | ld.bu CARG2, CCSTATE->nsp
+ | ld.bu CARG3, CCSTATE->nfpr
+ | or TMP2, sp, r0
+ | sub.d sp, sp, TMP1
+ | st.d ra, -8(TMP2)
+ | slli.w CARG2, CARG2, 3
+ | st.d r23, -16(TMP2)
+ | st.d CCSTATE, -24(TMP2)
+ | or r23, TMP2, r0
+ | addi.d TMP1, CCSTATE, offsetof(CCallState, stack)
+ | or TMP2, sp, r0
+ | add.d TMP3, TMP1, CARG2
+ | beqz CARG2, >2
+ |1:
+ | ld.d TMP0, 0(TMP1)
+ | addi.d TMP1, TMP1, 8
+ | sltu TMP4, TMP1, TMP3
+ | st.d TMP0, 0(TMP2)
+ | addi.d TMP2, TMP2, 8
+ | bnez TMP4, <1
+ |2:
+ | beqz CARG3, >3
+ | fld.d FARG1, CCSTATE->fpr[0]
+ | fld.d FARG2, CCSTATE->fpr[1]
+ | fld.d FARG3, CCSTATE->fpr[2]
+ | fld.d FARG4, CCSTATE->fpr[3]
+ | fld.d FARG5, CCSTATE->fpr[4]
+ | fld.d FARG6, CCSTATE->fpr[5]
+ | fld.d FARG7, CCSTATE->fpr[6]
+ | fld.d FARG8, CCSTATE->fpr[7]
+ |3:
+ | ld.d TMP3, CCSTATE->func
+ | ld.d CARG2, CCSTATE->gpr[1]
+ | ld.d CARG3, CCSTATE->gpr[2]
+ | ld.d CARG4, CCSTATE->gpr[3]
+ | ld.d CARG5, CCSTATE->gpr[4]
+ | ld.d CARG6, CCSTATE->gpr[5]
+ | ld.d CARG7, CCSTATE->gpr[6]
+ | ld.d CARG8, CCSTATE->gpr[7]
+ | ld.d CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1.
+ | jirl r1, TMP3, 0
+ | ld.d CCSTATE:TMP1, -24(r23)
+ | ld.d TMP2, -16(r23)
+ | ld.d ra, -8(r23)
+ | st.d CRET1, CCSTATE:TMP1->gpr[0]
+ | st.d CRET2, CCSTATE:TMP1->gpr[1]
+ | fst.d FRET1, CCSTATE:TMP1->fpr[0]
+ | fst.d FRET2, CCSTATE:TMP1->fpr[1]
+ | or sp, r23, r0
+ | or r23, TMP2, r0
+ | jirl r0, ra, 0
+ |.endif
+ |// Note: vm_ffi_call must be the last function in this object file!
+ |
+ |//-----------------------------------------------------------------------
+}
+
+/* Generate the code for a single instruction. */
+static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+{
+ int vk = 0;
+ |=>defop:
+
+ switch (op) {
+
+ /* -- Comparison ops ---------------------------------------------------- */
+
+ /* Remember: all ops branch for a true comparison, fall through otherwise. */
+
+ case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
+ | // RA = src1*8, RD = src2*8, JMP with RD = target
+ | add.d RA, BASE, RA
+ | add.d RD, BASE, RD
+ if (op == BC_ISLT || op == BC_ISGE) {
+ | ld.d CARG1, 0(RA)
+ | ld.d CARG2, 0(RD)
+ | gettp CARG3, CARG1
+ | gettp CARG4, CARG2
+ } else {
+ | ld.d CARG2, 0(RA)
+ | ld.d CARG1, 0(RD)
+ | gettp CARG3, CARG2
+ | gettp CARG4, CARG1
+ }
+ | ld.hu TMP2, OFS_RD(PC) // TMP2=jump
+ | addi.d PC, PC, 4
+ | bne CARG3, TISNUM, >2
+ | decode_BC4b TMP2
+ | bne CARG4, TISNUM, >5
+ | slli.w CARG1, CARG1, 0
+ | slli.w CARG2, CARG2, 0
+ | addu16i.d TMP3, r0, -0x2 // -BCBIAS_J*4
+ | slt TMP1, CARG1, CARG2
+ | add.w TMP2, TMP2, TMP3 // TMP2=(jump-0x8000)<<2
+ if (op == BC_ISLT || op == BC_ISGT) {
+ | maskeqz TMP2, TMP2, TMP1
+ } else {
+ | masknez TMP2, TMP2,TMP1
+ }
+ |1:
+ | add.d PC, PC, TMP2
+ | ins_next
+ |
+ |2: // RA is not an integer.
+ | sltui TMP1, CARG3, LJ_TISNUM
+ | addu16i.d TMP3, r0, -0x2 // -BCBIAS_J*4
+ | beqz TMP1, ->vmeta_comp
+ | sltui TMP1, CARG4, LJ_TISNUM
+ | decode_BC4b TMP2
+ | beqz TMP1, >4
+ | movgr2fr.d FTMP0, CARG1
+ | movgr2fr.d FTMP2, CARG2
+ |3: // RA and RD are both numbers.
+ if (op == BC_ISLT || op == BC_ISGE) {
+ | fcmp.clt.d FCC0, FTMP0, FTMP2
+ } else {
+ | fcmp.cult.d FCC0, FTMP0, FTMP2
+ }
+ | add.w TMP2, TMP2, TMP3
+ | movcf2gr TMP3, FCC0
+ if (op == BC_ISLT || op == BC_ISGT) {
+ | maskeqz TMP2, TMP2, TMP3
+ } else {
+ | masknez TMP2, TMP2, TMP3
+ }
+ | b <1
+ |
+ |4: // RA is a number, RD is not a number.
+ | // RA is a number, RD is an integer. Convert RD to a number.
+ | bne CARG4, TISNUM, ->vmeta_comp
+ if (op == BC_ISLT || op == BC_ISGE) {
+ | movgr2fr.w FTMP2, CARG2
+ | movgr2fr.d FTMP0, CARG1
+ | ffint.d.w FTMP2, FTMP2
+ } else {
+ | movgr2fr.w FTMP0, CARG1
+ | movgr2fr.d FTMP2, CARG2
+ | ffint.d.w FTMP0, FTMP0
+ }
+ | b <3
+ |
+ |5: // RA is an integer, RD is not an integer
+ | sltui TMP1, CARG4, LJ_TISNUM
+ | addu16i.d TMP3, r0, -0x2 // -BCBIAS_J*4
+ | beqz TMP1, ->vmeta_comp
+ | // RA is an integer, RD is a number. Convert RA to a number.
+ if (op == BC_ISLT || op == BC_ISGE) {
+ | movgr2fr.w FTMP0, CARG1
+ | movgr2fr.d FTMP2, CARG2
+ | ffint.d.w FTMP0, FTMP0
+ } else {
+ | movgr2fr.w FTMP2, CARG2
+ | movgr2fr.d FTMP0, CARG1
+ | ffint.d.w FTMP2, FTMP2
+ }
+ | b <3
+ break;
+
+ case BC_ISEQV: case BC_ISNEV:
+ vk = op == BC_ISEQV;
+ | // RA = src1*8, RD = src2*8, JMP with RD = target
+ | add.d RA, BASE, RA
+ | add.d RD, BASE, RD
+ | addi.d PC, PC, 4
+ | ld.d CARG1, 0(RA)
+ | ld.d CARG2, 0(RD)
+ | ld.hu TMP2, -4+OFS_RD(PC)
+ | gettp CARG3, CARG1
+ | gettp CARG4, CARG2
+ | sltu TMP0, TISNUM, CARG3
+ | sltu TMP1, TISNUM, CARG4
+ | or TMP0, TMP0, TMP1
+ | addu16i.d TMP3, r0, -0x2 // -BCBIAS_J*4
+ if (vk) {
+ | beqz TMP0, ->BC_ISEQN_Z
+ } else {
+ | beqz TMP0, ->BC_ISNEN_Z
+ }
+ |// Either or both types are not numbers.
+ |.if FFI
+ | // Check if RA or RD is a cdata.
+ | addi.w TMP0, r0, LJ_TCDATA
+ | beq CARG3, TMP0, ->vmeta_equal_cd
+ | beq CARG4, TMP0, ->vmeta_equal_cd
+ |.endif
+ | addu16i.d TMP3, r0, -0x2 // -BCBIAS_J*4
+ | decode_BC4b TMP2
+ | add.w TMP2, TMP2, TMP3 // (jump-0x8000)<<2
+ | bne CARG1, CARG2, >2
+ | // Tag and value are equal.
+ if (vk) {
+ |->BC_ISEQV_Z:
+ | add.d PC, PC, TMP2
+ }
+ |1:
+ | ins_next
+ |
+ |2: // Check if the tags are the same and it's a table or userdata.
+ | xor TMP3, CARG3, CARG4 // Same type?
+ | sltui TMP0, CARG3, LJ_TISTABUD+1 // Table or userdata? TMP0=1
+ | masknez TMP0, TMP0, TMP3 // TMP0=0: not same type, or same type table/userdata
+ | cleartp TAB:TMP1, CARG1
+ if (vk) {
+ | beqz TMP0, <1
+ } else {
+ | beqz TMP0, ->BC_ISEQV_Z // Reuse code from opposite instruction.
+ }
+ | // Different tables or userdatas. Need to check __eq metamethod.
+ | // Field metatable must be at same offset for GCtab and GCudata!
+ | ld.d TAB:TMP3, TAB:TMP1->metatable
+ if (vk) {
+ | beqz TAB:TMP3, <1 // No metatable?
+ | ld.bu TMP3, TAB:TMP3->nomm
+ | andi TMP3, TMP3, 1<<MM_eq
+ | addi.w TMP0, r0, 0 // ne = 0
+ | bnez TMP3, <1 // Or 'no __eq' flag set?
+ } else {
+ | beqz TAB:TMP3,->BC_ISEQV_Z // No metatable?
+ | ld.bu TMP3, TAB:TMP3->nomm
+ | andi TMP3, TMP3, 1<<MM_eq
+ | addi.w TMP0, r0, 1 // ne = 1
+ | bnez TMP3, ->BC_ISEQV_Z // Or 'no __eq' flag set?
+ }
+ | b ->vmeta_equal // Handle __eq metamethod.
+ break;
+
+ case BC_ISEQS: case BC_ISNES:
+ vk = op == BC_ISEQS;
+ | // RA = src*8, RD = str_const*8 (~), JMP with RD = target
+ | add.d RA, BASE, RA
+ | addi.d PC, PC, 4
+ | ld.d CARG1, 0(RA)
+ | sub.d RD, KBASE, RD
+ | ld.hu TMP2, -4+OFS_RD(PC)
+ | ld.d CARG2, -8(RD) // KBASE-8-str_const*8
+ |.if FFI
+ | gettp CARG3, CARG1
+ | addi.w TMP1, r0, LJ_TCDATA
+ |.endif
+ | addi.w TMP0, r0, LJ_TSTR
+ | decode_BC4b TMP2
+ | settp CARG2, TMP0
+ | addu16i.d TMP3, r0, -0x2 // -BCBIAS_J*4
+ |.if FFI
+ | beq CARG3, TMP1, ->vmeta_equal_cd
+ |.endif
+ | xor TMP0, CARG1, CARG2 // TMP2=0: A==D; TMP2!=0: A!=D
+ | add.w TMP2, TMP2, TMP3
+ if (vk) {
+ | masknez TMP2, TMP2, TMP0
+ } else {
+ | maskeqz TMP2, TMP2, TMP0
+ }
+ | add.d PC, PC, TMP2
+ | ins_next
+ break;
+
+ case BC_ISEQN: case BC_ISNEN:
+ vk = op == BC_ISEQN;
+ | // RA = src*8, RD = num_const*8, JMP with RD = target
+ | add.d RA, BASE, RA
+ | add.d RD, KBASE, RD
+ | ld.d CARG1, 0(RA)
+ | ld.d CARG2, 0(RD)
+ | ld.hu TMP2, OFS_RD(PC)
+ | addi.d PC, PC, 4
+ | gettp CARG3, CARG1
+ | gettp CARG4, CARG2
+ | addu16i.d TMP3, r0, -0x2 // -BCBIAS_J*4
+ if (vk) {
+ |->BC_ISEQN_Z:
+ } else {
+ |->BC_ISNEN_Z:
+ }
+ | decode_BC4b TMP2
+ | bne CARG3, TISNUM, >4
+ | add.w TMP2, TMP2, TMP3
+ | bne CARG4, TISNUM, >6
+ | xor TMP0, CARG1, CARG2 // TMP0=0: A==D; TMP0!=0: A!=D
+ |1:
+ if (vk) {
+ | masknez TMP2, TMP2, TMP0
+ | add.d PC, PC, TMP2
+ |2:
+ } else {
+ | maskeqz TMP2, TMP2, TMP0
+ |2:
+ | add.d PC, PC, TMP2
+ }
+ |3:
+ | ins_next
+ |
+ |4: // RA is not an integer.
+ | sltu TMP0, CARG3, TISNUM
+ | add.w TMP2, TMP2, TMP3
+ |.if FFI
+ | beqz TMP0, >7
+ |.else
+ | beqz TMP0, <2
+ |.endif
+ | movgr2fr.d FTMP0, CARG1
+ | movgr2fr.d FTMP2, CARG2
+ | bne CARG4, TISNUM, >5
+ |// RA is a number, RD is an integer.
+ | ffint.d.w FTMP2, FTMP2
+ |
+ |5: // RA and RD are both numbers.
+ | fcmp.cune.d FCC0, FTMP0, FTMP2
+ | movcf2gr TMP0, FCC0
+ | b <1
+ |
+ |6: // RA is an integer, RD is a number.
+ | sltu TMP0, CARG4, TISNUM
+ |.if FFI
+ | beqz TMP0, >8
+ |.else
+ | beqz TMP0, <2
+ |.endif
+ | movgr2fr.w FTMP0, CARG1
+ | movgr2fr.d FTMP2, CARG2
+ | ffint.d.w FTMP0, FTMP0
+ | b <5
+ |
+ |.if FFI
+ |7: // RA not int, not number
+ | addi.w TMP0, r0, LJ_TCDATA
+ | bne CARG3, TMP0, <2
+ | b ->vmeta_equal_cd
+ |
+ |8: // RD not int, not number
+ | addi.w TMP0, r0, LJ_TCDATA
+ | bne CARG4, TMP0, <2
+ | b ->vmeta_equal_cd
+ |.endif
+ break;
+
+ case BC_ISEQP: case BC_ISNEP:
+ vk = op == BC_ISEQP;
+ | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target
+ | add.d RA, BASE, RA
+ | srli.w TMP0, RD, 3
+ | ld.d TMP1, 0(RA)
+ | nor TMP0, TMP0, r0 // ~TMP0: ~0 ~1 ~2
+ | ld.hu TMP2, OFS_RD(PC) // TMP2: RD in next INS, branch target
+ | gettp TMP1, TMP1
+ | addi.d PC, PC, 4
+ | xor TMP0, TMP1, TMP0 // TMP0=0 A=D; TMP0!=0 A!=D
+ |.if FFI
+ | addi.w TMP3, r0, LJ_TCDATA
+ | beq TMP1, TMP3, ->vmeta_equal_cd
+ |.endif
+ | decode_BC4b TMP2
+ | addu16i.d TMP3, r0, -0x2 // -BCBIAS_J*4
+ | add.w TMP2, TMP2, TMP3 // TMP2=(jump-0x8000)<<2
+ if (vk) {
+ | masknez TMP2, TMP2, TMP0
+ } else {
+ | maskeqz TMP2, TMP2, TMP0
+ }
+ | add.d PC, PC, TMP2
+ | ins_next
+ break;
+
+ /* -- Unary test and copy ops ------------------------------------------- */
+
+ case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
+ | // RA = dst*8 or unused, RD = src*8, JMP with RD = target
+ | add.d RD, BASE, RD
+ | ld.hu TMP2, OFS_RD(PC)
+ | ld.d TMP0, 0(RD)
+ | addi.d PC, PC, 4
+ | gettp TMP0, TMP0
+ | add.d RA, BASE, RA
+ | sltui TMP0, TMP0, LJ_TISTRUECOND // TMP0=1 true; TMP0=0 false
+ | decode_BC4b TMP2
+ | addu16i.d TMP3, r0, -0x2 // -BCBIAS_J*4
+ | ld.d CRET1, 0(RD)
+ | add.w TMP2, TMP2, TMP3 // (jump-0x8000)<<2
+ if (op == BC_IST || op == BC_ISTC) {
+ | beqz TMP0, >1
+ if (op == BC_ISTC) {
+ | st.d CRET1, 0(RA)
+ }
+ } else {
+ | bnez TMP0, >1
+ if (op == BC_ISFC) {
+ | st.d CRET1, 0(RA)
+ }
+ }
+ | add.d PC, PC, TMP2
+ |1:
+ | ins_next
+ break;
+
+ case BC_ISTYPE:
+ | // RA = src*8, RD = -type*8
+ | add.d TMP0, BASE, RA
+ | srli.w TMP1, RD, 3
+ | ld.d TMP0, 0(TMP0)
+ | gettp TMP0, TMP0
+ | add.d TMP0, TMP0, TMP1 // if itype of RA == type, then TMP0=0
+ | bnez TMP0, ->vmeta_istype
+ | ins_next
+ break;
+ case BC_ISNUM:
+ | // RA = src*8, RD = -(TISNUM-1)*8
+ | add.d TMP0, BASE, RA
+ | ld.d TMP0, 0(TMP0)
+ | checknum TMP0, ->vmeta_istype
+ | ins_next
+ break;
+
+ /* -- Unary ops --------------------------------------------------------- */
+
+ case BC_MOV:
+ | // RA = dst*8, RD = src*8
+ | add.d RD, BASE, RD
+ | add.d RA, BASE, RA
+ | ld.d TMP0, 0(RD)
+ | ins_next1
+ | st.d TMP0, 0(RA)
+ | ins_next2
+ break;
+ case BC_NOT:
+ | // RA = dst*8, RD = src*8
+ | add.d RD, BASE, RD
+ | add.d RA, BASE, RA
+ | ld.d TMP0, 0(RD)
+ | addi.d TMP1, r0, LJ_TTRUE
+ | ins_next1
+ | gettp TMP0, TMP0
+ | sltu TMP0, TMP1, TMP0
+ | addi.w TMP0, TMP0, 1
+ | slli.d TMP0, TMP0, 47
+ | nor TMP0, TMP0, r0
+ | st.d TMP0, 0(RA)
+ | ins_next2
+ break;
+ case BC_UNM:
+ | // RA = dst*8, RD = src*8
+ | add.d RB, BASE, RD
+ | add.d RA, BASE, RA
+ | ld.d TMP0, 0(RB)
+ | addu16i.d TMP1, r0, 0x8000
+ | gettp CARG3, TMP0
+ | bne CARG3, TISNUM, >1
+ | sub.w TMP0, r0, TMP0
+ | beq TMP0, TMP1, ->vmeta_unm // Meta handler deals with -2^31.
+ | bstrpick.d TMP0, TMP0, 31, 0
+ | settp TMP0, TISNUM
+ | b >2
+ |1:
+ | sltui TMP3, CARG3, LJ_TISNUM
+ | slli.d TMP1, TMP1, 32
+ | beqz TMP3, ->vmeta_unm
+ | xor TMP0, TMP0, TMP1 // sign => ~sign
+ |2:
+ | st.d TMP0, 0(RA)
+ | ins_next
+ break;
+ case BC_LEN:
+ | // RA = dst*8, RD = src*8
+ | add.d CARG2, BASE, RD
+ | ld.d TMP0, 0(CARG2)
+ | add.d RA, BASE, RA
+ | gettp TMP1, TMP0
+ | addi.d TMP2, TMP1, -LJ_TSTR
+ | cleartp STR:CARG1, TMP0
+ | bnez TMP2, >2
+ | ld.w CARG1, STR:CARG1->len
+ |1:
+ | settp CARG1, TISNUM
+ | st.d CARG1, 0(RA)
+ | ins_next
+ |2:
+ | addi.d TMP2, TMP1, -LJ_TTAB
+ | bnez TMP2, ->vmeta_len
+#if LJ_52
+ | ld.d TAB:TMP2, TAB:CARG1->metatable
+ | bnez TAB:TMP2, >9
+ |3:
+#endif
+ |->BC_LEN_Z:
+ | bl extern lj_tab_len // (GCtab *t)
+ | // Returns uint32_t (but less than 2^31).
+ | b <1
+#if LJ_52
+ |9:
+ | ld.bu TMP0, TAB:TMP2->nomm
+ | andi TMP0, TMP0, 1<<MM_len
+ | bnez TMP0, <3 // 'no __len' flag set: done.
+ | b ->vmeta_len
+#endif
+ break;
+
+ /* -- Binary ops -------------------------------------------------------- */
+
+ |.macro fpmod, a, b, c
+ | fdiv.d FARG1, b, c
+ | bl ->vm_floor // floor(b/c)
+ | fmul.d a, FRET1, c
+ | fsub.d a, b, a // b - floor(b/c)*c
+ |.endmacro
+ |
+ |.macro ins_arithpre
+ ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
+ | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
+ ||if (vk == 1) {
+ | // RA = dst*8, RB = num_const*8, RC = src1*8
+ | decode_RB RC, INS
+ | decode_RDtoRC8 RB, RD
+ ||} else {
+ | // RA = dst*8, RB = src1*8, RC = num_const*8
+ | decode_RB RB, INS
+ | decode_RDtoRC8 RC, RD
+ ||}
+ ||switch (vk) {
+ ||case 0: // suffix is VN
+ | add.d RB, BASE, RB
+ | add.d RC, KBASE, RC
+ || break;
+ ||case 1: // suffix is NV
+ | add.d RC, BASE, RC
+ | add.d RB, KBASE, RB
+ || break;
+ ||default: // CAT or suffix is VV
+ | add.d RB, BASE, RB
+ | add.d RC, BASE, RC
+ || break;
+ ||}
+ |.endmacro
+ |
+ |.macro ins_arithfp, fpins, itype1, itype2
+ | fld.d FTMP0, 0(RB)
+ | sltu itype1, itype1, TISNUM
+ | sltu itype2, itype2, TISNUM
+ | fld.d FTMP2, 0(RC)
+ | and itype1, itype1, itype2
+ | add.d RA, BASE, RA
+ | beqz itype1, ->vmeta_arith
+ | fpins FRET1, FTMP0, FTMP2
+ | ins_next1
+ | fst.d FRET1, 0(RA)
+ | ins_next2
+ |.endmacro
+ |
+ |.macro ins_arithead, itype1, itype2, tval1, tval2
+ | ld.d tval1, 0(RB)
+ | ld.d tval2, 0(RC)
+ | // Check for two integers.
+ | gettp itype1, tval1
+ | gettp itype2, tval2
+ |.endmacro
+ |
+ |.macro ins_arithdn, intins, fpins
+ | ins_arithpre
+ | ins_arithead TMP0, TMP1, CARG1, CARG2
+ | bne TMP0, TISNUM, >1
+ | bne TMP1, TISNUM, >1
+ | slli.w CARG3, CARG1, 0
+ | slli.w CARG4, CARG2, 0
+ |.if "intins" == "add.w"
+ | intins CRET1, CARG3, CARG4
+ | xor TMP1, CRET1, CARG3 // ((y^a) & (y^b)) < 0: overflow.
+ | xor TMP2, CRET1, CARG4
+ | and TMP1, TMP1, TMP2
+ | add.d RA, BASE, RA
+ | blt TMP1, r0, ->vmeta_arith
+ |.elif "intins" == "sub.w"
+ | intins CRET1, CARG3, CARG4
+ | xor TMP1, CRET1, CARG3 // ((y^a) & (a^b)) < 0: overflow.
+ | xor TMP2, CARG3, CARG4
+ | and TMP1, TMP1, TMP2
+ | add.d RA, BASE, RA
+ | blt TMP1, r0, ->vmeta_arith
+ |.elif "intins" == "mulw.d.w"
+ | mul.w CRET1, CARG3, CARG4
+ | mulh.w TMP2, CARG3, CARG4
+ | srai.w TMP1, CRET1, 31 // 63-32bit not all 0 or 1: overflow.
+ | add.d RA, BASE, RA
+ | bne TMP1, TMP2, ->vmeta_arith
+ |.endif
+ | bstrpick.d CRET1, CRET1, 31, 0
+ | settp CRET1, TISNUM
+ | st.d CRET1, 0(RA)
+ | ins_next
+ |1: // Check for two numbers.
+ | ins_arithfp, fpins, TMP0, TMP1
+ |.endmacro
+ |
+ |.macro ins_arithdiv, fpins
+ | ins_arithpre
+ | ins_arithead TMP0, TMP1, CARG1, CARG2
+ | ins_arithfp, fpins, TMP0, TMP1
+ |.endmacro
+ |
+ |.macro ins_arithmod, fpins
+ | ins_arithpre
+ | ins_arithead TMP0, TMP1, CARG1, CARG2
+ | bne TMP0, TISNUM, >1
+ | bne TMP1, TISNUM, >1
+ | slli.w CARG1, CARG1, 0
+ | slli.w CARG2, CARG2, 0
+ | add.d RA, BASE, RA
+ | beqz CARG2, ->vmeta_arith
+ | bl extern lj_vm_modi
+ | bstrpick.d CRET1, CRET1, 31, 0
+ | settp CRET1, TISNUM
+ | st.d CRET1, 0(RA)
+ | ins_next
+ |1: // Check for two numbers.
+ | ins_arithfp, fpins, TMP0, TMP1
+ |.endmacro
+
+ case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
+ | ins_arithdn add.w, fadd.d
+ break;
+ case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
+ | ins_arithdn sub.w, fsub.d
+ break;
+ case BC_MULVN: case BC_MULNV: case BC_MULVV:
+ | ins_arithdn mulw.d.w, fmul.d
+ break;
+ case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
+ | ins_arithdiv fdiv.d
+ break;
+ case BC_MODVN: case BC_MODNV: case BC_MODVV:
+ | ins_arithmod fpmod
+ break;
+ case BC_POW:
+ | ins_arithpre
+ | ld.d CARG1, 0(RB)
+ | ld.d CARG2, 0(RC)
+ | gettp TMP0, CARG1
+ | gettp TMP1, CARG2
+ | sltui TMP0, TMP0, LJ_TISNUM
+ | sltui TMP1, TMP1, LJ_TISNUM
+ | and TMP0, TMP0, TMP1
+ | add.d RA, BASE, RA
+ | beqz TMP0, ->vmeta_arith
+ | fld.d FARG1, 0(RB)
+ | fld.d FARG2, 0(RC)
+ | bl extern pow
+ | ins_next1
+ | fst.d FRET1, 0(RA)
+ | ins_next2
+ break;
+
+ case BC_CAT:
+ | // RA = dst*8, RB = src_start*8, RC = src_end*8
+ | decode_RB RB, INS
+ | decode_RDtoRC8 RC, RD
+ | sub.d CARG3, RC, RB
+ | st.d BASE, L->base
+ | add.d CARG2, BASE, RC
+ | or MULTRES, RB, r0
+ |->BC_CAT_Z:
+ | srli.w CARG3, CARG3, 3
+ | st.d PC, SAVE_PC(sp)
+ | or CARG1, L, r0
+ | bl extern lj_meta_cat // (lua_State *L, TValue *top, int left)
+ | // Returns NULL (finished) or TValue * (metamethod).
+ | ld.d BASE, L->base
+ | bnez CRET1, ->vmeta_binop
+ | add.d RB, BASE, MULTRES
+ | ld.d TMP0, 0(RB)
+ | add.d RA, BASE, RA
+ | st.d TMP0, 0(RA)
+ | ins_next
+ break;
+
+ /* -- Constant ops ------------------------------------------------------ */
+
+ case BC_KSTR:
+ | // RA = dst*8, RD = str_const*8 (~)
+ | sub.d TMP1, KBASE, RD
+ | addi.w TMP2, r0, LJ_TSTR
+ | ld.d TMP0, -8(TMP1) // KBASE-8-str_const*8
+ | add.d RA, BASE, RA
+ | settp TMP0, TMP2
+ | st.d TMP0, 0(RA)
+ | ins_next
+ break;
+ case BC_KCDATA:
+ |.if FFI
+ | // RA = dst*8, RD = cdata_const*8 (~)
+ | sub.d TMP1, KBASE, RD
+ | ld.d TMP0, -8(TMP1) // KBASE-8-cdata_const*8
+ | addi.w TMP2, r0, LJ_TCDATA
+ | add.d RA, BASE, RA
+ | settp TMP0, TMP2
+ | st.d TMP0, 0(RA)
+ | ins_next
+ |.endif
+ break;
+ case BC_KSHORT:
+ | // RA = dst*8, RD = int16_literal*8
+ | srai.w RD, INS, 16
+ | add.d RA, BASE, RA
+ | bstrpick.d RD, RD, 31, 0
+ | settp RD, TISNUM
+ | st.d RD, 0(RA)
+ | ins_next
+ break;
+ case BC_KNUM:
+ | // RA = dst*8, RD = num_const*8
+ | add.d RD, KBASE, RD
+ | add.d RA, BASE, RA
+ | ld.d TMP0, 0(RD)
+ | st.d TMP0, 0(RA)
+ | ins_next
+ break;
+ case BC_KPRI:
+ | // RA = dst*8, RD = primitive_type*8 (~)
+ | add.d RA, BASE, RA
+ | slli.d TMP0, RD, 44 // 44+3
+ | nor TMP0, TMP0, r0
+ | st.d TMP0, 0(RA)
+ | ins_next
+ break;
+ case BC_KNIL:
+ | // RA = base*8, RD = end*8
+ | add.d RA, BASE, RA
+ | st.d TISNIL, 0(RA)
+ | addi.d RA, RA, 8
+ | add.d RD, BASE, RD
+ |1:
+ | st.d TISNIL, 0(RA)
+ | slt TMP0, RA, RD
+ | addi.d RA, RA, 8
+ | bnez TMP0, <1
+ | ins_next
+ break;
+
+ /* -- Upvalue and function ops ------------------------------------------ */
+
+ case BC_UGET:
+ | // RA = dst*8, RD = uvnum*8
+ | ld.d LFUNC:TMP0, FRAME_FUNC(BASE)
+ | add.d RA, BASE, RA
+ | cleartp LFUNC:TMP0
+ | add.d RD, RD, LFUNC:TMP0
+ | ld.d UPVAL:TMP0, LFUNC:RD->uvptr
+ | ld.d TMP1, UPVAL:TMP0->v
+ | ld.d TMP2, 0(TMP1)
+ | ins_next1
+ | st.d TMP2, 0(RA)
+ | ins_next2
+ break;
+ case BC_USETV:
+ | // RA = uvnum*8, RD = src*8
+ | ld.d LFUNC:TMP0, FRAME_FUNC(BASE)
+ | add.d RD, BASE, RD
+ | cleartp LFUNC:TMP0
+ | add.d RA, RA, LFUNC:TMP0
+ | ld.d UPVAL:TMP0, LFUNC:RA->uvptr
+ | ld.d CRET1, 0(RD)
+ | ld.bu TMP3, UPVAL:TMP0->marked
+ | ld.d CARG2, UPVAL:TMP0->v
+ | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
+ | ld.bu TMP0, UPVAL:TMP0->closed
+ | gettp TMP2, CRET1
+ | st.d CRET1, 0(CARG2)
+ | or TMP3, TMP3, TMP0
+ | addi.d TMP0, r0, LJ_GC_BLACK|1
+ | addi.d TMP2, TMP2, -(LJ_TNUMX+1)
+ | beq TMP3, TMP0, >2 // Upvalue is closed and black?
+ |1:
+ | ins_next
+ |
+ |2: // Check if new value is collectable.
+ | sltui TMP0, TMP2, LJ_TISGCV - (LJ_TNUMX+1)
+ | cleartp GCOBJ:CRET1, CRET1
+ | beqz TMP0, <1 // tvisgcv(v)
+ | ld.bu TMP3, GCOBJ:CRET1->gch.marked
+ | andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v)
+ | beqz TMP3, <1
+ | // Crossed a write barrier. Move the barrier forward.
+ | .ADD16I CARG1, DISPATCH, GG_DISP2G
+ | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv)
+ | b <1
+ break;
+ case BC_USETS:
+ | // RA = uvnum*8, RD = str_const*8 (~)
+ | ld.d LFUNC:TMP0, FRAME_FUNC(BASE)
+ | sub.d TMP1, KBASE, RD
+ | cleartp LFUNC:TMP0
+ | add.d RA, RA, LFUNC:TMP0
+ | ld.d UPVAL:TMP0, LFUNC:RA->uvptr
+ | ld.d STR:TMP1, -8(TMP1) // KBASE-8-str_const*8
+ | ld.bu TMP2, UPVAL:TMP0->marked
+ | ld.d CARG2, UPVAL:TMP0->v
+ | ld.bu TMP3, STR:TMP1->marked
+ | andi TMP4, TMP2, LJ_GC_BLACK // isblack(uv)
+ | ld.bu TMP2, UPVAL:TMP0->closed
+ | addi.d TMP0, r0, LJ_TSTR
+ | settp TMP1, TMP0
+ | st.d TMP1, 0(CARG2)
+ | bnez TMP4, >2
+ |1:
+ | ins_next
+ |
+ |2: // Check if string is white and ensure upvalue is closed.
+ | beqz TMP2, <1
+ | andi TMP0, TMP3, LJ_GC_WHITES // iswhite(str)
+ | beqz TMP0, <1
+ | // Crossed a write barrier. Move the barrier forward.
+ | .ADD16I CARG1, DISPATCH, GG_DISP2G
+ | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv)
+ | b <1
+ break;
+ case BC_USETN:
+ | // RA = uvnum*8, RD = num_const*8
+ | ld.d LFUNC:TMP0, FRAME_FUNC(BASE)
+ | add.d RD, KBASE, RD
+ | cleartp LFUNC:TMP0
+ | add.d TMP0, RA, LFUNC:TMP0
+ | ld.d UPVAL:TMP0, LFUNC:TMP0->uvptr
+ | ld.d TMP1, 0(RD)
+ | ld.d TMP0, UPVAL:TMP0->v
+ | st.d TMP1, 0(TMP0)
+ | ins_next
+ break;
+ case BC_USETP:
+ | // RA = uvnum*8, RD = primitive_type*8 (~)
+ | ld.d LFUNC:TMP0, FRAME_FUNC(BASE)
+ | slli.d TMP2, RD, 44
+ | cleartp LFUNC:TMP0
+ | add.d TMP0, RA, LFUNC:TMP0
+ | nor TMP2, TMP2, r0
+ | ld.d UPVAL:TMP0, LFUNC:TMP0->uvptr
+ | ld.d TMP1, UPVAL:TMP0->v
+ | st.d TMP2, 0(TMP1)
+ | ins_next
+ break;
+
+ case BC_UCLO:
+ | // RA = level*8, RD = target
+ | ld.d TMP2, L->openupval
+ | branch_RD // Do this first since RD is not saved.
+ | st.d BASE, L->base
+ | or CARG1, L, r0
+ | beqz TMP2, >1
+ | add.d CARG2, BASE, RA
+ | bl extern lj_func_closeuv // (lua_State *L, TValue *level)
+ | ld.d BASE, L->base
+ |1:
+ | ins_next
+ break;
+
+ case BC_FNEW:
+ | // RA = dst*8, RD = proto_const*8 (~) (holding function prototype)
+ | sub.d TMP1, KBASE, RD
+ | ld.d CARG3, FRAME_FUNC(BASE)
+ | ld.d CARG2, -8(TMP1) // KBASE-8-tab_const*8
+ | st.d BASE, L->base
+ | st.d PC, SAVE_PC(sp)
+ | cleartp CARG3
+ | or CARG1, L, r0
+ | // (lua_State *L, GCproto *pt, GCfuncL *parent)
+ | bl extern lj_func_newL_gc
+ | // Returns GCfuncL *.
+ | addi.d TMP0, r0, LJ_TFUNC
+ | ld.d BASE, L->base
+ | settp CRET1, TMP0
+ | add.d RA, BASE, RA
+ | st.d CRET1, 0(RA)
+ | ins_next
+ break;
+
+ /* -- Table ops --------------------------------------------------------- */
+
+ case BC_TNEW:
+ case BC_TDUP:
+ | // RA = dst*8, RD = (hbits|asize)*8 | tab_const*8 (~)
+ | .LDXD TMP0, DISPATCH, DISPATCH_GL(gc.total)
+ | .LDXD TMP1, DISPATCH, DISPATCH_GL(gc.threshold)
+ | st.d BASE, L->base
+ | sltu TMP2, TMP0, TMP1
+ | st.d PC, SAVE_PC(sp)
+ | beqz TMP2, >5
+ |1:
+ if (op == BC_TNEW) {
+ | srli.w CARG2, RD, 3
+ | andi CARG2, CARG2, 0x7ff
+ | ori TMP0, r0, 0x801
+ | addi.w TMP2, CARG2, -0x7ff
+ | srli.w CARG3, RD, 14
+ | masknez TMP0, TMP0, TMP2
+ | maskeqz CARG2, CARG2, TMP2
+ | or CARG2, CARG2, TMP0
+ | // (lua_State *L, int32_t asize, uint32_t hbits)
+ | or CARG1, L, r0
+ | bl extern lj_tab_new
+ | // Returns Table *.
+ } else {
+ | sub.d TMP1, KBASE, RD
+ | or CARG1, L, r0
+ | ld.d CARG2, -8(TMP1) // KBASE-8-str_const*8
+ | bl extern lj_tab_dup // (lua_State *L, Table *kt)
+ | // Returns Table *.
+ }
+ | addi.d TMP0, r0, LJ_TTAB
+ | ld.d BASE, L->base
+ | ins_next1
+ | settp CRET1, TMP0
+ | add.d RA, BASE, RA
+ | st.d CRET1, 0(RA)
+ | ins_next2
+ |5:
+ | or MULTRES, RD, r0
+ | or CARG1, L, r0
+ | bl extern lj_gc_step_fixtop // (lua_State *L)
+ | or RD, MULTRES, r0
+ | b <1
+ break;
+
+ case BC_GGET:
+ | // RA = dst*8, RD = str_const*8 (~)
+ case BC_GSET:
+ | // RA = src*8, RD = str_const*8 (~)
+ | ld.d LFUNC:TMP0, FRAME_FUNC(BASE)
+ | sub.d TMP1, KBASE, RD
+ | ld.d STR:RC, -8(TMP1) // KBASE-8-str_const*8
+ | cleartp LFUNC:TMP0
+ | ld.d TAB:RB, LFUNC:TMP0->env
+ | add.d RA, BASE, RA
+ if (op == BC_GGET) {
+ | b ->BC_TGETS_Z
+ } else {
+ | b ->BC_TSETS_Z
+ }
+ break;
+
+ case BC_TGETV:
+ | // RA = dst*8, RB = table*8, RC = key*8
+ | decode_RB RB, INS
+ | decode_RDtoRC8 RC, RD
+ | add.d CARG2, BASE, RB
+ | add.d CARG3, BASE, RC
+ | ld.d TAB:RB, 0(CARG2)
+ | ld.d TMP2, 0(CARG3)
+ | add.d RA, BASE, RA
+ | checktab TAB:RB, ->vmeta_tgetv
+ | gettp TMP3, TMP2
+ | ld.w TMP0, TAB:RB->asize
+ | bne TMP3, TISNUM, >5 // Integer key?
+ | slli.w TMP2, TMP2, 0
+ | ld.d TMP1, TAB:RB->array
+ | sltu TMP3, TMP2, TMP0 //array part (keys = [0, asize-1])
+ | slli.w TMP2, TMP2, 3
+ | beqz TMP3, ->vmeta_tgetv // Integer key and in array part?
+ | add.d TMP2, TMP1, TMP2
+ | ld.d CRET1, 0(TMP2)
+ | beq CRET1, TISNIL, >2
+ |1:
+ | st.d CRET1, 0(RA)
+ | ins_next
+ |
+ |2: // Check for __index if table value is nil.
+ | ld.d TAB:TMP2, TAB:RB->metatable
+ | beqz TAB:TMP2, <1 // No metatable: done.
+ | ld.bu TMP0, TAB:TMP2->nomm
+ | andi TMP0, TMP0, 1<<MM_index
+ | bnez TMP0, <1 // 'no __index' flag set: done.
+ | b ->vmeta_tgetv
+ |
+ |5:
+ | addi.d TMP0, r0, LJ_TSTR
+ | cleartp RC, TMP2
+ | bne TMP3, TMP0, ->vmeta_tgetv // String key?
+ | b ->BC_TGETS_Z
+ break;
+ case BC_TGETS:
+ | // RA = dst*8, RB = table*8, RC = str_const*8 (~)
+ | decode_RB RB, INS
+ | decode_RDtoRC8 RC, RD
+ | add.d CARG2, BASE, RB
+ | sub.d CARG3, KBASE, RC
+ | ld.d TAB:RB, 0(CARG2)
+ | add.d RA, BASE, RA
+ | ld.d STR:RC, -8(CARG3) // KBASE-8-str_const*8
+ | checktab TAB:RB, ->vmeta_tgets1
+ |->BC_TGETS_Z:
+ | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8
+ | ld.w TMP0, TAB:RB->hmask
+ | ld.w TMP1, STR:RC->sid
+ | ld.d NODE:TMP2, TAB:RB->node
+ | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
+ | slli.w TMP0, TMP1, 5
+ | slli.w TMP1, TMP1, 3
+ | sub.w TMP1, TMP0, TMP1
+ | addi.d TMP3, r0, LJ_TSTR
+ | add.d NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
+ | settp STR:RC, TMP3 // Tagged key to look for.
+ |1:
+ | ld.d CARG1, NODE:TMP2->key
+ | ld.d CARG2, NODE:TMP2->val
+ | ld.d NODE:TMP1, NODE:TMP2->next
+ | ld.d TAB:TMP3, TAB:RB->metatable
+ | bne CARG1, RC, >4
+ | beq CARG2, TISNIL, >5 // Key found, but nil value?
+ |3:
+ | st.d CARG2, 0(RA)
+ | ins_next
+ |
+ |4: // Follow hash chain.
+ | or NODE:TMP2, NODE:TMP1, r0
+ | bnez NODE:TMP1, <1
+ | // End of hash chain: key not found, nil result.
+ |
+ |5: // Check for __index if table value is nil.
+ | or CARG2, TISNIL, r0
+ | beqz TAB:TMP3, <3 // No metatable: done.
+ | ld.bu TMP0, TAB:TMP3->nomm
+ | andi TMP0, TMP0, 1<<MM_index
+ | bnez TMP0, <3 // 'no __index' flag set: done.
+ | b ->vmeta_tgets
+ break;
+ case BC_TGETB:
+ | // RA = dst*8, RB = table*8, RC = index*8
+ | decode_RB RB, INS
+ | add.d CARG2, BASE, RB
+ | decode_RDtoRC8 RC, RD
+ | ld.d TAB:RB, 0(CARG2)
+ | add.d RA, BASE, RA
+ | srli.w TMP0, RC, 3
+ | checktab TAB:RB, ->vmeta_tgetb
+ | ld.w TMP1, TAB:RB->asize
+ | ld.d TMP2, TAB:RB->array
+ | sltu TMP1, TMP0, TMP1
+ | add.d RC, TMP2, RC
+ | beqz TMP1, ->vmeta_tgetb
+ | ld.d CRET1, 0(RC)
+ | beq CRET1, TISNIL, >5
+ |1:
+ | st.d CRET1, 0(RA)
+ | ins_next
+ |
+ |5: // Check for __index if table value is nil.
+ | ld.d TAB:TMP2, TAB:RB->metatable
+ | beqz TAB:TMP2, <1 // No metatable: done.
+ | ld.bu TMP1, TAB:TMP2->nomm
+ | andi TMP1, TMP1, 1<<MM_index
+ | bnez TMP1, <1 // 'no __index' flag set: done.
+ | b ->vmeta_tgetb // Caveat: preserve TMP0 and CARG2!
+ break;
+ case BC_TGETR:
+ | // RA = dst*8, RB = table*8, RC = key*8
+ | decode_RB RB, INS
+ | decode_RDtoRC8 RC, RD
+ | add.d RB, BASE, RB
+ | add.d RC, BASE, RC
+ | ld.d TAB:CARG1, 0(RB)
+ | ld.w CARG2, 0(RC)
+ | add.d RA, BASE, RA
+ | cleartp TAB:CARG1
+ | ld.w TMP0, TAB:CARG1->asize
+ | ld.d TMP1, TAB:CARG1->array
+ | sltu TMP0, CARG2, TMP0
+ | slli.w TMP2, CARG2, 3
+ | add.d TMP3, TMP1, TMP2
+ | beqz TMP0, ->vmeta_tgetr // In array part?
+ | ld.d TMP1, 0(TMP3)
+ |->BC_TGETR_Z:
+ | ins_next1
+ | st.d TMP1, 0(RA)
+ | ins_next2
+ break;
+
+ case BC_TSETV:
+ | // RA = src*8, RB = table*8, RC = key*8
+ | decode_RB RB, INS
+ | decode_RDtoRC8 RC, RD
+ | add.d CARG2, BASE, RB
+ | add.d CARG3, BASE, RC
+ | ld.d TAB:RB, 0(CARG2)
+ | ld.d TMP2, 0(CARG3)
+ | add.d RA, BASE, RA
+ | checktab TAB:RB, ->vmeta_tsetv
+ | slli.w RC, TMP2, 0
+ | checkint TMP2, >5
+ | ld.w TMP0, TAB:RB->asize
+ | ld.d TMP1, TAB:RB->array
+ | sltu TMP0, RC, TMP0
+ | slli.w TMP2, RC, 3
+ | beqz TMP0, ->vmeta_tsetv // Integer key and in array part?
+ | add.d TMP1, TMP1, TMP2
+ | ld.bu TMP3, TAB:RB->marked
+ | ld.d TMP0, 0(TMP1)
+ | ld.d CRET1, 0(RA)
+ | beq TMP0, TISNIL, >3
+ |1:
+ | andi TMP2, TMP3, LJ_GC_BLACK // isblack(table)
+ | st.d CRET1, 0(TMP1)
+ | bnez TMP2, >7
+ |2:
+ | ins_next
+ |
+ |3: // Check for __newindex if previous value is nil.
+ | ld.d TAB:TMP2, TAB:RB->metatable
+ | beqz TAB:TMP2, <1 // No metatable: done.
+ | ld.bu TMP2, TAB:TMP2->nomm
+ | andi TMP2, TMP2, 1<<MM_newindex
+ | bnez TMP2, <1 // 'no __newindex' flag set: done.
+ | b ->vmeta_tsetv
+ |5:
+ | gettp TMP0, TMP2
+ | addi.d TMP0, TMP0, -LJ_TSTR
+ | bnez TMP0, ->vmeta_tsetv
+ | cleartp STR:RC, TMP2
+ | b ->BC_TSETS_Z // String key?
+ |
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
+ | barrierback TAB:RB, TMP3, TMP0, <2
+ break;
+ case BC_TSETS:
+ | // RA = src*8, RB = table*8, RC = str_const*8 (~)
+ | decode_RB RB, INS
+ | decode_RDtoRC8 RC, RD
+ | add.d CARG2, BASE, RB
+ | sub.d CARG3, KBASE, RC
+ | ld.d TAB:RB, 0(CARG2)
+ | ld.d RC, -8(CARG3) // KBASE-8-str_const*8
+ | add.d RA, BASE, RA
+ | cleartp STR:RC
+ | checktab TAB:RB, ->vmeta_tsets1
+ |->BC_TSETS_Z:
+ | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = BASE+src*8
+ | ld.w TMP0, TAB:RB->hmask
+ | ld.w TMP1, STR:RC->sid
+ | ld.d NODE:TMP2, TAB:RB->node
+ | st.b r0, TAB:RB->nomm // Clear metamethod cache.
+ | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
+ | slli.w TMP0, TMP1, 5
+ | slli.w TMP1, TMP1, 3
+ | sub.w TMP1, TMP0, TMP1
+ | addi.d TMP3, r0, LJ_TSTR
+ | add.d NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
+ | settp STR:RC, TMP3 // Tagged key to look for.
+ | fld.d FTMP0, 0(RA)
+ |1:
+ | ld.d TMP0, NODE:TMP2->key
+ | ld.d CARG2, NODE:TMP2->val
+ | ld.d NODE:TMP1, NODE:TMP2->next
+ | ld.bu TMP3, TAB:RB->marked
+ | bne TMP0, RC, >5
+ | ld.d TAB:TMP0, TAB:RB->metatable
+ | beq CARG2, TISNIL, >4 // Key found, but nil value?
+ |2:
+ | andi TMP3, TMP3, LJ_GC_BLACK // isblack(table)
+ | fst.d FTMP0, NODE:TMP2->val
+ | bnez TMP3, >7
+ |3:
+ | ins_next
+ |
+ |4: // Check for __newindex if previous value is nil.
+ | beqz TAB:TMP0, <2 // No metatable: done.
+ | ld.bu TMP0, TAB:TMP0->nomm
+ | andi TMP0, TMP0, 1<<MM_newindex
+ | bnez TMP0, <2 // 'no __newindex' flag set: done.
+ | b ->vmeta_tsets
+ |
+ |5: // Follow hash chain.
+ | or NODE:TMP2, NODE:TMP1, r0
+ | bnez NODE:TMP1, <1
+ | // End of hash chain: key not found, add a new one
+ |
+ | // But check for __newindex first.
+ | ld.d TAB:TMP2, TAB:RB->metatable
+ | .ADD16I CARG3, DISPATCH, DISPATCH_GL(tmptv)
+ | beqz TAB:TMP2, >6 // No metatable: continue.
+ | ld.bu TMP0, TAB:TMP2->nomm
+ | andi TMP0, TMP0, 1<<MM_newindex
+ | beqz TMP0, ->vmeta_tsets // 'no __newindex' flag NOT set: check.
+ |6:
+ | st.d RC, 0(CARG3)
+ | st.d BASE, L->base
+ | or CARG2, TAB:RB, r0
+ | st.d PC, SAVE_PC(sp)
+ | or CARG1, L, r0
+ | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k
+ | // Returns TValue *.
+ | ld.d BASE, L->base
+ | fst.d FTMP0, 0(CRET1)
+ | b <3 // No 2nd write barrier needed.
+ |
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
+ | barrierback TAB:RB, TMP3, TMP0, <3
+ break;
+ case BC_TSETB:
+ | // RA = src*8, RB = table*8, RC = index*8
+ | decode_RB RB, INS
+ | decode_RDtoRC8 RC, RD
+ | add.d CARG2, BASE, RB
+ | add.d RA, BASE, RA
+ | ld.d TAB:RB, 0(CARG2)
+ | srli.w TMP0, RC, 3
+ | checktab RB, ->vmeta_tsetb
+ | ld.w TMP1, TAB:RB->asize
+ | ld.d TMP2, TAB:RB->array
+ | sltu TMP1, TMP0, TMP1
+ | add.d RC, TMP2, RC
+ | beqz TMP1, ->vmeta_tsetb
+ | ld.d TMP1, 0(RC)
+ | ld.bu TMP3, TAB:RB->marked
+ | beq TMP1, TISNIL, >5
+ |1:
+ | ld.d CRET1, 0(RA)
+ | andi TMP1, TMP3, LJ_GC_BLACK // isblack(table)
+ | st.d CRET1, 0(RC)
+ | bnez TMP1, >7
+ |2:
+ | ins_next
+ |
+ |5: // Check for __newindex if previous value is nil.
+ | ld.d TAB:TMP2, TAB:RB->metatable
+ | beqz TAB:TMP2, <1 // No metatable: done.
+ | ld.bu TMP1, TAB:TMP2->nomm
+ | andi TMP1, TMP1, 1<<MM_newindex
+ | bnez TMP1, <1 // 'no __newindex' flag set: done.
+ | b ->vmeta_tsetb // Caveat: preserve TMP0 and CARG2!
+ |
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
+ | barrierback TAB:RB, TMP3, TMP0, <2
+ break;
+ case BC_TSETR:
+ | // RA = dst*8, RB = table*8, RC = key*8
+ | decode_RB RB, INS
+ | decode_RDtoRC8 RC, RD
+ | add.d CARG1, BASE, RB
+ | add.d CARG3, BASE, RC
+ | ld.d TAB:CARG2, 0(CARG1)
+ | ld.w CARG3, 0(CARG3)
+ | cleartp TAB:CARG2
+ | ld.bu TMP3, TAB:CARG2->marked
+ | ld.w TMP0, TAB:CARG2->asize
+ | ld.d TMP1, TAB:CARG2->array
+ | andi TMP2, TMP3, LJ_GC_BLACK // isblack(table)
+ | add.d RA, BASE, RA
+ | bnez TMP2, >7
+ |2:
+ | sltu TMP0, CARG3, TMP0
+ | slli.w TMP2, CARG3, 3
+ | add.d CRET1, TMP1, TMP2
+ | beqz TMP0, ->vmeta_tsetr // In array part?
+ |->BC_TSETR_Z:
+ | ld.d TMP1, 0(RA)
+ | ins_next1
+ | st.d TMP1, 0(CRET1)
+ | ins_next2
+ |
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
+ | barrierback TAB:CARG2, TMP3, CRET1, <2
+ break;
+
+ case BC_TSETM:
+ | // RA = base*8 (table at base-1), RD = num_const*8 (start index)
+ | add.d RA, BASE, RA
+ |1:
+ | add.d TMP3, KBASE, RD
+ | ld.d TAB:CARG2, -8(RA) // Guaranteed to be a table.
+ | addi.w TMP0, MULTRES, -8
+ | ld.w TMP3, 0(TMP3) // Integer constant is in lo-word.
+ | srli.w CARG3, TMP0, 3
+ | beqz TMP0, >4 // Nothing to copy?
+ | cleartp TAB:CARG2
+ | add.w CARG3, CARG3, TMP3
+ | ld.w TMP2, TAB:CARG2->asize
+ | slli.w TMP1, TMP3, 3
+ | ld.bu TMP3, TAB:CARG2->marked
+ | ld.d CARG1, TAB:CARG2->array
+ | sltu TMP4, TMP2, CARG3
+ | add.d TMP2, RA, TMP0
+ | bnez TMP4, >5
+ | add.d TMP1, TMP1, CARG1
+ | andi TMP0, TMP3, LJ_GC_BLACK // isblack(table)
+ |3: // Copy result slots to table.
+ | ld.d CRET1, 0(RA)
+ | addi.d RA, RA, 8
+ | sltu TMP4, RA, TMP2
+ | st.d CRET1, 0(TMP1)
+ | addi.d TMP1, TMP1, 8
+ | bnez TMP4, <3
+ | bnez TMP0, >7
+ |4:
+ | ins_next
+ |
+ |5: // Need to resize array part.
+ | st.d BASE, L->base
+ | st.d PC, SAVE_PC(sp)
+ | or BASE, RD, r0
+ | or CARG1, L, r0
+ | bl extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize)
+ | // Must not reallocate the stack.
+ | or RD, BASE, r0
+ | ld.d BASE, L->base // Reload BASE for lack of a saved register.
+ | b <1
+ |
+ |7: // Possible table write barrier for any value. Skip valiswhite check.
+ | barrierback TAB:CARG2, TMP3, TMP0, <4
+ break;
+
+ /* -- Calls and vararg handling ----------------------------------------- */
+
+ case BC_CALLM:
+ | // RA = base*8, (RB = (nresults+1)*8,) RC = extra_nargs*8
+ | decode_RDtoRC8 NARGS8:RC, RD
+ | add.w NARGS8:RC, NARGS8:RC, MULTRES
+ | b ->BC_CALL_Z
+ break;
+ case BC_CALL:
+ | // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8
+ | decode_RDtoRC8 NARGS8:RC, RD
+ |->BC_CALL_Z:
+ | or TMP2, BASE, r0
+ | add.d BASE, BASE, RA
+ | ld.d LFUNC:RB, 0(BASE)
+ | addi.d BASE, BASE, 16
+ | addi.w NARGS8:RC, NARGS8:RC, -8
+ | checkfunc RB, ->vmeta_call
+ | ins_call
+ break;
+
+ case BC_CALLMT:
+ | // RA = base*8, (RB = 0,) RC = extra_nargs*8
+ | add.w NARGS8:RD, NARGS8:RD, MULTRES
+ | b ->BC_CALLT_Z1
+ break;
+ case BC_CALLT:
+ | // RA = base*8, (RB = 0,) RC = (nargs+1)*8
+ |->BC_CALLT_Z1:
+ | add.d RA, BASE, RA
+ | ld.d LFUNC:RB, 0(RA)
+ | or NARGS8:RC, RD, r0
+ | ld.d TMP1, FRAME_PC(BASE)
+ | addi.d RA, RA, 16
+ | addi.w NARGS8:RC, NARGS8:RC, -8
+ | checktp CARG3, LFUNC:RB, -LJ_TFUNC, ->vmeta_callt
+ |->BC_CALLT_Z:
+ | andi TMP0, TMP1, FRAME_TYPE // Caveat: preserve TMP0 until the 'or'.
+ | ld.bu TMP3, LFUNC:CARG3->ffid
+ | xori TMP2, TMP1, FRAME_VARG
+ | bnez TMP0, >7
+ |1:
+ | st.d LFUNC:RB, FRAME_FUNC(BASE) // Copy function down, but keep PC.
+ | sltui CARG4, TMP3, 2 // (> FF_C) Calling a fast function?
+ | or TMP2, BASE, r0
+ | or RB, CARG3, r0
+ | or TMP3, NARGS8:RC, r0
+ | beqz NARGS8:RC, >3
+ |2:
+ | ld.d CRET1, 0(RA)
+ | addi.d RA, RA, 8
+ | addi.w TMP3, TMP3, -8
+ | st.d CRET1, 0(TMP2)
+ | addi.d TMP2, TMP2, 8
+ | bnez TMP3, <2
+ |3:
+ | or TMP0, TMP0, CARG4
+ | beqz TMP0, >5
+ |4:
+ | ins_callt
+ |
+ |5: // Tailcall to a fast function with a Lua frame below.
+ | ld.w INS, -4(TMP1)
+ | decode_RA RA, INS
+ | sub.d TMP1, BASE, RA
+ | ld.d TMP1, -32(TMP1)
+ | cleartp LFUNC:TMP1
+ | ld.d TMP1, LFUNC:TMP1->pc
+ | ld.d KBASE, PC2PROTO(k)(TMP1) // Need to prepare KBASE.
+ | b <4
+ |
+ |7: // Tailcall from a vararg function.
+ | andi CARG4, TMP2, FRAME_TYPEP
+ | sub.d TMP2, BASE, TMP2 // Relocate BASE down.
+ | bnez CARG4, <1 // Vararg frame below?
+ | or BASE, TMP2, r0
+ | ld.d TMP1, FRAME_PC(TMP2)
+ | andi TMP0, TMP1, FRAME_TYPE
+ | b <1
+ break;
+
+ case BC_ITERC:
+ | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8))
+ | or TMP2, BASE, r0 // Save old BASE for vmeta_call.
+ | add.d BASE, BASE, RA
+ | ld.d RB, -24(BASE) //A, A+1, A+2 = A-3, A-2, A-1.
+ | ld.d CARG1, -16(BASE)
+ | ld.d CARG2, -8(BASE)
+ | addi.d NARGS8:RC, r0, 16 // Iterators get 2 arguments.
+ | st.d RB, 0(BASE) // Copy callable.
+ | st.d CARG1, 16(BASE) // Copy state.
+ | st.d CARG2, 24(BASE) // Copy control var.
+ | addi.d BASE, BASE, 16
+ | checkfunc RB, ->vmeta_call
+ | ins_call
+ break;
+
+ case BC_ITERN:
+ | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8)
+ |.if JIT
+ | hotloop
+ |.endif
+ |->vm_IITERN:
+ | add.d RA, BASE, RA
+ | ld.d TAB:RB, -16(RA)
+ | ld.w RC, -8(RA) // Get index from control var.
+ | cleartp TAB:RB
+ | addi.d PC, PC, 4
+ | ld.w TMP0, TAB:RB->asize
+ | ld.d TMP1, TAB:RB->array
+ | slli.d CARG3, TISNUM, 47
+ |1: // Traverse array part.
+ | sltu TMP2, RC, TMP0
+ | slli.w TMP3, RC, 3
+ | beqz TMP2, >5 // Index points after array part?
+ | add.d TMP3, TMP1, TMP3
+ | ld.d CARG1, 0(TMP3)
+ | ld.hu RD, -4+OFS_RD(PC) // ITERL RD
+ | or TMP2, RC, CARG3
+ | addi.w RC, RC, 1
+ | beq CARG1, TISNIL, <1 // Skip holes in array part.
+ | st.d TMP2, 0(RA)
+ | st.d CARG1, 8(RA)
+ | addu16i.d TMP3, r0, -0x2 // -BCBIAS_J*4
+ | decode_BC4b RD
+ | add.d RD, RD, TMP3
+ | st.w RC, -8(RA) // Update control var.
+ | add.d PC, PC, RD
+ |3:
+ | ins_next
+ |
+ |5: // Traverse hash part.
+ | ld.w TMP1, TAB:RB->hmask
+ | sub.w RC, RC, TMP0
+ | ld.d TMP2, TAB:RB->node
+ |6:
+ | sltu CARG1, TMP1, RC // End of iteration? Branch to ITERL+1.
+ | slli.w TMP3, RC, 5
+ | bnez CARG1, <3
+ | slli.w RB, RC, 3
+ | sub.w TMP3, TMP3, RB
+ | add.d NODE:TMP3, TMP3, TMP2 // node = tab->node + (idx*32-idx*8)
+ | ld.d CARG1, 0(NODE:TMP3)
+ | ld.hu RD, -4+OFS_RD(PC) // ITERL RD
+ | addi.w RC, RC, 1
+ | beq CARG1, TISNIL, <6 // Skip holes in hash part.
+ | ld.d CARG2, NODE:TMP3->key
+ | addu16i.d TMP3, r0, -0x2 // -BCBIAS_J*4
+ | st.d CARG1, 8(RA)
+ | add.w RC, RC, TMP0
+ | decode_BC4b RD
+ | add.w RD, RD, TMP3
+ | st.d CARG2, 0(RA)
+ | add.d PC, PC, RD
+ | st.w RC, -8(RA) // Update control var.
+ | b <3
+ break;
+
+ case BC_ISNEXT:
+ | // RA = base*8, RD = target (points to ITERN)
+ | add.d RA, BASE, RA
+ | srli.w TMP0, RD, 1
+ | ld.d CFUNC:CARG1, -24(RA)
+ | add.d TMP0, PC, TMP0
+ | ld.d CARG2, -16(RA)
+ | ld.d CARG3, -8(RA)
+ | addu16i.d TMP2, r0, -0x2 // -BCBIAS_J*4
+ | checkfunc CFUNC:CARG1, >5
+ | gettp CARG2, CARG2
+ | addi.d CARG2, CARG2, -LJ_TTAB
+ | ld.bu TMP1, CFUNC:CARG1->ffid
+ | addi.d CARG3, CARG3, -LJ_TNIL
+ | or TMP3, CARG2, CARG3
+ | addi.d TMP1, TMP1, -FF_next_N
+ | or TMP3, TMP3, TMP1
+ | addu16i.d TMP1, r0, 0xfffe // LJ_KEYINDEX >> 16
+ | bnez TMP3, >5
+ | add.d PC, TMP0, TMP2
+ | slli.d TMP1, TMP1, 16
+ | addu16i.d TMP1, TMP1, 0x7fff // LJ_KEYINDEX & 0xffff
+ | slli.d TMP1, TMP1, 16
+ | st.d TMP1, -8(RA)
+ |1:
+ | ins_next
+ |5: // Despecialize bytecode if any of the checks fail.
+ | addi.d TMP3, r0, BC_JMP
+ | addi.d TMP1, r0, BC_ITERC
+ | st.b TMP3, -4+OFS_OP(PC)
+ | add.d PC, TMP0, TMP2
+ |.if JIT
+ | ld.b TMP0, OFS_OP(PC)
+ | addi.d TMP3, r0, BC_ITERN
+ | ld.hu TMP2, OFS_RD(PC)
+ | bne TMP0, TMP3, >6
+ |.endif
+ | st.b TMP1, OFS_OP(PC)
+ | b <1
+ |.if JIT
+ |6: // Unpatch JLOOP.
+ | .LDXD TMP0, DISPATCH, DISPATCH_J(trace)
+ | slli.w TMP2, TMP2, 3
+ | add.d TMP0, TMP0, TMP2
+ | ld.d TRACE:TMP2, 0(TMP0)
+ | ld.w TMP0, TRACE:TMP2->startins
+ | addi.d TMP3, r0, -256
+ | and TMP0, TMP0, TMP3
+ | or TMP0, TMP0, TMP1
+ | st.w TMP0, 0(PC)
+ | b <1
+ |.endif
+ break;
+
+ case BC_VARG:
+ | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8
+ | ld.d TMP0, FRAME_PC(BASE)
+ | decode_RDtoRC8 RC, RD
+ | decode_RB RB, INS
+ | add.d RC, BASE, RC
+ | add.d RA, BASE, RA
+ | addi.d RC, RC, FRAME_VARG
+ | add.d TMP2, RA, RB
+ | addi.d TMP3, BASE, -16 // TMP3 = vtop
+ | sub.d RC, RC, TMP0 // RC = vbase
+ | // Note: RC may now be even _above_ BASE if nargs was < numparams.
+ | sub.d TMP1, TMP3, RC
+ | beqz RB, >5 // Copy all varargs?
+ | addi.d TMP2, TMP2, -16
+ |1: // Copy vararg slots to destination slots.
+ | ld.d CARG1, 0(RC)
+ | sltu TMP0, RC, TMP3
+ | addi.d RC, RC, 8
+ | maskeqz CARG1, CARG1, TMP0
+ | masknez TMP0, TISNIL, TMP0
+ | or CARG1, CARG1, TMP0
+ | st.d CARG1, 0(RA)
+ | sltu TMP0, RA, TMP2
+ | addi.d RA, RA, 8
+ | bnez TMP0, <1
+ |3:
+ | ins_next
+ |
+ |5: // Copy all varargs.
+ | ld.d TMP0, L->maxstack
+ | addi.d MULTRES, r0, 8 // MULTRES = (0+1)*8
+ | bge r0, TMP1, <3 // No vararg slots?
+ | add.d TMP2, RA, TMP1
+ | sltu TMP2, TMP0, TMP2
+ | addi.d MULTRES, TMP1, 8
+ | bnez TMP2, >7
+ |6:
+ | ld.d CRET1, 0(RC)
+ | addi.d RC, RC, 8
+ | st.d CRET1, 0(RA)
+ | sltu TMP0, RC, TMP3
+ | addi.d RA, RA, 8
+ | bnez TMP0, <6 // More vararg slots?
+ | b <3
+ |
+ |7: // Grow stack for varargs.
+ | st.d RA, L->top
+ | sub.d RA, RA, BASE
+ | st.d BASE, L->base
+ | sub.d BASE, RC, BASE // Need delta, because BASE may change.
+ | st.d PC, SAVE_PC(sp)
+ | srli.w CARG2, TMP1, 3
+ | or CARG1, L, r0
+ | bl extern lj_state_growstack // (lua_State *L, int n)
+ | or RC, BASE, r0
+ | ld.d BASE, L->base
+ | add.d RA, BASE, RA
+ | add.d RC, BASE, RC
+ | addi.d TMP3, BASE, -16
+ | b <6
+ break;
+
+ /* -- Returns ----------------------------------------------------------- */
+
+ case BC_RETM:
+ | // RA = results*8, RD = extra_nresults*8
+ | add.w RD, RD, MULTRES
+ | b ->BC_RET_Z1
+ break;
+
+ case BC_RET:
+ | // RA = results*8, RD = (nresults+1)*8
+ |->BC_RET_Z1:
+ | ld.d PC, FRAME_PC(BASE)
+ | add.d RA, BASE, RA
+ | or MULTRES, RD, r0
+ |1:
+ | andi TMP0, PC, FRAME_TYPE
+ | xori TMP1, PC, FRAME_VARG
+ | bnez TMP0, ->BC_RETV_Z
+ |
+ |->BC_RET_Z:
+ | // BASE = base, RA = resultptr, RD = (nresults+1)*8, PC = return
+ | ld.w INS, -4(PC)
+ | addi.d TMP2, BASE, -16
+ | addi.d RC, RD, -8
+ | decode_RA TMP0, INS
+ | decode_RB RB, INS
+ | add.d TMP3, TMP2, RB
+ | sub.d BASE, TMP2, TMP0
+ | beqz RC, >3
+ |2:
+ | ld.d CRET1, 0(RA)
+ | addi.d RA, RA, 8
+ | addi.d RC, RC, -8
+ | st.d CRET1, 0(TMP2)
+ | addi.d TMP2, TMP2, 8
+ | bnez RC, <2
+ |3:
+ | addi.d TMP3, TMP3, -8
+ |5:
+ | sltu TMP0, TMP2, TMP3
+ | ld.d LFUNC:TMP1, FRAME_FUNC(BASE)
+ | bnez TMP0, >6
+ | cleartp LFUNC:TMP1
+ | ld.d TMP1, LFUNC:TMP1->pc
+ | ld.d KBASE, PC2PROTO(k)(TMP1)
+ | ins_next
+ |
+ |6: // Fill up results with nil.
+ | st.d TISNIL, 0(TMP2)
+ | addi.d TMP2, TMP2, 8
+ | b <5
+ |
+ |->BC_RETV_Z: // Non-standard return case.
+ | andi TMP2, TMP1, FRAME_TYPEP
+ | bnez TMP2, ->vm_return
+ | // Return from vararg function: relocate BASE down.
+ | sub.d BASE, BASE, TMP1
+ | ld.d PC, FRAME_PC(BASE)
+ | b <1
+ break;
+
+ case BC_RET0: case BC_RET1:
+ | // RA = results*8, RD = (nresults+1)*8
+ | ld.d PC, FRAME_PC(BASE)
+ | add.d RA, BASE, RA
+ | or MULTRES, RD, r0
+ | andi TMP0, PC, FRAME_TYPE
+ | xori TMP1, PC, FRAME_VARG
+ | bnez TMP0, ->BC_RETV_Z
+ | ld.w INS, -4(PC)
+ | addi.d TMP2, BASE, -16
+ if (op == BC_RET1) {
+ | ld.d CRET1, 0(RA)
+ }
+ | decode_RB RB, INS
+ | decode_RA RA, INS
+ | sub.d BASE, TMP2, RA
+ if (op == BC_RET1) {
+ | st.d CRET1, 0(TMP2)
+ }
+ |5:
+ | sltu TMP0, RD, RB
+ | ld.d TMP1, FRAME_FUNC(BASE)
+ | bnez TMP0, >6
+ | cleartp LFUNC:TMP1
+ | ld.d TMP1, LFUNC:TMP1->pc
+ | ins_next1
+ | ld.d KBASE, PC2PROTO(k)(TMP1)
+ | ins_next2
+ |
+ |6: // Fill up results with nil.
+ | addi.d TMP2, TMP2, 8
+ | addi.d RD, RD, 8
+ if (op == BC_RET1) {
+ | st.d TISNIL, 0(TMP2)
+ } else {
+ | st.d TISNIL, -8(TMP2)
+ }
+ | b <5
+ break;
+
+ /* -- Loops and branches ------------------------------------------------ */
+
+ case BC_FORL:
+ |.if JIT
+ | hotloop
+ |.endif
+ | // Fall through. Assumes BC_IFORL follows.
+ break;
+
+ case BC_JFORI:
+ case BC_JFORL:
+#if !LJ_HASJIT
+ break;
+#endif
+ case BC_FORI:
+ case BC_IFORL:
+ | // RA = base*8, RD = target (after end of loop or start of loop)
+ vk = (op == BC_IFORL || op == BC_JFORL);
+ | add.d RA, BASE, RA
+ | ld.d CARG1, FORL_IDX*8(RA) // CARG1 = IDX
+ | ld.d CARG2, FORL_STEP*8(RA) // CARG2 = STEP
+ | ld.d CARG3, FORL_STOP*8(RA) // CARG3 = STOP
+ | gettp CARG4, CARG1
+ | gettp CARG5, CARG2
+ | gettp CARG6, CARG3
+ if (op != BC_JFORL) {
+ | srli.w RD, RD, 1
+ | addu16i.d TMP2, r0, -0x2 // -BCBIAS_J<<2
+ | add.d TMP2, RD, TMP2
+ }
+ | bne CARG4, TISNUM, >3
+ | slli.w CARG4, CARG1, 0 // start
+ | slli.w CARG3, CARG3, 0 // stop
+ if (!vk) { // init
+ | bne CARG6, TISNUM, ->vmeta_for
+ | bne CARG5, TISNUM, ->vmeta_for
+ | bstrpick.d TMP0, CARG2, 31, 31 // sign
+ | slt CARG2, CARG3, CARG4
+ | slt TMP1, CARG4, CARG3
+ | maskeqz TMP1, TMP1, TMP0
+ | masknez CARG2, CARG2, TMP0
+ | or CARG2, CARG2, TMP1 // CARG2=0: +,start <= stop or -,start >= stop
+ } else {
+ | slli.w CARG5, CARG2, 0 // step
+ | add.w CARG1, CARG4, CARG5 // start + step
+ | xor TMP3, CARG1, CARG4 // y^a
+ | xor TMP1, CARG1, CARG5 // y^b
+ | and TMP3, TMP3, TMP1
+ | slt TMP1, CARG1, CARG3 // start+step < stop ?
+ | slt CARG3, CARG3, CARG1 // stop < start+step ?
+ | slt TMP0, CARG5, r0 // step < 0 ?
+ | slt TMP3, TMP3, r0 // ((y^a) & (y^b)) < 0: overflow.
+ | maskeqz TMP1, TMP1, TMP0
+ | masknez CARG3, CARG3, TMP0
+ | or CARG3, CARG3, TMP1
+ | or CARG2, CARG3, TMP3 // CARG2=1: overflow; CARG2=0: continue
+ | bstrpick.d CARG1, CARG1, 31, 0
+ | settp CARG1, TISNUM
+ | st.d CARG1, FORL_IDX*8(RA)
+ }
+ |1:
+ if (op == BC_FORI) {
+ | maskeqz TMP2, TMP2, CARG2 // CARG2!=0: jump out the loop; CARG2==0: next INS
+ | add.d PC, PC, TMP2
+ } else if (op == BC_JFORI) {
+ | add.d PC, PC, TMP2
+ | ld.hu RD, -4+OFS_RD(PC)
+ } else if (op == BC_IFORL) {
+ | masknez TMP2, TMP2, CARG2 // CARG2!=0: next INS; CARG2==0: jump back
+ | add.d PC, PC, TMP2
+ }
+ | ins_next1
+ | st.d CARG1, FORL_EXT*8(RA)
+ |2:
+ if (op == BC_JFORI) {
+ | decode_BC8b RD
+ | beqz CARG2, =>BC_JLOOP // CARG2 == 0: excute the loop
+ } else if (op == BC_JFORL) {
+ | beqz CARG2, =>BC_JLOOP
+ }
+ | ins_next2
+ |
+ |3: // FP loop.
+ | fld.d FTMP0, FORL_IDX*8(RA) // start
+ | fld.d FTMP1, FORL_STOP*8(RA) // stop
+ | ld.d TMP0, FORL_STEP*8(RA) // step
+ | slt TMP0, TMP0, r0 // step < 0 ?
+ | movgr2fr.d FTMP2, TMP0
+ if (!vk) {
+ | sltui TMP3, CARG4, LJ_TISNUM // start is number ?
+ | sltui TMP0, CARG5, LJ_TISNUM // step is number ?
+ | sltui TMP1, CARG6, LJ_TISNUM // stop is number ?
+ | and TMP3, TMP3, TMP1
+ | and TMP0, TMP0, TMP3
+ | beqz TMP0, ->vmeta_for // if start or step or stop isn't number
+ | fcmp.clt.d FCC0, FTMP0, FTMP1 // start < stop ?
+ | fcmp.clt.d FCC1, FTMP1, FTMP0 // stop < start ?
+ | movcf2fr FTMP3, FCC0
+ | movcf2fr FTMP4, FCC1
+ | movfr2cf FCC0, FTMP2
+ | fsel FTMP2, FTMP4, FTMP3, FCC0
+ | movfr2gr.d CARG2, FTMP2 // CARG2=0:+,start<stop or -,start>stop
+ | b <1
+ } else {
+ | fld.d FTMP3, FORL_STEP*8(RA)
+ | fadd.d FTMP0, FTMP0, FTMP3 // start + step
+ | fcmp.clt.d FCC0, FTMP0, FTMP1 // start + step < stop ?
+ | fcmp.clt.d FCC1, FTMP1, FTMP0
+ | movcf2fr FTMP3, FCC0
+ | movcf2fr FTMP4, FCC1
+ | movfr2cf FCC0, FTMP2
+ | fsel FTMP2, FTMP4, FTMP3, FCC0
+ | movfr2gr.d CARG2, FTMP2
+ if (op == BC_IFORL) {
+ | masknez TMP2, TMP2, CARG2
+ | add.d PC, PC, TMP2
+ }
+ | fst.d FTMP0, FORL_IDX*8(RA)
+ | ins_next1
+ | fst.d FTMP0, FORL_EXT*8(RA)
+ | b <2
+ }
+ break;
+
+ case BC_ITERL:
+ |.if JIT
+ | hotloop
+ |.endif
+ | // Fall through. Assumes BC_IITERL follows.
+ break;
+
+ case BC_JITERL:
+#if !LJ_HASJIT
+ break;
+#endif
+ case BC_IITERL:
+ | // RA = base*8, RD = target
+ | add.d RA, BASE, RA
+ | ld.d TMP1, 0(RA)
+ | beq TMP1, TISNIL, >1 // Stop if iterator returned nil.
+ if (op == BC_JITERL) {
+ | st.d TMP1,-8(RA)
+ | b =>BC_JLOOP
+ } else {
+ | branch_RD // Otherwise save control var + branch.
+ | st.d TMP1, -8(RA)
+ }
+ |1:
+ | ins_next
+ break;
+
+ case BC_LOOP:
+ | // RA = base*8, RD = target (loop extent)
+ | // Note: RA/RD is only used by trace recorder to determine scope/extent
+ | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
+ |.if JIT
+ | hotloop
+ |.endif
+ | // Fall through. Assumes BC_ILOOP follows.
+ break;
+
+ case BC_ILOOP:
+ | // RA = base*8, RD = target (loop extent)
+ | ins_next
+ break;
+
+ case BC_JLOOP:
+ |.if JIT
+ | // RA = base*8 (ignored), RD = traceno*8
+ | .LDXD TMP0, DISPATCH, DISPATCH_J(trace)
+ | add.d TMP0, TMP0, RD
+ | // Traces on LOONGARCH don't store the trace number, so use 0.
+ | .STXD r0, DISPATCH, DISPATCH_GL(vmstate)
+ | ld.d TRACE:TMP1, 0(TMP0)
+ | .STXD BASE, DISPATCH, DISPATCH_GL(jit_base) // store Current JIT code L->base
+ | ld.d TMP1, TRACE:TMP1->mcode
+ | .ADD16I JGL, DISPATCH, GG_DISP2G+32768
+ | .STXD L, DISPATCH, DISPATCH_GL(tmpbuf.L)
+ | jirl r0, TMP1, 0
+ |.endif
+ break;
+
+ case BC_JMP:
+ | // RA = base*8 (only used by trace recorder), RD = target
+ | branch_RD // PC + (jump - 0x8000)<<2
+ | ins_next
+ break;
+
+ /* -- Function headers -------------------------------------------------- */
+
+ case BC_FUNCF:
+ |.if JIT
+ | hotcall
+ |.endif
+ case BC_FUNCV: /* NYI: compiled vararg functions. */
+ | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow.
+ break;
+
+ case BC_JFUNCF:
+#if !LJ_HASJIT
+ break;
+#endif
+ case BC_IFUNCF:
+ | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8
+ | ld.d TMP2, L->maxstack
+ | ld.bu TMP1, -4+PC2PROTO(numparams)(PC)
+ | ld.d KBASE, -4+PC2PROTO(k)(PC)
+ | sltu TMP0, TMP2, RA
+ | slli.w TMP1, TMP1, 3 // numparams*8
+ | bnez TMP0, ->vm_growstack_l
+ |2:
+ | sltu TMP0, NARGS8:RC, TMP1 // Check for missing parameters.
+ | bnez TMP0, >3
+ if (op == BC_JFUNCF) {
+ | decode_RD RD, INS
+ | b =>BC_JLOOP
+ } else {
+ | ins_next
+ }
+ |
+ |3: // Clear missing parameters.
+ | add.d TMP0, BASE, NARGS8:RC
+ | st.d TISNIL, 0(TMP0)
+ | addi.w NARGS8:RC, NARGS8:RC, 8
+ | b <2
+ break;
+
+ case BC_JFUNCV:
+#if !LJ_HASJIT
+ break;
+#endif
+ | NYI // NYI: compiled vararg functions
+ break; /* NYI: compiled vararg functions. */
+
+ case BC_IFUNCV:
+ | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8
+ | addi.w TMP0, r0, LJ_TFUNC
+ | add.d TMP1, BASE, RC
+ | ld.d TMP2, L->maxstack
+ | settp LFUNC:RB, TMP0
+ | add.d TMP0, RA, RC
+ | st.d LFUNC:RB, 0(TMP1) // Store (tagged) copy of LFUNC.
+ | addi.d TMP3, RC, 16+FRAME_VARG
+ | sltu TMP0, TMP0, TMP2
+ | ld.d KBASE, -4+PC2PROTO(k)(PC)
+ | st.d TMP3, 8(TMP1) // Store delta + FRAME_VARG.
+ | beqz TMP0, ->vm_growstack_l
+ | ld.bu TMP2, -4+PC2PROTO(numparams)(PC)
+ | or RA, BASE, r0
+ | or RC, TMP1, r0
+ | ins_next1
+ | addi.d BASE, TMP1, 16
+ | beqz TMP2, >2
+ |1:
+ | ld.d TMP0, 0(RA)
+ | sltu CARG2, RA, RC // Less args than parameters?
+ | or CARG1, TMP0, r0
+ | addi.d RA, RA, 8
+ | addi.d TMP1, TMP1, 8
+ | addi.w TMP2, TMP2, -1
+ | beqz CARG2, >3
+ | masknez TMP3, CARG1, CARG2 // Clear old fixarg slot (help the GC).
+ | maskeqz CARG1, TISNIL, CARG2
+ | or CARG1, CARG1, TMP3
+ | st.d CARG1, -8(RA)
+ | st.d TMP0, 8(TMP1)
+ | bnez TMP2, <1
+ |2:
+ | ins_next2
+ |3:
+ | maskeqz TMP0, TMP0, CARG2 // Clear missing fixargs.
+ | masknez TMP3, TISNIL, CARG2
+ | or TMP0, TMP0, TMP3
+ | st.d TMP0, 8(TMP1)
+ | bnez TMP2, <1
+ | b <2
+ break;
+
+ case BC_FUNCC:
+ case BC_FUNCCW:
+ | // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8
+ if (op == BC_FUNCC) {
+ | ld.d CARG4, CFUNC:RB->f
+ } else {
+ | .LDXD CARG4, DISPATCH, DISPATCH_GL(wrapf)
+ }
+ | add.d TMP1, RA, NARGS8:RC
+ | ld.d TMP2, L->maxstack
+ | add.d RC, BASE, NARGS8:RC
+ | st.d BASE, L->base // base of currently excuting function
+ | st.d RC, L->top
+ | sltu TMP3, TMP2, TMP1
+ | li_vmstate C // addi.w TMP0, r0, ~LJ_VMST_C
+ if (op == BC_FUNCCW) {
+ | ld.d CARG2, CFUNC:RB->f
+ }
+ | or CARG1, L, r0
+ | bnez TMP3, ->vm_growstack_c // Need to grow stack.
+ | st_vmstate // .STXW TMP0, DISPATCH, DISPATCH_GL(vmstate)
+ | jirl r1, CARG4, 0 // (lua_State *L [, lua_CFunction f])
+ | // Returns nresults.
+ | ld.d BASE, L->base
+ | ld.d TMP1, L->top
+ | .STXD L, DISPATCH, DISPATCH_GL(cur_L)
+ | slli.w RD, CRET1, 3
+ | li_vmstate INTERP
+ | ld.d PC, FRAME_PC(BASE) // Fetch PC of caller.
+ | sub.d RA, TMP1, RD // RA = L->top - nresults*8
+ | st_vmstate
+ | b ->vm_returnc
+ break;
+
+ /* ---------------------------------------------------------------------- */
+
+ default:
+ fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
+ exit(2);
+ break;
+ }
+}
+
+static int build_backend(BuildCtx *ctx)
+{
+ int op;
+
+ dasm_growpc(Dst, BC__MAX);
+
+ build_subroutines(ctx);
+
+ |.code_op
+ for (op = 0; op < BC__MAX; op++)
+ build_ins(ctx, (BCOp)op, op);
+
+ return BC__MAX;
+}
+
+/* Emit pseudo frame-info for all assembler functions. */
+static void emit_asm_debug(BuildCtx *ctx)
+{
+ int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
+ int i;
+ switch (ctx->mode) {
+ case BUILD_elfasm:
+ fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n");
+ fprintf(ctx->fp,
+ ".Lframe0:\n"
+ "\t.4byte .LECIE0-.LSCIE0\n"
+ ".LSCIE0:\n"
+ "\t.4byte 0xffffffff\n"
+ "\t.byte 0x1\n"
+ "\t.string \"\"\n"
+ "\t.uleb128 0x1\n"
+ "\t.sleb128 -4\n"
+ "\t.byte 1\n" /* Return address is in ra. */
+ "\t.byte 0xc\n\t.uleb128 3\n\t.uleb128 0\n" /* def_cfa sp 0 */
+ "\t.align 3\n"
+ ".LECIE0:\n\n");
+ fprintf(ctx->fp,
+ ".LSFDE0:\n"
+ "\t.4byte .LEFDE0-.LASFDE0\n"
+ ".LASFDE0:\n"
+ "\t.4byte .Lframe0\n"
+ "\t.8byte .Lbegin\n"
+ "\t.8byte %d\n"
+ "\t.byte 0xe\n\t.uleb128 %d\n"
+ "\t.byte 0x81\n\t.uleb128 2*5\n" /* offset ra*/
+ "\t.byte 0x96\n\t.uleb128 2*6\n", /* offset fp */
+ fcofs, CFRAME_SIZE);
+ for (i = 31; i >= 23; i--) /* offset r31-r23 */
+ fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2*(31-i+7));
+ for (i = 31; i >= 24; i--) /* offset f31-f24 */
+ fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 2*(31-i+16));
+ fprintf(ctx->fp,
+ "\t.align 3\n"
+ ".LEFDE0:\n\n");
+#if LJ_HASFFI
+ fprintf(ctx->fp,
+ ".LSFDE1:\n"
+ "\t.4byte .LEFDE1-.LASFDE1\n"
+ ".LASFDE1:\n"
+ "\t.4byte .Lframe0\n"
+ "\t.4byte lj_vm_ffi_call\n"
+ "\t.4byte %d\n"
+ "\t.byte 0x81\n\t.uleb128 2*5\n" /* offset ra*/
+ "\t.byte 0x96\n\t.uleb128 2*6\n" /* offset fp */
+ "\t.align 3\n"
+ ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
+#endif
+#if !LJ_NO_UNWIND
+ fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n");
+ fprintf(ctx->fp,
+ ".Lframe1:\n"
+ "\t.4byte .LECIE1-.LSCIE1\n"
+ ".LSCIE1:\n"
+ "\t.4byte 0\n"
+ "\t.byte 0x1\n"
+ "\t.string \"zPR\"\n"
+ "\t.uleb128 0x1\n"
+ "\t.sleb128 -4\n"
+ "\t.byte 1\n" /* Return address is in ra. */
+ "\t.uleb128 6\n" /* augmentation length */
+ "\t.byte 0x1b\n"
+ "\t.4byte lj_err_unwind_dwarf-.\n"
+ "\t.byte 0x1b\n"
+ "\t.byte 0xc\n\t.uleb128 3\n\t.uleb128 0\n" /* def_cfa sp 0 */
+ "\t.align 2\n"
+ ".LECIE1:\n\n");
+ fprintf(ctx->fp,
+ ".LSFDE2:\n"
+ "\t.4byte .LEFDE2-.LASFDE2\n"
+ ".LASFDE2:\n"
+ "\t.4byte .LASFDE2-.Lframe1\n"
+ "\t.4byte .Lbegin-.\n"
+ "\t.4byte %d\n"
+ "\t.uleb128 0\n" /* augmentation length */
+ "\t.byte 0x81\n\t.uleb128 2*5\n" /* offset ra*/
+ "\t.byte 0x96\n\t.uleb128 2*6\n", /* offset fp */
+ fcofs);
+ for (i = 31; i >= 23; i--) /* offset r23-r31 */
+ fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2*(31-i+7));
+ for (i = 31; i >= 24; i--) /* offset f24-f31 */
+ fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 2*(31-i+16));
+ fprintf(ctx->fp,
+ "\t.align 2\n"
+ ".LEFDE2:\n\n");
+#if LJ_HASFFI
+ fprintf(ctx->fp,
+ ".Lframe2:\n"
+ "\t.4byte .LECIE2-.LSCIE2\n"
+ ".LSCIE2:\n"
+ "\t.4byte 0\n"
+ "\t.byte 0x1\n"
+ "\t.string \"zR\"\n"
+ "\t.uleb128 0x1\n"
+ "\t.sleb128 -4\n"
+ "\t.byte 1\n" /* Return address is in ra. */
+ "\t.uleb128 1\n" /* augmentation length */
+ "\t.byte 0x1b\n"
+ "\t.byte 0xc\n\t.uleb128 3\n\t.uleb128 0\n" /* def_cfa sp 0 */
+ "\t.align 2\n"
+ ".LECIE2:\n\n");
+ fprintf(ctx->fp,
+ ".LSFDE3:\n"
+ "\t.4byte .LEFDE3-.LASFDE3\n"
+ ".LASFDE3:\n"
+ "\t.4byte .LASFDE3- .Lframe2\n"
+ "\t.4byte lj_vm_ffi_call-.\n"
+ "\t.4byte %d\n"
+ "\t.uleb128 0\n" /* augmentation length */
+ "\t.byte 0x81\n\t.uleb128 2*5\n" /* offset ra*/
+ "\t.byte 0x96\n\t.uleb128 2*6\n" /* offset fp */
+ "\t.align 2\n"
+ ".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
+#endif
+#endif
+#if !LJ_NO_UNWIND
+ /* NYI */
+#endif
+ break;
+ default:
+ break;
+ }
+}
+