File luajit-s390x.patch of Package luajit.28820
diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/dynasm/dasm_s390x.h luajit-2.1.0~beta3+git.1624618403.e9577376/dynasm/dasm_s390x.h
--- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/dynasm/dasm_s390x.h 1969-12-31 18:00:00.000000000 -0600
+++ luajit-2.1.0~beta3+git.1624618403.e9577376/dynasm/dasm_s390x.h 2021-10-05 12:36:23.421160526 -0500
@@ -0,0 +1,546 @@
+/*
+** DynASM s390x encoding engine.
+** Copyright (C) 2005-2016 Mike Pall. All rights reserved.
+** Released under the MIT license. See dynasm.lua for full copyright notice.
+*/
+
+#include <stddef.h>
+#include <stdarg.h>
+#include <string.h>
+#include <stdlib.h>
+
+#define DASM_ARCH "s390x"
+
+#ifndef DASM_EXTERN
+#define DASM_EXTERN(a,b,c,d) 0
+#endif
+
+/* Action definitions. */
+enum {
+ DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT,
+ /* The following actions need a buffer position. */
+ DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
+ /* The following actions also have an argument. */
+ DASM_REL_PC, DASM_LABEL_PC,
+ DASM_DISP12, DASM_DISP20,
+ DASM_IMM8, DASM_IMM16, DASM_IMM32,
+ DASM_LEN8R,DASM_LEN4HR,DASM_LEN4LR,
+ DASM__MAX
+};
+
+/* Maximum number of section buffer positions for a single dasm_put() call. */
+#define DASM_MAXSECPOS 25
+
+/* DynASM encoder status codes. Action list offset or number are or'ed in. */
+#define DASM_S_OK 0x00000000
+#define DASM_S_NOMEM 0x01000000
+#define DASM_S_PHASE 0x02000000
+#define DASM_S_MATCH_SEC 0x03000000
+#define DASM_S_RANGE_I 0x11000000
+#define DASM_S_RANGE_SEC 0x12000000
+#define DASM_S_RANGE_LG 0x13000000
+#define DASM_S_RANGE_PC 0x14000000
+#define DASM_S_RANGE_REL 0x15000000
+#define DASM_S_UNDEF_LG 0x21000000
+#define DASM_S_UNDEF_PC 0x22000000
+
+/* Macros to convert positions (8 bit section + 24 bit index). */
+#define DASM_POS2IDX(pos) ((pos)&0x00ffffff)
+#define DASM_POS2BIAS(pos) ((pos)&0xff000000)
+#define DASM_SEC2POS(sec) ((sec)<<24)
+#define DASM_POS2SEC(pos) ((pos)>>24)
+#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos))
+
+/* Action list type. */
+typedef const unsigned short *dasm_ActList;
+
+/* Per-section structure. */
+typedef struct dasm_Section {
+ int *rbuf; /* Biased buffer pointer (negative section bias). */
+ int *buf; /* True buffer pointer. */
+ size_t bsize; /* Buffer size in bytes. */
+ int pos; /* Biased buffer position. */
+ int epos; /* End of biased buffer position - max single put. */
+ int ofs; /* Byte offset into section. */
+} dasm_Section;
+
+/* Core structure holding the DynASM encoding state. */
+struct dasm_State {
+ size_t psize; /* Allocated size of this structure. */
+ dasm_ActList actionlist; /* Current actionlist pointer. */
+ int *lglabels; /* Local/global chain/pos ptrs. */
+ size_t lgsize;
+ int *pclabels; /* PC label chains/pos ptrs. */
+ size_t pcsize;
+ void **globals; /* Array of globals (bias -10). */
+ dasm_Section *section; /* Pointer to active section. */
+ size_t codesize; /* Total size of all code sections. */
+ int maxsection; /* 0 <= sectionidx < maxsection. */
+ int status; /* Status code. */
+ dasm_Section sections[1]; /* All sections. Alloc-extended. */
+};
+
+/* The size of the core structure depends on the max. number of sections. */
+#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section))
+
+
+/* Initialize DynASM state. */
+void dasm_init(Dst_DECL, int maxsection)
+{
+ dasm_State *D;
+ size_t psz = 0;
+ int i;
+ Dst_REF = NULL;
+ DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
+ D = Dst_REF;
+ D->psize = psz;
+ D->lglabels = NULL;
+ D->lgsize = 0;
+ D->pclabels = NULL;
+ D->pcsize = 0;
+ D->globals = NULL;
+ D->maxsection = maxsection;
+ for (i = 0; i < maxsection; i++) {
+ D->sections[i].buf = NULL; /* Need this for pass3. */
+ D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
+ D->sections[i].bsize = 0;
+ D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */
+ }
+}
+
+/* Free DynASM state. */
+void dasm_free(Dst_DECL)
+{
+ dasm_State *D = Dst_REF;
+ int i;
+ for (i = 0; i < D->maxsection; i++)
+ if (D->sections[i].buf)
+ DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize);
+ if (D->pclabels)
+ DASM_M_FREE(Dst, D->pclabels, D->pcsize);
+ if (D->lglabels)
+ DASM_M_FREE(Dst, D->lglabels, D->lgsize);
+ DASM_M_FREE(Dst, D, D->psize);
+}
+
+/* Setup global label array. Must be called before dasm_setup(). */
+void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
+{
+ dasm_State *D = Dst_REF;
+ D->globals = gl - 10; /* Negative bias to compensate for locals. */
+ DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10 + maxgl) * sizeof(int));
+}
+
+/* Grow PC label array. Can be called after dasm_setup(), too. */
+void dasm_growpc(Dst_DECL, unsigned int maxpc)
+{
+ dasm_State *D = Dst_REF;
+ size_t osz = D->pcsize;
+ DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc * sizeof(int));
+ memset((void *)(((unsigned char *)D->pclabels) + osz), 0, D->pcsize - osz);
+}
+
+/* Setup encoder. */
+void dasm_setup(Dst_DECL, const void *actionlist)
+{
+ dasm_State *D = Dst_REF;
+ int i;
+ D->actionlist = (dasm_ActList) actionlist;
+ D->status = DASM_S_OK;
+ D->section = &D->sections[0];
+ memset((void *)D->lglabels, 0, D->lgsize);
+ if (D->pclabels)
+ memset((void *)D->pclabels, 0, D->pcsize);
+ for (i = 0; i < D->maxsection; i++) {
+ D->sections[i].pos = DASM_SEC2POS(i);
+ D->sections[i].ofs = 0;
+ }
+}
+
+
+#ifdef DASM_CHECKS
+#define CK(x, st) \
+ do { if (!(x)) { \
+ D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0)
+#define CKPL(kind, st) \
+ do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \
+ D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0)
+#else
+#define CK(x, st) ((void)0)
+#define CKPL(kind, st) ((void)0)
+#endif
+
+/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */
+void dasm_put(Dst_DECL, int start, ...)
+{
+ va_list ap;
+ dasm_State *D = Dst_REF;
+ dasm_ActList p = D->actionlist + start;
+ dasm_Section *sec = D->section;
+ int pos = sec->pos, ofs = sec->ofs;
+ int *b;
+
+ if (pos >= sec->epos) {
+ DASM_M_GROW(Dst, int, sec->buf, sec->bsize,
+ sec->bsize + 2 * DASM_MAXSECPOS * sizeof(int));
+ sec->rbuf = sec->buf - DASM_POS2BIAS(pos);
+ sec->epos =
+ (int)sec->bsize / sizeof(int) - DASM_MAXSECPOS + DASM_POS2BIAS(pos);
+ }
+
+ b = sec->rbuf;
+ b[pos++] = start;
+
+ va_start(ap, start);
+ while (1) {
+ unsigned short ins = *p++;
+ unsigned short action = ins;
+ if (action >= DASM__MAX) {
+ ofs += 2;
+ continue;
+ }
+
+ int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0;
+ switch (action) {
+ case DASM_STOP:
+ goto stop;
+ case DASM_SECTION:
+ n = *p++ & 255;
+ CK(n < D->maxsection, RANGE_SEC);
+ D->section = &D->sections[n];
+ goto stop;
+ case DASM_ESC:
+ p++;
+ ofs += 2;
+ break;
+ case DASM_REL_EXT:
+ p++;
+ ofs += 4;
+ break;
+ case DASM_ALIGN:
+ ofs += *p++;
+ b[pos++] = ofs;
+ break;
+ case DASM_REL_LG:
+ if (p[-2] >> 12 == 0xc) { /* RIL instruction needs 32-bit immediate. */
+ ofs += 2;
+ }
+ n = *p++ - 10;
+ pl = D->lglabels + n;
+ /* Bkwd rel or global. */
+ if (n >= 0) {
+ CK(n >= 10 || *pl < 0, RANGE_LG);
+ CKPL(lg, LG);
+ goto putrel;
+ }
+ pl += 10;
+ n = *pl;
+ if (n < 0)
+ n = 0; /* Start new chain for fwd rel if label exists. */
+ goto linkrel;
+ case DASM_REL_PC:
+ if (p[-2] >> 12 == 0xc) { /* RIL instruction needs 32-bit immediate. */
+ ofs += 2;
+ }
+ pl = D->pclabels + n;
+ CKPL(pc, PC);
+ putrel:
+ n = *pl;
+ if (n < 0) { /* Label exists. Get label pos and store it. */
+ b[pos] = -n;
+ } else {
+ linkrel:
+ b[pos] = n; /* Else link to rel chain, anchored at label. */
+ *pl = pos;
+ }
+ ofs += 2;
+ pos++;
+ break;
+ case DASM_LABEL_LG:
+ pl = D->lglabels + *p++ - 10;
+ CKPL(lg, LG);
+ goto putlabel;
+ case DASM_LABEL_PC:
+ pl = D->pclabels + n;
+ CKPL(pc, PC);
+ putlabel:
+ n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */
+ while (n > 0) {
+ int *pb = DASM_POS2PTR(D, n);
+ n = *pb;
+ *pb = pos;
+ }
+ *pl = -pos; /* Label exists now. */
+ b[pos++] = ofs; /* Store pass1 offset estimate. */
+ break;
+ case DASM_IMM8:
+ b[pos++] = n;
+ break;
+ case DASM_IMM16:
+ CK(((short)n) == n || ((unsigned short)n) == n, RANGE_I); /* TODO: is this the right way to handle unsigned immediates? */
+ ofs += 2;
+ b[pos++] = n;
+ break;
+ case DASM_IMM32:
+ ofs += 4;
+ b[pos++] = n;
+ break;
+ case DASM_DISP20:
+ CK(-(1 << 19) <= n && n < (1 << 19), RANGE_I);
+ b[pos++] = n;
+ break;
+ case DASM_DISP12:
+ CK((n >> 12) == 0, RANGE_I);
+ b[pos++] = n;
+ break;
+ case DASM_LEN8R:
+ CK(n >= 1 && n <= 256, RANGE_I);
+ b[pos++] = n;
+ break;
+ case DASM_LEN4HR:
+ case DASM_LEN4LR:
+ CK(n >= 1 && n <= 128, RANGE_I);
+ b[pos++] = n;
+ break;
+ }
+ }
+stop:
+ va_end(ap);
+ sec->pos = pos;
+ sec->ofs = ofs;
+}
+
+#undef CK
+
+/* Pass 2: Link sections, shrink aligns, fix label offsets. */
+int dasm_link(Dst_DECL, size_t * szp)
+{
+ dasm_State *D = Dst_REF;
+ int secnum;
+ int ofs = 0;
+
+#ifdef DASM_CHECKS
+ *szp = 0;
+ if (D->status != DASM_S_OK)
+ return D->status;
+ {
+ int pc;
+ for (pc = 0; pc * sizeof(int) < D->pcsize; pc++)
+ if (D->pclabels[pc] > 0)
+ return DASM_S_UNDEF_PC | pc;
+ }
+#endif
+
+ { /* Handle globals not defined in this translation unit. */
+ int idx;
+ for (idx = 20; idx * sizeof(int) < D->lgsize; idx++) {
+ int n = D->lglabels[idx];
+ /* Undefined label: Collapse rel chain and replace with marker (< 0). */
+ while (n > 0) {
+ int *pb = DASM_POS2PTR(D, n);
+ n = *pb;
+ *pb = -idx;
+ }
+ }
+ }
+
+ /* Combine all code sections. No support for data sections (yet). */
+ for (secnum = 0; secnum < D->maxsection; secnum++) {
+ dasm_Section *sec = D->sections + secnum;
+ int *b = sec->rbuf;
+ int pos = DASM_SEC2POS(secnum);
+ int lastpos = sec->pos;
+
+ while (pos != lastpos) {
+ dasm_ActList p = D->actionlist + b[pos++];
+ while (1) {
+ unsigned short ins = *p++;
+ unsigned short action = ins;
+ switch (action) {
+ case DASM_STOP:
+ case DASM_SECTION:
+ goto stop;
+ case DASM_ESC:
+ p++;
+ break;
+ case DASM_REL_EXT:
+ p++;
+ break;
+ case DASM_ALIGN:
+ ofs -= (b[pos++] + ofs) & *p++;
+ break;
+ case DASM_REL_LG:
+ case DASM_REL_PC:
+ p++;
+ pos++;
+ break;
+ case DASM_LABEL_LG:
+ case DASM_LABEL_PC:
+ p++;
+ b[pos++] += ofs;
+ break;
+ case DASM_IMM8:
+ case DASM_IMM16:
+ case DASM_IMM32:
+ case DASM_DISP20:
+ case DASM_DISP12:
+ case DASM_LEN8R:
+ case DASM_LEN4HR:
+ case DASM_LEN4LR:
+ pos++;
+ break;
+ }
+ }
+ stop:(void)0;
+ }
+ ofs += sec->ofs; /* Next section starts right after current section. */
+ }
+
+ D->codesize = ofs; /* Total size of all code sections */
+ *szp = ofs;
+ return DASM_S_OK;
+}
+
+#ifdef DASM_CHECKS
+#define CK(x, st) \
+ do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0)
+#else
+#define CK(x, st) ((void)0)
+#endif
+
+/* Pass 3: Encode sections. */
+int dasm_encode(Dst_DECL, void *buffer)
+{
+ dasm_State *D = Dst_REF;
+ char *base = (char *)buffer;
+ unsigned short *cp = (unsigned short *)buffer;
+ int secnum;
+
+ /* Encode all code sections. No support for data sections (yet). */
+ for (secnum = 0; secnum < D->maxsection; secnum++) {
+ dasm_Section *sec = D->sections + secnum;
+ int *b = sec->buf;
+ int *endb = sec->rbuf + sec->pos;
+
+ while (b != endb) {
+ dasm_ActList p = D->actionlist + *b++;
+ while (1) {
+ unsigned short ins = *p++;
+ unsigned short action = ins;
+ int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0;
+ switch (action) {
+ case DASM_STOP:
+ case DASM_SECTION:
+ goto stop;
+ case DASM_ESC:
+ *cp++ = *p++;
+ break;
+ case DASM_REL_EXT:
+ n = DASM_EXTERN(Dst, (unsigned char *)cp, *p++, 1) - 4;
+ goto patchrel;
+ case DASM_ALIGN:
+ ins = *p++;
+ /* TODO: emit 4-byte noprs instead of 2-byte nops where possible. */
+ while ((((char *)cp - base) & ins))
+ *cp++ = 0x0700; /* nop */
+ break;
+ case DASM_REL_LG:
+ CK(n >= 0, UNDEF_LG);
+ case DASM_REL_PC:
+ CK(n >= 0, UNDEF_PC);
+ n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base);
+ p++; /* skip argument */
+ patchrel:
+ /* Offsets are halfword aligned (so need to be halved). */
+ n += 2; /* Offset is relative to start of instruction. */
+ if (cp[-1] >> 12 == 0xc) {
+ *cp++ = n >> 17;
+ } else {
+ CK(-(1 << 16) <= n && n < (1 << 16) && (n & 1) == 0, RANGE_LG);
+ }
+ *cp++ = n >> 1;
+ break;
+ case DASM_LABEL_LG:
+ ins = *p++;
+ if (ins >= 20)
+ D->globals[ins - 10] = (void *)(base + n);
+ break;
+ case DASM_LABEL_PC:
+ break;
+ case DASM_IMM8:
+ cp[-1] |= n & 0xff;
+ break;
+ case DASM_IMM16:
+ *cp++ = n;
+ break;
+ case DASM_IMM32:
+ *cp++ = n >> 16;
+ *cp++ = n;
+ break;
+ case DASM_DISP20:
+ cp[-2] |= n & 0xfff;
+ cp[-1] |= (n >> 4) & 0xff00;
+ break;
+ case DASM_DISP12:
+ cp[-1] |= n & 0xfff;
+ break;
+ case DASM_LEN8R:
+ cp[-1] |= (n - 1) & 0xff;
+ break;
+ case DASM_LEN4HR:
+ cp[-1] |= ((n - 1) << 4) & 0xf0;
+ break;
+ case DASM_LEN4LR:
+ cp[-1] |= (n - 1) & 0x0f;
+ break;
+ default:
+ *cp++ = ins;
+ break;
+ }
+ }
+ stop:(void)0;
+ }
+ }
+
+ if (base + D->codesize != (char *)cp) /* Check for phase errors. */
+ return DASM_S_PHASE;
+ return DASM_S_OK;
+}
+
+#undef CK
+
+/* Get PC label offset. */
+int dasm_getpclabel(Dst_DECL, unsigned int pc)
+{
+ dasm_State *D = Dst_REF;
+ if (pc * sizeof(int) < D->pcsize) {
+ int pos = D->pclabels[pc];
+ if (pos < 0)
+ return *DASM_POS2PTR(D, -pos);
+ if (pos > 0)
+ return -1; /* Undefined. */
+ }
+ return -2; /* Unused or out of range. */
+}
+
+#ifdef DASM_CHECKS
+/* Optional sanity checker to call between isolated encoding steps. */
+int dasm_checkstep(Dst_DECL, int secmatch)
+{
+ dasm_State *D = Dst_REF;
+ if (D->status == DASM_S_OK) {
+ int i;
+ for (i = 1; i <= 9; i++) {
+ if (D->lglabels[i] > 0) {
+ D->status = DASM_S_UNDEF_LG | i;
+ break;
+ }
+ D->lglabels[i] = 0;
+ }
+ }
+ if (D->status == DASM_S_OK && secmatch >= 0 &&
+ D->section != &D->sections[secmatch])
+ D->status = DASM_S_MATCH_SEC | (D->section - D->sections);
+ return D->status;
+}
+#endif
diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/dynasm/dasm_s390x.lua luajit-2.1.0~beta3+git.1624618403.e9577376/dynasm/dasm_s390x.lua
--- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/dynasm/dasm_s390x.lua 1969-12-31 18:00:00.000000000 -0600
+++ luajit-2.1.0~beta3+git.1624618403.e9577376/dynasm/dasm_s390x.lua 2021-10-05 12:36:23.461160737 -0500
@@ -0,0 +1,1633 @@
+------------------------------------------------------------------------------
+-- DynASM s390x module.
+--
+-- Copyright (C) 2005-2016 Mike Pall. All rights reserved.
+-- See dynasm.lua for full copyright notice.
+------------------------------------------------------------------------------
+
+-- Module information:
+local _info = {
+ arch = "s390x",
+ description = "DynASM s390x module",
+ version = "1.4.0",
+ vernum = 10400,
+ release = "2015-10-18",
+ author = "Mike Pall",
+ license = "MIT",
+}
+
+-- Exported glue functions for the arch-specific module.
+local _M = { _info = _info }
+
+-- Cache library functions.
+local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs
+local assert, setmetatable, rawget = assert, setmetatable, rawget
+local _s = string
+local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
+local match, gmatch, gsub = _s.match, _s.gmatch, _s.gsub
+local concat, sort, insert = table.concat, table.sort, table.insert
+local bit = bit or require("bit")
+local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift
+local ror, tohex = bit.ror, bit.tohex
+
+-- Inherited tables and callbacks.
+local g_opt, g_arch
+local wline, werror, wfatal, wwarn
+
+-- Action name list.
+-- CHECK: Keep this in sync with the C code!
+local action_names = {
+ "STOP", "SECTION", "ESC", "REL_EXT",
+ "ALIGN", "REL_LG", "LABEL_LG",
+ "REL_PC", "LABEL_PC", "DISP12", "DISP20", "IMM8", "IMM16", "IMM32", "LEN8R","LEN4HR","LEN4LR",
+}
+
+-- Maximum number of section buffer positions for dasm_put().
+-- CHECK: Keep this in sync with the C code!
+local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines.
+
+-- Action name -> action number.
+local map_action = {}
+local max_action = 0
+for n, name in ipairs(action_names) do
+ map_action[name] = n-1
+ max_action = n
+end
+
+-- Action list buffer.
+local actlist = {}
+
+-- Argument list for next dasm_put(). Start with offset 0 into action list.
+local actargs = { 0 }
+
+-- Current number of section buffer positions for dasm_put().
+local secpos = 1
+
+------------------------------------------------------------------------------
+
+-- Dump action names and numbers.
+local function dumpactions(out)
+ out:write("DynASM encoding engine action codes:\n")
+ for n, name in ipairs(action_names) do
+ local num = map_action[name]
+ out:write(format(" %-10s %02X %d\n", name, num, num))
+ end
+ out:write("\n")
+end
+
+local function havearg(a)
+ return a == "ESC" or
+ a == "SECTION" or
+ a == "REL_LG" or
+ a == "LABEL_LG" or
+ a == "REL_EXT"
+end
+
+-- Write action list buffer as a huge static C array.
+local function writeactions(out, name)
+ local nn = #actlist
+ if nn == 0 then nn = 1; actlist[0] = map_action.STOP end
+ out:write("static const unsigned short ", name, "[", nn, "] = {")
+ local esc = false -- also need to escape for action arguments
+ for i = 1, nn do
+ assert(out:write("\n 0x", sub(tohex(actlist[i]), 5, 8)))
+ if i ~= nn then assert(out:write(",")) end
+ local name = action_names[actlist[i]+1]
+ if not esc and name then
+ assert(out:write(" /* ", name, " */"))
+ esc = havearg(name)
+ else
+ esc = false
+ end
+ end
+ assert(out:write("\n};\n\n"))
+end
+
+------------------------------------------------------------------------------
+
+-- Add halfword to action list.
+local function wputxhw(n)
+ assert(n >= 0 and n <= 0xffff, "halfword out of range")
+ actlist[#actlist+1] = n
+end
+
+-- Add action to list with optional arg. Advance buffer pos, too.
+local function waction(action, val, a, num)
+ local w = assert(map_action[action], "bad action name `"..action.."'")
+ wputxhw(w)
+ if val then wputxhw(val) end -- Not sure about this, do we always have one arg?
+ if a then actargs[#actargs+1] = a end
+ if val or a or num then secpos = secpos + (num or 1) end
+end
+
+-- Flush action list (intervening C code or buffer pos overflow).
+local function wflush(term)
+ if #actlist == actargs[1] then return end -- Nothing to flush.
+ if not term then waction("STOP") end -- Terminate action list.
+ wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true)
+ actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put().
+ secpos = 1 -- The actionlist offset occupies a buffer position, too.
+end
+
+-- Put escaped halfword.
+local function wputhw(n)
+ if n <= max_action then waction("ESC") end
+ wputxhw(n)
+end
+
+-- Reserve position for halfword.
+local function wpos()
+ local pos = #actlist+1
+ actlist[pos] = ""
+ return pos
+end
+
+------------------------------------------------------------------------------
+
+-- Global label name -> global label number. With auto assignment on 1st use.
+local next_global = 20
+local map_global = setmetatable({}, { __index = function(t, name)
+ if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end
+ local n = next_global
+ if n > 2047 then werror("too many global labels") end
+ next_global = n + 1
+ t[name] = n
+ return n
+end})
+
+-- Dump global labels.
+local function dumpglobals(out, lvl)
+ local t = {}
+ for name, n in pairs(map_global) do t[n] = name end
+ out:write("Global labels:\n")
+ for i=20, next_global-1 do
+ out:write(format(" %s\n", t[i]))
+ end
+ out:write("\n")
+end
+
+-- Write global label enum.
+local function writeglobals(out, prefix)
+ local t = {}
+ for name, n in pairs(map_global) do t[n] = name end
+ out:write("enum {\n")
+ for i=20, next_global-1 do
+ out:write(" ", prefix, t[i], ",\n")
+ end
+ out:write(" ", prefix, "_MAX\n};\n")
+end
+
+-- Write global label names.
+local function writeglobalnames(out, name)
+ local t = {}
+ for name, n in pairs(map_global) do t[n] = name end
+ out:write("static const char *const ", name, "[] = {\n")
+ for i=20, next_global-1 do
+ out:write(" \"", t[i], "\",\n")
+ end
+ out:write(" (const char *)0\n};\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Extern label name -> extern label number. With auto assignment on 1st use.
+local next_extern = 0
+local map_extern_ = {}
+local map_extern = setmetatable({}, { __index = function(t, name)
+ -- No restrictions on the name for now.
+ local n = next_extern
+ if n > 2047 then werror("too many extern labels") end
+ next_extern = n + 1
+ t[name] = n
+ map_extern_[n] = name
+ return n
+end})
+
+-- Dump extern labels.
+local function dumpexterns(out, lvl)
+ out:write("Extern labels:\n")
+ for i=0, next_extern-1 do
+ out:write(format(" %s\n", map_extern_[i]))
+ end
+ out:write("\n")
+end
+
+-- Write extern label names.
+local function writeexternnames(out, name)
+ out:write("static const char *const ", name, "[] = {\n")
+ for i=0, next_extern-1 do
+ out:write(" \"", map_extern_[i], "\",\n")
+ end
+ out:write(" (const char *)0\n};\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Arch-specific maps.
+-- Ext. register name -> int. name.
+local map_archdef = { sp = "r15" }
+
+-- Int. register name -> ext. name.
+local map_reg_rev = { r15 = "sp" }
+
+local map_type = {} -- Type name -> { ctype, reg }
+local ctypenum = 0 -- Type number (for Dt... macros).
+
+-- Reverse defines for registers.
+function _M.revdef(s)
+ return map_reg_rev[s] or s
+end
+
+local map_cond = {
+ o = 1, h = 2, nle = 3, l = 4,
+ nhe = 5, lh = 6, ne = 7, e = 8,
+ nlh = 9, he = 10, nl = 11, le = 12,
+ nh = 13, no = 14, [""] = 15,
+}
+
+------------------------------------------------------------------------------
+
+local function parse_reg(expr)
+ if not expr then werror("expected register name") end
+ local tname, ovreg = match(expr, "^([%w_]+):(r1?%d)$")
+ local tp = map_type[tname or expr]
+ if tp then
+ local reg = ovreg or tp.reg
+ if not reg then
+ werror("type `"..(tname or expr).."' needs a register override")
+ end
+ expr = reg
+ end
+ local r = match(expr, "^[rf](1?%d)$")
+ if r then
+ r = tonumber(r)
+ if r <= 15 then return r, tp end
+ end
+ werror("bad register name `"..expr.."'")
+end
+
+local parse_ctx = {}
+
+local loadenv = setfenv and function(s)
+ local code = loadstring(s, "")
+ if code then setfenv(code, parse_ctx) end
+ return code
+end or function(s)
+ return load(s, "", nil, parse_ctx)
+end
+
+-- Try to parse simple arithmetic, too, since some basic ops are aliases.
+local function parse_number(n)
+ local x = tonumber(n)
+ if x then return x end
+ local code = loadenv("return "..n)
+ if code then
+ local ok, y = pcall(code)
+ if ok then return y end
+ end
+ return nil
+end
+
+local function is_uint12(num)
+ return 0 <= num and num < 4096
+end
+
+local function is_int20(num)
+ return -shl(1, 19) <= num and num < shl(1, 19)
+end
+
+local function is_int32(num)
+ return -2147483648 <= num and num < 2147483648
+end
+
+local function is_uint16(num)
+ return 0 <= num and num < 0xffff
+end
+
+local function is_int16(num)
+ return -32768 <= num and num < 32768
+end
+
+local function is_int8(num)
+ return -128 <= num and num < 128
+end
+
+local function is_uint8(num)
+ return 0 <= num and num < 256
+end
+
+-- Split a memory operand of the form d(b) or d(x,b) into d, x and b.
+-- If x is not specified then it is 0.
+local function split_memop(arg)
+ local reg = "[%w_:]+"
+ local d, x, b = match(arg, "^(.*)%(%s*("..reg..")%s*,%s*("..reg..")%s*%)$")
+ if d then
+ return d, parse_reg(x), parse_reg(b)
+ end
+ local d, b = match(arg, "^(.*)%(%s*("..reg..")%s*%)$")
+ if d then
+ return d, 0, parse_reg(b)
+ end
+ -- Assume the two registers are passed as "(r1,r2)", and displacement(d) is not specified. TODO: not sure if we want to do this, GAS doesn't.
+ local x, b = match(arg,"%(%s*("..reg..")%s*,%s*("..reg..")%s*%)$")
+ if b then
+ return 0, parse_reg(x), parse_reg(b)
+ end
+ -- Accept a lone integer as a displacement. TODO: allow expressions/variables here? Interacts badly with the other rules currently.
+ local d = match(arg,"^(-?[%d]+)$")
+ if d then
+ return d, 0, 0
+ end
+ local reg, tailr = match(arg, "^([%w_:]+)%s*(.*)$")
+ if reg then
+ local r, tp = parse_reg(reg)
+ if tp then
+ return format(tp.ctypefmt, tailr), 0, r
+ end
+ end
+ werror("bad memory operand: "..arg)
+ return nil
+end
+
+-- Parse memory operand of the form d(x, b) where 0 <= d < 4096 and b and x
+-- are GPRs.
+-- If the fourth return value is not-nil then it needs to be called to
+-- insert an action.
+-- Encoded as: xbddd
+local function parse_mem_bx(arg)
+ local d, x, b = split_memop(arg)
+ local dval = tonumber(d)
+ if dval then
+ if not is_uint12(dval) then
+ werror("displacement out of range: ", dval)
+ end
+ return dval, x, b, nil
+ end
+ if match(d, "^[rf]1?[0-9]?") then
+ werror("expected immediate operand, got register")
+ end
+ return 0, x, b, function() waction("DISP12", nil, d) end
+end
+
+-- Parse memory operand of the form d(b) where 0 <= d < 4096 and b is a GPR.
+-- Encoded as: bddd
+local function parse_mem_b(arg)
+ local d, x, b, a = parse_mem_bx(arg)
+ if x ~= 0 then
+ werror("unexpected index register")
+ end
+ return d, b, a
+end
+
+-- Parse memory operand of the form d(x, b) where -(2^20)/2 <= d < (2^20)/2
+-- and b and x are GPRs.
+-- Encoded as: xblllhh (ls are the low-bits of d, and hs are the high bits).
+local function parse_mem_bxy(arg)
+ local d, x, b = split_memop(arg)
+ local dval = tonumber(d)
+ if dval then
+ if not is_int20(dval) then
+ werror("displacement out of range: ", dval)
+ end
+ return dval, x, b, nil
+ end
+ if match(d, "^[rf]1?[0-9]?") then
+ werror("expected immediate operand, got register")
+ end
+ return 0, x, b, function() waction("DISP20", nil, d) end
+end
+
+-- Parse memory operand of the form d(b) where -(2^20)/2 <= d < (2^20)/2 and
+-- b is a GPR.
+-- Encoded as: blllhh (ls are the low-bits of d, and hs are the high bits).
+local function parse_mem_by(arg)
+ local d, x, b, a = parse_mem_bxy(arg)
+ if x ~= 0 then
+ werror("unexpected index register")
+ end
+ return d, b, a
+end
+
+-- Parse memory operand of the form d(l, b) where 0 <= d < 4096, 1 <= l <= 256,
+-- and b is a GPR.
+local function parse_mem_lb(arg)
+ local reg = "r1?[0-9]"
+ local d, l, b = match(arg, "^(.*)%s*%(%s*(.*)%s*,%s*("..reg..")%s*%)$")
+ if not d then
+ -- TODO: handle values without registers?
+ -- TODO: handle registers without a displacement?
+ werror("bad memory operand: "..arg)
+ return nil
+ end
+ local dval = tonumber(d)
+ local dact = nil
+ if dval then
+ if not is_uint12(dval) then
+ werror("displacement out of range: ", dval)
+ end
+ else
+ dval = 0
+ dact = function() waction("DISP12", nil, d) end
+ end
+ local lval = tonumber(l)
+ local lact = nil
+ if lval then
+ if lval < 1 or lval > 256 then
+ werror("length out of range: ", dval)
+ end
+ lval = lval - 1
+ else
+ lval = 0
+ lact = function() waction("LEN8R", nil, l) end
+ end
+ return dval, lval, parse_reg(b), dact, lact
+end
+
+local function parse_mem_l2b(arg, high_l)
+ local reg = "r1?[0-9]"
+ local d, l, b = match(arg, "^(.*)%s*%(%s*(.*)%s*,%s*("..reg..")%s*%)$")
+ if not d then
+ -- TODO: handle values without registers?
+ -- TODO: handle registers without a displacement?
+ werror("bad memory operand: "..arg)
+ return nil
+ end
+ local dval = tonumber(d)
+ local dact = nil
+ if dval then
+ if not is_uint12(dval) then
+ werror("displacement out of range: ", dval)
+ end
+ else
+ dval = 0
+ dact = function() waction("DISP12", nil, d) end
+ end
+ local lval = tonumber(l)
+ local lact = nil
+ if lval then
+ if lval < 1 or lval > 128 then
+ werror("length out of range: ", dval)
+ end
+ lval = lval - 1
+ else
+ lval = 0
+ if high_l then
+ lact = function() waction("LEN4HR", nil, l) end
+ else
+ lact = function() waction("LEN4LR", nil, l) end
+ end
+ end
+ return dval, lval, parse_reg(b), dact, lact
+end
+
+local function parse_imm32(imm)
+ local imm_val = tonumber(imm)
+ if imm_val then
+ if not is_int32(imm_val) then
+ werror("immediate value out of range: ", imm_val)
+ end
+ wputhw(band(shr(imm_val, 16), 0xffff))
+ wputhw(band(imm_val, 0xffff))
+ elseif match(imm, "^[rfv]([1-3]?[0-9])$") or
+ match(imm, "^([%w_]+):(r1?[0-9])$") then
+ werror("expected immediate operand, got register")
+ else
+ waction("IMM32", nil, imm) -- if we get label
+ end
+end
+
+local function parse_imm16(imm)
+ local imm_val = tonumber(imm)
+ if imm_val then
+ if not is_int16(imm_val) and not is_uint16(imm_val) then
+ werror("immediate value out of range: ", imm_val)
+ end
+ wputhw(band(imm_val, 0xffff))
+ elseif match(imm, "^[rfv]([1-3]?[0-9])$") or
+ match(imm, "^([%w_]+):(r1?[0-9])$") then
+ werror("expected immediate operand, got register")
+ else
+ waction("IMM16", nil, imm)
+ end
+end
+
+local function parse_imm8(imm)
+ local imm_val = tonumber(imm)
+ if imm_val then
+ if not is_int8(imm_val) and not is_uint8(imm_val) then
+ werror("Immediate value out of range: ", imm_val)
+ end
+ return imm_val, nil
+ end
+ return 0, function() waction("IMM8", nil, imm) end
+end
+
+local function parse_mask(mask)
+ local m3 = parse_number(mask)
+ if m3 then
+ if ((m3 == 1) or (m3 == 0) or ( m3 >=3 and m3 <=7)) then
+ return m3
+ else
+ werror("Mask value should be 0,1 or 3-7: ", m3)
+ end
+ end
+end
+
+local function parse_mask2(mask)
+ local m4 = parse_number(mask)
+ if ( m4 >=0 and m4 <=1) then
+ return m4
+ else
+ werror("Mask value should be 0 or 1: ", m4)
+ end
+end
+
+local function parse_label(label, def)
+ local prefix = sub(label, 1, 2)
+ -- =>label (pc label reference)
+ if prefix == "=>" then
+ return "PC", 0, sub(label, 3)
+ end
+ -- ->name (global label reference)
+ if prefix == "->" then
+ return "LG", map_global[sub(label, 3)]
+ end
+ if def then
+ -- [1-9] (local label definition)
+ if match(label, "^[1-9]$") then
+ return "LG", 10+tonumber(label)
+ end
+ else
+ -- [<>][1-9] (local label reference)
+ local dir, lnum = match(label, "^([<>])([1-9])$")
+ if dir then -- Fwd: 1-9, Bkwd: 11-19.
+ return "LG", lnum + (dir == ">" and 0 or 10)
+ end
+ -- extern label (extern label reference)
+ local extname = match(label, "^extern%s+(%S+)$")
+ if extname then
+ return "EXT", map_extern[extname]
+ end
+ end
+ werror("bad label `"..label.."'")
+end
+
+------------------------------------------------------------------------------
+
+local map_op, op_template
+
+local function op_alias(opname, f)
+ return function(params, nparams)
+ if not params then return "-> "..opname:sub(1, -3) end
+ f(params, nparams)
+ op_template(params, map_op[opname], nparams)
+ end
+end
+
+-- Template strings for s390x instructions.
+map_op = {
+ a_2 = "00005a000000RX-a",
+ ad_2 = "00006a000000RX-a",
+ adb_2 = "ed000000001aRXE",
+ adbr_2 = "0000b31a0000RRE",
+ adr_2 = "000000002a00RR",
+ ae_2 = "00007a000000RX-a",
+ aeb_2 = "ed000000000aRXE",
+ aebr_2 = "0000b30a0000RRE",
+ aer_2 = "000000003a00RR",
+ afi_2 = "c20900000000RIL-a",
+ ag_2 = "e30000000008RXY-a",
+ agf_2 = "e30000000018RXY-a",
+ agfi_2 = "c20800000000RIL-a",
+ agfr_2 = "0000b9180000RRE",
+ aghi_2 = "0000a70b0000RI-a",
+ agr_2 = "0000b9080000RRE",
+ ah_2 = "00004a000000RX-a",
+ ahi_2 = "0000a70a0000RI-a",
+ ahy_2 = "e3000000007aRXY-a",
+ aih_2 = "cc0800000000RIL-a",
+ al_2 = "00005e000000RX-a",
+ alc_2 = "e30000000098RXY-a",
+ alcg_2 = "e30000000088RXY-a",
+ alcgr_2 = "0000b9880000RRE",
+ alcr_2 = "0000b9980000RRE",
+ alfi_2 = "c20b00000000RIL-a",
+ alg_2 = "e3000000000aRXY-a",
+ algf_2 = "e3000000001aRXY-a",
+ algfi_2 = "c20a00000000RIL-a",
+ algfr_2 = "0000b91a0000RRE",
+ algr_2 = "0000b90a0000RRE",
+ alr_2 = "000000001e00RR",
+ alsih_2 = "cc0a00000000RIL-a",
+ alsihn_2 = "cc0b00000000RIL-a",
+ aly_2 = "e3000000005eRXY-a",
+ ap_2 = "fa0000000000SS-b",
+ ar_2 = "000000001a00RR",
+ au_2 = "00007e000000RX-a",
+ aur_2 = "000000003e00RR",
+ aw_2 = "00006e000000RX-a",
+ awr_2 = "000000002e00RR",
+ axbr_2 = "0000b34a0000RRE",
+ axr_2 = "000000003600RR",
+ ay_2 = "e3000000005aRXY-a",
+ bakr_2 = "0000b2400000RRE",
+ bal_2 = "000045000000RX-a",
+ balr_2 = "000000000500RR",
+ bas_2 = "00004d000000RX-a",
+ basr_2 = "000000000d00RR",
+ bassm_2 = "000000000c00RR",
+ bc_2 = "000047000000RX-b",
+ bc_2 = "000047000000RX-b",
+ bcr_2 = "000000000700RR",
+ bct_2 = "000046000000RX-a",
+ bctg_2 = "e30000000046RXY-a",
+ bctgr_2 = "0000b9460000RRE",
+ bctr_2 = "000000000600RR",
+ bras_2 = "0000a7050000RI-b",
+ brasl_2 = "c00500000000RIL-b",
+ brc_2 = "0000a7040000RI-c",
+ brcl_2 = "c00400000000RIL-c",
+ brcl_2 = "c00400000000RIL-c",
+ brct_2 = "0000a7060000RI-b",
+ brctg_2 = "0000a7070000RI-b",
+ brcth_2 = "cc0600000000RIL-b",
+ brxh_3 = "000084000000RSI",
+ brxhg_3 = "ec0000000044RIE-e",
+ bsa_2 = "0000b25a0000RRE",
+ bsg_2 = "0000b2580000RRE",
+ bsm_2 = "000000000b00RR",
+ bxh_3 = "000086000000RS-a",
+ bxhg_3 = "eb0000000044RSY-a",
+ bxle_3 = "000087000000RS-a",
+ bxleg_3 = "eb0000000045RSY-a",
+ c_2 = "000059000000RX-a",
+ cd_2 = "000069000000RX-a",
+ cdb_2 = "ed0000000019RXE",
+ cdbr_2 = "0000b3190000RRE",
+ cdfbr_2 = "0000b3950000RRE",
+ cdfbra_4 = "0000b3950000RRF-e",
+ cdfr_2 = "0000b3b50000RRE",
+ cdftr_2 = "0000b9510000RRE",
+ cdgbr_2 = "0000b3a50000RRE",
+ cdgbra_4 = "0000b3a50000RRF-e",
+ cdgr_2 = "0000b3c50000RRE",
+ cdgtr_2 = "0000b3f10000RRE",
+ cdr_2 = "000000002900RR",
+ cds_3 = "0000bb000000RS-a",
+ cdsg_3 = "eb000000003eRSY-a",
+ cdstr_2 = "0000b3f30000RRE",
+ cdsy_3 = "eb0000000031RSY-a",
+ cdtr_2 = "0000b3e40000RRE",
+ cdutr_2 = "0000b3f20000RRE",
+ ce_2 = "000079000000RX-a",
+ ceb_2 = "ed0000000009RXE",
+ cebr_2 = "0000b3090000RRE",
+ cedtr_2 = "0000b3f40000RRE",
+ cefbr_2 = "0000b3940000RRE",
+ cefbra_4 = "0000b3940000RRF-e",
+ cefr_2 = "0000b3b40000RRE",
+ cegbr_2 = "0000b3a40000RRE",
+ cegbra_4 = "0000b3a40000RRF-e",
+ cegr_2 = "0000b3c40000RRE",
+ cer_2 = "000000003900RR",
+ cextr_2 = "0000b3fc0000RRE",
+ cfdbr_3 = "0000b3990000RRF-e",
+ cfdbra_4 = "0000b3990000RRF-e",
+ cfebr_3 = "0000b3980000RRF-e",
+ cfebra_4 = "0000b3980000RRF-e",
+ cfi_2 = "c20d00000000RIL-a",
+ cfxbr_3 = "0000b39a0000RRF-e",
+ cfxbra_4 = "0000b39a0000RRF-e",
+ cg_2 = "e30000000020RXY-a",
+ cgdbr_3 = "0000b3a90000RRF-e",
+ cgdbra_4 = "0000b3a90000RRF-e",
+ cgebr_3 = "0000b3a80000RRF-e",
+ cgebra_4 = "0000b3a80000RRF-e",
+ cgf_2 = "e30000000030RXY-a",
+ cgfi_2 = "c20c00000000RIL-a",
+ cgfr_2 = "0000b9300000RRE",
+ cgfrl_2 = "c60c00000000RIL-b",
+ cgh_2 = "e30000000034RXY-a",
+ cghi_2 = "0000a70f0000RI-a",
+ cghrl_2 = "c60400000000RIL-b",
+ cgr_2 = "0000b9200000RRE",
+ cgrl_2 = "c60800000000RIL-b",
+ cgxbr_3 = "0000b3aa0000RRF-e",
+ cgxbra_4 = "0000b3aa0000RRF-e",
+ ch_2 = "000049000000RX-a",
+ chf_2 = "e300000000cdRXY-a",
+ chhr_2 = "0000b9cd0000RRE",
+ chi_2 = "0000a70e0000RI-a",
+ chlr_2 = "0000b9dd0000RRE",
+ chrl_2 = "c60500000000RIL-b",
+ chy_2 = "e30000000079RXY-a",
+ cih_2 = "cc0d00000000RIL-a",
+ cksm_2 = "0000b2410000RRE",
+ cl_2 = "000055000000RX-a",
+ clc_2 = "d50000000000SS-a",
+ clcl_2 = "000000000f00RR",
+ clcle_3 = "0000a9000000RS-a",
+ clclu_3 = "eb000000008fRSY-a",
+ clfi_2 = "c20f00000000RIL-a",
+ clg_2 = "e30000000021RXY-a",
+ clgf_2 = "e30000000031RXY-a",
+ clgfi_2 = "c20e00000000RIL-a",
+ clgfr_2 = "0000b9310000RRE",
+ clgfrl_2 = "c60e00000000RIL-b",
+ clghrl_2 = "c60600000000RIL-b",
+ clgr_2 = "0000b9210000RRE",
+ clgrl_2 = "c60a00000000RIL-b",
+ clhf_2 = "e300000000cfRXY-a",
+ clhhr_2 = "0000b9cf0000RRE",
+ clhlr_2 = "0000b9df0000RRE",
+ clhrl_2 = "c60700000000RIL-b",
+ cli_2 = "000095000000SI",
+ clih_2 = "cc0f00000000RIL-a",
+ clm_3 = "0000bd000000RS-b",
+ clmh_3 = "eb0000000020RSY-b",
+ clmy_3 = "eb0000000021RSY-b",
+ clr_2 = "000000001500RR",
+ clrl_2 = "c60f00000000RIL-b",
+ clst_2 = "0000b25d0000RRE",
+ cly_2 = "e30000000055RXY-a",
+ cmpsc_2 = "0000b2630000RRE",
+ cpya_2 = "0000b24d0000RRE",
+ cr_2 = "000000001900RR",
+ crl_2 = "c60d00000000RIL-b",
+ cs_3 = "0000ba000000RS-a",
+ csg_3 = "eb0000000030RSY-a",
+ csp_2 = "0000b2500000RRE",
+ cspg_2 = "0000b98a0000RRE",
+ csy_3 = "eb0000000014RSY-a",
+ cu41_2 = "0000b9b20000RRE",
+ cu42_2 = "0000b9b30000RRE",
+ cudtr_2 = "0000b3e20000RRE",
+ cuse_2 = "0000b2570000RRE",
+ cuxtr_2 = "0000b3ea0000RRE",
+ cvb_2 = "00004f000000RX-a",
+ cvbg_2 = "e3000000000eRXY-a",
+ cvby_2 = "e30000000006RXY-a",
+ cvd_2 = "00004e000000RX-a",
+ cvdg_2 = "e3000000002eRXY-a",
+ cvdy_2 = "e30000000026RXY-a",
+ cxbr_2 = "0000b3490000RRE",
+ cxfbr_2 = "0000b3960000RRE",
+ cxfbra_4 = "0000b3960000RRF-e",
+ cxfr_2 = "0000b3b60000RRE",
+ cxftr_2 = "0000b9590000RRE",
+ cxgbr_2 = "0000b3a60000RRE",
+ cxgbra_4 = "0000b3a60000RRF-e",
+ cxgr_2 = "0000b3c60000RRE",
+ cxgtr_2 = "0000b3f90000RRE",
+ cxr_2 = "0000b3690000RRE",
+ cxstr_2 = "0000b3fb0000RRE",
+ cxtr_2 = "0000b3ec0000RRE",
+ cxutr_2 = "0000b3fa0000RRE",
+ cy_2 = "e30000000059RXY-a",
+ d_2 = "00005d000000RX-a",
+ dd_2 = "00006d000000RX-a",
+ ddb_2 = "ed000000001dRXE",
+ ddbr_2 = "0000b31d0000RRE",
+ ddr_2 = "000000002d00RR",
+ de_2 = "00007d000000RX-a",
+ deb_2 = "ed000000000dRXE",
+ debr_2 = "0000b30d0000RRE",
+ der_2 = "000000003d00RR",
+ didbr_4 = "0000b35b0000RRF-b",
+ dl_2 = "e30000000097RXY-a",
+ dlg_2 = "e30000000087RXY-a",
+ dlgr_2 = "0000b9870000RRE",
+ dlr_2 = "0000b9970000RRE",
+ dr_2 = "000000001d00RR",
+ dsg_2 = "e3000000000dRXY-a",
+ dsgf_2 = "e3000000001dRXY-a",
+ dsgfr_2 = "0000b91d0000RRE",
+ dsgr_2 = "0000b90d0000RRE",
+ dxbr_2 = "0000b34d0000RRE",
+ dxr_2 = "0000b22d0000RRE",
+ ear_2 = "0000b24f0000RRE",
+ ecag_3 = "eb000000004cRSY-a",
+ ed_2 = "de0000000000SS-a",
+ edmk_2 = "df0000000000SS-a",
+ eedtr_2 = "0000b3e50000RRE",
+ eextr_2 = "0000b3ed0000RRE",
+ efpc_2 = "0000b38c0000RRE",
+ epair_2 = "0000b99a0000RRE",
+ epar_2 = "0000b2260000RRE",
+ epsw_2 = "0000b98d0000RRE",
+ ereg_2 = "0000b2490000RRE",
+ eregg_2 = "0000b90e0000RRE",
+ esair_2 = "0000b99b0000RRE",
+ esar_2 = "0000b2270000RRE",
+ esdtr_2 = "0000b3e70000RRE",
+ esea_2 = "0000b99d0000RRE",
+ esta_2 = "0000b24a0000RRE",
+ esxtr_2 = "0000b3ef0000RRE",
+ ex_2 = "000044000000RX-a",
+ exrl_2 = "c60000000000RIL-b",
+ fidr_2 = "0000b37f0000RRE",
+ fier_2 = "0000b3770000RRE",
+ fixr_2 = "0000b3670000RRE",
+ flogr_2 = "0000b9830000RRE",
+ hdr_2 = "000000002400RR",
+ her_2 = "000000003400RR",
+ iac_2 = "0000b2240000RRE",
+ ic_2 = "000043000000RX-a",
+ icm_3 = "0000bf000000RS-b",
+ icmh_3 = "eb0000000080RSY-b",
+ icmy_3 = "eb0000000081RSY-b",
+ icy_2 = "e30000000073RXY-a",
+ iihf_2 = "c00800000000RIL-a",
+ iihh_2 = "0000a5000000RI-a",
+ iihl_2 = "0000a5010000RI-a",
+ iilf_2 = "c00900000000RIL-a",
+ iilh_2 = "0000a5020000RI-a",
+ iill_2 = "0000a5030000RI-a",
+ ipm_2 = "0000b2220000RRE",
+ iske_2 = "0000b2290000RRE",
+ ivsk_2 = "0000b2230000RRE",
+ kdbr_2 = "0000b3180000RRE",
+ kdtr_2 = "0000b3e00000RRE",
+ kebr_2 = "0000b3080000RRE",
+ kimd_2 = "0000b93e0000RRE",
+ klmd_2 = "0000b93f0000RRE",
+ km_2 = "0000b92e0000RRE",
+ kmac_2 = "0000b91e0000RRE",
+ kmc_2 = "0000b92f0000RRE",
+ kmf_2 = "0000b92a0000RRE",
+ kmo_2 = "0000b92b0000RRE",
+ kxbr_2 = "0000b3480000RRE",
+ kxtr_2 = "0000b3e80000RRE",
+ l_2 = "000058000000RX-a",
+ la_2 = "000041000000RX-a",
+ laa_3 = "eb00000000f8RSY-a",
+ laag_3 = "eb00000000e8RSY-a",
+ laal_3 = "eb00000000faRSY-a",
+ laalg_3 = "eb00000000eaRSY-a",
+ lae_2 = "000051000000RX-a",
+ laey_2 = "e30000000075RXY-a",
+ lam_3 = "00009a000000RS-a",
+ lamy_3 = "eb000000009aRSY-a",
+ lan_3 = "eb00000000f4RSY-a",
+ lang_3 = "eb00000000e4RSY-a",
+ lao_3 = "eb00000000f6RSY-a",
+ laog_3 = "eb00000000e6RSY-a",
+ larl_2 = "c00000000000RIL-b",
+ lax_3 = "eb00000000f7RSY-a",
+ laxg_3 = "eb00000000e7RSY-a",
+ lay_2 = "e30000000071RXY-a",
+ lb_2 = "e30000000076RXY-a",
+ lbh_2 = "e300000000c0RXY-a",
+ lbr_2 = "0000b9260000RRE",
+ lcdbr_2 = "0000b3130000RRE",
+ lcdfr_2 = "0000b3730000RRE",
+ lcdr_2 = "000000002300RR",
+ lcebr_2 = "0000b3030000RRE",
+ lcer_2 = "000000003300RR",
+ lcgfr_2 = "0000b9130000RRE",
+ lcgr_2 = "0000b9030000RRE",
+ lcr_2 = "000000001300RR",
+ lctl_3 = "0000b7000000RS-a",
+ lctlg_3 = "eb000000002fRSY-a",
+ lcxbr_2 = "0000b3430000RRE",
+ lcxr_2 = "0000b3630000RRE",
+ ld_2 = "000068000000RX-a",
+ ldebr_2 = "0000b3040000RRE",
+ lder_2 = "0000b3240000RRE",
+ ldgr_2 = "0000b3c10000RRE",
+ ldr_2 = "000000002800RR",
+ ldxbr_2 = "0000b3450000RRE",
+ ldxr_2 = "000000002500RR",
+ ldy_2 = "ed0000000065RXY-a",
+ le_2 = "000078000000RX-a",
+ ledbr_2 = "0000b3440000RRE",
+ ledr_2 = "000000003500RR",
+ ler_2 = "000000003800RR",
+ lexbr_2 = "0000b3460000RRE",
+ lexr_2 = "0000b3660000RRE",
+ ley_2 = "ed0000000064RXY-a",
+ lfh_2 = "e300000000caRXY-a",
+ lg_2 = "e30000000004RXY-a",
+ lgb_2 = "e30000000077RXY-a",
+ lgbr_2 = "0000b9060000RRE",
+ lgdr_2 = "0000b3cd0000RRE",
+ lgf_2 = "e30000000014RXY-a",
+ lgfi_2 = "c00100000000RIL-a",
+ lgfr_2 = "0000b9140000RRE",
+ lgfrl_2 = "c40c00000000RIL-b",
+ lgh_2 = "e30000000015RXY-a",
+ lghi_2 = "0000a7090000RI-a",
+ lghr_2 = "0000b9070000RRE",
+ lghrl_2 = "c40400000000RIL-b",
+ lgr_2 = "0000b9040000RRE",
+ lgrl_2 = "c40800000000RIL-b",
+ lh_2 = "000048000000RX-a",
+ lhh_2 = "e300000000c4RXY-a",
+ lhi_2 = "0000a7080000RI-a",
+ lhr_2 = "0000b9270000RRE",
+ lhrl_2 = "c40500000000RIL-b",
+ lhy_2 = "e30000000078RXY-a",
+ llc_2 = "e30000000094RXY-a",
+ llch_2 = "e300000000c2RXY-a",
+ llcr_2 = "0000b9940000RRE",
+ llgc_2 = "e30000000090RXY-a",
+ llgcr_2 = "0000b9840000RRE",
+ llgf_2 = "e30000000016RXY-a",
+ llgfr_2 = "0000b9160000RRE",
+ llgfrl_2 = "c40e00000000RIL-b",
+ llgh_2 = "e30000000091RXY-a",
+ llghr_2 = "0000b9850000RRE",
+ llghrl_2 = "c40600000000RIL-b",
+ llgt_2 = "e30000000017RXY-a",
+ llgtr_2 = "0000b9170000RRE",
+ llh_2 = "e30000000095RXY-a",
+ llhh_2 = "e300000000c6RXY-a",
+ llhr_2 = "0000b9950000RRE",
+ llhrl_2 = "c40200000000RIL-b",
+ llihf_2 = "c00e00000000RIL-a",
+ llihh_2 = "0000a50c0000RI-a",
+ llihl_2 = "0000a50d0000RI-a",
+ llilf_2 = "c00f00000000RIL-a",
+ llilh_2 = "0000a50e0000RI-a",
+ llill_2 = "0000a50f0000RI-a",
+ lm_3 = "000098000000RS-a",
+ lmg_3 = "eb0000000004RSY-a",
+ lmh_3 = "eb0000000096RSY-a",
+ lmy_3 = "eb0000000098RSY-a",
+ lndbr_2 = "0000b3110000RRE",
+ lndfr_2 = "0000b3710000RRE",
+ lndr_2 = "000000002100RR",
+ lnebr_2 = "0000b3010000RRE",
+ lner_2 = "000000003100RR",
+ lngfr_2 = "0000b9110000RRE",
+ lngr_2 = "0000b9010000RRE",
+ lnr_2 = "000000001100RR",
+ lnxbr_2 = "0000b3410000RRE",
+ lnxr_2 = "0000b3610000RRE",
+ loc_3 = "eb00000000f2RSY-b",
+ locg_3 = "eb00000000e2RSY-b",
+ lpdbr_2 = "0000b3100000RRE",
+ lpdfr_2 = "0000b3700000RRE",
+ lpdr_2 = "000000002000RR",
+ lpebr_2 = "0000b3000000RRE",
+ lper_2 = "000000003000RR",
+ lpgfr_2 = "0000b9100000RRE",
+ lpgr_2 = "0000b9000000RRE",
+ lpq_2 = "e3000000008fRXY-a",
+ lpr_2 = "000000001000RR",
+ lpxbr_2 = "0000b3400000RRE",
+ lpxr_2 = "0000b3600000RRE",
+ lr_2 = "000000001800RR",
+ lra_2 = "0000b1000000RX-a",
+ lrag_2 = "e30000000003RXY-a",
+ lray_2 = "e30000000013RXY-a",
+ lrdr_2 = "000000002500RR",
+ lrer_2 = "000000003500RR",
+ lrl_2 = "c40d00000000RIL-b",
+ lrv_2 = "e3000000001eRXY-a",
+ lrvg_2 = "e3000000000fRXY-a",
+ lrvgr_2 = "0000b90f0000RRE",
+ lrvh_2 = "e3000000001fRXY-a",
+ lrvr_2 = "0000b91f0000RRE",
+ lt_2 = "e30000000012RXY-a",
+ ltdbr_2 = "0000b3120000RRE",
+ ltdr_2 = "000000002200RR",
+ ltdtr_2 = "0000b3d60000RRE",
+ ltebr_2 = "0000b3020000RRE",
+ lter_2 = "000000003200RR",
+ ltg_2 = "e30000000002RXY-a",
+ ltgf_2 = "e30000000032RXY-a",
+ ltgfr_2 = "0000b9120000RRE",
+ ltgr_2 = "0000b9020000RRE",
+ ltr_2 = "000000001200RR",
+ ltxbr_2 = "0000b3420000RRE",
+ ltxr_2 = "0000b3620000RRE",
+ ltxtr_2 = "0000b3de0000RRE",
+ lura_2 = "0000b24b0000RRE",
+ lurag_2 = "0000b9050000RRE",
+ lxdbr_2 = "0000b3050000RRE",
+ lxdr_2 = "0000b3250000RRE",
+ lxebr_2 = "0000b3060000RRE",
+ lxer_2 = "0000b3260000RRE",
+ lxr_2 = "0000b3650000RRE",
+ ly_2 = "e30000000058RXY-a",
+ lzdr_2 = "0000b3750000RRE",
+ lzer_2 = "0000b3740000RRE",
+ lzxr_2 = "0000b3760000RRE",
+ m_2 = "00005c000000RX-a",
+ madb_3 = "ed000000001eRXF",
+ maeb_3 = "ed000000000eRXF",
+ maebr_3 = "0000b30e0000RRD",
+ maer_3 = "0000b32e0000RRD",
+ md_2 = "00006c000000RX-a",
+ mdb_2 = "ed000000001cRXE",
+ mdbr_2 = "0000b31c0000RRE",
+ mde_2 = "00007c000000RX-a",
+ mdeb_2 = "ed000000000cRXE",
+ mdebr_2 = "0000b30c0000RRE",
+ mder_2 = "000000003c00RR",
+ mdr_2 = "000000002c00RR",
+ me_2 = "00007c000000RX-a",
+ meeb_2 = "ed0000000017RXE",
+ meebr_2 = "0000b3170000RRE",
+ meer_2 = "0000b3370000RRE",
+ mer_2 = "000000003c00RR",
+ mfy_2 = "e3000000005cRXY-a",
+ mghi_2 = "0000a70d0000RI-a",
+ mh_2 = "00004c000000RX-a",
+ mhi_2 = "0000a70c0000RI-a",
+ mhy_2 = "e3000000007cRXY-a",
+ ml_2 = "e30000000096RXY-a",
+ mlg_2 = "e30000000086RXY-a",
+ mlgr_2 = "0000b9860000RRE",
+ mlr_2 = "0000b9960000RRE",
+ mr_2 = "000000001c00RR",
+ ms_2 = "000071000000RX-a",
+ msfi_2 = "c20100000000RIL-a",
+ msg_2 = "e3000000000cRXY-a",
+ msgf_2 = "e3000000001cRXY-a",
+ msgfi_2 = "c20000000000RIL-a",
+ msgfr_2 = "0000b91c0000RRE",
+ msgr_2 = "0000b90c0000RRE",
+ msr_2 = "0000b2520000RRE",
+ msta_2 = "0000b2470000RRE",
+ msy_2 = "e30000000051RXY-a",
+ mvc_2 = "d20000000000SS-a",
+ mvcin_2 = "e80000000000SS-a",
+ mvcl_2 = "000000000e00RR",
+ mvcle_3 = "0000a8000000RS-a",
+ mvclu_3 = "eb000000008eRSY-a",
+ mvghi_2 = "e54800000000SIL",
+ mvhhi_2 = "e54400000000SIL",
+ mvhi_2 = "e54c00000000SIL",
+ mvi_2 = "000092000000SI",
+ mvn_2 = "d10000000000SS-a",
+ mvpg_2 = "0000b2540000RRE",
+ mvst_2 = "0000b2550000RRE",
+ mvz_2 = "d30000000000SS-a",
+ mxbr_2 = "0000b34c0000RRE",
+ mxd_2 = "000067000000RX-a",
+ mxdb_2 = "ed0000000007RXE",
+ mxdbr_2 = "0000b3070000RRE",
+ mxdr_2 = "000000002700RR",
+ mxr_2 = "000000002600RR",
+ n_2 = "000054000000RX-a",
+ nc_2 = "d40000000000SS-a",
+ ng_2 = "e30000000080RXY-a",
+ ngr_2 = "0000b9800000RRE",
+ ni_2 = "000094000000SI",
+ nihf_2 = "c00a00000000RIL-a",
+ nihh_2 = "0000a5040000RI-a",
+ nihl_2 = "0000a5050000RI-a",
+ nilf_2 = "c00b00000000RIL-a",
+ nilh_2 = "0000a5060000RI-a",
+ nill_2 = "0000a5070000RI-a",
+ nr_2 = "000000001400RR",
+ ny_2 = "e30000000054RXY-a",
+ o_2 = "000056000000RX-a",
+ oc_2 = "d60000000000SS-a",
+ og_2 = "e30000000081RXY-a",
+ ogr_2 = "0000b9810000RRE",
+ oi_2 = "000096000000SI",
+ oihf_2 = "c00c00000000RIL-a",
+ oihh_2 = "0000a5080000RI-a",
+ oihl_2 = "0000a5090000RI-a",
+ oilf_2 = "c00d00000000RIL-a",
+ oilh_2 = "0000a50a0000RI-a",
+ oill_2 = "0000a50b0000RI-a",
+ or_2 = "000000001600RR",
+ oy_2 = "e30000000056RXY-a",
+ palb_2 = "0000b2480000RRE",
+ pcc_2 = "0000b92c0000RRE",
+ pckmo_2 = "0000b9280000RRE",
+ pfd_2 = "e30000000036m",
+ pfdrl_2 = "c60200000000RIL-c",
+ pfmf_2 = "0000b9af0000RRE",
+ pgin_2 = "0000b22e0000RRE",
+ pgout_2 = "0000b22f0000RRE",
+ popcnt_2 = "0000b9e10000RRE",
+ pt_2 = "0000b2280000RRE",
+ ptf_2 = "0000b9a20000RRE",
+ pti_2 = "0000b99e0000RRE",
+ rll_3 = "eb000000001dRSY-a",
+ rllg_3 = "eb000000001cRSY-a",
+ rrbe_2 = "0000b22a0000RRE",
+ rrbm_2 = "0000b9ae0000RRE",
+ s_2 = "00005b000000RX-a",
+ sar_2 = "0000b24e0000RRE",
+ sd_2 = "00006b000000RX-a",
+ sdb_2 = "ed000000001bRXE",
+ sdbr_2 = "0000b31b0000RRE",
+ sdr_2 = "000000002b00RR",
+ se_2 = "00007b000000RX-a",
+ seb_2 = "ed000000000bRXE",
+ sebr_2 = "0000b30b0000RRE",
+ ser_2 = "000000003b00RR",
+ sfasr_2 = "0000b3850000RRE",
+ sfpc_2 = "0000b3840000RRE",
+ sg_2 = "e30000000009RXY-a",
+ sgf_2 = "e30000000019RXY-a",
+ sgfr_2 = "0000b9190000RRE",
+ sgr_2 = "0000b9090000RRE",
+ sh_2 = "00004b000000RX-a",
+ shy_2 = "e3000000007bRXY-a",
+ sl_2 = "00005f000000RX-a",
+ sla_2 = "00008b000000RS-a",
+ slag_3 = "eb000000000bRSY-a",
+ slak_3 = "eb00000000ddRSY-a",
+ slb_2 = "e30000000099RXY-a",
+ slbg_2 = "e30000000089RXY-a",
+ slbgr_2 = "0000b9890000RRE",
+ slbr_2 = "0000b9990000RRE",
+ slda_2 = "00008f000000RS-a",
+ sldl_2 = "00008d000000RS-a",
+ slfi_2 = "c20500000000RIL-a",
+ slg_2 = "e3000000000bRXY-a",
+ slgf_2 = "e3000000001bRXY-a",
+ slgfi_2 = "c20400000000RIL-a",
+ slgfr_2 = "0000b91b0000RRE",
+ slgr_2 = "0000b90b0000RRE",
+ sll_2 = "000089000000RS-a",
+ sllg_3 = "eb000000000dRSY-a",
+ sllk_3 = "eb00000000dfRSY-a",
+ slr_2 = "000000001f00RR",
+ sly_2 = "e3000000005fRXY-a",
+ spm_2 = "000000000400RR",
+ sqdb_2 = "ed0000000015RXE",
+ sqdbr_2 = "0000b3150000RRE",
+ sqdr_2 = "0000b2440000RRE",
+ sqeb_2 = "ed0000000014RXE",
+ sqebr_2 = "0000b3140000RRE",
+ sqer_2 = "0000b2450000RRE",
+ sqxbr_2 = "0000b3160000RRE",
+ sqxr_2 = "0000b3360000RRE",
+ sr_2 = "000000001b00RR",
+ sra_2 = "00008a000000RS-a",
+ srag_3 = "eb000000000aRSY-a",
+ srak_3 = "eb00000000dcRSY-a",
+ srda_2 = "00008e000000RS-a",
+ srdl_2 = "00008c000000RS-a",
+ srl_2 = "000088000000RS-a",
+ srlg_3 = "eb000000000cRSY-a",
+ srlk_3 = "eb00000000deRSY-a",
+ srst_2 = "0000b25e0000RRE",
+ srstu_2 = "0000b9be0000RRE",
+ ssair_2 = "0000b99f0000RRE",
+ ssar_2 = "0000b2250000RRE",
+ st_2 = "000050000000RX-a",
+ stam_3 = "00009b000000RS-a",
+ stamy_3 = "eb000000009bRSY-a",
+ stc_2 = "000042000000RX-a",
+ stch_2 = "e300000000c3RXY-a",
+ stcm_3 = "0000be000000RS-b",
+ stcmh_3 = "eb000000002cRSY-b",
+ stcmy_3 = "eb000000002dRSY-b",
+ stctg_3 = "eb0000000025RSY-a",
+ stctl_3 = "0000b6000000RS-a",
+ stcy_2 = "e30000000072RXY-a",
+ std_2 = "000060000000RX-a",
+ stdy_2 = "ed0000000067RXY-a",
+ ste_2 = "000070000000RX-a",
+ stey_2 = "ed0000000066RXY-a",
+ stfh_2 = "e300000000cbRXY-a",
+ stfl_1 = "0000b2b10000S",
+ stg_2 = "e30000000024RXY-a",
+ stgrl_2 = "c40b00000000RIL-b",
+ sth_2 = "000040000000RX-a",
+ sthh_2 = "e300000000c7RXY-a",
+ sthrl_2 = "c40700000000RIL-b",
+ sthy_2 = "e30000000070RXY-a",
+ stm_3 = "000090000000RS-a",
+ stmg_3 = "eb0000000024RSY-a",
+ stmh_3 = "eb0000000026RSY-a",
+ stmy_3 = "eb0000000090RSY-a",
+ stoc_3 = "eb00000000f3RSY-b",
+ stocg_3 = "eb00000000e3RSY-b",
+ stpq_2 = "e3000000008eRXY-a",
+ strl_2 = "c40f00000000RIL-b",
+ strv_2 = "e3000000003eRXY-a",
+ strvg_2 = "e3000000002fRXY-a",
+ strvh_2 = "e3000000003fRXY-a",
+ stura_2 = "0000b2460000RRE",
+ sturg_2 = "0000b9250000RRE",
+ sty_2 = "e30000000050RXY-a",
+ su_2 = "00007f000000RX-a",
+ sur_2 = "000000003f00RR",
+ svc_1 = "000000000a00I",
+ sw_2 = "00006f000000RX-a",
+ swr_2 = "000000002f00RR",
+ sxbr_2 = "0000b34b0000RRE",
+ sxr_2 = "000000003700RR",
+ sy_2 = "e3000000005bRXY-a",
+ tar_2 = "0000b24c0000RRE",
+ tb_2 = "0000b22c0000RRE",
+ thder_2 = "0000b3580000RRE",
+ thdr_2 = "0000b3590000RRE",
+ tm_2 = "000091000000SI",
+ tmhh_2 = "0000a7020000RI-a",
+ tmhl_2 = "0000a7030000RI-a",
+ tmlh_2 = "0000a7000000RI-a",
+ tmll_2 = "0000a7010000RI-a",
+ tmy_2 = "eb0000000051SIY",
+ tr_2 = "dc0000000000SS-a",
+ trace_3 = "000099000000RS-a",
+ tracg_3 = "eb000000000fRSY-a",
+ tre_2 = "0000b2a50000RRE",
+ trt_2 = "dd0000000000SS-a",
+ trtr_2 = "d00000000000SS-a",
+ unpka_2 = "ea0000000000SS-a",
+ unpku_2 = "e20000000000SS-a",
+ x_2 = "000057000000RX-a",
+ xc_2 = "d70000000000SS-a",
+ xg_2 = "e30000000082RXY-a",
+ xgr_2 = "0000b9820000RRE",
+ xi_2 = "000097000000SI",
+ xihf_2 = "c00600000000RIL-a",
+ xilf_2 = "c00700000000RIL-a",
+ xr_2 = "000000001700RR",
+ xy_2 = "e30000000057RXY-a",
+}
+for cond, c in pairs(map_cond) do
+ -- Extended mnemonics for branches.
+ -- TODO: replace 'B' with correct encoding.
+ -- brc
+ map_op["j"..cond.."_1"] = "0000"..tohex(0xa7040000+shl(c, 20)).."RI-c"
+ -- brcl
+ map_op["jg"..cond.."_1"] = tohex(0xc0040000+shl(c, 20)).."0000".."RIL-c"
+ -- bc
+ map_op["b"..cond.."_1"] = "0000"..tohex(0x47000000+shl(c, 20)).."RX-b"
+ -- bcr
+ map_op["b"..cond.."r_1"] = "0000"..tohex(0x0700+shl(c, 4)).."RR"
+end
+------------------------------------------------------------------------------
+-- Handle opcodes defined with template strings.
+local function parse_template(params, template, nparams, pos)
+ -- Read the template in 16-bit chunks.
+ -- Leading halfword zeroes should not be written out.
+ local op0 = tonumber(sub(template, 1, 4), 16)
+ local op1 = tonumber(sub(template, 5, 8), 16)
+ local op2 = tonumber(sub(template, 9, 12), 16)
+
+ -- Process each character.
+ local p = sub(template, 13)
+ if p == "I" then
+ local imm_val, a = parse_imm8(params[1])
+ op2 = op2 + imm_val
+ wputhw(op2)
+ if a then a() end
+ elseif p == "RI-a" then
+ op1 = op1 + shl(parse_reg(params[1]), 4)
+ wputhw(op1)
+ parse_imm16(params[2])
+ elseif p == "RI-b" then
+ op1 = op1 + shl(parse_reg(params[1]), 4)
+ wputhw(op1)
+ local mode, n, s = parse_label(params[2])
+ waction("REL_"..mode, n, s)
+ elseif p == "RI-c" then
+ if #params > 1 then
+ op1 = op1 + shl(parse_num(params[1]), 4)
+ end
+ wputhw(op1)
+ local mode, n, s = parse_label(params[#params])
+ waction("REL_"..mode, n, s)
+ elseif p == "RIE-e" then
+ op0 = op0 + shl(parse_reg(params[1]), 4) + parse_reg(params[2])
+ wputhw1(op0)
+ local mode, n, s = parse_label(params[3])
+ waction("REL_"..mode, n, s)
+ wputhw(op2)
+ elseif p == "RIL-a" then
+ op0 = op0 + shl(parse_reg(params[1]), 4)
+ wputhw(op0);
+ parse_imm32(params[2])
+ elseif p == "RIL-b" then
+ op0 = op0 + shl(parse_reg(params[1]), 4)
+ wputhw(op0)
+ local mode, n, s = parse_label(params[2])
+ waction("REL_"..mode, n, s)
+ elseif p == "RIL-c" then
+ if #params > 1 then
+ op0 = op0 + shl(parse_num(params[1]), 4)
+ end
+ wputhw(op0)
+ local mode, n, s = parse_label(params[#params])
+ waction("REL_"..mode, n, s)
+ elseif p == "RR" then
+ if #params > 1 then
+ op2 = op2 + shl(parse_reg(params[1]), 4)
+ end
+ op2 = op2 + parse_reg(params[#params])
+ wputhw(op2)
+ elseif p == "RRD" then
+ wputhw(op1)
+ op2 = op2 + shl(parse_reg(params[1]), 12) + shl(parse_reg(params[2]), 4) + parse_reg(params[3])
+ wputhw(op2)
+ elseif p == "RRE" then
+ op2 = op2 + shl(parse_reg(params[1]), 4) + parse_reg(params[2])
+ wputhw(op1); wputhw(op2)
+ elseif p == "RRF-b" then
+ wputhw(op1)
+ op2 = op2 + shl(parse_reg(params[1]), 4) + shl(parse_reg(params[2]), 12) + parse_reg(params[3]) + shl(parse_mask(params[4]), 8)
+ wputhw(op2)
+ elseif p == "RRF-e" then
+ wputhw(op1)
+ op2 = op2 + shl(parse_reg(params[1]), 4) + shl(parse_mask(params[2]), 12) + parse_reg(params[3])
+ if params[4] then
+ op2 = op2 + shl(parse_mask2(params[4]), 8)
+ end
+ wputhw(op2)
+ elseif p == "RS-a" then
+ if (params[3]) then
+ local d, b, a = parse_mem_b(params[3])
+ op1 = op1 + shl(parse_reg(params[1]), 4) + parse_reg(params[2])
+ op2 = op2 + shl(b, 12) + d
+ else
+ local d, b, a = parse_mem_b(params[2])
+ op1 = op1 + shl(parse_reg(params[1]), 4)
+ op2 = op2 + shl(b, 12) + d
+ end
+ wputhw(op1); wputhw(op2)
+ if a then a() end
+ elseif p == "RS-b" then
+ local m = parse_mask(params[2])
+ local d, b, a = parse_mem_b(params[3])
+ op1 = op1 + shl(parse_reg(params[1]), 4) + m
+ op2 = op2 + shl(b, 12) + d
+ wputhw(op1); wputhw(op2)
+ if a then a() end
+ elseif p == "RSI" then
+ op1 = op1 + shl(parse_reg(params[1]), 4) + parse_reg(params[2])
+ wputhw(op1)
+ local mode, n, s = parse_label(params[3])
+ waction("REL_"..mode, n, s)
+ elseif p == "RSY-a" then
+ local d, b, a = parse_mem_by(params[3])
+ op0 = op0 + shl(parse_reg(params[1]), 4) + parse_reg(params[2])
+ op1 = op1 + shl(b, 12) + band(d, 0xfff)
+ op2 = op2 + band(shr(d, 4), 0xff00)
+ wputhw(op0); wputhw(op1); wputhw(op2)
+ if a then a() end -- a() emits action.
+ elseif p == "RX-a" then
+ local d, x, b, a = parse_mem_bx(params[2])
+ op1 = op1 + shl(parse_reg(params[1]), 4) + x
+ op2 = op2 + shl(b, 12) + d
+ wputhw(op1); wputhw(op2)
+ if a then a() end
+ elseif p == "RX-b" then
+ local d, x, b, a = parse_mem_bx(params[#params])
+ if #params > 1 then
+ op1 = op1 + shl(parse_num(params[1]), 4)
+ end
+ op1 = op1 + x
+ op2 = op2 + shl(b, 12) + d
+ wputhw(op1); wputhw(op2)
+ if a then a() end
+ elseif p == "RXE" then
+ local d, x, b, a = parse_mem_bx(params[2])
+ op0 = op0 + shl(parse_reg(params[1]), 4) + x
+ op1 = op1 + shl(b, 12) + d
+ wputhw(op0); wputhw(op1)
+ if a then a() end
+ wputhw(op2);
+ elseif p == "RXF" then
+ local d, x, b, a = parse_mem_bx(params[3])
+ op0 = op0 + shl(parse_reg(params[2]), 4) + x
+ op1 = op1 + shl(b, 12) + d
+ wputhw(op0); wputhw(op1)
+ if a then a() end
+ op2 = op2 + shl(parse_reg(params[1]), 12)
+ wputhw(op2)
+ elseif p == "RXY-a" then
+ local d, x, b, a = parse_mem_bxy(params[2])
+ op0 = op0 + shl(parse_reg(params[1]), 4) + x
+ op1 = op1 + shl(b, 12) + band(d, 0xfff)
+ op2 = op2 + band(shr(d, 4), 0xff00)
+ wputhw(op0); wputhw(op1); wputhw(op2)
+ if a then a() end
+ elseif p == "S" then
+ wputhw(op1);
+ local d, b, a = parse_mem_b(params[1])
+ op2 = op2 + shl(b, 12) + d
+ wputhw(op2)
+ if a then a() end
+ elseif p == "SI" then
+ local imm_val, a = parse_imm8(params[2])
+ op1 = op1 + imm_val
+ wputhw(op1)
+ if a then a() end
+ local d, b, a = parse_mem_b(params[1])
+ op2 = op2 + shl(b, 12) + d
+ wputhw(op2)
+ if a then a() end
+ elseif p == "SIL" then
+ wputhw(op0)
+ local d, b, a = parse_mem_b(params[1])
+ op1 = op1 + shl(b, 12) + d
+ wputhw(op1)
+ if a then a() end
+ parse_imm16(params[2])
+ elseif p == "SIY" then
+ local imm8, iact = parse_imm8(params[2])
+ op0 = op0 + shl(imm8, 8)
+ wputhw(op0)
+ if iact then iact() end
+ local d, b, a = parse_mem_by(params[1])
+ op1 = op1 + shl(b, 12) + band(d, 0xfff)
+ op2 = op2 + band(shr(d, 4), 0xff00)
+ wputhw(op1); wputhw(op2)
+ if a then a() end
+ elseif p == "SS-a" then
+ local d1, l1, b1, d1a, l1a = parse_mem_lb(params[1])
+ local d2, b2, d2a = parse_mem_b(params[2])
+ op0 = op0 + l1
+ op1 = op1 + shl(b1, 12) + d1
+ op2 = op2 + shl(b2, 12) + d2
+ wputhw(op0)
+ if l1a then l1a() end
+ wputhw(op1)
+ if d1a then d1a() end
+ wputhw(op2)
+ if d2a then d2a() end
+ elseif p == "SS-b" then
+ local high_l = true
+ local d1, l1, b1, d1a, l1a = parse_mem_l2b(params[1], high_l)
+ high_l = false
+ local d2, l2, b2, d2a, l2a = parse_mem_l2b(params[2], high_l)
+ op0 = op0 + shl(l1, 4) + l2
+ op1 = op1 + shl(b1, 12) + d1
+ op2 = op2 + shl(b2, 12) + d2
+ wputhw(op0)
+ if l1a then l1a() end
+ if l2a then l2a() end
+ wputhw(op1)
+ if d1a then d1a() end
+ wputhw(op2)
+ if d2a then d2a() end
+ else
+ werror("unrecognized encoding")
+ end
+end
+
+function op_template(params, template, nparams)
+ if not params then return template:gsub("%x%x%x%x%x%x%x%x%x%x%x%x", "") end
+ -- Limit number of section buffer positions used by a single dasm_put().
+ -- A single opcode needs a maximum of 5 positions.
+ if secpos+5 > maxsecpos then wflush() end
+ local lpos, apos, spos = #actlist, #actargs, secpos
+ local ok, err
+ for t in gmatch(template, "[^|]+") do
+ ok, err = pcall(parse_template, params, t, nparams)
+ if ok then return end
+ secpos = spos
+ actlist[lpos+1] = nil
+ actlist[lpos+2] = nil
+ actlist[lpos+3] = nil
+ actargs[apos+1] = nil
+ actargs[apos+2] = nil
+ actargs[apos+3] = nil
+ end
+ error(err, 0)
+end
+map_op[".template__"] = op_template
+------------------------------------------------------------------------------
+-- Pseudo-opcode to mark the position where the action list is to be emitted.
+map_op[".actionlist_1"] = function(params)
+ if not params then return "cvar" end
+ local name = params[1] -- No syntax check. You get to keep the pieces.
+ wline(function(out) writeactions(out, name) end)
+end
+-- Pseudo-opcode to mark the position where the global enum is to be emitted.
+map_op[".globals_1"] = function(params)
+ if not params then return "prefix" end
+ local prefix = params[1] -- No syntax check. You get to keep the pieces.
+ wline(function(out) writeglobals(out, prefix) end)
+end
+-- Pseudo-opcode to mark the position where the global names are to be emitted.
+map_op[".globalnames_1"] = function(params)
+ if not params then return "cvar" end
+ local name = params[1] -- No syntax check. You get to keep the pieces.
+ wline(function(out) writeglobalnames(out, name) end)
+end
+-- Pseudo-opcode to mark the position where the extern names are to be emitted.
+map_op[".externnames_1"] = function(params)
+ if not params then return "cvar" end
+ local name = params[1] -- No syntax check. You get to keep the pieces.
+ wline(function(out) writeexternnames(out, name) end)
+end
+------------------------------------------------------------------------------
+-- Label pseudo-opcode (converted from trailing colon form).
+map_op[".label_1"] = function(params)
+ if not params then return "[1-9] | ->global | =>pcexpr" end
+ if secpos+1 > maxsecpos then wflush() end
+ local mode, n, s = parse_label(params[1], true)
+ if mode == "EXT" then werror("bad label definition") end
+ waction("LABEL_"..mode, n, s, 1)
+end
+------------------------------------------------------------------------------
+-- Pseudo-opcodes for data storage.
+map_op[".long_*"] = function(params)
+ if not params then return "imm..." end
+ for _, p in ipairs(params) do
+ local n = tonumber(p)
+ if not n then werror("bad immediate `"..p.."'") end
+ if n < 0 then n = n + 2^32 end
+ wputw(n)
+ if secpos+2 > maxsecpos then wflush() end
+ end
+end
+-- Alignment pseudo-opcode.
+map_op[".align_1"] = function(params)
+ if not params then return "numpow2" end
+ if secpos+1 > maxsecpos then wflush() end
+ local align = tonumber(params[1])
+ if align then
+ local x = align
+ -- Must be a power of 2 in the range (2 ... 256).
+ for i=1, 8 do
+ x = x / 2
+ if x == 1 then
+ waction("ALIGN", align-1, nil, 1) -- Action halfword is 2**n-1.
+ return
+ end
+ end
+ end
+ werror("bad alignment")
+end
+------------------------------------------------------------------------------
+-- Pseudo-opcode for (primitive) type definitions (map to C types).
+map_op[".type_3"] = function(params, nparams)
+ if not params then
+ return nparams == 2 and "name, ctype" or "name, ctype, reg"
+ end
+ local name, ctype, reg = params[1], params[2], params[3]
+ if not match(name, "^[%a_][%w_]*$") then
+ werror("bad type name `"..name.."'")
+ end
+ local tp = map_type[name]
+ if tp then
+ werror("duplicate type `"..name.."'")
+ end
+ -- Add #type to defines. A bit unclean to put it in map_archdef.
+ map_archdef["#"..name] = "sizeof("..ctype..")"
+ -- Add new type and emit shortcut define.
+ local num = ctypenum + 1
+ map_type[name] = {
+ ctype = ctype,
+ ctypefmt = format("Dt%X(%%s)", num),
+ reg = reg,
+ }
+ wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype))
+ ctypenum = num
+end
+map_op[".type_2"] = map_op[".type_3"]
+-- Dump type definitions.
+local function dumptypes(out, lvl)
+ local t = {}
+ for name in pairs(map_type) do t[#t+1] = name end
+ sort(t)
+ out:write("Type definitions:\n")
+ for _, name in ipairs(t) do
+ local tp = map_type[name]
+ local reg = tp.reg or ""
+ out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg))
+ end
+ out:write("\n")
+end
+------------------------------------------------------------------------------
+-- Set the current section.
+function _M.section(num)
+ waction("SECTION", num)
+ wflush(true) -- SECTION is a terminal action.
+end
+------------------------------------------------------------------------------
+-- Dump architecture description.
+function _M.dumparch(out)
+ out:write(format("DynASM %s version %s, released %s\n\n",
+ _info.arch, _info.version, _info.release))
+ dumpactions(out)
+end
+-- Dump all user defined elements.
+function _M.dumpdef(out, lvl)
+ dumptypes(out, lvl)
+ dumpglobals(out, lvl)
+ dumpexterns(out, lvl)
+end
+------------------------------------------------------------------------------
+-- Pass callbacks from/to the DynASM core.
+function _M.passcb(wl, we, wf, ww)
+ wline, werror, wfatal, wwarn = wl, we, wf, ww
+ return wflush
+end
+-- Setup the arch-specific module.
+function _M.setup(arch, opt)
+ g_arch, g_opt = arch, opt
+end
+-- Merge the core maps and the arch-specific maps.
+function _M.mergemaps(map_coreop, map_def)
+ setmetatable(map_op, { __index = map_coreop })
+ setmetatable(map_def, { __index = map_archdef })
+ return map_op, map_def
+end
+return _M
+------------------------------------------------------------------------------
diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/dynasm/Examples/run.sh luajit-2.1.0~beta3+git.1624618403.e9577376/dynasm/Examples/run.sh
--- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/dynasm/Examples/run.sh 1969-12-31 18:00:00.000000000 -0600
+++ luajit-2.1.0~beta3+git.1624618403.e9577376/dynasm/Examples/run.sh 2021-10-05 12:36:23.413160484 -0500
@@ -0,0 +1,13 @@
+#!/bin/bash
+# set -x
+
+# run test
+lua ../dynasm.lua test_z_inst.c | gcc -DDASM_CHECKS -std=gnu99 -Wall -Werror -g -x c -o test_z_inst -
+./test_z_inst
+ec=$?
+
+# cleanup
+rm -f ./test_z_inst
+
+# exit
+exit $ec
diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/dynasm/Examples/test_z_inst.c luajit-2.1.0~beta3+git.1624618403.e9577376/dynasm/Examples/test_z_inst.c
--- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/dynasm/Examples/test_z_inst.c 1969-12-31 18:00:00.000000000 -0600
+++ luajit-2.1.0~beta3+git.1624618403.e9577376/dynasm/Examples/test_z_inst.c 2021-10-05 12:36:23.449160674 -0500
@@ -0,0 +1,487 @@
+#include <assert.h>
+#include <stdio.h>
+#include <sys/mman.h>
+
+#include "../dasm_proto.h"
+#include "../dasm_s390x.h"
+
+// DynASM directives.
+|.arch s390x
+|.actionlist actions
+|.globals lab_
+
+static void add(dasm_State *state)
+{
+ dasm_State ** Dst = &state;
+
+ | ar r2,r3
+ | br r14
+}
+
+/*
+static void add_rrd(dasm_State *state)
+{
+ dasm_State **Dst = &state;
+
+ | lgfi r4 , 0x02
+ | maer r2 , r3 , r4
+ | br r14
+}
+*/
+
+static void sub(dasm_State *state)
+{
+ dasm_State **Dst = &state;
+
+ | sr r2,r3
+ | br r14
+}
+
+static void mul(dasm_State *state)
+{
+ dasm_State **Dst = &state;
+
+ | msr r2 , r3
+ | br r14
+}
+
+static void rx(dasm_State *state)
+{
+ dasm_State **Dst = &state;
+
+ int x = 1;
+ int y = 4095;
+
+ | la r4, 4095(r2, r3)
+ | la r5, 4095(r4)
+ | la r1, x(r5)
+ | la r2, y(r1, r0)
+ | br r14
+}
+
+static void rxy(dasm_State *state)
+{
+ dasm_State **Dst = &state;
+
+ int x = -524287;
+ int y = 524286;
+
+ | lay r4, -524288(r2, r3)
+ | lay r5, 524287(r4)
+ | lay r1, x(r5)
+ | lay r2, y(r1, r0)
+ | br r14
+}
+
+static void lab(dasm_State *state)
+{
+ dasm_State **Dst = &state;
+
+ // r1 = 0; do { r2 += r2; r1 += 1; } while(r1 < r3);
+ | la r1, 0(r0)
+ |1:
+ | agr r2, r2
+ | la r1, 1(r1)
+ | cgr r1, r3
+ | jl <1
+ | br r14
+}
+
+static void labg(dasm_State *state)
+{
+ dasm_State **Dst = &state;
+
+ // r1 = 0; do { r2 += r2; r1 += 1; } while(r1 < r3);
+ | la r1, 0(r0)
+ |1:
+ | agr r2, r2
+ | la r1, 1(r1)
+ | cgr r1, r3
+ | jgl <1
+ | jgnl >1
+ | stg r0, 0(r0)
+ |1:
+ | br r14
+}
+
+static void jmp_fwd(dasm_State *state)
+{
+ dasm_State **Dst = &state;
+
+ // while(r2!=r3){r2 += 2};
+ | j >1
+ |1:
+ | cgr r2 , r3
+ | jne >2
+ | je >3
+ |2:
+ | afi r2, 0x2
+ | j <1
+ |3:
+ | br r14
+
+}
+
+static void add_imm16(dasm_State *state)
+{
+ dasm_State **Dst = &state;
+
+ | ahi r2 , 0xf
+ | br r14
+}
+
+static void add_imm32(dasm_State *state)
+{
+ dasm_State **Dst = &state;
+
+ | afi r2 , 0xe
+ | br r14
+}
+
+static void save(dasm_State *state)
+{
+ dasm_State **Dst = &state;
+
+ |.define CFRAME_SPACE, 224 // Delta for sp, 8 byte aligned.
+ |
+ |// Register save area.
+ |.define SAVE_GPRS, 264(sp) // Save area for r6-r15 (10*8 bytes).
+ |
+ |// Argument save area, each slot is 8-bytes (32-bit types are sign/zero extended).
+ |.define RESERVED, 232(sp) // Reserved for compiler use.
+ |.define BACKCHAIN, 224(sp)
+ |
+ |// Current stack frame.
+ |.define SAVE_FPR15, 216(sp)
+ |.define SAVE_FPR14, 208(sp)
+ |.define SAVE_FPR13, 200(sp)
+ |.define SAVE_FPR12, 192(sp)
+ |.define SAVE_FPR11, 184(sp)
+ |.define SAVE_FPR10, 176(sp)
+ |.define SAVE_FPR9, 168(sp)
+ |.define SAVE_FPR8, 160(sp)
+ |
+ |// Callee save area.
+ |.define CALLEESAVE, 000(sp)
+ |
+ |.macro saveregs
+ | lay sp, -CFRAME_SPACE(sp) // Allocate stack frame.
+ | stmg r6, r15, SAVE_GPRS // Technically we restore r15 regardless.
+ | std f8, SAVE_FPR8 // f8-f15 are callee-saved.
+ | std f9, SAVE_FPR9
+ | std f10, SAVE_FPR10
+ | std f11, SAVE_FPR11
+ | std f12, SAVE_FPR12
+ | std f13, SAVE_FPR13
+ | std f14, SAVE_FPR14
+ | std f15, SAVE_FPR15
+ |.endmacro
+ |
+ |.macro restoreregs
+ | ld f8, SAVE_FPR8 // f8-f15 are callee-saved.
+ | ld f9, SAVE_FPR9
+ | ld f10, SAVE_FPR10
+ | ld f11, SAVE_FPR11
+ | ld f12, SAVE_FPR12
+ | ld f13, SAVE_FPR13
+ | ld f14, SAVE_FPR14
+ | ld f15, SAVE_FPR15
+ | lmg r6, r15, SAVE_GPRS // Restores the stack pointer.
+ |.endmacro
+ |
+ | saveregs
+ | lgfi r7, 0x10 // 16
+ | lgfi r8, 0x20 // 32
+ | agr r2, r3
+ | agr r7, r8
+ | msgr r2, r7
+ | restoreregs
+ | br r14
+}
+
+static void labmul(dasm_State *state)
+{
+ dasm_State **Dst = &state;
+
+ // Multiply using an add function.
+ // Only correct if input is positive.
+ |->mul_func:
+ | stmg r6, r14, 48(sp)
+ | lgr r6, r2
+ | lgr r7, r3
+ | cgfi r7, 0
+ | je >3
+ | cgfi r7, 1
+ | je >2
+ |1:
+ | lgr r3, r6
+ | brasl r14, ->add_func
+ | lay r7, -1(r7)
+ | cgfi r7, 1
+ | jh <1
+ |2:
+ | lmg r6, r14, 48(sp)
+ | br r14
+ |3:
+ | la r2, 0(r0)
+ | j <2
+
+ |->add_func:
+ | agr r2, r3
+ | br r14
+}
+
+static void pc(dasm_State *state) {
+ dasm_State **Dst = &state;
+ int MAX = 10;
+ dasm_growpc(Dst, MAX+1);
+
+ | j =>MAX
+ for (int i = 0; i <= MAX; i++) {
+ |=>i:
+ if (i == 0) {
+ | br r14
+ } else {
+ | aghi r2, i
+ | j =>i-1
+ }
+ }
+}
+
+/*
+static void load_test(dasm_State *state)
+{
+ dasm_State **Dst = &state;
+
+ | ltdr r2 , r3
+ | br r14
+}
+*/
+
+
+static void test_mask(dasm_State *state)
+{
+ dasm_State **Dst = &state;
+
+ |lay sp , -8(sp)
+ |stg r2, 4(sp)
+ |tm 4(sp),0x04
+ |je >2
+ |jne >1
+|1:
+ |ar r2,r3
+ |br r14
+|2:
+ |sr r2,r3
+ |br r14
+}
+
+static void ssa(dasm_State *state) {
+ dasm_State **Dst = &state;
+
+ | lay sp, -16(sp)
+ | lay r0, -1(r0)
+ | stg r0, 8(sp)
+ | xc 8(8, sp), 8(sp)
+ | stg r2, 0(sp)
+ | mvc 13(2, sp), 6(sp)
+ | lg r2, 8(sp)
+ | la sp, 16(sp)
+ | br r14
+}
+
+static void ssa_act(dasm_State *state) {
+ dasm_State **Dst = &state;
+
+ int xl = 8;
+ int d1 = 13;
+ int l1 = 2;
+ int d2 = 6;
+
+ | lay sp, -16(sp)
+ | lay r0, -1(r0)
+ | stg r0, 8(sp)
+ | xc 8(xl, sp), 8(sp)
+ | stg r2, 0(sp)
+ | mvc d1(l1, sp), d2(sp)
+ | lg r2, 8(sp)
+ | la sp, 16(sp)
+ | br r14
+}
+
+typedef struct {
+ int a;
+ int b;
+} SimpleStruct;
+
+static void type(dasm_State *state) {
+ dasm_State **Dst = &state;
+
+ | .type SIMPLE, SimpleStruct
+ | lay sp, -8(sp)
+ | stg r2, 0(sp)
+ | xgr r2, r2
+ | l r2, SIMPLE:sp->b
+ | la sp, 8(sp)
+ | br r14
+}
+
+static void sil(dasm_State *state) {
+ dasm_State **Dst = &state;
+
+ | lay sp, -16(sp)
+ | xc 0(16, sp), 0(sp)
+ | mvghi 0(sp), 5
+ | mvhi 8(sp), 7
+ | mvhhi 12(sp), 11
+ | lghi r2, 0
+ | ag r2, 0(sp) // r2 += 5
+ | a r2, 8(sp) // r2 += 7
+ | ah r2, 12(sp) // r2 += 11
+ | la sp, 16(sp)
+ | br r14
+}
+
+static void rrfe_rrd(dasm_State *state) {
+ dasm_State ** Dst = &state;
+
+ | cefbr f0,r2
+ | cefbr f2,r3
+ | cefbr f4,r4
+ | maebr f0 ,f2 ,f4
+ | cfebr r2, 0, f0
+ | br r14
+}
+
+static void rre(dasm_State *state) {
+
+ dasm_State **Dst = &state;
+
+ | lay sp , -8(sp)
+ | cefbr f0 , r2
+ | cefbr f1 , r3
+ | fidr f0 , f1
+ | cfebr r2 ,0,f0
+ | la sp, 8(sp)
+ | br r14
+}
+
+static void rsb(dasm_State *state) {
+ dasm_State **Dst = &state;
+
+ | lay sp, -4(sp)
+ | lghi r3, 0x0706
+ | lghi r4, 0
+ | iill r4, 6
+ | iilh r4, 7
+ | st r4, 0(sp)
+ | lghi r2, 0
+ | clm r3, 5, 0(sp)
+ | jne >1
+ | lghi r2, 1
+ |1:
+ | la sp, 4(sp)
+ | br r14
+}
+
+static void sqrt_rxe(dasm_State *state)
+{
+ dasm_State **Dst = &state;
+
+ | lay sp , -8(sp)
+ | cefbr f0 , r2
+ | stdy f0 , 0(sp)
+ | sqeb f0 ,0(r4,sp)
+ | cfebr r2 ,0, f0
+ | la sp, 8(sp)
+ | br r14
+
+}
+
+static void rxf(dasm_State *state) {
+ dasm_State **Dst = &state;
+
+ | lay sp , -8(sp)
+ | cegbra f1 ,0, r2,0
+ | cegbra f2 ,0,r3,0
+ | ste f2 ,0(sp)
+ | maeb f1, f2, 0(sp)
+ | cfebr r2 ,0, f1
+ | la sp, 8(sp)
+ | br r14
+
+}
+
+typedef struct {
+ int64_t arg1;
+ int64_t arg2;
+ int64_t arg3;
+ void (*fn)(dasm_State *);
+ int64_t want;
+ const char *testname;
+} test_table;
+
+test_table test[] = {
+ { 1, 2, 0, add, 3, "add"},
+ {10, 5, 0, sub, 5, "sub"},
+ { 2, 3, 0, mul, 6, "mul"},
+ { 5, 7, 0, rx, 12298, "rx"},
+ { 5, 7, 0, rxy, 10, "rxy"},
+ { 2, 4, 0, lab, 32, "lab"},
+ { 2, 4, 0, labg, 32, "labg"},
+ { 2, 0, 0, add_imm16, 17, "imm16"},
+ { 2, 0, 0, add_imm32, 16, "imm32"},
+ { 7, 3, 0, save, 480, "save"},
+ { 7, 3, 0, labmul, 21, "labmul0"},
+ { 7, 0, 0, labmul, 0, "labmul1"},
+ { 0, 0, 0, pc, 55, "pc"},
+ { 2,12, 0, jmp_fwd, 12, "jmp_fwd"},
+// { 9,8, 0, add_rrd, 25, "add_rrd"},
+// { 2,4, 0, load_test, 4,"load_test"},
+ {-1, 0, 0, ssa, 65535<<8, "ssa"},
+ {-1, 0, 0, ssa_act, 65535<<8, "ssa_act"},
+ {27, 0, 0, type, 27, "type"},
+ { 0, 0, 0, sil, 23, "sil"},
+ {15, 3,10, rrfe_rrd, 45, "rrfe_rrd"},
+ { 0, 0, 0, rsb, 0, "rsb"},
+ {12,10, 0, rre, 10, "rre"},
+ {16,10, 0, sqrt_rxe, 4,"sqrt_rxe"},
+ {16,10, 0, rxf, 116, "rxf"},
+ { 4, 3, 0, test_mask, 1,"test_mask"}
+};
+
+static void *jitcode(dasm_State **state, size_t *size)
+{
+ int dasm_status = dasm_link(state, size);
+ assert(dasm_status == DASM_S_OK);
+
+ void *ret = mmap(0, *size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ dasm_encode(state, ret);
+ dasm_free(state);
+
+ mprotect(ret, *size, PROT_READ | PROT_EXEC);
+ return (int *)ret;
+}
+
+int main(int argc, char *argv[])
+{
+ dasm_State *state;
+ for(int i = 0; i < sizeof(test)/sizeof(test[0]); i++) {
+ dasm_init(&state, 1);
+ void* labels[lab__MAX];
+ dasm_setupglobal(&state, labels, lab__MAX);
+ dasm_setup(&state, actions);
+ test[i].fn(state);
+ size_t size;
+ int64_t (*fptr)(int64_t, int64_t, int64_t) = jitcode(&state, &size);
+ int64_t got = fptr(test[i].arg1, test[i].arg2, test[i].arg3);
+
+ if (got != test[i].want) {
+ fprintf(stderr, "FAIL: test %s: want %ld, got %ld\n", test[i].testname, test[i].want, got);
+ exit(1);
+ }
+ munmap(fptr, size);
+ }
+ printf("all tests passed\n");
+ return 0;
+}
diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/host/buildvm_asm.c luajit-2.1.0~beta3+git.1624618403.e9577376/src/host/buildvm_asm.c
--- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/host/buildvm_asm.c 2021-10-05 12:28:22.786623190 -0500
+++ luajit-2.1.0~beta3+git.1624618403.e9577376/src/host/buildvm_asm.c 2021-10-05 12:36:23.421160526 -0500
@@ -87,6 +87,54 @@ err:
}
fprintf(ctx->fp, "\t%s %s\n", opname, sym);
}
+#elif LJ_TARGET_S390X
+/* Emit halfwords piecewise as assembler text. */
+static void emit_asm_halfwords(BuildCtx *ctx, uint8_t *p, int n)
+{
+ uint16_t *cp = (uint16_t*)p;
+ n /= 2;
+ int i;
+ for (i = 0; i < n; i++) {
+ if ((i & 7) == 0)
+ fprintf(ctx->fp, "\t.hword 0x%hx", cp[i]);
+ else
+ fprintf(ctx->fp, ",0x%hx", cp[i]);
+ if ((i & 7) == 7) putc('\n', ctx->fp);
+ }
+ if ((n & 7) != 0) putc('\n', ctx->fp);
+}
+
+/* Emit s390x text relocations. */
+static void emit_asm_reloc_text(BuildCtx *ctx, uint8_t *cp, int n,
+ const char *sym)
+{
+ if (n & 1 || n < 2) {
+ fprintf(stderr, "Error: instruction stream length invalid: %d.\n", n);
+ exit(1);
+ }
+ n -= 2;
+ const char *opname = NULL;
+ const char *argt = ""; /* Inserted before argument. */
+ int opcode = *(uint16_t*)(&cp[n]);
+ int arg = (opcode>>4) & 0xf;
+ switch (opcode & 0xff0f) {
+ case 0xa705: opname = "bras"; argt = "%r"; break;
+ case 0xc005: opname = "brasl"; argt = "%r"; break;
+ case 0xa704: opname = "brc"; break;
+ case 0xc004: opname = "brcl"; break;
+ default:
+ fprintf(stderr, "Error: unsupported opcode for %s symbol relocation.\n",
+ sym);
+ exit(1);
+ }
+ emit_asm_halfwords(ctx, cp, n);
+ if (strncmp(sym+(*sym == '_'), LABEL_PREFIX, sizeof(LABEL_PREFIX)-1)) {
+ /* Various fixups for external symbols outside of our binary. */
+ fprintf(ctx->fp, "\t%s %s%d, %s@PLT\n", opname, argt, arg, sym);
+ return;
+ }
+ fprintf(ctx->fp, "\t%s %s%d, %s\n", opname, argt, arg, sym);
+}
#else
/* Emit words piecewise as assembler text. */
static void emit_asm_words(BuildCtx *ctx, uint8_t *p, int n)
@@ -303,6 +351,9 @@ void emit_asm(BuildCtx *ctx)
emit_asm_reloc(ctx, r->type, ctx->relocsym[r->sym]);
}
ofs += n+4;
+#elif LJ_TARGET_S390X
+ emit_asm_reloc_text(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]);
+ ofs += n+4;
#else
emit_asm_wordreloc(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]);
ofs += n;
@@ -311,6 +362,8 @@ void emit_asm(BuildCtx *ctx)
}
#if LJ_TARGET_X86ORX64
emit_asm_bytes(ctx, ctx->code+ofs, next-ofs);
+#elif LJ_TARGET_S390X
+ emit_asm_halfwords(ctx, ctx->code+ofs, next-ofs);
#else
emit_asm_words(ctx, ctx->code+ofs, next-ofs);
#endif
diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/host/buildvm.c luajit-2.1.0~beta3+git.1624618403.e9577376/src/host/buildvm.c
--- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/host/buildvm.c 2021-06-25 05:53:23.000000000 -0500
+++ luajit-2.1.0~beta3+git.1624618403.e9577376/src/host/buildvm.c 2021-10-05 12:36:23.377160294 -0500
@@ -65,6 +65,8 @@ static int collect_reloc(BuildCtx *ctx,
#include "../dynasm/dasm_ppc.h"
#elif LJ_TARGET_MIPS
#include "../dynasm/dasm_mips.h"
+#elif LJ_TARGET_S390X
+#include "../dynasm/dasm_s390x.h"
#else
#error "No support for this architecture (yet)"
#endif
diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/jit/dis_s390x.lua luajit-2.1.0~beta3+git.1624618403.e9577376/src/jit/dis_s390x.lua
--- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/jit/dis_s390x.lua 1969-12-31 18:00:00.000000000 -0600
+++ luajit-2.1.0~beta3+git.1624618403.e9577376/src/jit/dis_s390x.lua 2021-10-05 12:36:23.445160653 -0500
@@ -0,0 +1 @@
+-- Not yet implemented.
diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lib_jit.c luajit-2.1.0~beta3+git.1624618403.e9577376/src/lib_jit.c
--- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lib_jit.c 2021-06-25 05:53:23.000000000 -0500
+++ luajit-2.1.0~beta3+git.1624618403.e9577376/src/lib_jit.c 2021-10-05 12:58:31.960174477 -0500
@@ -718,6 +718,8 @@ static uint32_t jit_cpudetect(void)
if (x) flags |= JIT_F_MIPSXXR2; /* Either 0x80000000 (R2) or 0 (R1). */
}
#endif
+#elif LJ_TARGET_S390X
+ /* No optional CPU features to detect (for now). */
#else
#error "Missing CPU detection for this architecture"
diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_arch.h luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_arch.h
--- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_arch.h 2021-10-05 12:28:22.786623190 -0500
+++ luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_arch.h 2021-10-05 12:36:23.457160716 -0500
@@ -31,6 +31,8 @@
#define LUAJIT_ARCH_mips32 6
#define LUAJIT_ARCH_MIPS64 7
#define LUAJIT_ARCH_mips64 7
+#define LUAJIT_ARCH_S390X 8
+#define LUAJIT_ARCH_s390x 8
/* Target OS. */
#define LUAJIT_OS_OTHER 0
@@ -59,6 +61,8 @@
#define LUAJIT_TARGET LUAJIT_ARCH_ARM
#elif defined(__aarch64__)
#define LUAJIT_TARGET LUAJIT_ARCH_ARM64
+#elif defined(__s390x__) || defined(__s390x)
+#define LUAJIT_TARGET LUAJIT_ARCH_S390X
#elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC)
#define LUAJIT_TARGET LUAJIT_ARCH_PPC
#elif defined(__mips64__) || defined(__mips64) || defined(__MIPS64__) || defined(__MIPS64)
@@ -428,6 +432,21 @@
#define LJ_ARCH_VERSION 10
#endif
+#elif LUAJIT_TARGET == LUAJIT_ARCH_S390X
+
+#define LJ_ARCH_NAME "s390x"
+#define LJ_ARCH_BITS 64
+#define LJ_ARCH_ENDIAN LUAJIT_BE
+#define LJ_TARGET_S390X 1
+#define LJ_TARGET_EHRETREG 0xe
+#define LJ_TARGET_JUMPRANGE 32 /* +-2^32 = +-4GB (32-bit, halfword aligned) */
+#define LJ_TARGET_MASKSHIFT 1
+#define LJ_TARGET_MASKROT 1
+#define LJ_TARGET_UNALIGNED 1
+#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
+#define LJ_TARGET_GC64 1
+#define LJ_ARCH_NOJIT 1 /* NYI */
+
#else
#error "No target architecture defined"
#endif
diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_asm.c luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_asm.c
--- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_asm.c 2021-06-25 05:53:23.000000000 -0500
+++ luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_asm.c 2021-10-05 12:36:23.389160358 -0500
@@ -1642,6 +1642,8 @@ static void asm_loop(ASMState *as)
#include "lj_asm_ppc.h"
#elif LJ_TARGET_MIPS
#include "lj_asm_mips.h"
+#elif LJ_TARGET_S390X
+#include "lj_asm_s390x.h"
#else
#error "Missing assembler for target CPU"
#endif
diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_ccallback.c luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_ccallback.c
--- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_ccallback.c 2021-10-25 16:52:59.684087182 -0500
+++ luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_ccallback.c 2021-10-25 15:40:46.949027757 -0500
@@ -566,6 +566,15 @@ void lj_ccallback_mcode_free(CTState *ct
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
((float *)dp)[1] = *(float *)dp;
+#elif LJ_TARGET_S390X
+
+#define CALLBACK_HANDLE_REGARG \
+ if (isfp) { \
+ if (nfpr < CCALL_NARG_FPR) { sp = &cts->cb.fpr[nfpr++]; goto done; } \
+ } else { \
+ if (ngpr < maxgpr) { sp = &cts->cb.gpr[ngpr++]; goto done; } \
+ }
+
#else
#error "Missing calling convention definitions for this architecture"
#endif
diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_ccall.c luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_ccall.c
--- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_ccall.c 2021-10-05 12:28:22.790623212 -0500
+++ luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_ccall.c 2021-10-05 12:36:23.449160674 -0500
@@ -653,6 +653,40 @@
goto done; \
}
+#elif LJ_TARGET_S390X
+/* -- POSIX/s390x calling conventions --------------------------------------- */
+
+#define CCALL_HANDLE_STRUCTRET \
+ cc->retref = 1; /* Return all structs by reference. */ \
+ cc->gpr[ngpr++] = (GPRArg)dp;
+
+#define CCALL_HANDLE_COMPLEXRET \
+ cc->retref = 1; /* Return all complex values by reference. */ \
+ cc->gpr[ngpr++] = (GPRArg)dp;
+
+#define CCALL_HANDLE_COMPLEXRET2 \
+ UNUSED(dp); /* Nothing to do. */
+
+#define CCALL_HANDLE_STRUCTARG \
+ /* Pass structs of size 1, 2, 4 or 8 in a GPR by value. */ \
+ if (!(sz == 1 || sz == 2 || sz == 4 || sz == 8)) { \
+ rp = cdataptr(lj_cdata_new(cts, did, sz)); \
+ sz = CTSIZE_PTR; /* Pass all other structs by reference. */ \
+ }
+
+#define CCALL_HANDLE_COMPLEXARG \
+ /* Pass complex numbers by reference. */ \
+ /* TODO: not sure why this is different to structs. */ \
+ rp = cdataptr(lj_cdata_new(cts, did, sz)); \
+ sz = CTSIZE_PTR; \
+
+#define CCALL_HANDLE_REGARG \
+ if (isfp) { \
+ if (nfpr < CCALL_NARG_FPR) { dp = &cc->fpr[nfpr++]; goto done; } \
+ } else { \
+ if (ngpr < maxgpr) { dp = &cc->gpr[ngpr++]; goto done; } \
+ }
+
#else
#error "Missing calling convention definitions for this architecture"
#endif
@@ -1111,7 +1145,7 @@ static int ccall_set_args(lua_State *L,
CTypeID did;
CType *d;
CTSize sz;
- MSize n, isfp = 0, isva = 0;
+ MSize n, isfp = 0, isva = 0, onstack = 0;
void *dp, *rp = NULL;
#if LJ_TARGET_PPC && LJ_ARCH_BITS == 64
int isf32 = 0;
@@ -1154,6 +1188,7 @@ static int ccall_set_args(lua_State *L,
CCALL_HANDLE_REGARG /* Handle register arguments. */
/* Otherwise pass argument on stack. */
+ onstack = 1;
if (CCALL_ALIGN_STACKARG && !rp && (d->info & CTF_ALIGN) > CTALIGN_PTR) {
MSize align = (1u << ctype_align(d->info-CTALIGN_PTR)) -1;
nsp = (nsp + align) & ~align; /* Align argument on stack. */
@@ -1234,6 +1269,16 @@ static int ccall_set_args(lua_State *L,
*(int64_t *)dp = (int64_t)*(int32_t *)dp; /* Sign-extend to 64 bit. */
}
#endif
+#if LJ_TARGET_S390X
+ /* Arguments need to be sign-/zero-extended to 64-bits. */
+ if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info) ||
+ (isfp && onstack)) && d->size <= 4) {
+ if (d->info & CTF_UNSIGNED || isfp)
+ *(uint64_t *)dp = (uint64_t)*(uint32_t *)dp;
+ else
+ *(int64_t *)dp = (int64_t)*(int32_t *)dp;
+ }
+#endif
#if LJ_TARGET_X64 && LJ_ABI_WIN
if (isva) { /* Windows/x64 mirrors varargs in both register sets. */
if (nfpr == ngpr)
diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_ccall.h luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_ccall.h
--- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_ccall.h 2021-10-05 12:28:22.790623212 -0500
+++ luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_ccall.h 2021-10-05 12:36:23.445160653 -0500
@@ -139,6 +139,21 @@ typedef union FPRArg {
struct { LJ_ENDIAN_LOHI(float f; , float g;) };
} FPRArg;
+#elif LJ_TARGET_S390X
+
+#define CCALL_NARG_GPR 5 /* GPR 2,3,4,5,6 */
+#define CCALL_NARG_FPR 4 /* FPR 0,2,4,8 */
+#define CCALL_NRET_GPR 1 /* GPR 2 */
+#define CCALL_NRET_FPR 1 /* FPR 0 */
+#define CCALL_SPS_EXTRA 20 /* 160-byte callee save area (not sure if this is the right place) */
+#define CCALL_SPS_FREE 0
+
+typedef intptr_t GPRArg;
+typedef union FPRArg {
+ double d;
+ float f;
+} FPRArg;
+
#else
#error "Missing calling convention definitions for this architecture"
#endif
diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_err.c luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_err.c
--- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_err.c 2021-06-25 05:53:23.000000000 -0500
+++ luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_err.c 2021-10-05 12:36:23.457160716 -0500
@@ -419,6 +419,9 @@ LJ_FUNCA int lj_err_unwind_dwarf(int ver
if (version != 1)
return _URC_FATAL_PHASE1_ERROR;
cf = (void *)_Unwind_GetCFA(ctx);
+#ifdef LJ_TARGET_S390X
+ cf -= 160; /* CFA points 160 bytes above r15. */
+#endif
L = cframe_L(cf);
if ((actions & _UA_SEARCH_PHASE)) {
#if LJ_UNWIND_EXT
diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_frame.h luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_frame.h
--- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_frame.h 2021-10-05 12:28:22.794623233 -0500
+++ luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_frame.h 2021-10-05 12:36:23.453160695 -0500
@@ -273,6 +273,20 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CAL
#endif
#define CFRAME_OFS_MULTRES 0
#define CFRAME_SHIFT_MULTRES 3
+#elif LJ_TARGET_S390X
+#define CFRAME_OFS_ERRF 280
+#define CFRAME_OFS_NRES 272
+#define CFRAME_OFS_PREV 264
+#define CFRAME_OFS_L 256
+#define CFRAME_OFS_PC 168
+#define CFRAME_OFS_MULTRES 160
+#define CFRAME_SIZE 240
+/*
+** TODO: it would be good if we always decoded param*8 like
+** the RISC architectures do. If so then SHIFT_MULTRES will
+** need to change to 3.
+*/
+#define CFRAME_SHIFT_MULTRES 0
#else
#error "Missing CFRAME_* definitions for this architecture"
#endif
diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_target.h luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_target.h
--- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_target.h 2021-06-25 05:53:23.000000000 -0500
+++ luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_target.h 2021-10-05 12:36:23.445160653 -0500
@@ -144,6 +144,8 @@ typedef uint32_t RegCost;
#include "lj_target_ppc.h"
#elif LJ_TARGET_MIPS
#include "lj_target_mips.h"
+#elif LJ_TARGET_S390X
+#include "lj_target_s390x.h"
#else
#error "Missing include for target CPU"
#endif
diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_target_s390x.h luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_target_s390x.h
--- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_target_s390x.h 1969-12-31 18:00:00.000000000 -0600
+++ luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_target_s390x.h 2021-10-05 12:36:23.445160653 -0500
@@ -0,0 +1,80 @@
+/*
+** Definitions for IBM z/Architecture (s390x) CPUs.
+** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_TARGET_S390X_H
+#define _LJ_TARGET_S390X_H
+
+/* -- Registers IDs ------------------------------------------------------- */
+
+#define GPRDEF(_) \
+ _(R0) _(R1) _(R2) _(R3) _(R4) _(R5) _(R6) _(R7) \
+ _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _(R15)
+#define FPRDEF(_) \
+ _(F0) _(F1) _(F2) _(F3) \
+ _(F4) _(F5) _(F6) _(F7) \
+ _(F8) _(F9) _(F10) _(F11) \
+ _(F12) _(F13) _(F14) _(F15)
+// TODO: VREG?
+
+#define RIDENUM(name) RID_##name,
+
+enum {
+ GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */
+ FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */
+ RID_MAX,
+
+ /* Calling conventions. */
+ RID_SP = RID_R15,
+ RID_RET = RID_R2,
+ RID_FPRET = RID_F0,
+
+ /* These definitions must match with the *.dasc file(s): */
+ RID_BASE = RID_R7, /* Interpreter BASE. */
+ RID_LPC = RID_R9, /* Interpreter PC. */
+ RID_DISPATCH = RID_R10, /* Interpreter DISPATCH table. */
+
+ /* Register ranges [min, max) and number of registers. */
+ RID_MIN_GPR = RID_R0,
+ RID_MIN_FPR = RID_F0,
+ RID_MAX_GPR = RID_MIN_FPR,
+ RID_MAX_FPR = RID_MAX,
+ RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR,
+ RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR,
+};
+
+/* -- Register sets ------------------------------------------------------- */
+
+/* -- Spill slots --------------------------------------------------------- */
+
+/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs.
+**
+** SPS_FIXED: Available fixed spill slots in interpreter frame.
+** This definition must match with the *.dasc file(s).
+**
+** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots.
+*/
+#define SPS_FIXED 2
+#define SPS_FIRST 2
+
+#define SPOFS_TMP 0
+
+#define sps_scale(slot) (4 * (int32_t)(slot))
+#define sps_align(slot) (((slot) - SPS_FIXED + 1) & ~1)
+
+/* -- Exit state ---------------------------------------------------------- */
+
+/* This definition must match with the *.dasc file(s). */
+typedef struct {
+ lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */
+ int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */
+ int32_t spill[256]; /* Spill slots. */
+} ExitState;
+
+#define EXITSTUB_SPACING 4
+#define EXITSTUBS_PER_GROUP 32
+
+/* -- Instructions -------------------------------------------------------- */
+
+#endif
diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/Makefile luajit-2.1.0~beta3+git.1624618403.e9577376/src/Makefile
--- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/Makefile 2021-10-05 12:28:22.786623190 -0500
+++ luajit-2.1.0~beta3+git.1624618403.e9577376/src/Makefile 2021-10-05 13:54:29.633902276 -0500
@@ -245,6 +245,9 @@ else
ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH)))
TARGET_LJARCH= arm
else
+ifneq (,$(findstring LJ_TARGET_S390X ,$(TARGET_TESTARCH)))
+ TARGET_LJARCH= s390x
+else
ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH)))
ifneq (,$(findstring __AARCH64EB__ ,$(TARGET_TESTARCH)))
TARGET_ARCH= -D__AARCH64EB__=1
@@ -273,6 +276,7 @@ else
endif
endif
endif
+endif
endif
endif
endif
diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/vm_s390x.dasc luajit-2.1.0~beta3+git.1624618403.e9577376/src/vm_s390x.dasc
--- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/vm_s390x.dasc 1969-12-31 18:00:00.000000000 -0600
+++ luajit-2.1.0~beta3+git.1624618403.e9577376/src/vm_s390x.dasc 2021-10-25 16:52:46.620017650 -0500
@@ -0,0 +1,4261 @@
+|// Low-level VM code for IBM z/Architecture (s390x) CPUs in LJ_GC64 mode.
+|// Bytecode interpreter, fast functions and helper functions.
+|// Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
+|
+|// This assembly targets the instruction set available on z10 (and newer)
+|// machines.
+|
+|// ELF ABI registers:
+|// r0,r1 | | volatile |
+|// r2 | parameter and return value | volatile |
+|// r3-r5 | parameter | volatile |
+|// r6 | parameter | saved |
+|// r7-r11 | | saved |
+|// r12 | GOT pointer (needed?) | saved |
+|// r13 | literal pool (not needed) | saved |
+|// r14 | return address | volatile |
+|// r15 | stack pointer | saved |
+|// f0,f2,f4,f6 | parameter and return value | volatile |
+|// f1,f3,f5,f7 | | volatile |
+|// f8-f15 | | saved |
+|// ar0,ar1 | TLS | volatile |
+|// ar2-ar15 | | volatile |
+|
+|.arch s390x
+|.section code_op, code_sub
+|
+|.actionlist build_actionlist
+|.globals GLOB_
+|.globalnames globnames
+|.externnames extnames
+|
+|//-----------------------------------------------------------------------
+|
+|// Fixed register assignments for the interpreter, callee-saved.
+|.define KBASE, r8 // Constants of current Lua function.
+|.define PC, r9 // Next PC.
+|.define DISPATCH, r10 // Opcode dispatch table.
+|.define ITYPE, r11 // Temporary used for type information.
+|.define BASE, r13 // Base of current Lua stack frame.
+|
+|// The following temporaries are not saved across C calls, except for RB.
+|.define RA, r4 // Overlaps CARG3.
+|.define RB, r7 // Must be callee-save.
+|.define RC, r5 // Overlaps CARG4.
+|.define RD, r6 // Overlaps CARG5.
+|
+|// Calling conventions. Also used as temporaries.
+|.define CARG1, r2
+|.define CARG2, r3
+|.define CARG3, r4
+|.define CARG4, r5
+|.define CARG5, r6
+|
+|.define FARG1, f0
+|.define FARG2, f2
+|.define FARG3, f4
+|.define FARG4, f6
+|
+|.define CRET1, r2
+|
+|.define TMPR0, r0
+|.define TMPR1, r1
+|.define OP, r2
+|
+|// Stack layout while in interpreter. Must match with lj_frame.h.
+|.define CFRAME_SPACE, 240 // Delta for sp, 8 byte aligned.
+|
+|// Register save area.
+|.define SAVE_GPRS, 288(sp) // Save area for r6-r15 (10*8 bytes).
+|.define SAVE_GPRS_P, 48(sp) // Save area for r6-r15 (10*8 bytes) in prologue (before stack frame is allocated).
+|
+|// Argument save area.
+|.define SAVE_ERRF, 280(sp) // Argument 4, in r5.
+|.define SAVE_NRES, 272(sp) // Argument 3, in r4. Size is 4-bytes.
+|.define SAVE_CFRAME, 264(sp) // Argument 2, in r3.
+|.define SAVE_L, 256(sp) // Argument 1, in r2.
+|.define RESERVED, 248(sp) // Reserved for compiler use.
+|.define BACKCHAIN, 240(sp) // <- sp entering interpreter.
+|
+|// Interpreter stack frame.
+|.define SAVE_FPR15, 232(sp)
+|.define SAVE_FPR14, 224(sp)
+|.define SAVE_FPR13, 216(sp)
+|.define SAVE_FPR12, 208(sp)
+|.define SAVE_FPR11, 200(sp)
+|.define SAVE_FPR10, 192(sp)
+|.define SAVE_FPR9, 184(sp)
+|.define SAVE_FPR8, 176(sp)
+|.define SAVE_PC, 168(sp)
+|.define SAVE_MULTRES, 160(sp)
+|.define SAVE_TMP, 160(sp) // Overlaps SAVE_MULTRES
+|.define SAVE_TMP_HI, 164(sp) // High 32-bits (to avoid SAVE_MULTRES).
+|
+|// Callee save area (allocated by interpreter).
+|.define CALLEESAVE, 000(sp) // <- sp in interpreter.
+|
+|.macro saveregs
+| stmg r6, r15, SAVE_GPRS_P
+| lay sp, -CFRAME_SPACE(sp) // Allocate stack frame.
+| std f8, SAVE_FPR8 // f8-f15 are callee-saved.
+| std f9, SAVE_FPR9
+| std f10, SAVE_FPR10
+| std f11, SAVE_FPR11
+| std f12, SAVE_FPR12
+| std f13, SAVE_FPR13
+| std f14, SAVE_FPR14
+| std f15, SAVE_FPR15
+|.endmacro
+|
+|.macro restoreregs
+| ld f8, SAVE_FPR8 // f8-f15 are callee-saved.
+| ld f9, SAVE_FPR9
+| ld f10, SAVE_FPR10
+| ld f11, SAVE_FPR11
+| ld f12, SAVE_FPR12
+| ld f13, SAVE_FPR13
+| ld f14, SAVE_FPR14
+| ld f15, SAVE_FPR15
+| lmg r6, r15, SAVE_GPRS // Restores the stack pointer.
+|.endmacro
+|
+|// Type definitions. Some of these are only used for documentation.
+|.type L, lua_State
+|.type GL, global_State
+|.type TVALUE, TValue
+|.type GCOBJ, GCobj
+|.type STR, GCstr
+|.type TAB, GCtab
+|.type LFUNC, GCfuncL
+|.type CFUNC, GCfuncC
+|.type PROTO, GCproto
+|.type UPVAL, GCupval
+|.type NODE, Node
+|.type NARGS, int
+|.type TRACE, GCtrace
+|.type SBUF, SBuf
+|
+|//-----------------------------------------------------------------------
+|
+|// Instruction headers.
+|.macro ins_A; .endmacro
+|.macro ins_AD; .endmacro
+|.macro ins_AJ; .endmacro
+|.macro ins_ABC; srlg RB, RD, 8; llgcr RC, RD; .endmacro
+|.macro ins_AB_; srlg RB, RD, 8; .endmacro
+|.macro ins_A_C; llgcr RC, RD; .endmacro
+|.macro ins_AND; lghi TMPR1, -1; xgr RD, TMPR1; .endmacro // RD = ~RD
+|
+|// Instruction decode+dispatch.
+|.macro ins_NEXT
+| llgc OP, 3(PC)
+| llgh RD, 0(PC)
+| llgc RA, 2(PC)
+| sllg TMPR1, OP, 3
+| lg TMPR1, 0(TMPR1, DISPATCH)
+| la PC, 4(PC)
+| br TMPR1
+|.endmacro
+|
+|// Instruction footer.
+|.if 1
+| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
+| .define ins_next, ins_NEXT
+| .define ins_next_, ins_NEXT
+|.else
+| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
+| .macro ins_next
+| j ->ins_next
+| .endmacro
+| .macro ins_next_
+| ->ins_next:
+| ins_NEXT
+| .endmacro
+|.endif
+|
+|// Call decode and dispatch.
+|.macro ins_callt
+| // BASE = new base, RB = LFUNC, RD = nargs+1, -8(BASE) = PC
+| lg PC, LFUNC:RB->pc
+| llgc OP, 3(PC)
+| llgc RA, 2(PC)
+| sllg TMPR1, OP, 3
+| la PC, 4(PC)
+| lg TMPR1, 0(TMPR1, DISPATCH)
+| br TMPR1
+|.endmacro
+|
+|.macro ins_call
+| // BASE = new base, RB = LFUNC, RD = nargs+1
+| stg PC, -8(BASE)
+| ins_callt
+|.endmacro
+|
+|// Assumes DISPATCH is relative to GL.
+#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
+#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
+|
+#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
+|
+|//-----------------------------------------------------------------------
+|
+|// Macros to clear or set tags.
+|.macro cleartp, reg
+| nihf reg, 0x7fff
+|.endmacro
+|.macro settp, reg, tp
+| oihf reg, tp<<15
+|.endmacro
+|.macro settp, dst, reg, tp
+| llihf dst, tp<<15
+| ogr dst, reg
+|.endmacro
+|.macro setint, reg
+| settp reg, LJ_TISNUM
+|.endmacro
+|.macro setint, dst, reg
+| settp dst, reg, LJ_TISNUM
+|.endmacro
+|
+|// Macros to test operand types.
+|.macro checktp_nc, reg, tp, target
+| srag ITYPE, reg, 47
+| clfi ITYPE, tp
+| jne target
+|.endmacro
+|.macro checktp, reg, tp, target
+| srag ITYPE, reg, 47
+| cleartp reg
+| clfi ITYPE, tp
+| jne target
+|.endmacro
+|.macro checktptp, src, tp, target
+| srag ITYPE, src, 47
+| clfi ITYPE, tp
+| jne target
+|.endmacro
+|.macro checkstr, reg, target; checktp reg, LJ_TSTR, target; .endmacro
+|.macro checktab, reg, target; checktp reg, LJ_TTAB, target; .endmacro
+|.macro checkfunc, reg, target; checktp reg, LJ_TFUNC, target; .endmacro
+|
+|.macro checknumx, reg, target, jump
+| srag ITYPE, reg, 47
+| clfi ITYPE, LJ_TISNUM
+| jump target
+|.endmacro
+|.macro checkint, reg, target; checknumx reg, target, jne; .endmacro
+|.macro checkinttp, src, target; checknumx src, target, jne; .endmacro
+|.macro checknum, reg, target; checknumx reg, target, jhe; .endmacro
+|.macro checknumtp, src, target; checknumx src, target, jhe; .endmacro
+|.macro checknumber, src, target; checknumx src, target, jh; .endmacro
+|
+|.macro load_false, reg; lghi reg, -1; iihl reg, 0x7fff; .endmacro // assumes LJ_TFALSE == ~(1<<47)
+|.macro load_true, reg; lghi reg, -1; iihh reg, 0xfffe; .endmacro // assumes LJ_TTRUE == ~(2<<47)
+|
+|.define PC_OP, -1(PC)
+|.define PC_RA, -2(PC)
+|.define PC_RB, -4(PC)
+|.define PC_RC, -3(PC)
+|.define PC_RD, -4(PC)
+|
+|.macro branchPC, reg
+| // Must not clobber condition code.
+| sllg TMPR1, reg, 2
+| lay PC, (-BCBIAS_J*4)(TMPR1, PC)
+|.endmacro
+|
+|// Set current VM state.
+|.macro set_vmstate, st
+| lghi TMPR1, ~LJ_VMST_..st
+| stg TMPR1, DISPATCH_GL(vmstate)(DISPATCH)
+|.endmacro
+|
+|// Synthesize binary floating-point constants.
+|.macro bfpconst_tobit, reg, tmp // Synthesize 2^52 + 2^51.
+| llihh tmp, 0x4338
+| ldgr reg, tmp
+|.endmacro
+|
+|// Move table write barrier back. Overwrites reg.
+|.macro barrierback, tab, reg
+| ni tab->marked, ~LJ_GC_BLACK // black2gray(tab)
+| lg reg, (DISPATCH_GL(gc.grayagain))(DISPATCH)
+| stg tab, (DISPATCH_GL(gc.grayagain))(DISPATCH)
+| stg reg, tab->gclist
+|.endmacro
+
+#if !LJ_DUALNUM
+#error "Only dual-number mode supported for s390x target"
+#endif
+
+/* Generate subroutines used by opcodes and other parts of the VM. */
+/* The .code_sub section should be last to help static branch prediction. */
+static void build_subroutines(BuildCtx *ctx)
+{
+ |.code_sub
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Return handling ----------------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |->vm_returnp:
+ | tmll PC, FRAME_P
+ | je ->cont_dispatch
+ |
+ | // Return from pcall or xpcall fast func.
+ | nill PC, -8
+ | sgr BASE, PC // Restore caller base.
+ | lay RA, -8(RA, PC) // Rebase RA and prepend one result.
+ | lg PC, -8(BASE) // Fetch PC of previous frame.
+ | // Prepending may overwrite the pcall frame, so do it at the end.
+ | load_true ITYPE
+ | stg ITYPE, 0(RA, BASE) // Prepend true to results.
+ |
+ |->vm_returnc:
+ | aghi RD, 1 // RD = nresults+1
+ | je ->vm_unwind_yield
+ | st RD, SAVE_MULTRES
+ | tmll PC, FRAME_TYPE
+ | je ->BC_RET_Z // Handle regular return to Lua.
+ |
+ |->vm_return:
+ | // BASE = base, RA = resultofs, RD = nresults+1 (= MULTRES), PC = return
+ | lghi TMPR1, FRAME_C
+ | xgr PC, TMPR1
+ | tmll PC, FRAME_TYPE
+ | jne ->vm_returnp
+ |
+ | // Return to C.
+ | set_vmstate C
+ | nill PC, -8
+ | sgr PC, BASE
+ | lcgr PC, PC // Previous base = BASE - delta.
+ |
+ | aghi RD, -1
+ | je >2
+ |1: // Move results down.
+ | lg RB, 0(BASE, RA)
+ | stg RB, -16(BASE)
+ | la BASE, 8(BASE)
+ | aghi RD, -1
+ | jne <1
+ |2:
+ | lg L:RB, SAVE_L
+ | stg PC, L:RB->base
+ |3:
+ | llgf RD, SAVE_MULTRES
+ | lgf RA, SAVE_NRES // RA = wanted nresults+1
+ |4:
+ | cgr RA, RD
+ | jne >6 // More/less results wanted?
+ |5:
+ | lay BASE, -16(BASE)
+ | stg BASE, L:RB->top
+ |
+ |->vm_leave_cp:
+ | lg RA, SAVE_CFRAME // Restore previous C frame.
+ | stg RA, L:RB->cframe
+ | lghi CRET1, 0 // Ok return status for vm_pcall.
+ |
+ |->vm_leave_unw:
+ | restoreregs
+ | br r14
+ |
+ |6:
+ | jl >7 // Less results wanted?
+ | // More results wanted. Check stack size and fill up results with nil.
+ | cg BASE, L:RB->maxstack
+ | jh >8
+ | lghi TMPR1, LJ_TNIL
+ | stg TMPR1, -16(BASE)
+ | la BASE, 8(BASE)
+ | aghi RD, 1
+ | j <4
+ |
+ |7: // Fewer results wanted.
+ | cghi RA, 0
+ | je <5 // But check for LUA_MULTRET+1.
+ | sgr RA, RD // Negative result!
+ | sllg TMPR1, RA, 3
+ | la BASE, 0(TMPR1, BASE) // Correct top.
+ | j <5
+ |
+ |8: // Corner case: need to grow stack for filling up results.
+ | // This can happen if:
+ | // - A C function grows the stack (a lot).
+ | // - The GC shrinks the stack in between.
+ | // - A return back from a lua_call() with (high) nresults adjustment.
+ | stg BASE, L:RB->top // Save current top held in BASE (yes).
+ | st RD, SAVE_MULTRES // Need to fill only remainder with nil.
+ | lgr CARG2, RA
+ | lgr CARG1, L:RB
+ | brasl r14, extern lj_state_growstack // (lua_State *L, int n)
+ | lg BASE, L:RB->top // Need the (realloced) L->top in BASE.
+ | j <3
+ |
+ |->vm_unwind_yield:
+ | lghi CRET1, LUA_YIELD
+ | j ->vm_unwind_c_eh
+ |
+ |->vm_unwind_c: // Unwind C stack, return from vm_pcall.
+ | // (void *cframe, int errcode)
+ | lgr sp, CARG1
+ | lgfr CARG2, CRET1 // Error return status for vm_pcall.
+ |->vm_unwind_c_eh: // Landing pad for external unwinder.
+ | lg L:RB, SAVE_L
+ | lg GL:RB, L:RB->glref
+ | lghi TMPR1, ~LJ_VMST_C
+ | stg TMPR1, GL:RB->vmstate
+ | j ->vm_leave_unw
+ |
+ |->vm_unwind_ff: // Unwind C stack, return from ff pcall.
+ | // (void *cframe)
+ | nill CARG1, CFRAME_RAWMASK // Assumes high 48-bits set in CFRAME_RAWMASK.
+ | lgr sp, CARG1
+ |->vm_unwind_ff_eh: // Landing pad for external unwinder.
+ | lg L:RB, SAVE_L
+ | lghi RD, 1+1 // Really 1+2 results, incr. later.
+ | lg BASE, L:RB->base
+ | lg DISPATCH, L:RB->glref // Setup pointer to dispatch table.
+ | la DISPATCH, GG_G2DISP(DISPATCH)
+ | lg PC, -8(BASE) // Fetch PC of previous frame.
+ | load_false RA
+ | lg RB, 0(BASE)
+ | stg RA, -16(BASE) // Prepend false to error message.
+ | stg RB, -8(BASE)
+ | lghi RA, -16 // Results start at BASE+RA = BASE-16.
+ | set_vmstate INTERP
+ | j ->vm_returnc // Increments RD/MULTRES and returns.
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Grow stack for calls -----------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |->vm_growstack_c: // Grow stack for C function.
+ | lghi CARG2, LUA_MINSTACK
+ | j >2
+ |
+ |->vm_growstack_v: // Grow stack for vararg Lua function.
+ | aghi RD, -16 // LJ_FR2
+ | j >1
+ |
+ |->vm_growstack_f: // Grow stack for fixarg Lua function.
+ | // BASE = new base, RD = nargs+1, RB = L, PC = first PC
+ | sllg RD, NARGS:RD, 3
+ | lay RD, -8(RD, BASE)
+ |1:
+ | llgc RA, (PC2PROTO(framesize)-4)(PC)
+ | la PC, 4(PC) // Must point after first instruction.
+ | stg BASE, L:RB->base
+ | stg RD, L:RB->top
+ | stg PC, SAVE_PC
+ | lgr CARG2, RA
+ |2:
+ | // RB = L, L->base = new base, L->top = top
+ | lgr CARG1, L:RB
+ | brasl r14, extern lj_state_growstack // (lua_State *L, int n)
+ | lg BASE, L:RB->base
+ | lg RD, L:RB->top
+ | lg LFUNC:RB, -16(BASE)
+ | cleartp LFUNC:RB
+ | sgr RD, BASE
+ | srlg RD, RD, 3
+ | aghi NARGS:RD, 1
+ | // BASE = new base, RB = LFUNC, RD = nargs+1
+ | ins_callt // Just retry the call.
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Entry points into the assembler VM ---------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |->vm_resume: // Setup C frame and resume thread.
+ | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
+ | saveregs
+ | lgr L:RB, CARG1
+ | stg CARG1, SAVE_L
+ | lgr RA, CARG2
+ | lghi PC, FRAME_CP
+ | lghi RD, 0
+ | la KBASE, CFRAME_RESUME(sp)
+ | lg DISPATCH, L:RB->glref // Setup pointer to dispatch table.
+ | aghi DISPATCH, GG_G2DISP
+ | stg RD, SAVE_PC // Any value outside of bytecode is ok.
+ | stg RD, SAVE_CFRAME
+ | st RD, SAVE_NRES
+ | stg RD, SAVE_ERRF
+ | stg KBASE, L:RB->cframe
+ | clm RD, 1, L:RB->status
+ | je >2 // Initial resume (like a call).
+ |
+ | // Resume after yield (like a return).
+ | stg L:RB, (DISPATCH_GL(cur_L))(DISPATCH)
+ | set_vmstate INTERP
+ | stc RD, L:RB->status
+ | lg BASE, L:RB->base
+ | lg RD, L:RB->top
+ | sgr RD, RA
+ | srlg RD, RD, 3
+ | aghi RD, 1 // RD = nresults+1
+ | sgr RA, BASE // RA = resultofs
+ | lg PC, -8(BASE)
+ | st RD, SAVE_MULTRES
+ | tmll PC, FRAME_TYPE
+ | je ->BC_RET_Z
+ | j ->vm_return
+ |
+ |->vm_pcall: // Setup protected C frame and enter VM.
+ | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
+ | saveregs
+ | lghi PC, FRAME_CP
+ | llgfr CARG4, CARG4
+ | stg CARG4, SAVE_ERRF
+ | j >1
+ |
+ |->vm_call: // Setup C frame and enter VM.
+ | // (lua_State *L, TValue *base, int nres1)
+ | saveregs
+ | lghi PC, FRAME_C
+ |
+ |1: // Entry point for vm_pcall above (PC = ftype).
+ | st CARG3, SAVE_NRES
+ | lgr L:RB, CARG1
+ | stg CARG1, SAVE_L
+ | lgr RA, CARG2 // Caveat: RA = CARG3.
+ |
+ | lg DISPATCH, L:RB->glref // Setup pointer to dispatch table.
+ | lg KBASE, L:RB->cframe // Add our C frame to cframe chain.
+ | stg KBASE, SAVE_CFRAME
+ | stg L:RB, SAVE_PC // Any value outside of bytecode is ok.
+ | aghi DISPATCH, GG_G2DISP
+ | stg sp, L:RB->cframe
+ |
+ |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype).
+ | stg L:RB, DISPATCH_GL(cur_L)(DISPATCH)
+ | set_vmstate INTERP
+ | lg BASE, L:RB->base // BASE = old base (used in vmeta_call).
+ | agr PC, RA
+ | sgr PC, BASE // PC = frame delta + frame type
+ |
+ | lg RD, L:RB->top
+ | sgr RD, RA
+ | srlg NARGS:RD, NARGS:RD, 3
+ | aghi NARGS:RD, 1 // RD = nargs+1
+ |
+ |->vm_call_dispatch:
+ | lg LFUNC:RB, -16(RA)
+ | checkfunc LFUNC:RB, ->vmeta_call // Ensure KBASE defined and != BASE.
+ |
+ |->vm_call_dispatch_f:
+ | lgr BASE, RA
+ | ins_call
+ | // BASE = new base, RB = func, RD = nargs+1, PC = caller PC
+ |
+ |->vm_cpcall: // Setup protected C frame, call C.
+ | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
+ | saveregs
+ | lgr L:RB, CARG1
+ | stg L:RB, SAVE_L
+ | stg L:RB, SAVE_PC // Any value outside of bytecode is ok.
+ |
+ | lg KBASE, L:RB->stack // Compute -savestack(L, L->top).
+ | sg KBASE, L:RB->top
+ | lg DISPATCH, L:RB->glref // Setup pointer to dispatch table.
+ | lghi TMPR0, 0
+ | stg TMPR0, SAVE_ERRF // No error function.
+ | st KBASE, SAVE_NRES // Neg. delta means cframe w/o frame.
+ | aghi DISPATCH, GG_G2DISP
+ | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
+ |
+ | lg KBASE, L:RB->cframe // Add our C frame to cframe chain.
+ | stg KBASE, SAVE_CFRAME
+ | stg sp, L:RB->cframe
+ | stg L:RB, DISPATCH_GL(cur_L)(DISPATCH)
+ |
+ | basr r14, CARG4 // (lua_State *L, lua_CFunction func, void *ud)
+ | // TValue * (new base) or NULL returned in r2 (CRET1/).
+ | cghi CRET1, 0
+ | je ->vm_leave_cp // No base? Just remove C frame.
+ | lgr RA, CRET1
+ | lghi PC, FRAME_CP
+ | j <2 // Else continue with the call.
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Metamethod handling ------------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |//-- Continuation dispatch ----------------------------------------------
+ |
+ |->cont_dispatch:
+ | // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES)
+ | agr RA, BASE
+ | nill PC, -8
+ | lgr RB, BASE
+ | sgr BASE, PC // Restore caller BASE.
+ | sllg TMPR1, RD, 3
+ | lghi TMPR0, LJ_TNIL
+ | stg TMPR0, -8(RA, TMPR1) // Ensure one valid arg.
+ | lgr RC, RA // ... in [RC]
+ | lg PC, -24(RB) // Restore PC from [cont|PC].
+ | lg RA, -32(RB)
+ |.if FFI
+ | clfi RA, 1
+ | jle >1
+ |.endif
+ | lg LFUNC:KBASE, -16(BASE)
+ | cleartp LFUNC:KBASE
+ | lg KBASE, LFUNC:KBASE->pc
+ | lg KBASE, (PC2PROTO(k))(KBASE)
+ | // BASE = base, RC = result, RB = meta base
+ | br RA // Jump to continuation.
+ |
+ |.if FFI
+ |1:
+ | je ->cont_ffi_callback // cont = 1: return from FFI callback.
+ | // cont = 0: Tail call from C function.
+ | sgr RB, BASE
+ | srl RB, 3
+ | ahi RB, -3
+ | llgfr RD, RB
+ | j ->vm_call_tail
+ |.endif
+ |
+ |->cont_cat: // BASE = base, RC = result, RB = mbase
+ | llgc RA, PC_RB
+ | sllg RA, RA, 3
+ | aghi RB, -32
+ | la RA, 0(RA, BASE)
+ | sgr RA, RB
+ | je ->cont_ra
+ | lcgr RA, RA
+ | srlg RA, RA, 3
+ | lg L:CARG1, SAVE_L
+ | stg BASE, L:CARG1->base
+ | lgfr CARG3, RA // Caveat: RA == CARG3.
+ | lg TMPR0, 0(RC)
+ | stg TMPR0, 0(RB)
+ | lgr CARG2, RB
+ | j ->BC_CAT_Z
+ |
+ |//-- Table indexing metamethods -----------------------------------------
+ |
+ |->vmeta_tgets:
+ | settp STR:RC, LJ_TSTR // STR:RC = GCstr *
+ | stg STR:RC, SAVE_TMP
+ | la RC, SAVE_TMP
+ | llgc TMPR1, PC_OP
+ | cghi TMPR1, BC_GGET
+ | jne >1
+ | settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab *
+ | lay RB, (DISPATCH_GL(tmptv))(DISPATCH) // Store fn->l.env in g->tmptv.
+ | stg TAB:RA, 0(RB)
+ | j >2
+ |
+ |->vmeta_tgetb:
+ | llgc RC, PC_RC
+ | setint RC
+ | stg RC, SAVE_TMP
+ | la RC, SAVE_TMP
+ | j >1
+ |
+ |->vmeta_tgetv:
+ | llgc RC, PC_RC // Reload TValue *k from RC.
+ | sllg RC, RC, 3
+ | la RC, 0(RC, BASE)
+ |1:
+ | llgc RB, PC_RB // Reload TValue *t from RB.
+ | sllg RB, RB, 3
+ | la RB, 0(RB, BASE)
+ |2:
+ | lg L:CARG1, SAVE_L
+ | stg BASE, L:CARG1->base
+ | lgr CARG2, RB
+ | lgr CARG3, RC
+ | lgr L:RB, L:CARG1
+ | stg PC, SAVE_PC
+ | brasl r14, extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k)
+ | // TValue * (finished) or NULL (metamethod) returned in r2 (CRET1).
+ | lg BASE, L:RB->base
+ | ltgr RC, CRET1
+ | je >3
+ |->cont_ra: // BASE = base, RC = result
+ | llgc RA, PC_RA
+ | sllg RA, RA, 3
+ | lg RB, 0(RC)
+ | stg RB, 0(RA, BASE)
+ | ins_next
+ |
+ |3: // Call __index metamethod.
+ | // BASE = base, L->top = new base, stack = cont/func/t/k
+ | lg RA, L:RB->top
+ | stg PC, -24(RA) // [cont|PC]
+ | la PC, FRAME_CONT(RA)
+ | sgr PC, BASE
+ | lg LFUNC:RB, -16(RA) // Guaranteed to be a function here.
+ | lghi NARGS:RD, 2+1 // 2 args for func(t, k).
+ | cleartp LFUNC:RB
+ | j ->vm_call_dispatch_f
+ |
+ |->vmeta_tgetr:
+ | lgr CARG1, TAB:RB
+ | lgfr CARG2, RC
+ | brasl r14, extern lj_tab_getinth // (GCtab *t, int32_t key)
+ | // cTValue * or NULL returned in r2 (CRET1).
+ | llgc RA, PC_RA
+ | ltgr RC, CRET1
+ | jne ->BC_TGETR_Z
+ | lghi ITYPE, LJ_TNIL
+ | j ->BC_TGETR2_Z
+ |
+ |//-----------------------------------------------------------------------
+ |
+ |->vmeta_tsets:
+ | settp STR:RC, LJ_TSTR // STR:RC = GCstr *
+ | stg STR:RC, SAVE_TMP
+ | la RC, SAVE_TMP
+ | llgc TMPR0, PC_OP
+ | cghi TMPR0, BC_GSET
+ | jne >1
+ | settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab *
+ | lay RB, (DISPATCH_GL(tmptv))(DISPATCH) // Store fn->l.env in g->tmptv.
+ | stg TAB:RA, 0(RB)
+ | j >2
+ |
+ |->vmeta_tsetb:
+ | llgc RC, PC_RC
+ | setint RC
+ | stg RC, SAVE_TMP
+ | la RC, SAVE_TMP
+ | j >1
+ |
+ |->vmeta_tsetv:
+ | llgc RC, PC_RC // Reload TValue *k from RC.
+ | sllg RC, RC, 3
+ | la RC, 0(RC, BASE)
+ |1:
+ | llgc RB, PC_RB // Reload TValue *t from RB.
+ | sllg RB, RB, 3
+ | la RB, 0(RB, BASE)
+ |2:
+ | lg L:CARG1, SAVE_L
+ | stg BASE, L:CARG1->base
+ | lgr CARG2, RB
+ | lgr CARG3, RC
+ | lgr L:RB, L:CARG1
+ | stg PC, SAVE_PC
+ | brasl r14, extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
+ | // TValue * (finished) or NULL (metamethod) returned in r2 (CRET1).
+ | lg BASE, L:RB->base
+ | ltgr RC, CRET1
+ | je >3
+ | // NOBARRIER: lj_meta_tset ensures the table is not black.
+ | llgc RA, PC_RA
+ | sllg RA, RA, 3
+ | lg RB, 0(RA, BASE)
+ | stg RB, 0(RC)
+ |->cont_nop: // BASE = base, (RC = result)
+ | ins_next
+ |
+ |3: // Call __newindex metamethod.
+ | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
+ | lg RA, L:RB->top
+ | stg PC, -24(RA) // [cont|PC]
+ | llgc RC, PC_RA
+ | // Copy value to third argument.
+ | sllg RB, RC, 3
+ | lg RB, 0(RB, BASE)
+ | stg RB, 16(RA)
+ | la PC, FRAME_CONT(RA)
+ | sgr PC, BASE
+ | lg LFUNC:RB, -16(RA) // Guaranteed to be a function here.
+ | lghi NARGS:RD, 3+1 // 3 args for func(t, k, v).
+ | cleartp LFUNC:RB
+ | j ->vm_call_dispatch_f
+ |
+ |->vmeta_tsetr:
+ | lg L:CARG1, SAVE_L
+ | lgr CARG2, TAB:RB
+ | stg BASE, L:CARG1->base
+ | lgfr CARG3, RC
+ | stg PC, SAVE_PC
+ | brasl r14, extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
+ | // TValue * returned in r2 (CRET1).
+ | lgr RC, CRET1
+ | llgc RA, PC_RA
+ | j ->BC_TSETR_Z
+ |
+ |//-- Comparison metamethods ---------------------------------------------
+ |
+ |->vmeta_comp:
+ | llgh RD, PC_RD
+ | sllg RD, RD, 3
+ | llgc RA, PC_RA
+ | sllg RA, RA, 3
+ | lg L:RB, SAVE_L
+ | stg BASE, L:RB->base
+ | la CARG2, 0(RA, BASE)
+ | la CARG3, 0(RD, BASE) // Caveat: RA == CARG3
+ | lgr CARG1, L:RB
+ | llgc CARG4, PC_OP
+ | stg PC, SAVE_PC
+ | brasl r14, extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
+ | // 0/1 or TValue * (metamethod) returned in r2 (CRET1).
+ |3:
+ | lgr RC, CRET1
+ | lg BASE, L:RB->base
+ | clgfi RC, 1
+ | jh ->vmeta_binop
+ |4:
+ | la PC, 4(PC)
+ | jl >6
+ |5:
+ | llgh RD, PC_RD
+ | branchPC RD
+ |6:
+ | ins_next
+ |
+ |->cont_condt: // BASE = base, RC = result
+ | la PC, 4(PC)
+ | lg ITYPE, 0(RC)
+ | srag ITYPE, ITYPE, 47
+ | lghi TMPR0, LJ_TISTRUECOND
+ | clr ITYPE, TMPR0 // Branch if result is true.
+ | jl <5
+ | j <6
+ |
+ |->cont_condf: // BASE = base, RC = result
+ | lg ITYPE, 0(RC)
+ | srag ITYPE, ITYPE, 47
+ | lghi TMPR0, LJ_TISTRUECOND
+ | clr ITYPE, TMPR0 // Branch if result is false.
+ | j <4
+ |
+ |->vmeta_equal:
+ | cleartp TAB:RD
+ | lay PC, -4(PC)
+ | lgr CARG2, RA
+ | lgfr CARG4, RB
+ | lg L:RB, SAVE_L
+ | stg BASE, L:RB->base
+ | lgr CARG3, RD
+ | lgr CARG1, L:RB
+ | stg PC, SAVE_PC
+ | brasl r14, extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne)
+ | // 0/1 or TValue * (metamethod) returned in r2 (CRET1).
+ | j <3
+ |
+ |->vmeta_equal_cd:
+ |.if FFI
+ | lay PC, -4(PC)
+ | lg L:RB, SAVE_L
+ | stg BASE, L:RB->base
+ | lgr CARG1, L:RB
+ | llgf CARG2, -4(PC)
+ | stg PC, SAVE_PC
+ | brasl r14, extern lj_meta_equal_cd // (lua_State *L, BCIns ins)
+ | // 0/1 or TValue * (metamethod) returned in r2 (CRET1).
+ | j <3
+ |.endif
+ |
+ |->vmeta_istype:
+ | lg L:RB, SAVE_L
+ | stg BASE, L:RB->base
+ | llgfr CARG2, RA
+ | llgfr CARG3, RD // Caveat: CARG3 == RA.
+ | lgr L:CARG1, L:RB
+ | stg PC, SAVE_PC
+ | brasl r14, extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
+ | lg BASE, L:RB->base
+ | j <6
+ |
+ |//-- Arithmetic metamethods ---------------------------------------------
+ |
+ |->vmeta_arith_vno:
+ | llgc RB, PC_RB
+ | llgc RC, PC_RC
+ |->vmeta_arith_vn:
+ | sllg RB, RB, 3
+ | sllg RC, RC, 3
+ | la RB, 0(RB, BASE)
+ | la RC, 0(RC, KBASE)
+ | j >1
+ |
+ |->vmeta_arith_nvo:
+ | llgc RC, PC_RC
+ | llgc RB, PC_RB
+ |->vmeta_arith_nv:
+ | sllg RC, RC, 3
+ | sllg RB, RB, 3
+ | la TMPR1, 0(RC, KBASE)
+ | la RC, 0(RB, BASE)
+ | lgr RB, TMPR1
+ | j >1
+ |
+ |->vmeta_unm:
+ | llgh RD, PC_RD
+ | sllg RD, RD, 3
+ | la RC, 0(RD, BASE)
+ | lgr RB, RC
+ | j >1
+ |
+ |->vmeta_arith_vvo:
+ | llgc RB, PC_RB
+ | llgc RC, PC_RC
+ |->vmeta_arith_vv:
+ | sllg RC, RC, 3
+ | sllg RB, RB, 3
+ | la RB, 0(RB, BASE)
+ | la RC, 0(RC, BASE)
+ |1:
+ | llgc RA, PC_RA
+ | sllg RA, RA, 3
+ | la RA, 0(RA, BASE)
+ | llgc CARG5, PC_OP // Caveat: CARG5 == RD.
+ | lgr CARG2, RA
+ | lgr CARG3, RB // Caveat: CARG3 == RA.
+ | // lgr CARG4, RC // Caveat: CARG4 == RC (nop, so commented out).
+ | lg L:CARG1, SAVE_L
+ | stg BASE, L:CARG1->base
+ | lgr L:RB, L:CARG1
+ | stg PC, SAVE_PC
+ | brasl r14, extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
+ | // NULL (finished) or TValue * (metamethod) returned in r2 (CRET1).
+ | lg BASE, L:RB->base
+ | cghi CRET1, 0
+ | lgr RC, CRET1
+ | je ->cont_nop
+ |
+ | // Call metamethod for binary op.
+ |->vmeta_binop:
+ | // BASE = base, RC = new base, stack = cont/func/o1/o2
+ | lgr RA, RC
+ | sgr RC, BASE
+ | stg PC, -24(RA) // [cont|PC]
+ | la PC, FRAME_CONT(RC)
+ | lghi NARGS:RD, 2+1 // 2 args for func(o1, o2).
+ | j ->vm_call_dispatch
+ |
+ |->vmeta_len:
+ | llgh RD, PC_RD
+ | sllg RD, RD, 3
+ | lg L:RB, SAVE_L
+ | stg BASE, L:RB->base
+ | la CARG2, 0(RD, BASE)
+ | lgr L:CARG1, L:RB
+ | stg PC, SAVE_PC
+ | brasl r14, extern lj_meta_len // (lua_State *L, TValue *o)
+ | // NULL (retry) or TValue * (metamethod) returned in r2 (CRET1).
+ | lgr RC, CRET1
+ | lg BASE, L:RB->base
+#if LJ_52
+ | cghi RC, 0
+ | jne ->vmeta_binop // Binop call for compatibility.
+ | llgh RD, PC_RD
+ | sllg RD, RD, 3
+ | lg TAB:CARG1, 0(RD, BASE)
+ | cleartp TAB:CARG1
+ | j ->BC_LEN_Z
+#else
+ | j ->vmeta_binop // Binop call for compatibility.
+#endif
+ |
+ |//-- Call metamethod ----------------------------------------------------
+ |
+ |->vmeta_call_ra:
+ | la RA, 16(RA, BASE) // RA previously set to RA*8.
+ |->vmeta_call: // Resolve and call __call metamethod.
+ | // BASE = old base, RA = new base, RC = nargs+1, PC = return
+ | stg NARGS:RD, SAVE_TMP // Save RA, RC for us (not sure about this).
+ | lgr RB, RA
+ | lg L:CARG1, SAVE_L
+ | stg BASE, L:CARG1->base
+ | lay CARG2, -16(RA)
+ | sllg RD, RD, 3
+ | lay CARG3, -8(RA, RD) // Caveat: CARG3 == RA.
+ | stg PC, SAVE_PC
+ | brasl r14, extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
+ | lgr RA, RB
+ | lg L:RB, SAVE_L
+ | lg BASE, L:RB->base
+ | lg NARGS:RD, SAVE_TMP
+ | lg LFUNC:RB, -16(RA)
+ | aghi NARGS:RD, 1 // 32-bit on x64.
+ | // This is fragile. L->base must not move, KBASE must always be defined.
+ | cgr KBASE, BASE // Continue with CALLT if flag set.
+ | je ->BC_CALLT_Z
+ | cleartp LFUNC:RB
+ | lgr BASE, RA
+ | ins_call // Otherwise call resolved metamethod.
+ |
+ |//-- Argument coercion for 'for' statement ------------------------------
+ |
+ |->vmeta_for:
+ | lg L:RB, SAVE_L
+ | stg BASE, L:RB->base
+ | lgr CARG2, RA
+ | lgr CARG1, RB
+ | stg PC, SAVE_PC
+ | brasl r14, extern lj_meta_for // (lua_State *L, TValue *base)
+ | lg BASE, L:RB->base
+ | llgc OP, PC_OP
+ | llgc RA, PC_RA
+ | llgh RD, PC_RD
+ | sllg TMPR1, OP, 3
+ | lg TMPR1, GG_DISP2STATIC(TMPR1, DISPATCH) // Retry FORI or JFORI.
+ | br TMPR1
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Fast functions -----------------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |.macro .ffunc, name
+ |->ff_ .. name:
+ |.endmacro
+ |
+ |.macro .ffunc_1, name
+ |->ff_ .. name:
+ | clfi NARGS:RD, 1+1; jl ->fff_fallback
+ |.endmacro
+ |
+ |.macro .ffunc_2, name
+ |->ff_ .. name:
+ | clfi NARGS:RD, 2+1; jl ->fff_fallback
+ |.endmacro
+ |
+ |.macro .ffunc_n, name, op
+ | .ffunc_1 name
+ | lg TMPR0, 0(BASE)
+ | checknumtp TMPR0, ->fff_fallback
+ | op f0, 0(BASE)
+ |.endmacro
+ |
+ |.macro .ffunc_n, name
+ | .ffunc_n name, ld
+ |.endmacro
+ |
+ |.macro .ffunc_nn, name
+ | .ffunc_2 name
+ | lg TMPR1, 0(BASE)
+ | lg TMPR0, 8(BASE)
+ | ld FARG1, 0(BASE)
+ | ld FARG2, 8(BASE)
+ | checknumtp TMPR1, ->fff_fallback
+ | checknumtp TMPR0, ->fff_fallback
+ |.endmacro
+ |
+ |// Inlined GC threshold check. Caveat: uses label 1.
+ |.macro ffgccheck
+ | lg RB, (DISPATCH_GL(gc.total))(DISPATCH)
+ | clg RB, (DISPATCH_GL(gc.threshold))(DISPATCH)
+ | jl >1
+ | brasl r14, ->fff_gcstep
+ |1:
+ |.endmacro
+ |
+ |//-- Base library: checks -----------------------------------------------
+ |
+ |.ffunc_1 assert
+ | lg RB, 0(BASE)
+ | srag ITYPE, RB, 47
+ | clfi ITYPE, LJ_TISTRUECOND; jhe ->fff_fallback
+ | lg PC, -8(BASE)
+ | st RD, SAVE_MULTRES
+ | lg RB, 0(BASE)
+ | stg RB, -16(BASE)
+ | ahi RD, -2
+ | je >2
+ | lgr RA, BASE
+ |1:
+ | la RA, 8(RA)
+ | lg RB, 0(RA)
+ | stg RB, -16(RA)
+ | brct RD, <1
+ |2:
+ | llgf RD, SAVE_MULTRES
+ | j ->fff_res_
+ |
+ |.ffunc_1 type
+ | lg RC, 0(BASE)
+ | srag RC, RC, 47
+ | lghi RB, LJ_TISNUM
+ | clgr RC, RB
+ | jnl >1
+ | lgr RC, RB
+ |1:
+ | lghi TMPR0, -1
+ | xgr RC, TMPR0
+ |2:
+ | lg CFUNC:RB, -16(BASE)
+ | cleartp CFUNC:RB
+ | sllg RC, RC, 3
+ | lg STR:RC, ((char *)(&((GCfuncC *)0)->upvalue))(RC, CFUNC:RB)
+ | lg PC, -8(BASE)
+ | settp STR:RC, LJ_TSTR
+ | stg STR:RC, -16(BASE)
+ | j ->fff_res1
+ |
+ |//-- Base library: getters and setters ---------------------------------
+ |
+ |.ffunc_1 getmetatable
+ | lg TAB:RB, 0(BASE)
+ | lg PC, -8(BASE)
+ | checktab TAB:RB, >6
+ |1: // Field metatable must be at same offset for GCtab and GCudata!
+ | lg TAB:RB, TAB:RB->metatable
+ |2:
+ | lghi TMPR0, LJ_TNIL
+ | stg TMPR0, -16(BASE)
+ | cghi TAB:RB, 0
+ | je ->fff_res1
+ | settp TAB:RC, TAB:RB, LJ_TTAB
+ | stg TAB:RC, -16(BASE) // Store metatable as default result.
+ | lg STR:RC, (DISPATCH_GL(gcroot)+8*(GCROOT_MMNAME+MM_metatable))(DISPATCH)
+ | llgf RA, TAB:RB->hmask
+ | n RA, STR:RC->hash
+ | settp STR:RC, LJ_TSTR
+ | mghi RA, #NODE
+ | ag NODE:RA, TAB:RB->node
+ |3: // Rearranged logic, because we expect _not_ to find the key.
+ | cg STR:RC, NODE:RA->key
+ | je >5
+ |4:
+ | ltg NODE:RA, NODE:RA->next
+ | jne <3
+ | j ->fff_res1 // Not found, keep default result.
+ |5:
+ | lg RB, NODE:RA->val
+ | cghi RB, LJ_TNIL; je ->fff_res1 // Ditto for nil value.
+ | stg RB, -16(BASE) // Return value of mt.__metatable.
+ | j ->fff_res1
+ |
+ |6:
+ | clfi ITYPE, LJ_TUDATA; je <1
+ | clfi ITYPE, LJ_TISNUM; jh >7
+ | lhi ITYPE, LJ_TISNUM
+ |7:
+ | lhi TMPR0, -1
+ | xr ITYPE, TMPR0 // not ITYPE
+ | llgfr ITYPE, ITYPE
+ | sllg ITYPE, ITYPE, 3
+ | lg TAB:RB, (DISPATCH_GL(gcroot[GCROOT_BASEMT]))(ITYPE, DISPATCH)
+ | j <2
+ |
+ |.ffunc_2 setmetatable
+ | lg TAB:RB, 0(BASE)
+ | lgr TAB:TMPR1, TAB:RB
+ | checktab TAB:RB, ->fff_fallback
+ | // Fast path: no mt for table yet and not clearing the mt.
+ | lghi TMPR0, 0
+ | cg TMPR0, TAB:RB->metatable; jne ->fff_fallback
+ | lg TAB:RA, 8(BASE)
+ | checktab TAB:RA, ->fff_fallback
+ | stg TAB:RA, TAB:RB->metatable
+ | lg PC, -8(BASE)
+ | stg TAB:TMPR1, -16(BASE) // Return original table.
+ | tm TAB:RB->marked, LJ_GC_BLACK // isblack(table)
+ | je >1
+ | // Possible write barrier. Table is black, but skip iswhite(mt) check.
+ | barrierback TAB:RB, RC
+ |1:
+ | j ->fff_res1
+ |
+ |.ffunc_2 rawget
+ | lg TAB:CARG2, 0(BASE)
+ | checktab TAB:CARG2, ->fff_fallback
+ | la CARG3, 8(BASE)
+ | lg CARG1, SAVE_L
+ | brasl r14, extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
+ | // cTValue * returned in r2 (CRET1).
+ | // Copy table slot.
+ | lg RB, 0(CRET1)
+ | lg PC, -8(BASE)
+ | stg RB, -16(BASE)
+ | j ->fff_res1
+ |
+ |//-- Base library: conversions ------------------------------------------
+ |
+ |.ffunc tonumber
+ | // Only handles the number case inline (without a base argument).
+ | clfi NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument.
+ | lg RB, 0(BASE)
+ | checknumber RB, ->fff_fallback
+ | lg PC, -8(BASE)
+ | stg RB, -16(BASE)
+ | j ->fff_res1
+ |
+ |.ffunc_1 tostring
+ | // Only handles the string or number case inline.
+ | lg PC, -8(BASE)
+ | lg STR:RB, 0(BASE)
+ | checktp_nc STR:RB, LJ_TSTR, >3
+ | // A __tostring method in the string base metatable is ignored.
+ |2:
+ | stg STR:RB, -16(BASE)
+ | j ->fff_res1
+ |3: // Handle numbers inline, unless a number base metatable is present.
+ | clfi ITYPE, LJ_TISNUM; jh ->fff_fallback_1
+ | lghi TMPR0, 0
+ | cg TMPR0, (DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM]))(DISPATCH)
+ | jne ->fff_fallback
+ | ffgccheck // Caveat: uses label 1.
+ | lg L:RB, SAVE_L
+ | stg BASE, L:RB->base // Add frame since C call can throw.
+ | stg PC, SAVE_PC // Redundant (but a defined value).
+ | lgr CARG2, BASE // Otherwise: CARG2 == BASE
+ | lgr L:CARG1, L:RB
+ | brasl r14, extern lj_strfmt_number // (lua_State *L, cTValue *o)
+ | // GCstr returned in r2 (CRET1).
+ | lg BASE, L:RB->base
+ | settp STR:RB, CRET1, LJ_TSTR
+ | j <2
+ |
+ |//-- Base library: iterators -------------------------------------------
+ |
+ |.ffunc_1 next
+ | je >2 // Missing 2nd arg?
+ |1:
+ | lg CARG2, 0(BASE)
+ | checktab CARG2, ->fff_fallback
+ | lg L:RB, SAVE_L
+ | stg BASE, L:RB->base // Add frame since C call can throw.
+ | stg BASE, L:RB->top // Dummy frame length is ok.
+ | lg PC, -8(BASE)
+ | la CARG3, 8(BASE)
+ | lgr CARG1, L:RB
+ | stg PC, SAVE_PC // Needed for ITERN fallback.
+ | brasl r14, extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
+ | // Flag returned in r2 (CRET1).
+ | lg BASE, L:RB->base
+ | ltr RD, CRET1; je >3 // End of traversal?
+ | // Copy key and value to results.
+ | lg RB, 8(BASE)
+ | lg RD, 16(BASE)
+ | stg RB, -16(BASE)
+ | stg RD, -8(BASE)
+ |->fff_res2:
+ | lghi RD, 1+2
+ | j ->fff_res
+ |2: // Set missing 2nd arg to nil.
+ | lghi TMPR0, LJ_TNIL
+ | stg TMPR0, 8(BASE)
+ | j <1
+ |3: // End of traversal: return nil.
+ | lghi TMPR0, LJ_TNIL
+ | stg TMPR0, -16(BASE)
+ | j ->fff_res1
+ |
+ |.ffunc_1 pairs
+ | lg TAB:RB, 0(BASE)
+ | lgr TMPR1, TAB:RB
+ | checktab TAB:RB, ->fff_fallback
+#if LJ_52
+ | ltg TMPR0, TAB:RB->metatable; jne ->fff_fallback
+#endif
+ | lg CFUNC:RD, -16(BASE)
+ | cleartp CFUNC:RD
+ | lg CFUNC:RD, CFUNC:RD->upvalue[0]
+ | settp CFUNC:RD, LJ_TFUNC
+ | lg PC, -8(BASE)
+ | stg CFUNC:RD, -16(BASE)
+ | stg TMPR1, -8(BASE)
+ | lghi TMPR0, LJ_TNIL
+ | stg TMPR0, 0(BASE)
+ | lghi RD, 1+3
+ | j ->fff_res
+ |
+ |.ffunc_2 ipairs_aux
+ | lg TAB:RB, 0(BASE)
+ | checktab TAB:RB, ->fff_fallback
+ | lg RA, 8(BASE)
+ | checkint RA, ->fff_fallback
+ | lg PC, -8(BASE)
+ | aghi RA, 1
+ | setint ITYPE, RA
+ | stg ITYPE, -16(BASE)
+ | cl RA, TAB:RB->asize; jhe >2 // Not in array part?
+ | lg RD, TAB:RB->array
+ | lgfr TMPR1, RA
+ | sllg TMPR1, TMPR1, 3
+ | la RD, 0(TMPR1, RD)
+ |1:
+ | lg TMPR0, 0(RD)
+ | cghi TMPR0, LJ_TNIL; je ->fff_res0
+ | // Copy array slot.
+ | stg TMPR0, -8(BASE)
+ | j ->fff_res2
+ |2: // Check for empty hash part first. Otherwise call C function.
+ | lt TMPR0, TAB:RB->hmask; je ->fff_res0
+ | lgr CARG1, TAB:RB
+ | lgfr CARG2, RA
+ | brasl r14, extern lj_tab_getinth // (GCtab *t, int32_t key)
+ | // cTValue * or NULL returned in r2 (CRET1).
+ | ltgr RD, CRET1
+ | jne <1
+ |->fff_res0:
+ | lghi RD, 1+0
+ | j ->fff_res
+ |
+ |.ffunc_1 ipairs
+ | lg TAB:RB, 0(BASE)
+ | lgr TMPR1, TAB:RB
+ | checktab TAB:RB, ->fff_fallback
+#if LJ_52
+ | lghi TMPR0, 0
+ | cg TMPR0, TAB:RB->metatable; jne ->fff_fallback
+#endif
+ | lg CFUNC:RD, -16(BASE)
+ | cleartp CFUNC:RD
+ | lg CFUNC:RD, CFUNC:RD->upvalue[0]
+ | settp CFUNC:RD, LJ_TFUNC
+ | lg PC, -8(BASE)
+ | stg CFUNC:RD, -16(BASE)
+ | stg TMPR1, -8(BASE)
+ | llihf RD, LJ_TISNUM<<15
+ | stg RD, 0(BASE)
+ | lghi RD, 1+3
+ | j ->fff_res
+ |
+ |//-- Base library: catch errors ----------------------------------------
+ |
+ |.ffunc_1 pcall
+ | la RA, 16(BASE)
+ | aghi NARGS:RD, -1
+ | lghi PC, 16+FRAME_PCALL
+ |1:
+ | llgc RB, (DISPATCH_GL(hookmask))(DISPATCH)
+ | srlg RB, RB, HOOK_ACTIVE_SHIFT(r0)
+ | nill RB, 1 // High bits already zero (from load).
+ | agr PC, RB // Remember active hook before pcall.
+ | // Note: this does a (harmless) copy of the function to the PC slot, too.
+ | lgr KBASE, RD
+ |2:
+ | sllg TMPR1, KBASE, 3
+ | lg RB, -24(TMPR1, RA)
+ | stg RB, -16(TMPR1, RA)
+ | aghi KBASE, -1
+ | jh <2
+ | j ->vm_call_dispatch
+ |
+ |.ffunc_2 xpcall
+ | lg LFUNC:RA, 8(BASE)
+ | checktp_nc LFUNC:RA, LJ_TFUNC, ->fff_fallback
+ | lg LFUNC:RB, 0(BASE) // Swap function and traceback.
+ | stg LFUNC:RA, 0(BASE)
+ | stg LFUNC:RB, 8(BASE)
+ | la RA, 24(BASE)
+ | aghi NARGS:RD, -2
+ | lghi PC, 24+FRAME_PCALL
+ | j <1
+ |
+ |//-- Coroutine library --------------------------------------------------
+ |
+ |.macro coroutine_resume_wrap, resume
+ |.if resume
+ |.ffunc_1 coroutine_resume
+ | lg L:RB, 0(BASE)
+ | lgr L:TMPR0, L:RB // Save type for checktptp.
+ | cleartp L:RB
+ |.else
+ |.ffunc coroutine_wrap_aux
+ | lg CFUNC:RB, -16(BASE)
+ | cleartp CFUNC:RB
+ | lg L:RB, CFUNC:RB->upvalue[0].gcr
+ | cleartp L:RB
+ |.endif
+ | lg PC, -8(BASE)
+ | stg PC, SAVE_PC
+ | stg L:RB, SAVE_TMP
+ |.if resume
+ | checktptp L:TMPR0, LJ_TTHREAD, ->fff_fallback
+ |.endif
+ | ltg TMPR0, L:RB->cframe; jne ->fff_fallback
+ | cli L:RB->status, LUA_YIELD; jh ->fff_fallback
+ | lg RA, L:RB->top
+ | je >1 // Status != LUA_YIELD (i.e. 0)?
+ | cg RA, L:RB->base // Check for presence of initial func.
+ | je ->fff_fallback
+ | lg PC, -8(RA) // Move initial function up.
+ | stg PC, 0(RA)
+ | la RA, 8(RA)
+ |1:
+ | sllg TMPR1, NARGS:RD, 3
+ |.if resume
+ | lay PC, -16(TMPR1, RA) // Check stack space (-1-thread).
+ |.else
+ | lay PC, -8(TMPR1, RA) // Check stack space (-1).
+ |.endif
+ | clg PC, L:RB->maxstack; jh ->fff_fallback
+ | stg PC, L:RB->top
+ |
+ | lg L:RB, SAVE_L
+ | stg BASE, L:RB->base
+ |.if resume
+ | la BASE, 8(BASE) // Keep resumed thread in stack for GC.
+ |.endif
+ | stg BASE, L:RB->top
+ |.if resume
+ | lay RB, -24(TMPR1, BASE) // RB = end of source for stack move.
+ |.else
+ | lay RB, -16(TMPR1, BASE) // RB = end of source for stack move.
+ |.endif
+ | sgr RB, PC // Relative to PC.
+ |
+ | cgr PC, RA
+ | je >3
+ |2: // Move args to coroutine.
+ | lg RC, 0(RB, PC)
+ | stg RC, -8(PC)
+ | lay PC, -8(PC)
+ | cgr PC, RA
+ | jne <2
+ |3:
+ | lgr CARG2, RA
+ | lg L:CARG1, SAVE_TMP
+ | lghi CARG3, 0
+ | lghi CARG4, 0
+ | brasl r14, ->vm_resume // (lua_State *L, TValue *base, 0, 0)
+ |
+ | lg L:RB, SAVE_L
+ | lg L:PC, SAVE_TMP
+ | lg BASE, L:RB->base
+ | stg L:RB, (DISPATCH_GL(cur_L))(DISPATCH)
+ | set_vmstate INTERP
+ |
+ | clfi CRET1, LUA_YIELD
+ | jh >8
+ |4:
+ | lg RA, L:PC->base
+ | lg KBASE, L:PC->top
+ | stg RA, L:PC->top // Clear coroutine stack.
+ | lgr PC, KBASE
+ | sgr PC, RA
+ | je >6 // No results?
+ | la RD, 0(PC, BASE)
+ | llgfr PC, PC
+ | srlg PC, PC, 3
+ | clg RD, L:RB->maxstack
+ | jh >9 // Need to grow stack?
+ |
+ | lgr RB, BASE
+ | sgr RB, RA
+ |5: // Move results from coroutine.
+ | lg RD, 0(RA)
+ | stg RD, 0(RA, RB)
+ | la RA, 8(RA)
+ | cgr RA, KBASE
+ | jne <5
+ |6:
+ |.if resume
+ | la RD, 2(PC) // nresults+1 = 1 + true + results.
+ | load_true ITYPE // Prepend true to results.
+ | stg ITYPE, -8(BASE)
+ |.else
+ | la RD, 1(PC) // nresults+1 = 1 + results.
+ |.endif
+ |7:
+ | lg PC, SAVE_PC
+ | st RD, SAVE_MULTRES
+ |.if resume
+ | lghi RA, -8
+ |.else
+ | lghi RA, 0
+ |.endif
+ | tmll PC, FRAME_TYPE
+ | je ->BC_RET_Z
+ | j ->vm_return
+ |
+ |8: // Coroutine returned with error (at co->top-1).
+ |.if resume
+ | load_false ITYPE // Prepend false to results.
+ | stg ITYPE, -8(BASE)
+ | lg RA, L:PC->top
+ | aghi RA, -8
+ | stg RA, L:PC->top // Clear error from coroutine stack.
+ | // Copy error message.
+ | lg RD, 0(RA)
+ | stg RD, 0(BASE)
+ | lghi RD, 1+2 // nresults+1 = 1 + false + error.
+ | j <7
+ |.else
+ | lgr CARG2, L:PC
+ | lgr CARG1, L:RB
+ | brasl r14, extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co)
+ | // Error function does not return.
+ |.endif
+ |
+ |9: // Handle stack expansion on return from yield.
+ | lg L:RA, SAVE_TMP
+ | stg KBASE, L:RA->top // Undo coroutine stack clearing.
+ | lgr CARG2, PC
+ | lgr CARG1, L:RB
+ | brasl r14, extern lj_state_growstack // (lua_State *L, int n)
+ | lg L:PC, SAVE_TMP
+ | lg BASE, L:RB->base
+ | j <4 // Retry the stack move.
+ |.endmacro
+ |
+ | coroutine_resume_wrap 1 // coroutine.resume
+ | coroutine_resume_wrap 0 // coroutine.wrap
+ |
+ |.ffunc coroutine_yield
+ | lg L:RB, SAVE_L
+ | lg TMPR0, L:RB->cframe
+ | tmll TMPR0, CFRAME_RESUME
+ | je ->fff_fallback
+ | stg BASE, L:RB->base
+ | sllg RD, NARGS:RD, 3
+ | lay RD, -8(RD, BASE)
+ | stg RD, L:RB->top
+ | lghi RD, 0
+ | stg RD, L:RB->cframe
+ | lghi CRET1, LUA_YIELD
+ | stc CRET1, L:RB->status
+ | j ->vm_leave_unw
+ |
+ |//-- Math library -------------------------------------------------------
+ |
+ |.ffunc_1 math_abs
+ | lg RB, 0(BASE)
+ | checkint RB, >3
+ | lpr RB, RB; jo >2
+ |->fff_resbit:
+ |->fff_resi:
+ | setint RB
+ |->fff_resRB:
+ | lg PC, -8(BASE)
+ | stg RB, -16(BASE)
+ | j ->fff_res1
+ |2:
+ | llihh RB, 0x41e0 // 2^31
+ | j ->fff_resRB
+ |3:
+ | jh ->fff_fallback
+ | nihh RB, 0x7fff // Clear sign bit.
+ | lg PC, -8(BASE)
+ | stg RB, -16(BASE)
+ | j ->fff_res1
+ |
+ |.ffunc_n math_sqrt, sqdb
+ |->fff_resf0:
+ | lg PC, -8(BASE)
+ | stdy f0, -16(BASE)
+ | // fallthrough
+ |
+ |->fff_res1:
+ | lghi RD, 1+1
+ |->fff_res:
+ | st RD, SAVE_MULTRES
+ |->fff_res_:
+ | tmll PC, FRAME_TYPE
+ | jne >7
+ |5:
+ | llgc TMPR1, PC_RB
+ | clgr TMPR1, RD // More results expected?
+ | jh >6
+ | // Adjust BASE. KBASE is assumed to be set for the calling frame.
+ | llgc RA, PC_RA
+ | lcgr RA, RA
+ | sllg RA, RA, 3
+ | lay BASE, -16(RA, BASE) // base = base - (RA+2)*8
+ | ins_next
+ |
+ |6: // Fill up results with nil.
+ | sllg TMPR1, RD, 3
+ | lghi TMPR0, LJ_TNIL
+ | stg TMPR0, -24(TMPR1, BASE)
+ | la RD, 1(RD)
+ | j <5
+ |
+ |7: // Non-standard return case.
+ | lghi RA, -16 // Results start at BASE+RA = BASE-16.
+ | j ->vm_return
+ |
+ |.macro math_round, func
+ | .ffunc math_ .. func
+ | lg RB, 0(BASE)
+ | ld f0, 0(BASE)
+ | checknumx RB, ->fff_resRB, je
+ | jh ->fff_fallback
+ | brasl r14, ->vm_ .. func
+ | cfdbr RB, 0, f0
+ | jo ->fff_resf0
+ | llgfr RB, RB
+ | j ->fff_resi
+ |.endmacro
+ |
+ | math_round floor
+ | math_round ceil
+ |
+ |.ffunc math_log
+ | chi NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument.
+ | lg TMPR0, 0(BASE)
+ | ld FARG1, 0(BASE)
+ | checknumtp TMPR0, ->fff_fallback
+ | brasl r14, extern log
+ | j ->fff_resf0
+ |
+ |.macro math_extern, func
+ | .ffunc_n math_ .. func
+ | brasl r14, extern func
+ | j ->fff_resf0
+ |.endmacro
+ |
+ |.macro math_extern2, func
+ | .ffunc_nn math_ .. func
+ | brasl r14, extern func
+ | j ->fff_resf0
+ |.endmacro
+ |
+ | math_extern log10
+ | math_extern exp
+ | math_extern sin
+ | math_extern cos
+ | math_extern tan
+ | math_extern asin
+ | math_extern acos
+ | math_extern atan
+ | math_extern sinh
+ | math_extern cosh
+ | math_extern tanh
+ | math_extern2 pow
+ | math_extern2 atan2
+ | math_extern2 fmod
+ |
+ |.ffunc_2 math_ldexp
+ | lg TMPR0, 0(BASE)
+ | ld FARG1, 0(BASE)
+ | lg CARG1, 8(BASE)
+ | checknumtp TMPR0, ->fff_fallback
+ | checkinttp CARG1, ->fff_fallback
+ | lgfr CARG1, CARG1
+ | brasl r14, extern ldexp // (double, int)
+ | j ->fff_resf0
+ |
+ |.ffunc_n math_frexp
+ | la CARG1, SAVE_TMP
+ | brasl r14, extern frexp
+ | llgf RB, SAVE_TMP
+ | lg PC, -8(BASE)
+ | stdy f0, -16(BASE)
+ | setint RB
+ | stg RB, -8(BASE)
+ | lghi RD, 1+2
+ | j ->fff_res
+ |
+ |.ffunc_n math_modf
+ | lay CARG1, -16(BASE)
+ | brasl r14, extern modf // (double, double*)
+ | lg PC, -8(BASE)
+ | stdy f0, -8(BASE)
+ | lghi RD, 1+2
+ | j ->fff_res
+ |
+ |.macro math_minmax, name, cjmp
+ | .ffunc name
+ | lghi RA, 2*8
+ | sllg TMPR1, RD, 3
+ | lg RB, 0(BASE)
+ | ld f0, 0(BASE)
+ | checkint RB, >4
+ |1: // Handle integers.
+ | clgr RA, TMPR1; jhe ->fff_resRB
+ | lg TMPR0, -8(RA, BASE)
+ | checkint TMPR0, >3
+ | cr RB, TMPR0
+ | cjmp >2
+ | lgr RB, TMPR0
+ |2:
+ | aghi RA, 8
+ | j <1
+ |3:
+ | jh ->fff_fallback
+ | // Convert intermediate result to number and continue below.
+ | cdfbr f0, RB
+ | ldgr f1, TMPR0
+ | j >6
+ |4:
+ | jh ->fff_fallback
+ |5: // Handle numbers or integers.
+ | clgr RA, TMPR1; jhe ->fff_resf0
+ | lg RB, -8(RA, BASE)
+ | ldy f1, -8(RA, BASE)
+ | checknumx RB, >6, jl
+ | jh ->fff_fallback
+ | cdfbr f1, RB
+ |6:
+ | cdbr f0, f1
+ | cjmp >7
+ | ldr f0, f1
+ |7:
+ | aghi RA, 8
+ | j <5
+ |.endmacro
+ |
+ | math_minmax math_min, jnh
+ | math_minmax math_max, jnl
+ |
+ |//-- String library -----------------------------------------------------
+ |
+ |.ffunc string_byte // Only handle the 1-arg case here.
+ | chi NARGS:RD, 1+1; jne ->fff_fallback
+ | lg STR:RB, 0(BASE)
+ | checkstr STR:RB, ->fff_fallback
+ | lg PC, -8(BASE)
+ | ltg TMPR0, STR:RB->len
+ | je ->fff_res0 // Return no results for empty string.
+ | llgc RB, STR:RB[1]
+ | j ->fff_resi
+ |
+ |.ffunc string_char // Only handle the 1-arg case here.
+ | ffgccheck
+ | chi NARGS:RD, 1+1; jne ->fff_fallback // *Exactly* 1 arg.
+ | lg RB, 0(BASE)
+ | checkint RB, ->fff_fallback
+ | clfi RB, 255; jh ->fff_fallback
+ | strvh RB, SAVE_TMP // Store [c,0].
+ | lghi TMPR1, 1
+ | la RD, SAVE_TMP // Points to stack. Little-endian.
+ |->fff_newstr:
+ | lg L:RB, SAVE_L
+ | stg BASE, L:RB->base
+ | llgfr CARG3, TMPR1 // Zero-extended to size_t.
+ | lgr CARG2, RD
+ | lgr CARG1, L:RB
+ | stg PC, SAVE_PC
+ | brasl r14, extern lj_str_new // (lua_State *L, char *str, size_t l)
+ |->fff_resstr:
+ | // GCstr * returned in r2 (CRET1).
+ | lgr STR:RD, CRET1
+ | lg BASE, L:RB->base
+ | lg PC, -8(BASE)
+ | settp STR:RD, LJ_TSTR
+ | stg STR:RD, -16(BASE)
+ | j ->fff_res1
+ |
+ |.ffunc string_sub
+ | ffgccheck
+ | lghi TMPR1, -1
+ | clfi NARGS:RD, 1+2; jl ->fff_fallback
+ | jnh >1
+ | lg TMPR1, 16(BASE)
+ | checkint TMPR1, ->fff_fallback
+ |1:
+ | lg STR:RB, 0(BASE)
+ | checkstr STR:RB, ->fff_fallback
+ | lg ITYPE, 8(BASE)
+ | lgfr RA, ITYPE
+ | srag ITYPE, ITYPE, 47
+ | cghi ITYPE, LJ_TISNUM
+ | jne ->fff_fallback
+ | llgf RC, STR:RB->len
+ | clr RC, TMPR1 // len < end? (unsigned compare)
+ | jl >5
+ |2:
+ | cghi RA, 0 // start <= 0?
+ | jle >7
+ |3:
+ | sr TMPR1, RA // start > end?
+ | jnhe ->fff_emptystr
+ | la RD, (#STR-1)(RA, STR:RB)
+ | ahi TMPR1, 1
+ |4:
+ | j ->fff_newstr
+ |
+ |5: // Negative end or overflow.
+ | chi TMPR1, 0
+ | jnl >6
+ | ahi TMPR1, 1
+ | ar TMPR1, RC // end = end+(len+1)
+ | j <2
+ |6: // Overflow.
+ | lr TMPR1, RC // end = len
+ | j <2
+ |
+ |7: // Negative start or underflow.
+ | je >8
+ | agr RA, RC // start = start+(len+1)
+ | aghi RA, 1
+ | jh <3 // start > 0?
+ |8: // Underflow.
+ | lghi RA, 1 // start = 1
+ | j <3
+ |
+ |->fff_emptystr: // Range underflow.
+ | lghi TMPR1, 0
+ | j <4
+ |
+ |.macro ffstring_op, name
+ | .ffunc_1 string_ .. name
+ | ffgccheck
+ | lg STR:CARG2, 0(BASE)
+ | checkstr STR:CARG2, ->fff_fallback
+ | lg L:RB, SAVE_L
+ | lay SBUF:CARG1, (DISPATCH_GL(tmpbuf))(DISPATCH)
+ | stg BASE, L:RB->base
+ | lg RC, SBUF:CARG1->b
+ | stg L:RB, SBUF:CARG1->L
+ | stg RC, SBUF:CARG1->w
+ | stg PC, SAVE_PC
+ | brasl r14, extern lj_buf_putstr_ .. name
+ | // lgr CARG1, CRET1 (nop, CARG1==CRET1)
+ | brasl r14, extern lj_buf_tostr
+ | j ->fff_resstr
+ |.endmacro
+ |
+ |ffstring_op reverse
+ |ffstring_op lower
+ |ffstring_op upper
+ |
+ |//-- Bit library --------------------------------------------------------
+ |
+ |.macro .ffunc_bit, name, kind, fdef
+ | fdef name
+ |.if kind == 2
+ | bfpconst_tobit f1, RB
+ |.endif
+ | lg RB, 0(BASE)
+ | ld f0, 0(BASE)
+ | checkint RB, >1
+ |.if kind > 0
+ | j >2
+ |.else
+ | j ->fff_resbit
+ |.endif
+ |1:
+ | jh ->fff_fallback
+ |.if kind < 2
+ | bfpconst_tobit f1, RB
+ |.endif
+ | adbr f0, f1
+ | lgdr RB, f0
+ | llgfr RB, RB
+ |2:
+ |.endmacro
+ |
+ |.macro .ffunc_bit, name, kind
+ | .ffunc_bit name, kind, .ffunc_1
+ |.endmacro
+ |
+ |.ffunc_bit bit_tobit, 0
+ | j ->fff_resbit
+ |
+ |.macro .ffunc_bit_op, name, ins
+ | .ffunc_bit name, 2
+ | lgr TMPR1, NARGS:RD // Save for fallback.
+ | sllg RD, NARGS:RD, 3
+ | lay RD, -16(RD, BASE)
+ |1:
+ | clgr RD, BASE
+ | jle ->fff_resbit
+ | lg RA, 0(RD)
+ | checkint RA, >2
+ | ins RB, RA
+ | aghi RD, -8
+ | j <1
+ |2:
+ | jh ->fff_fallback_bit_op
+ | ldgr f0, RA
+ | adbr f0, f1
+ | lgdr RA, f0
+ | ins RB, RA
+ | aghi RD, -8
+ | j <1
+ |.endmacro
+ |
+ |.ffunc_bit_op bit_band, nr
+ |.ffunc_bit_op bit_bor, or
+ |.ffunc_bit_op bit_bxor, xr
+ |
+ |.ffunc_bit bit_bswap, 1
+ | lrvr RB, RB
+ | j ->fff_resbit
+ |
+ |.ffunc_bit bit_bnot, 1
+ | xilf RB, -1
+ | j ->fff_resbit
+ |
+ |->fff_fallback_bit_op:
+ | lgr NARGS:RD, TMPR1 // Restore for fallback
+ | j ->fff_fallback
+ |
+ |.macro .ffunc_bit_sh, name, ins
+ | .ffunc_bit name, 1, .ffunc_2
+ | // Note: no inline conversion from number for 2nd argument!
+ | lg RA, 8(BASE)
+ | checkint RA, ->fff_fallback
+ | nill RA, 0x1f // Limit shift to 5-bits.
+ | ins RB, 0(RA)
+ | j ->fff_resbit
+ |.endmacro
+ |
+ |.ffunc_bit_sh bit_lshift, sll
+ |.ffunc_bit_sh bit_rshift, srl
+ |.ffunc_bit_sh bit_arshift, sra
+ |
+ |.ffunc_bit bit_rol, 1, .ffunc_2
+ | // Note: no inline conversion from number for 2nd argument!
+ | lg RA, 8(BASE)
+ | checkint RA, ->fff_fallback
+ | rll RB, RB, 0(RA)
+ | j ->fff_resbit
+ |
+ |.ffunc_bit bit_ror, 1, .ffunc_2
+ | // Note: no inline conversion from number for 2nd argument!
+ | lg RA, 8(BASE)
+ | checkint RA, ->fff_fallback
+ | lcr RA, RA // Right rotate equivalent to negative left rotate.
+ | rll RB, RB, 0(RA)
+ | j ->fff_resbit
+ |
+ |//-----------------------------------------------------------------------
+ |
+ |->fff_fallback_2:
+ | lghi NARGS:RD, 1+2 // Other args are ignored, anyway.
+ | j ->fff_fallback
+ |->fff_fallback_1:
+ | lghi NARGS:RD, 1+1 // Other args are ignored, anyway.
+ |->fff_fallback: // Call fast function fallback handler.
+ | // BASE = new base, RD = nargs+1
+ | lg L:RB, SAVE_L
+ | lg PC, -8(BASE) // Fallback may overwrite PC.
+ | stg PC, SAVE_PC // Redundant (but a defined value).
+ | stg BASE, L:RB->base
+ | sllg RD, NARGS:RD, 3
+ | lay RD, -8(RD, BASE)
+ | la RA, (8*LUA_MINSTACK)(RD) // Ensure enough space for handler.
+ | stg RD, L:RB->top
+ | lg CFUNC:RD, -16(BASE)
+ | cleartp CFUNC:RD
+ | clg RA, L:RB->maxstack
+ | jh >5 // Need to grow stack.
+ | lgr CARG1, L:RB
+ | lg TMPR1, CFUNC:RD->f
+ | basr r14, TMPR1 // (lua_State *L)
+ | lg BASE, L:RB->base
+ | // Either throws an error, or recovers and returns -1, 0 or nresults+1.
+ | lgr RD, CRET1
+ | cghi RD, 0; jh ->fff_res // Returned nresults+1?
+ |1:
+ | lg RA, L:RB->top
+ | sgr RA, BASE
+ | srlg RA, RA, 3
+ | cghi RD, 0
+ | la NARGS:RD, 1(RA)
+ | lg LFUNC:RB, -16(BASE)
+ | jne ->vm_call_tail // Returned -1?
+ | cleartp LFUNC:RB
+ | ins_callt // Returned 0: retry fast path.
+ |
+ |// Reconstruct previous base for vmeta_call during tailcall.
+ |->vm_call_tail:
+ | lgr RA, BASE
+ | tmll PC, FRAME_TYPE
+ | jne >3
+ | llgc RB, PC_RA
+ | lcgr RB, RB
+ | sllg RB, RB, 3
+ | lay BASE, -16(RB, BASE) // base = base - (RB+2)*8
+ | j ->vm_call_dispatch // Resolve again for tailcall.
+ |3:
+ | lgr RB, PC
+ | nill RB, -8
+ | sgr BASE, RB
+ | j ->vm_call_dispatch // Resolve again for tailcall.
+ |
+ |5: // Grow stack for fallback handler.
+ | lghi CARG2, LUA_MINSTACK
+ | lgr CARG1, L:RB
+ | brasl r14, extern lj_state_growstack // (lua_State *L, int n)
+ | lg BASE, L:RB->base
+ | lghi RD, 0 // Simulate a return 0.
+ | j <1 // Dumb retry (goes through ff first).
+ |
+ |->fff_gcstep: // Call GC step function.
+ | // BASE = new base, RD = nargs+1
+ | stg r14, SAVE_TMP // Save return address
+ | lg L:RB, SAVE_L
+ | stg PC, SAVE_PC // Redundant (but a defined value).
+ | stg BASE, L:RB->base
+ | sllg RD, NARGS:RD, 3
+ | lay RD, -8(RD, BASE)
+ | lgr CARG1, L:RB
+ | stg RD, L:RB->top
+ | brasl r14, extern lj_gc_step // (lua_State *L)
+ | lg BASE, L:RB->base
+ | lg RD, L:RB->top
+ | sgr RD, BASE
+ | srlg RD, RD, 3
+ | aghi NARGS:RD, 1
+ | lg r14, SAVE_TMP // Restore return address.
+ | br r14
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Special dispatch targets -------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |->vm_record: // Dispatch target for recording phase.
+ | stg r0, 0
+ | stg r0, 0
+ |
+ |->vm_rethook: // Dispatch target for return hooks.
+ | llgc RD, (DISPATCH_GL(hookmask))(DISPATCH)
+ | tmll RD, HOOK_ACTIVE
+ | jne >5
+ | j >1
+ |
+ |->vm_inshook: // Dispatch target for instr/line hooks.
+ | llgc RD, (DISPATCH_GL(hookmask))(DISPATCH)
+ | tmll RD, HOOK_ACTIVE // Hook already active?
+ | jne >5
+ |
+ | tmll RD, LUA_MASKLINE|LUA_MASKCOUNT
+ | je >5
+ | ly TMPR0, (DISPATCH_GL(hookcount))(DISPATCH)
+ | ahi TMPR0, -1
+ | sty TMPR0, (DISPATCH_GL(hookcount))(DISPATCH)
+ | je >1
+ | tmll RD, LUA_MASKLINE
+ | je >5
+ |1:
+ | lg L:RB, SAVE_L
+ | stg BASE, L:RB->base
+ | lgr CARG2, PC
+ | lgr CARG1, L:RB
+ | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
+ | brasl r14, extern lj_dispatch_ins // (lua_State *L, const BCIns *pc)
+ |3:
+ | lg BASE, L:RB->base
+ |4:
+ | llgc RA, PC_RA
+ |5:
+ | llgc OP, PC_OP
+ | sllg TMPR1, OP, 3
+ | llgh RD, PC_RD
+ | lg TMPR1, GG_DISP2STATIC(TMPR1, DISPATCH)
+ | br TMPR1
+ |
+ |->cont_hook: // Continue from hook yield.
+ | stg r0, 0
+ | stg r0, 0
+ |
+ |->vm_hotloop: // Hot loop counter underflow.
+ | stg r0, 0
+ | stg r0, 0
+ |
+ |->vm_callhook: // Dispatch target for call hooks.
+ | stg PC, SAVE_PC
+ |.if JIT
+ | j >1
+ |.endif
+ |
+ |->vm_hotcall: // Hot call counter underflow.
+ |.if JIT
+ | stg PC, SAVE_PC
+ | oill PC, 1 // Marker for hot call.
+ |1:
+ |.endif
+ | sllg RD, NARGS:RD, 3
+ | lay RD, -8(RD, BASE)
+ | lg L:RB, SAVE_L
+ | stg BASE, L:RB->base
+ | stg RD, L:RB->top
+ | lgr CARG2, PC
+ | lgr CARG1, L:RB
+ | brasl r14, extern lj_dispatch_call // (lua_State *L, const BCIns *pc)
+ | // ASMFunction returned in r2 (CRET1).
+ | lghi TMPR0, 0
+ | stg TMPR0, SAVE_PC // Invalidate for subsequent line hook.
+ |.if JIT
+ | nill PC, -2
+ |.endif
+ | lg BASE, L:RB->base
+ | lg RD, L:RB->top
+ | sgr RD, BASE
+ | lgr RB, CRET1
+ | llgc RA, PC_RA
+ | srl RD, 3
+ | ahi NARGS:RD, 1
+ | llgfr RD, RD
+ | br RB
+ |
+ |->cont_stitch: // Trace stitching.
+ | stg r0, 0
+ | stg r0, 0
+ |
+ |->vm_profhook: // Dispatch target for profiler hook.
+ | stg r0, 0
+ | stg r0, 0
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Trace exit handler -------------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |// Called from an exit stub with the exit number on the stack.
+ |// The 16 bit exit number is stored with two (sign-extended) push imm8.
+ |->vm_exit_handler:
+ | stg r0, 0
+ | stg r0, 0
+ |->vm_exit_interp:
+ | stg r0, 0
+ | stg r0, 0
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Math helper functions ----------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |// FP value rounding. Called by math.floor/math.ceil fast functions.
+ |// Value to round is in f0. May clobber f0-f7 and r0. Return address is r14.
+ |.macro vm_round, name, mask
+ |->name:
+ | lghi r0, 1
+ | cdfbr f1, r0
+ | didbr f0, f2, f1, mask // f0=remainder, f2=quotient.
+ | jnle >1
+ | ldr f0, f2
+ | br r14
+ |1: // partial remainder (sanity check)
+ | stg r0, 0
+ |.endmacro
+ |
+ | vm_round vm_floor, 7 // Round towards -inf.
+ | vm_round vm_ceil, 6 // Round towards +inf.
+ | vm_round vm_trunc, 5 // Round towards 0.
+ |
+ |// FP modulo x%y. Called by BC_MOD* and vm_arith.
+ |->vm_mod: // NYI.
+ | stg r0, 0
+ | stg r0, 0
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Assertions ---------------------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |->assert_bad_for_arg_type:
+ | stg r0, 0
+ | stg r0, 0
+#ifdef LUA_USE_ASSERT
+#endif
+ |
+ |//-----------------------------------------------------------------------
+ |//-- FFI helper functions -----------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |// Handler for callback functions. Callback slot number in ah/al.
+ |->vm_ffi_callback:
+ | stg r0, 0
+ | stg r0, 0
+ |
+ |->cont_ffi_callback: // Return from FFI callback.
+ | stg r0, 0
+ | stg r0, 0
+ |
+ |->vm_ffi_call: // Call C function via FFI.
+ | // Caveat: needs special frame unwinding, see below.
+ |.if FFI
+ | .type CCSTATE, CCallState, r8
+ | stmg r6, r15, 48(sp)
+ | lgr r13, sp // Use r13 as frame pointer.
+ | lgr CCSTATE, CARG1
+ | lg r7, CCSTATE->func
+ |
+ | // Readjust stack.
+ | sgf sp, CCSTATE->spadj
+ |
+ | // Copy stack slots.
+ | llgc r1, CCSTATE->nsp
+ | chi r1, 0
+ | jh >2
+ |1:
+ | lmg CARG1, CARG5, CCSTATE->gpr[0]
+ | // TODO: conditionally load FPRs?
+ | ld FARG1, CCSTATE->fpr[0]
+ | ld FARG2, CCSTATE->fpr[1]
+ | ld FARG3, CCSTATE->fpr[2]
+ | ld FARG4, CCSTATE->fpr[3]
+ | basr r14, r7
+ |
+ | stg CRET1, CCSTATE->gpr[0]
+ | std f0, CCSTATE->fpr[0]
+ |
+ | lgr sp, r13
+ | lmg r6, r15, 48(sp)
+ | br r14
+ |
+ |2:
+ | sll r1, 3
+ | la r10, (offsetof(CCallState, stack))(CCSTATE) // Source.
+ | la r11, (CCALL_SPS_EXTRA*8)(sp) // Destination.
+ |3:
+ | chi r1, 256
+ | jl >4
+ | mvc 0(256, r11), 0(r10)
+ | la r10, 256(r10)
+ | la r11, 256(r11)
+ | ahi r1, -256
+ | j <3
+ |
+ |4:
+ | ahi r1, -1
+ | jl <1
+ | larl r9, >5
+ | ex r1, 0(r9)
+ | j <1
+ |
+ |5:
+ | // exrl target
+ | mvc 0(1, r11), 0(r10)
+ |.endif
+ |// Note: vm_ffi_call must be the last function in this object file!
+ |
+ |//-----------------------------------------------------------------------
+}
+
+/* Generate the code for a single instruction. */
+static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+{
+ int vk = 0;
+ (void)vk;
+ |// Note: aligning all instructions does not pay off.
+ |=>defop:
+
+ switch (op) {
+
+ /* -- Comparison ops ---------------------------------------------------- */
+
+ /* Remember: all ops branch for a true comparison, fall through otherwise. */
+
+ |.macro jmp_comp, lt, ge, le, gt, target
+ ||switch (op) {
+ ||case BC_ISLT:
+ | lt target
+ ||break;
+ ||case BC_ISGE:
+ | ge target
+ ||break;
+ ||case BC_ISLE:
+ | le target
+ ||break;
+ ||case BC_ISGT:
+ | gt target
+ ||break;
+ ||default: break; /* Shut up GCC. */
+ ||}
+ |.endmacro
+
+ case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
+ | // RA = src1, RD = src2, JMP with RD = target
+ | ins_AD
+ | sllg RA, RA, 3
+ | sllg RD, RD, 3
+ | ld f0, 0(RA, BASE)
+ | ld f1, 0(RD, BASE)
+ | lg RA, 0(RA, BASE)
+ | lg RD, 0(RD, BASE)
+ | srag ITYPE, RA, 47
+ | srag RB, RD, 47
+ |
+ | clfi ITYPE, LJ_TISNUM; jne >7
+ | clfi RB, LJ_TISNUM; jne >8
+ | // Both are integers.
+ | la PC, 4(PC)
+ | cr RA, RD
+ | jmp_comp jhe, jl, jh, jle, >9
+ |6:
+ | llgh RD, PC_RD
+ | branchPC RD
+ |9:
+ | ins_next
+ |
+ |7: // RA is not an integer.
+ | jh ->vmeta_comp
+ | // RA is a number.
+ | clfi RB, LJ_TISNUM; jl >1; jne ->vmeta_comp
+ | // RA is a number, RD is an integer.
+ | cdfbr f1, RD
+ | j >1
+ |
+ |8: // RA is an integer, RD is not an integer.
+ | jh ->vmeta_comp
+ | // RA is an integer, RD is a number.
+ | cdfbr f0, RA
+ |1:
+ | la PC, 4(PC)
+ | cdbr f0, f1
+ | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
+ | jmp_comp jnl, jl, jnle, jle, <9
+ | j <6
+ break;
+
+ case BC_ISEQV: case BC_ISNEV:
+ vk = op == BC_ISEQV;
+ | ins_AD // RA = src1, RD = src2, JMP with RD = target
+ | sllg RD, RD, 3
+ | ld f1, 0(RD, BASE)
+ | lg RD, 0(RD, BASE)
+ | sllg RA, RA, 3
+ | ld f0, 0(RA, BASE)
+ | lg RA, 0(RA, BASE)
+ | la PC, 4(PC)
+ | srag RB, RD, 47
+ | srag ITYPE, RA, 47
+ | clfi RB, LJ_TISNUM; jne >7
+ | clfi ITYPE, LJ_TISNUM; jne >8
+ | cr RD, RA
+ if (vk) {
+ | jne >9
+ } else {
+ | je >9
+ }
+ | llgh RD, PC_RD
+ | branchPC RD
+ |9:
+ | ins_next
+ |
+ |7: // RD is not an integer.
+ | jh >5
+ | // RD is a number.
+ | clfi ITYPE, LJ_TISNUM; jl >1; jne >5
+ | // RD is a number, RA is an integer.
+ | cdfbr f0, RA
+ | j >1
+ |
+ |8: // RD is an integer, RA is not an integer.
+ | jh >5
+ | // RD is an integer, RA is a number.
+ | cdfbr f1, RD
+ | j >1
+ |
+ |1:
+ | cdbr f0, f1
+ |4:
+ iseqne_fp:
+ if (vk) {
+ | jne >2 // Unordered means not equal.
+ } else {
+ | je >1 // Unordered means not equal.
+ }
+ iseqne_end:
+ if (vk) {
+ |1: // EQ: Branch to the target.
+ | llgh RD, PC_RD
+ | branchPC RD
+ |2: // NE: Fallthrough to next instruction.
+ |.if not FFI
+ |3:
+ |.endif
+ } else {
+ |.if not FFI
+ |3:
+ |.endif
+ |2: // NE: Branch to the target.
+ | llgh RD, PC_RD
+ | branchPC RD
+ |1: // EQ: Fallthrough to next instruction.
+ }
+ if (LJ_DUALNUM && (op == BC_ISEQV || op == BC_ISNEV ||
+ op == BC_ISEQN || op == BC_ISNEN)) {
+ | j <9
+ } else {
+ | ins_next
+ }
+ |
+ if (op == BC_ISEQV || op == BC_ISNEV) {
+ |5: // Either or both types are not numbers.
+ |.if FFI
+ | clfi RB, LJ_TCDATA; je ->vmeta_equal_cd
+ | clfi ITYPE, LJ_TCDATA; je ->vmeta_equal_cd
+ |.endif
+ | cgr RA, RD
+ | je <1 // Same GCobjs or pvalues?
+ | cr RB, ITYPE
+ | jne <2 // Not the same type?
+ | clfi RB, LJ_TISTABUD
+ | jh <2 // Different objects and not table/ud?
+ |
+ | // Different tables or userdatas. Need to check __eq metamethod.
+ | // Field metatable must be at same offset for GCtab and GCudata!
+ | cleartp TAB:RA
+ | lg TAB:RB, TAB:RA->metatable
+ | cghi TAB:RB, 0
+ | je <2 // No metatable?
+ | tm TAB:RB->nomm, 1<<MM_eq
+ | jne <2 // Or 'no __eq' flag set?
+ if (vk) {
+ | lghi RB, 0 // ne = 0
+ } else {
+ | lghi RB, 1 // ne = 1
+ }
+ | j ->vmeta_equal // Handle __eq metamethod.
+ } else {
+ |.if FFI
+ |3:
+ | clfi ITYPE, LJ_TCDATA
+ if (LJ_DUALNUM && vk) {
+ | jne <9
+ } else {
+ | jne <2
+ }
+ | j ->vmeta_equal_cd
+ |.endif
+ }
+ break;
+ case BC_ISEQS: case BC_ISNES:
+ vk = op == BC_ISEQS;
+ | ins_AND // RA = src, RD = str const, JMP with RD = target
+ | sllg RA, RA, 3
+ | sllg RD, RD, 3
+ | lg RB, 0(RA, BASE)
+ | la PC, 4(PC)
+ | checkstr RB, >3
+ | cg RB, 0(RD, KBASE)
+ iseqne_test:
+ if (vk) {
+ | jne >2
+ } else {
+ | je >1
+ }
+ goto iseqne_end;
+ case BC_ISEQN: case BC_ISNEN:
+ vk = op == BC_ISEQN;
+ | ins_AD // RA = src, RD = num const, JMP with RD = target
+ | sllg RA, RA, 3
+ | sllg RD, RD, 3
+ | ld f0, 0(RA, BASE)
+ | lg RB, 0(RA, BASE)
+ | ld f1, 0(RD, KBASE)
+ | lg RD, 0(RD, KBASE)
+ | la PC, 4(PC)
+ | checkint RB, >7
+ | checkint RD, >8
+ | cr RB, RD
+ if (vk) {
+ | jne >9
+ } else {
+ | je >9
+ }
+ | llgh RD, PC_RD
+ | branchPC RD
+ |9:
+ | ins_next
+ |
+ |7: // RA is not an integer.
+ | jh >3
+ | // RA is a number.
+ | checkint RD, >1
+ | // RA is a number, RD is an integer.
+ | cdfbr f1, RD
+ | j >1
+ |
+ |8: // RA is an integer, RD is a number.
+ | cdfbr f0, RB
+ | cdbr f0, f1
+ | j >4
+ |1:
+ | cdbr f0, f1
+ |4:
+ goto iseqne_fp;
+ case BC_ISEQP: case BC_ISNEP:
+ vk = op == BC_ISEQP;
+ | ins_AND // RA = src, RD = primitive type (~), JMP with RD = target
+ | sllg RA, RA, 3
+ | lg RB, 0(RA, BASE)
+ | srag RB, RB, 47
+ | la PC, 4(PC)
+ | cr RB, RD
+ if (!LJ_HASFFI) goto iseqne_test;
+ if (vk) {
+ | jne >3
+ | llgh RD, PC_RD
+ | branchPC RD
+ |2:
+ | ins_next
+ |3:
+ | cghi RB, LJ_TCDATA; jne <2
+ | j ->vmeta_equal_cd
+ } else {
+ | je >2
+ | cghi RB, LJ_TCDATA; je ->vmeta_equal_cd
+ | llgh RD, PC_RD
+ | branchPC RD
+ |2:
+ | ins_next
+ }
+ break;
+
+ /* -- Unary test and copy ops ------------------------------------------- */
+
+ case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
+ | ins_AD // RA = dst or unused, RD = src, JMP with RD = target
+ | sllg RD, RD, 3
+ | sllg RA, RA, 3
+ | lg ITYPE, 0(RD, BASE)
+ | la PC, 4(PC)
+ if (op == BC_ISTC || op == BC_ISFC) {
+ | lgr RB, ITYPE
+ }
+ | srag ITYPE, ITYPE, 47
+ | clfi ITYPE, LJ_TISTRUECOND
+ if (op == BC_IST || op == BC_ISTC) {
+ | jhe >1
+ } else {
+ | jl >1
+ }
+ if (op == BC_ISTC || op == BC_ISFC) {
+ | stg RB, 0(RA, BASE)
+ }
+ | llgh RD, PC_RD
+ | branchPC RD
+ |1: // Fallthrough to the next instruction.
+ | ins_next
+ break;
+
+ case BC_ISTYPE:
+ | ins_AD // RA = src, RD = -type
+ | lghr RD, RD
+ | sllg RA, RA, 3
+ | lg RB, 0(RA, BASE)
+ | srag RB, RB, 47
+ | agr RB, RD
+ | jne ->vmeta_istype
+ | ins_next
+ break;
+ case BC_ISNUM:
+ | ins_AD // RA = src, RD = -(TISNUM-1)
+ | sllg TMPR1, RA, 3
+ | lg TMPR1, 0(TMPR1, BASE)
+ | checknumtp TMPR1, ->vmeta_istype
+ | ins_next
+ break;
+ case BC_MOV:
+ | ins_AD // RA = dst, RD = src
+ | sllg RD, RD, 3
+ | lg RB, 0(RD, BASE)
+ | sllg RA, RA, 3
+ | stg RB, 0(RA, BASE)
+ | ins_next_
+ break;
+ case BC_NOT:
+ | ins_AD // RA = dst, RD = src
+ | sllg RD, RD, 3
+ | sllg RA, RA, 3
+ | lg RB, 0(RD, BASE)
+ | srag RB, RB, 47
+ | load_false RC
+ | cghi RB, LJ_TTRUE
+ | je >1
+ | load_true RC
+ |1:
+ | stg RC, 0(RA, BASE)
+ | ins_next
+ break;
+ case BC_UNM:
+ | ins_AD // RA = dst, RD = src
+ | sllg RA, RA, 3
+ | sllg RD, RD, 3
+ | lg RB, 0(RD, BASE)
+ | checkint RB, >3
+ | lcr RB, RB; jo >2
+ |1:
+ | stg RB, 0(RA, BASE)
+ | ins_next
+ |2:
+ | llihh RB, 0x41e0 // (double)2^31
+ | j <1
+ |3:
+ | jh ->vmeta_unm
+ | // Toggle sign bit.
+ | llihh TMPR0, 0x8000
+ | xgr RB, TMPR0
+ | j <1
+ break;
+ case BC_LEN:
+ | ins_AD // RA = dst, RD = src
+ | sllg RD, RD, 3
+ | lg RD, 0(RD, BASE)
+ | checkstr RD, >2
+ | llgf RD, STR:RD->len
+ |1:
+ | sllg RA, RA, 3
+ | setint RD
+ | stg RD, 0(RA, BASE)
+ | ins_next
+ |2:
+ | cghi ITYPE, LJ_TTAB; jne ->vmeta_len
+ | lgr TAB:CARG1, TAB:RD
+#if LJ_52
+ | lg TAB:RB, TAB:RD->metatable
+ | cghi TAB:RB, 0
+ | jne >9
+ |3:
+#endif
+ |->BC_LEN_Z:
+ | brasl r14, extern lj_tab_len // (GCtab *t)
+ | // Length of table returned in r2 (CRET1).
+ | lgr RD, CRET1
+ | llgc RA, PC_RA
+ | j <1
+#if LJ_52
+ |9: // Check for __len.
+ | tm TAB:RB->nomm, 1<<MM_len
+ | jne <3
+ | j ->vmeta_len // 'no __len' flag NOT set: check.
+#endif
+ break;
+
+ /* -- Binary ops -------------------------------------------------------- */
+
+ |.macro ins_arithpre
+ | ins_ABC
+ | sllg RB, RB, 3
+ | sllg RC, RC, 3
+ | sllg RA, RA, 3
+ |.endmacro
+ |
+ |.macro ins_arithfp, ins
+ | ins_arithpre
+ ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
+ ||switch (vk) {
+ ||case 0:
+ | ld f0, 0(RB, BASE)
+ | ld f1, 0(RC, KBASE)
+ | lg RB, 0(RB, BASE)
+ | lg RC, 0(RC, KBASE)
+ | checknumtp RB, ->vmeta_arith_vno
+ | checknumtp RC, ->vmeta_arith_vno
+ | ins f0, f1
+ || break;
+ ||case 1:
+ | ld f1, 0(RB, BASE)
+ | ld f0, 0(RC, KBASE)
+ | lg RB, 0(RB, BASE)
+ | lg RC, 0(RC, KBASE)
+ | checknumtp RB, ->vmeta_arith_nvo
+ | checknumtp RC, ->vmeta_arith_nvo
+ | ins f0, f1
+ || break;
+ ||default:
+ | ld f0, 0(RB, BASE)
+ | ld f1, 0(RC, BASE)
+ | lg RB, 0(RB, BASE)
+ | lg RC, 0(RC, BASE)
+ | checknumtp RB, ->vmeta_arith_vvo
+ | checknumtp RC, ->vmeta_arith_vvo
+ | ins f0, f1
+ || break;
+ ||}
+ | std f0, 0(RA, BASE)
+ | ins_next
+ |.endmacro
+ |
+ |.macro ins_arithdn, intins
+ | ins_arithpre
+ ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
+ ||switch (vk) {
+ ||case 0:
+ | lg RB, 0(RB, BASE)
+ | lg RC, 0(RC, KBASE)
+ | checkint RB, ->vmeta_arith_vno
+ | checkint RC, ->vmeta_arith_vno
+ | intins RB, RC; jo ->vmeta_arith_vno
+ || break;
+ ||case 1:
+ | lg RB, 0(RB, BASE)
+ | lg RC, 0(RC, KBASE)
+ | checkint RB, ->vmeta_arith_nvo
+ | checkint RC, ->vmeta_arith_nvo
+ | intins RC, RB; jo ->vmeta_arith_nvo
+ || break;
+ ||default:
+ | lg RB, 0(RB, BASE)
+ | lg RC, 0(RC, BASE)
+ | checkint RB, ->vmeta_arith_vvo
+ | checkint RC, ->vmeta_arith_vvo
+ | intins RB, RC; jo ->vmeta_arith_vvo
+ || break;
+ ||}
+ ||if (vk == 1) {
+ | // setint RC
+ | stg RC, 0(RA, BASE)
+ ||} else {
+ | // setint RB
+ | stg RB, 0(RA, BASE)
+ ||}
+ | ins_next
+ |.endmacro
+
+ | // RA = dst, RB = src1 or num const, RC = src2 or num const
+ case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
+ | ins_arithdn ar
+ break;
+ case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
+ | ins_arithdn sr
+ break;
+ case BC_MULVN: case BC_MULNV: case BC_MULVV:
+ | ins_arithpre
+ | // For multiplication we use msgfr and check if the result
+ | // fits in an int32_t.
+ switch(op) {
+ case BC_MULVN:
+ | lg RB, 0(RB, BASE)
+ | lg RC, 0(RC, KBASE)
+ | checkint RB, ->vmeta_arith_vno
+ | checkint RC, ->vmeta_arith_vno
+ | lgfr RB, RB
+ | msgfr RB, RC
+ | lgfr RC, RB
+ | cgr RB, RC; jne ->vmeta_arith_vno
+ break;
+ case BC_MULNV:
+ | lg RB, 0(RB, BASE)
+ | lg RC, 0(RC, KBASE)
+ | checkint RB, ->vmeta_arith_nvo
+ | checkint RC, ->vmeta_arith_nvo
+ | lgfr RB, RB
+ | msgfr RB, RC
+ | lgfr RC, RB
+ | cgr RB, RC; jne ->vmeta_arith_nvo
+ break;
+ default:
+ | lg RB, 0(RB, BASE)
+ | lg RC, 0(RC, BASE)
+ | checkint RB, ->vmeta_arith_vvo
+ | checkint RC, ->vmeta_arith_vvo
+ | lgfr RB, RB
+ | msgfr RB, RC
+ | lgfr RC, RB
+ | cgr RB, RC; jne ->vmeta_arith_vvo
+ break;
+ }
+ | llgfr RB, RB
+ | setint RB
+ | stg RB, 0(RA, BASE)
+ | ins_next
+ break;
+ case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
+ | ins_arithfp ddbr
+ break;
+ // TODO: implement fast mod operation.
+ // x86_64 does floating point mod, however it might be better to use integer mod.
+ case BC_MODVN:
+ | j ->vmeta_arith_vno
+ break;
+ case BC_MODNV:
+ | j ->vmeta_arith_nvo
+ break;
+ case BC_MODVV:
+ | j ->vmeta_arith_vvo
+ break;
+ case BC_POW:
+ | ins_ABC
+ | sllg RB, RB, 3
+ | sllg RC, RC, 3
+ | ld FARG1, 0(RB, BASE)
+ | ld FARG2, 0(RC, BASE)
+ | lg TMPR0, 0(RB, BASE)
+ | checknumtp TMPR0, ->vmeta_arith_vvo
+ | lg TMPR0, 0(RC, BASE)
+ | checknumtp TMPR0, ->vmeta_arith_vvo
+ | brasl r14, extern pow // double pow(double x, double y), result in f0.
+ | llgc RA, PC_RA
+ | sllg RA, RA, 3
+ | std f0, 0(RA, BASE)
+ | ins_next
+ break;
+
+ case BC_CAT:
+ | ins_ABC // RA = dst, RB = src_start, RC = src_end
+ | lg L:CARG1, SAVE_L
+ | stg BASE, L:CARG1->base
+ | lgr CARG3, RC
+ | sgr CARG3, RB
+ | sllg RC, RC, 3
+ | la CARG2, 0(RC, BASE)
+ |->BC_CAT_Z:
+ | lgr L:RB, L:CARG1
+ | stg PC, SAVE_PC
+ | brasl r14, extern lj_meta_cat // (lua_State *L, TValue *top, int left)
+ | // NULL (finished) or TValue * (metamethod) returned in r2 (CRET1).
+ | lg BASE, L:RB->base
+ | ltgr RC, CRET1
+ | jne ->vmeta_binop
+ | llgc RB, PC_RB // Copy result to Stk[RA] from Stk[RB].
+ | sllg RB, RB, 3
+ | llgc RA, PC_RA
+ | sllg RA, RA, 3
+ | lg RC, 0(RB, BASE)
+ | stg RC, 0(RA, BASE)
+ | ins_next
+ break;
+
+ /* -- Constant ops ------------------------------------------------------ */
+
+ case BC_KSTR:
+ | ins_AND // RA = dst, RD = str const (~)
+ | sllg RD, RD, 3
+ | lg RD, 0(RD, KBASE)
+ | settp RD, LJ_TSTR
+ | sllg RA, RA, 3
+ | stg RD, 0(RA, BASE)
+ | ins_next
+ break;
+ case BC_KCDATA:
+ |.if FFI
+ | ins_AND // RA = dst, RD = cdata const (~)
+ | sllg RD, RD, 3
+ | sllg RA, RA, 3
+ | lg RD, 0(RD, KBASE)
+ | settp RD, LJ_TCDATA
+ | stg RD, 0(RA, BASE)
+ | ins_next
+ |.endif
+ break;
+ case BC_KSHORT:
+ | ins_AD // RA = dst, RD = signed int16 literal
+ | // Assumes DUALNUM.
+ | lhr RD, RD // Sign-extend literal to 32-bits.
+ | setint RD
+ | sllg RA, RA, 3
+ | stg RD, 0(RA, BASE)
+ | ins_next
+ break;
+ case BC_KNUM:
+ | ins_AD // RA = dst, RD = num const
+ | sllg RD, RD, 3
+ | ld f0, 0(RD, KBASE)
+ | sllg RA, RA, 3
+ | std f0, 0(RA, BASE)
+ | ins_next
+ break;
+ case BC_KPRI:
+ | ins_AD // RA = dst, RD = primitive type (~)
+ | sllg RA, RA, 3
+ | sllg RD, RD, 47
+ | lghi TMPR0, -1
+ | xgr RD, TMPR0 // not
+ | stg RD, 0(RA, BASE)
+ | ins_next
+ break;
+ case BC_KNIL:
+ | ins_AD // RA = dst_start, RD = dst_end
+ | sllg RA, RA, 3
+ | sllg RD, RD, 3
+ | la RA, 8(RA, BASE)
+ | la RD, 0(RD, BASE)
+ | lghi RB, LJ_TNIL
+ | stg RB, -8(RA) // Sets minimum 2 slots.
+ |1:
+ | stg RB, 0(RA)
+ | la RA, 8(RA)
+ | clgr RA, RD
+ | jle <1
+ | ins_next
+ break;
+
+/* -- Upvalue and function ops ------------------------------------------ */
+
+ case BC_UGET:
+ | ins_AD // RA = dst, RD = upvalue #
+ | sllg RA, RA, 3
+ | sllg RD, RD, 3
+ | lg LFUNC:RB, -16(BASE)
+ | cleartp LFUNC:RB
+ | lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RD, LFUNC:RB)
+ | lg RB, UPVAL:RB->v
+ | lg RD, 0(RB)
+ | stg RD, 0(RA, BASE)
+ | ins_next
+ break;
+ case BC_USETV:
+#define TV2MARKOFS \
+ ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv))
+ | ins_AD // RA = upvalue #, RD = src
+ | lg LFUNC:RB, -16(BASE)
+ | cleartp LFUNC:RB
+ | sllg RA, RA, 3
+ | lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RA, LFUNC:RB)
+ | tm UPVAL:RB->closed, 0xff
+ | lg RB, UPVAL:RB->v
+ | sllg TMPR1, RD, 3
+ | lg RA, 0(TMPR1, BASE)
+ | stg RA, 0(RB)
+ | je >1
+ | // Check barrier for closed upvalue.
+ | tmy TV2MARKOFS(RB), LJ_GC_BLACK // isblack(uv)
+ | jne >2
+ |1:
+ | ins_next
+ |
+ |2: // Upvalue is black. Check if new value is collectable and white.
+ | srag RD, RA, 47
+ | ahi RD, -LJ_TISGCV
+ | clfi RD, LJ_TNUMX - LJ_TISGCV // tvisgcv(v)
+ | jle <1
+ | cleartp GCOBJ:RA
+ | tm GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v)
+ | je <1
+ | // Crossed a write barrier. Move the barrier forward.
+ | lgr CARG2, RB
+ | lay GL:CARG1, GG_DISP2G(DISPATCH)
+ | brasl r14, extern lj_gc_barrieruv // (global_State *g, TValue *tv)
+ | j <1
+ break;
+#undef TV2MARKOFS
+ case BC_USETS:
+ | ins_AND // RA = upvalue #, RD = str const (~)
+ | lg LFUNC:RB, -16(BASE)
+ | sllg RA, RA, 3
+ | sllg RD, RD, 3
+ | cleartp LFUNC:RB
+ | lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RA, LFUNC:RB)
+ | lg STR:RA, 0(RD, KBASE)
+ | lg RD, UPVAL:RB->v
+ | settp STR:ITYPE, STR:RA, LJ_TSTR
+ | stg STR:ITYPE, 0(RD)
+ | tm UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv)
+ | jne >2
+ |1:
+ | ins_next
+ |
+ |2: // Check if string is white and ensure upvalue is closed.
+ | tm GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str)
+ | je <1
+ | tm UPVAL:RB->closed, 0xff
+ | je <1
+ | // Crossed a write barrier. Move the barrier forward.
+ | lgr CARG2, RD
+ | lay GL:CARG1, GG_DISP2G(DISPATCH)
+ | brasl r14, extern lj_gc_barrieruv // (global_State *g, TValue *tv)
+ | j <1
+ break;
+ case BC_USETN:
+ | ins_AD // RA = upvalue #, RD = num const
+ | lg LFUNC:RB, -16(BASE)
+ | sllg RA, RA, 3
+ | sllg RD, RD, 3
+ | cleartp LFUNC:RB
+ | ld f0, 0(RD, KBASE)
+ | lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RA, LFUNC:RB)
+ | lg RA, UPVAL:RB->v
+ | std f0, 0(RA)
+ | ins_next
+ break;
+ case BC_USETP:
+ | ins_AD // RA = upvalue #, RD = primitive type (~)
+ | lg LFUNC:RB, -16(BASE)
+ | sllg RA, RA, 3
+ | cleartp LFUNC:RB
+ | lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RA, LFUNC:RB)
+ | sllg RD, RD, 47
+ | lghi TMPR0, -1
+ | xgr RD, TMPR0
+ | lg RA, UPVAL:RB->v
+ | stg RD, 0(RA)
+ | ins_next
+ break;
+ case BC_UCLO:
+ | ins_AD // RA = level, RD = target
+ | branchPC RD // Do this first to free RD.
+ | lg L:RB, SAVE_L
+ | ltg TMPR0, L:RB->openupval
+ | je >1
+ | stg BASE, L:RB->base
+ | sllg RA, RA, 3
+ | la CARG2, 0(RA, BASE)
+ | lgr L:CARG1, L:RB
+ | brasl r14, extern lj_func_closeuv // (lua_State *L, TValue *level)
+ | lg BASE, L:RB->base
+ |1:
+ | ins_next
+ break;
+
+ case BC_FNEW:
+ | ins_AND // RA = dst, RD = proto const (~) (holding function prototype)
+ | lg L:RB, SAVE_L
+ | stg BASE, L:RB->base
+ | lg CARG3, -16(BASE)
+ | cleartp CARG3
+ | sllg RD, RD, 3
+ | lg CARG2, 0(RD, KBASE) // Fetch GCproto *.
+ | lgr CARG1, L:RB
+ | stg PC, SAVE_PC
+ | // (lua_State *L, GCproto *pt, GCfuncL *parent)
+ | brasl r14, extern lj_func_newL_gc
+ | // GCfuncL * returned in r2 (CRET1).
+ | lg BASE, L:RB->base
+ | llgc RA, PC_RA
+ | sllg RA, RA, 3
+ | settp LFUNC:CRET1, LJ_TFUNC
+ | stg LFUNC:CRET1, 0(RA, BASE)
+ | ins_next
+ break;
+ case BC_TNEW:
+ | ins_AD // RA = dst, RD = hbits|asize
+ | lg L:RB, SAVE_L
+ | stg BASE, L:RB->base
+ | lg RA, (DISPATCH_GL(gc.total))(DISPATCH)
+ | clg RA, (DISPATCH_GL(gc.threshold))(DISPATCH)
+ | stg PC, SAVE_PC
+ | jhe >5
+ |1:
+ | srlg CARG3, RD, 11
+ | llill TMPR0, 0x7ff
+ | nr RD, TMPR0
+ | cr RD, TMPR0
+ | je >3
+ |2:
+ | lgr L:CARG1, L:RB
+ | llgfr CARG2, RD
+ | brasl r14, extern lj_tab_new // (lua_State *L, uint32_t asize, uint32_t hbits)
+ | // Table * returned in r2 (CRET1).
+ | lg BASE, L:RB->base
+ | llgc RA, PC_RA
+ | sllg RA, RA, 3
+ | settp TAB:CRET1, LJ_TTAB
+ | stg TAB:CRET1, 0(RA, BASE)
+ | ins_next
+ |3: // Turn 0x7ff into 0x801.
+ | llill RD, 0x801
+ | j <2
+ |5:
+ | lgr L:CARG1, L:RB
+ | brasl r14, extern lj_gc_step_fixtop // (lua_State *L)
+ | llgh RD, PC_RD
+ | j <1
+ break;
+ case BC_TDUP:
+ | ins_AND // RA = dst, RD = table const (~) (holding template table)
+ | lg L:RB, SAVE_L
+ | lg RA, (DISPATCH_GL(gc.total))(DISPATCH)
+ | stg PC, SAVE_PC
+ | clg RA, (DISPATCH_GL(gc.threshold))(DISPATCH)
+ | stg BASE, L:RB->base
+ | jhe >3
+ |2:
+ | sllg RD, RD, 3
+ | lg TAB:CARG2, 0(RD, KBASE)
+ | lgr L:CARG1, L:RB
+ | brasl r14, extern lj_tab_dup // (lua_State *L, Table *kt)
+ | // Table * returned in r2 (CRET1).
+ | lg BASE, L:RB->base
+ | llgc RA, PC_RA
+ | settp TAB:CRET1, LJ_TTAB
+ | sllg RA, RA, 3
+ | stg TAB:CRET1, 0(RA, BASE)
+ | ins_next
+ |3:
+ | lgr L:CARG1, L:RB
+ | brasl r14, extern lj_gc_step_fixtop // (lua_State *L)
+ | llgh RD, PC_RD // Need to reload RD.
+ | lghi TMPR0, -1
+ | xgr RD, TMPR0 // not RD
+ | j <2
+ break;
+
+ case BC_GGET:
+ | ins_AND // RA = dst, RD = str const (~)
+ | lg LFUNC:RB, -16(BASE)
+ | cleartp LFUNC:RB
+ | lg TAB:RB, LFUNC:RB->env
+ | sllg TMPR1, RD, 3
+ | lg STR:RC, 0(TMPR1, KBASE)
+ | j ->BC_TGETS_Z
+ break;
+ case BC_GSET:
+ | ins_AND // RA = src, RD = str const (~)
+ | lg LFUNC:RB, -16(BASE)
+ | cleartp LFUNC:RB
+ | lg TAB:RB, LFUNC:RB->env
+ | sllg TMPR1, RD, 3
+ | lg STR:RC, 0(TMPR1, KBASE)
+ | j ->BC_TSETS_Z
+ break;
+
+ case BC_TGETV:
+ | ins_ABC // RA = dst, RB = table, RC = key
+ | sllg RB, RB, 3
+ | lg TAB:RB, 0(RB, BASE)
+ | sllg RC, RC, 3
+ | lg RC, 0(RC, BASE)
+ | checktab TAB:RB, ->vmeta_tgetv
+ |
+ | // Integer key?
+ | checkint RC, >5
+ | cl RC, TAB:RB->asize // Takes care of unordered, too.
+ | jhe ->vmeta_tgetv // Not in array part? Use fallback.
+ | llgfr RC, RC
+ | sllg RC, RC, 3
+ | ag RC, TAB:RB->array
+ | // Get array slot.
+ | lg ITYPE, 0(RC)
+ | cghi ITYPE, LJ_TNIL // Avoid overwriting RB in fastpath.
+ | je >2
+ |1:
+ | sllg RA, RA, 3
+ | stg ITYPE, 0(RA, BASE)
+ | ins_next
+ |
+ |2: // Check for __index if table value is nil.
+ | lg TAB:TMPR1, TAB:RB->metatable
+ | cghi TAB:TMPR1, 0
+ | je <1
+ | tm TAB:TMPR1->nomm, 1<<MM_index
+ | je ->vmeta_tgetv // 'no __index' flag NOT set: check.
+ | j <1
+ |
+ |5: // String key?
+ | cghi ITYPE, LJ_TSTR; jne ->vmeta_tgetv
+ | cleartp STR:RC
+ | j ->BC_TGETS_Z
+ break;
+ case BC_TGETS:
+ | ins_ABC
+ | sllg RB, RB, 3
+ | lg TAB:RB, 0(RB, BASE)
+ | lghi TMPR1, -1
+ | xgr RC, TMPR1
+ | sllg RC, RC, 3
+ | lg STR:RC, 0(RC, KBASE)
+ | checktab TAB:RB, ->vmeta_tgets
+ |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *
+ | l TMPR1, TAB:RB->hmask
+ | n TMPR1, STR:RC->hash
+ | lgfr TMPR1, TMPR1
+ | mghi TMPR1, #NODE
+ | ag NODE:TMPR1, TAB:RB->node
+ | settp ITYPE, STR:RC, LJ_TSTR
+ |1:
+ | cg ITYPE, NODE:TMPR1->key
+ | jne >4
+ | // Get node value.
+ | lg ITYPE, NODE:TMPR1->val
+ | cghi ITYPE, LJ_TNIL
+ | je >5 // Key found, but nil value?
+ |2:
+ | sllg RA, RA, 3
+ | stg ITYPE, 0(RA, BASE)
+ | ins_next
+ |
+ |4: // Follow hash chain.
+ | lg NODE:TMPR1, NODE:TMPR1->next
+ | cghi NODE:TMPR1, 0
+ | jne <1
+ | // End of hash chain: key not found, nil result.
+ | lghi ITYPE, LJ_TNIL
+ |
+ |5: // Check for __index if table value is nil.
+ | lg TAB:TMPR1, TAB:RB->metatable
+ | cghi TAB:TMPR1, 0
+ | je <2 // No metatable: done.
+ | tm TAB:TMPR1->nomm, 1<<MM_index
+ | jne <2 // 'no __index' flag set: done.
+ | j ->vmeta_tgets // Caveat: preserve STR:RC.
+ break;
+ case BC_TGETB:
+ | ins_ABC // RA = dst, RB = table, RC = byte literal
+ | sllg RB, RB, 3
+ | lg TAB:RB, 0(RB, BASE)
+ | checktab TAB:RB, ->vmeta_tgetb
+ | cl RC, TAB:RB->asize
+ | jhe ->vmeta_tgetb
+ | sllg RC, RC, 3
+ | ag RC, TAB:RB->array
+ | // Get array slot.
+ | lg ITYPE, 0(RC)
+ | cghi ITYPE, LJ_TNIL
+ | je >2
+ |1:
+ | sllg RA, RA, 3
+ | stg ITYPE, 0(RA, BASE)
+ | ins_next
+ |
+ |2: // Check for __index if table value is nil.
+ | lg TAB:TMPR1, TAB:RB->metatable
+ | cghi TAB:TMPR1, 0
+ | je <1
+ | tm TAB:TMPR1->nomm, 1<<MM_index
+ | je ->vmeta_tgetb // 'no __index' flag NOT set: check.
+ | j <1
+ break;
+ case BC_TGETR:
+ | ins_ABC // RA = dst, RB = table, RC = key
+ | sllg RB, RB, 3
+ | lg TAB:RB, 0(RB, BASE)
+ | cleartp TAB:RB
+ | sllg RC, RC, 3
+ | llgf RC, 4(RC, BASE) // Load low word (big endian).
+ | cl RC, TAB:RB->asize
+ | jhe ->vmeta_tgetr // Not in array part? Use fallback.
+ | sllg RC, RC, 3
+ | ag RC, TAB:RB->array
+ | // Get array slot.
+ |->BC_TGETR_Z:
+ | lg ITYPE, 0(RC)
+ |->BC_TGETR2_Z:
+ | sllg RA, RA, 3
+ | stg ITYPE, 0(RA, BASE)
+ | ins_next
+ break;
+
+ case BC_TSETV:
+ | ins_ABC // RA = src, RB = table, RC = key
+ | sllg RB, RB, 3
+ | lg TAB:RB, 0(RB, BASE)
+ | sllg RC, RC, 3
+ | lg RC, 0(RC, BASE)
+ | checktab TAB:RB, ->vmeta_tsetv
+ |
+ | // Integer key?
+ | checkint RC, >5
+ | cl RC, TAB:RB->asize // Takes care of unordered, too.
+ | jhe ->vmeta_tsetv
+ | llgfr RC, RC
+ | sllg RC, RC, 3
+ | ag RC, TAB:RB->array
+ | lghi TMPR0, LJ_TNIL
+ | cg TMPR0, 0(RC)
+ | je >3 // Previous value is nil?
+ |1:
+ | tm TAB:RB->marked, LJ_GC_BLACK // isblack(table)
+ | jne >7
+ |2: // Set array slot.
+ | sllg RA, RA, 3
+ | lg RB, 0(RA, BASE)
+ | stg RB, 0(RC)
+ | ins_next
+ |
+ |3: // Check for __newindex if previous value is nil.
+ | lg TAB:TMPR1, TAB:RB->metatable
+ | cghi TAB:TMPR1, 0
+ | je <1
+ | tm TAB:TMPR1->nomm, 1<<MM_newindex
+ | je ->vmeta_tsetv // 'no __newindex' flag NOT set: check.
+ | j <1
+ |
+ |5: // String key?
+ | cghi ITYPE, LJ_TSTR; jne ->vmeta_tsetv
+ | cleartp STR:RC
+ | j ->BC_TSETS_Z
+ |
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
+ | barrierback TAB:RB, TMPR1
+ | j <2
+ break;
+ case BC_TSETS:
+ | ins_ABC // RA = src, RB = table, RC = str const (~)
+ | sllg RB, RB, 3
+ | lg TAB:RB, 0(RB, BASE)
+ | lghi TMPR0, -1
+ | xgr RC, TMPR0 // ~RC
+ | sllg RC, RC, 3
+ | lg STR:RC, 0(RC, KBASE)
+ | checktab TAB:RB, ->vmeta_tsets
+ |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *
+ | l TMPR1, TAB:RB->hmask
+ | n TMPR1, STR:RC->hash
+ | lgfr TMPR1, TMPR1
+ | mghi TMPR1, #NODE
+ | mvi TAB:RB->nomm, 0 // Clear metamethod cache.
+ | ag NODE:TMPR1, TAB:RB->node
+ | settp ITYPE, STR:RC, LJ_TSTR
+ |1:
+ | cg ITYPE, NODE:TMPR1->key
+ | jne >5
+ | // Ok, key found. Assumes: offsetof(Node, val) == 0
+ | lghi TMPR0, LJ_TNIL
+ | cg TMPR0, 0(TMPR1)
+ | je >4 // Previous value is nil?
+ |2:
+ | tm TAB:RB->marked, LJ_GC_BLACK // isblack(table)
+ | jne >7
+ |3: // Set node value.
+ | sllg RA, RA, 3
+ | lg ITYPE, 0(RA, BASE)
+ | stg ITYPE, 0(TMPR1)
+ | ins_next
+ |
+ |4: // Check for __newindex if previous value is nil.
+ | lg TAB:ITYPE, TAB:RB->metatable
+ | cghi TAB:ITYPE, 0
+ | je <2
+ | tm TAB:ITYPE->nomm, 1<<MM_newindex
+ | je ->vmeta_tsets // 'no __newindex' flag NOT set: check.
+ | j <2
+ |
+ |5: // Follow hash chain.
+ | lg NODE:TMPR1, NODE:TMPR1->next
+ | cghi NODE:TMPR1, 0
+ | jne <1
+ | // End of hash chain: key not found, add a new one.
+ |
+ | // But check for __newindex first.
+ | lg TAB:TMPR1, TAB:RB->metatable
+ | cghi TAB:TMPR1, 0
+ | je >6 // No metatable: continue.
+ | tm TAB:TMPR1->nomm, 1<<MM_newindex
+ | je ->vmeta_tsets // 'no __newindex' flag NOT set: check.
+ |6:
+ | stg ITYPE, SAVE_TMP
+ | lg L:CARG1, SAVE_L
+ | stg BASE, L:CARG1->base
+ | la CARG3, SAVE_TMP
+ | lgr CARG2, TAB:RB
+ | stg PC, SAVE_PC
+ | brasl r14, extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
+ | // Handles write barrier for the new key. TValue * returned in r2 (CRET1).
+ | lgr TMPR1, CRET1
+ | lg L:CRET1, SAVE_L
+ | lg BASE, L:CRET1->base
+ | llgc RA, PC_RA
+ | j <2 // Must check write barrier for value.
+ |
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
+ | barrierback TAB:RB, ITYPE
+ | j <3
+ break;
+ case BC_TSETB:
+ | ins_ABC // RA = src, RB = table, RC = byte literal
+ | sllg RB, RB, 3
+ | lg TAB:RB, 0(RB, BASE)
+ | checktab TAB:RB, ->vmeta_tsetb
+ | cl RC, TAB:RB->asize
+ | jhe ->vmeta_tsetb
+ | sllg RC, RC, 3
+ | ag RC, TAB:RB->array
+ | lghi TMPR0, LJ_TNIL
+ | cg TMPR0, 0(RC)
+ | je >3 // Previous value is nil?
+ |1:
+ | tm TAB:RB->marked, LJ_GC_BLACK // isblack(table)
+ | jne >7
+ |2: // Set array slot.
+ | sllg RA, RA, 3
+ | lg ITYPE, 0(RA, BASE)
+ | stg ITYPE, 0(RC)
+ | ins_next
+ |
+ |3: // Check for __newindex if previous value is nil.
+ | lg TAB:TMPR1, TAB:RB->metatable
+ | cghi TAB:TMPR1, 0
+ | je <1
+ | tm TAB:TMPR1->nomm, 1<<MM_newindex
+ | je ->vmeta_tsetb // 'no __newindex' flag NOT set: check.
+ | j <1
+ |
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
+ | barrierback TAB:RB, TMPR1
+ | j <2
+ break;
+ case BC_TSETR:
+ | ins_ABC // RA = src, RB = table, RC = key
+ | sllg RB, RB, 3
+ | lg TAB:RB, 0(RB, BASE)
+ | cleartp TAB:RB
+ | sllg RC, RC, 3
+ | lg RC, 0(RC, BASE)
+ | tm TAB:RB->marked, LJ_GC_BLACK // isblack(table)
+ | jne >7
+ |2:
+ | cl RC, TAB:RB->asize
+ | jhe ->vmeta_tsetr
+ | llgfr RC, RC
+ | sllg RC, RC, 3
+ | ag RC, TAB:RB->array
+ | // Set array slot.
+ |->BC_TSETR_Z:
+ | sllg RA, RA, 3
+ | lg ITYPE, 0(RA, BASE)
+ | stg ITYPE, 0(RC)
+ | ins_next
+ |
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
+ | barrierback TAB:RB, TMPR1
+ | j <2
+ break;
+
+ case BC_TSETM:
+ | ins_AD // RA = base (table at base-1), RD = num const (start index)
+ |1:
+ | sllg RA, RA, 3
+ | sllg TMPR1, RD, 3
+ | llgf TMPR1, 4(TMPR1, KBASE) // Integer constant is in lo-word.
+ | la RA, 0(RA, BASE)
+ | lg TAB:RB, -8(RA) // Guaranteed to be a table.
+ | cleartp TAB:RB
+ | tm TAB:RB->marked, LJ_GC_BLACK // isblack(table)
+ | jne >7
+ |2:
+ | llgf RD, SAVE_MULTRES
+ | aghi RD, -1
+ | je >4 // Nothing to copy?
+ | agr RD, TMPR1 // Compute needed size.
+ | clgf RD, TAB:RB->asize
+ | jh >5 // Doesn't fit into array part?
+ | sgr RD, TMPR1
+ | sllg TMPR1, TMPR1, 3
+ | ag TMPR1, TAB:RB->array
+ |3: // Copy result slots to table.
+ | lg RB, 0(RA)
+ | la RA, 8(RA)
+ | stg RB, 0(TMPR1)
+ | la TMPR1, 8(TMPR1)
+ | brctg RD, <3
+ |4:
+ | ins_next
+ |
+ |5: // Need to resize array part.
+ | lg L:CARG1, SAVE_L
+ | stg BASE, L:CARG1->base
+ | lgr CARG2, TAB:RB
+ | lgfr CARG3, RD
+ | lgr L:RB, L:CARG1
+ | stg PC, SAVE_PC
+ | brasl r14, extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize)
+ | lg BASE, L:RB->base
+ | llgc RA, PC_RA // Restore RA.
+ | llgh RD, PC_RD // Restore RD.
+ | j <1 // Retry.
+ |
+ |7: // Possible table write barrier for any value. Skip valiswhite check.
+ | barrierback TAB:RB, RD
+ | j <2
+ break;
+
+ /* -- Calls and vararg handling ----------------------------------------- */
+
+ case BC_CALL: case BC_CALLM:
+ | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
+ | sllg RA, RA, 3
+ | lgr RD, RC
+ if (op == BC_CALLM) {
+ | agf NARGS:RD, SAVE_MULTRES
+ }
+ | lg LFUNC:RB, 0(RA, BASE)
+ | checkfunc LFUNC:RB, ->vmeta_call_ra
+ | la BASE, 16(RA, BASE)
+ | ins_call
+ break;
+
+ case BC_CALLMT:
+ | ins_AD // RA = base, RD = extra_nargs
+ | a NARGS:RD, SAVE_MULTRES
+ | // Fall through. Assumes BC_CALLT follows and ins_AD is a no-op.
+ break;
+ case BC_CALLT:
+ | ins_AD // RA = base, RD = nargs+1
+ | sllg RA, RA, 3
+ | la RA, 16(RA, BASE)
+ | lgr KBASE, BASE // Use KBASE for move + vmeta_call hint.
+ | lg LFUNC:RB, -16(RA)
+ | checktp_nc LFUNC:RB, LJ_TFUNC, ->vmeta_call
+ |->BC_CALLT_Z:
+ | lg PC, -8(BASE)
+ | tmll PC, FRAME_TYPE
+ | jne >7
+ |1:
+ | stg LFUNC:RB, -16(BASE) // Copy func+tag down, reloaded below.
+ | st NARGS:RD, SAVE_MULTRES
+ | aghi NARGS:RD, -1
+ | je >3
+ |2: // Move args down.
+ | lg RB, 0(RA)
+ | la RA, 8(RA)
+ | stg RB, 0(KBASE)
+ | la KBASE, 8(KBASE)
+ | brctg NARGS:RD, <2
+ |
+ | lg LFUNC:RB, -16(BASE)
+ |3:
+ | cleartp LFUNC:RB
+ | llgf NARGS:RD, SAVE_MULTRES
+ | llgc TMPR1, LFUNC:RB->ffid
+ | cghi TMPR1, 1 // (> FF_C) Calling a fast function?
+ | jh >5
+ |4:
+ | ins_callt
+ |
+ |5: // Tailcall to a fast function.
+ | tmll PC, FRAME_TYPE // Lua frame below?
+ | jne <4
+ | llgc RA, PC_RA
+ | lcgr RA, RA
+ | sllg RA, RA, 3
+ | lg LFUNC:KBASE, -32(RA, BASE) // Need to prepare KBASE.
+ | cleartp LFUNC:KBASE
+ | lg KBASE, LFUNC:KBASE->pc
+ | lg KBASE, (PC2PROTO(k))(KBASE)
+ | j <4
+ |
+ |7: // Tailcall from a vararg function.
+ | aghi PC, -FRAME_VARG
+ | tmll PC, FRAME_TYPEP
+ | jne >8 // Vararg frame below?
+ | sgr BASE, PC // Need to relocate BASE/KBASE down.
+ | lgr KBASE, BASE
+ | lg PC, -8(BASE)
+ | j <1
+ |8:
+ | aghi PC, FRAME_VARG
+ | j <1
+ break;
+
+ case BC_ITERC:
+ | ins_A // RA = base, (RB = nresults+1,) RC = nargs+1 (2+1)
+ | sllg RA, RA, 3
+ | la RA, 16(RA, BASE) // fb = base+2
+ | lg RB, -32(RA) // Copy state. fb[0] = fb[-4].
+ | lg RC, -24(RA) // Copy control var. fb[1] = fb[-3].
+ | stg RB, 0(RA)
+ | stg RC, 8(RA)
+ | lg LFUNC:RB, -40(RA) // Copy callable. fb[-2] = fb[-5]
+ | stg LFUNC:RB, -16(RA)
+ | lghi NARGS:RD, 2+1 // Handle like a regular 2-arg call.
+ | checkfunc LFUNC:RB, ->vmeta_call
+ | lgr BASE, RA
+ | ins_call
+ break;
+
+ case BC_ITERN:
+ | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
+ |.if JIT
+ | // NYI: add hotloop, record BC_ITERN.
+ |.endif
+ | sllg RA, RA, 3
+ | lg TAB:RB, -16(RA, BASE)
+ | cleartp TAB:RB
+ | llgf RC, -4(RA, BASE) // Get index from control var.
+ | llgf TMPR1, TAB:RB->asize
+ | la PC, 4(PC)
+ | lg ITYPE, TAB:RB->array
+ |1: // Traverse array part.
+ | clr RC, TMPR1; jhe >5 // Index points after array part?
+ | sllg RD, RC, 3 // Warning: won't work if RD==RC!
+ | lg TMPR0, 0(RD, ITYPE)
+ | cghi TMPR0, LJ_TNIL; je >4
+ | // Copy array slot to returned value.
+ | lgr RB, TMPR0
+ | stg RB, 8(RA, BASE)
+ | // Return array index as a numeric key.
+ | setint ITYPE, RC
+ | stg ITYPE, 0(RA, BASE)
+ | ahi RC, 1
+ | sty RC, -4(RA, BASE) // Update control var.
+ |2:
+ | llgh RD, PC_RD // Get target from ITERL.
+ | branchPC RD
+ |3:
+ | ins_next
+ |
+ |4: // Skip holes in array part.
+ | ahi RC, 1
+ | j <1
+ |
+ |5: // Traverse hash part.
+ | sr RC, TMPR1
+ |6:
+ | cl RC, TAB:RB->hmask; jh <3 // End of iteration? Branch to ITERL+1.
+ | llgfr ITYPE, RC
+ | mghi ITYPE, #NODE
+ | ag NODE:ITYPE, TAB:RB->node
+ | lghi TMPR0, LJ_TNIL
+ | cg TMPR0, NODE:ITYPE->val; je >7
+ | ar TMPR1, RC
+ | ahi TMPR1, 1
+ | // Copy key and value from hash slot.
+ | lg RB, NODE:ITYPE->key
+ | lg RC, NODE:ITYPE->val
+ | stg RB, 0(RA, BASE)
+ | stg RC, 8(RA, BASE)
+ | sty TMPR1, -4(RA, BASE)
+ | j <2
+ |
+ |7: // Skip holes in hash part.
+ | ahi RC, 1
+ | j <6
+ break;
+
+ case BC_ISNEXT:
+ | ins_AD // RA = base, RD = target (points to ITERN)
+ | sllg RA, RA, 3
+ | lg CFUNC:RB, -24(RA, BASE)
+ | checkfunc CFUNC:RB, >5
+ | lg TMPR1, -16(RA, BASE)
+ | checktptp TMPR1, LJ_TTAB, >5
+ | lghi TMPR0, LJ_TNIL
+ | cg TMPR0, -8(RA, BASE); jne >5
+ | llgc TMPR1, CFUNC:RB->ffid
+ | clfi TMPR1, (uint8_t)FF_next_N; jne >5
+ | branchPC RD
+ | llihl TMPR1, 0x7fff
+ | iihh TMPR1, 0xfffe
+ | stg TMPR1, -8(RA, BASE) // Initialize control var.
+ |1:
+ | ins_next
+ |5: // Despecialize bytecode if any of the checks fail.
+ | lghi TMPR0, BC_JMP
+ | stcy TMPR0, PC_OP
+ | branchPC RD
+ | mvi 3(PC), BC_ITERC
+ | j <1
+ break;
+
+ case BC_VARG:
+ | ins_ABC // RA = base, RB = nresults+1, RC = numparams
+ | sllg RA, RA, 3
+ | sllg RB, RB, 3
+ | sllg RC, RC, 3
+ | la TMPR1, (16+FRAME_VARG)(RC, BASE)
+ | la RA, 0(RA, BASE)
+ | sg TMPR1, -8(BASE)
+ | // Note: TMPR1 may now be even _above_ BASE if nargs was < numparams.
+ | cghi RB, 0
+ | je >5 // Copy all varargs?
+ | lay RB, -8(RA, RB)
+ | clgr TMPR1, BASE // No vararg slots?
+ | lghi TMPR0, LJ_TNIL
+ | jnl >2
+ |1: // Copy vararg slots to destination slots.
+ | lg RC, -16(TMPR1)
+ | la TMPR1, 8(TMPR1)
+ | stg RC, 0(RA)
+ | la RA, 8(RA)
+ | clgr RA, RB // All destination slots filled?
+ | jnl >3
+ | clgr TMPR1, BASE // No more vararg slots?
+ | jl <1
+ |2: // Fill up remainder with nil.
+ | stg TMPR0, 0(RA)
+ | la RA, 8(RA)
+ | clgr RA, RB
+ | jl <2
+ |3:
+ | ins_next
+ |
+ |5: // Copy all varargs.
+ | lghi TMPR0, 1
+ | st TMPR0, SAVE_MULTRES // MULTRES = 0+1
+ | lgr RC, BASE
+ | slgr RC, TMPR1
+ | jno <3 // No vararg slots? (borrow or zero)
+ | llgfr RB, RC
+ | srlg RB, RB, 3
+ | ahi RB, 1
+ | st RB, SAVE_MULTRES // MULTRES = #varargs+1
+ | lg L:RB, SAVE_L
+ | agr RC, RA
+ | clg RC, L:RB->maxstack
+ | jh >7 // Need to grow stack?
+ |6: // Copy all vararg slots.
+ | lg RC, -16(TMPR1)
+ | la TMPR1, 8(TMPR1)
+ | stg RC, 0(RA)
+ | la RA, 8(RA)
+ | clgr TMPR1, BASE // No more vararg slots?
+ | jl <6
+ | j <3
+ |
+ |7: // Grow stack for varargs.
+ | stg BASE, L:RB->base
+ | stg RA, L:RB->top
+ | stg PC, SAVE_PC
+ | sgr TMPR1, BASE // Need delta, because BASE may change.
+ | st TMPR1, SAVE_TMP_HI
+ | llgf CARG2, SAVE_MULTRES
+ | aghi CARG2, -1
+ | lgr CARG1, L:RB
+ | brasl r14, extern lj_state_growstack // (lua_State *L, int n)
+ | lg BASE, L:RB->base
+ | lgf TMPR1, SAVE_TMP_HI
+ | lg RA, L:RB->top
+ | agr TMPR1, BASE
+ | j <6
+ break;
+
+ /* -- Returns ----------------------------------------------------------- */
+
+ case BC_RETM:
+ | ins_AD // RA = results, RD = extra_nresults
+ | agf RD, SAVE_MULTRES // MULTRES >=1, so RD >=1.
+ | // Fall through. Assumes BC_RET follows and ins_AD is a no-op.
+ break;
+
+ case BC_RET: case BC_RET0: case BC_RET1:
+ | ins_AD // RA = results, RD = nresults+1
+ if (op != BC_RET0) {
+ | sllg RA, RA, 3
+ }
+ |1:
+ | lg PC, -8(BASE)
+ | st RD, SAVE_MULTRES // Save nresults+1.
+ | tmll PC, FRAME_TYPE // Check frame type marker.
+ | jne >7 // Not returning to a fixarg Lua func?
+ switch (op) {
+ case BC_RET:
+ |->BC_RET_Z:
+ | lgr KBASE, BASE // Use KBASE for result move.
+ | aghi RD, -1
+ | je >3
+ |2: // Move results down.
+ | lg RB, 0(KBASE, RA)
+ | stg RB, -16(KBASE)
+ | la KBASE, 8(KBASE)
+ | brctg RD, <2
+ |3:
+ | llgf RD, SAVE_MULTRES // Note: MULTRES may be >256.
+ | llgc RB, PC_RB
+ |5:
+ | cgr RB, RD // More results expected?
+ | jh >6
+ break;
+ case BC_RET1:
+ | lg RB, 0(BASE, RA)
+ | stg RB, -16(BASE)
+ /* fallthrough */
+ case BC_RET0:
+ |5:
+ | llgc TMPR1, PC_RB
+ | cgr TMPR1, RD
+ | jh >6
+ default:
+ break;
+ }
+ | llgc RA, PC_RA
+ | lcgr RA, RA
+ | sllg RA, RA, 3
+ | lay BASE, -16(RA, BASE) // base = base - (RA+2)*8
+ | lg LFUNC:KBASE, -16(BASE)
+ | cleartp LFUNC:KBASE
+ | lg KBASE, LFUNC:KBASE->pc
+ | lg KBASE, PC2PROTO(k)(KBASE)
+ | ins_next
+ |
+ |6: // Fill up results with nil.
+ | lghi TMPR1, LJ_TNIL
+ if (op == BC_RET) {
+ | stg TMPR1, -16(KBASE) // Note: relies on shifted base.
+ | la KBASE, 8(KBASE)
+ } else {
+ | sllg RC, RD, 3 // RC used as temp.
+ | stg TMPR1, -24(RC, BASE)
+ }
+ | la RD, 1(RD)
+ | j <5
+ |
+ |7: // Non-standard return case.
+ | lay RB, -FRAME_VARG(PC)
+ | tmll RB, FRAME_TYPEP
+ | jne ->vm_return
+ | // Return from vararg function: relocate BASE down and RA up.
+ | sgr BASE, RB
+ if (op != BC_RET0) {
+ | agr RA, RB
+ }
+ | j <1
+ break;
+
+ /* -- Loops and branches ------------------------------------------------ */
+
+ |.define FOR_IDX, 0(RA)
+ |.define FOR_STOP, 8(RA)
+ |.define FOR_STEP, 16(RA)
+ |.define FOR_EXT, 24(RA)
+
+ case BC_FORL:
+ |.if JIT
+ | hotloop RB
+ |.endif
+ | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op.
+ break;
+
+ case BC_JFORI:
+ case BC_JFORL:
+#if !LJ_HASJIT
+ break;
+#endif
+ case BC_FORI:
+ case BC_IFORL:
+ vk = (op == BC_IFORL || op == BC_JFORL);
+ | ins_AJ // RA = base, RD = target (after end of loop or start of loop)
+ | sllg RA, RA, 3
+ | la RA, 0(RA, BASE)
+ | lg RB, FOR_IDX
+ | checkint RB, >9
+ | lg TMPR1, FOR_STOP
+ if (!vk) {
+ | checkint TMPR1, ->vmeta_for
+ | lg ITYPE, FOR_STEP
+ | chi ITYPE, 0; jl >5
+ | srag ITYPE, ITYPE, 47
+ | cghi ITYPE, LJ_TISNUM; jne ->vmeta_for
+ } else {
+#ifdef LUA_USE_ASSERT
+ | // lg TMPR1, FOR_STOP
+ | checkinttp TMPR1, ->assert_bad_for_arg_type
+ | lg TMPR0, FOR_STEP
+ | checkinttp TMPR0, ->assert_bad_for_arg_type
+#endif
+ | lg ITYPE, FOR_STEP
+ | chi ITYPE, 0; jl >5
+ | ar RB, ITYPE; jo >1
+ | setint RB
+ | stg RB, FOR_IDX
+ }
+ | cr RB, TMPR1
+ | stg RB, FOR_EXT
+ if (op == BC_FORI) {
+ | jle >7
+ |1:
+ |6:
+ | branchPC RD
+ } else if (op == BC_JFORI) {
+ | branchPC RD
+ | llgh RD, PC_RD
+ | jle =>BC_JLOOP
+ |1:
+ |6:
+ } else if (op == BC_IFORL) {
+ | jh >7
+ |6:
+ | branchPC RD
+ |1:
+ } else {
+ | jle =>BC_JLOOP
+ |1:
+ |6:
+ }
+ |7:
+ | ins_next
+ |
+ |5: // Invert check for negative step.
+ if (!vk) {
+ | srag ITYPE, ITYPE, 47
+ | cghi ITYPE, LJ_TISNUM; jne ->vmeta_for
+ } else {
+ | ar RB, ITYPE; jo <1
+ | setint RB
+ | stg RB, FOR_IDX
+ }
+ | cr RB, TMPR1
+ | stg RB, FOR_EXT
+ if (op == BC_FORI) {
+ | jhe <7
+ } else if (op == BC_JFORI) {
+ | branchPC RD
+ | llgh RD, PC_RD
+ | jhe =>BC_JLOOP
+ } else if (op == BC_IFORL) {
+ | jl <7
+ } else {
+ | jhe =>BC_JLOOP
+ }
+ | j <6
+ |9: // Fallback to FP variant.
+ if (!vk) {
+ | jhe ->vmeta_for
+ }
+ if (!vk) {
+ | lg TMPR0, FOR_STOP
+ | checknumtp TMPR0, ->vmeta_for
+ } else {
+#ifdef LUA_USE_ASSERT
+ | lg TMPR0, FOR_STOP
+ | checknumtp TMPR0, ->assert_bad_for_arg_type
+ | lg TMPR0, FOR_STEP
+ | checknumtp TMPR0, ->assert_bad_for_arg_type
+#endif
+ }
+ | lg RB, FOR_STEP
+ if (!vk) {
+ | checknum RB, ->vmeta_for
+ }
+ | ld f0, FOR_IDX
+ | ld f1, FOR_STOP
+ if (vk) {
+ | adb f0, FOR_STEP
+ | std f0, FOR_IDX
+ }
+ | cghi RB, 0; jl >3
+ | cdbr f1, f0
+ |1:
+ | std f0, FOR_EXT
+ if (op == BC_FORI) {
+ | jnl <7
+ } else if (op == BC_JFORI) {
+ | branchPC RD
+ | llgh RD, PC_RD
+ | jnl =>BC_JLOOP
+ } else if (op == BC_IFORL) {
+ | jl <7
+ } else {
+ | jnl =>BC_JLOOP
+ }
+ | j <6
+ |
+ |3: // Invert comparison if step is negative.
+ | cdbr f0, f1
+ | j <1
+ break;
+
+ case BC_ITERL:
+ |.if JIT
+ | hotloop RB
+ |.endif
+ | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op.
+ break;
+
+ case BC_JITERL:
+#if !LJ_HASJIT
+ break;
+#endif
+ case BC_IITERL:
+ | ins_AJ // RA = base, RD = target
+ | sllg RA, RA, 3
+ | la RA, 0(RA, BASE)
+ | lg RB, 0(RA)
+ | cghi RB, LJ_TNIL; je >1 // Stop if iterator returned nil.
+ if (op == BC_JITERL) {
+ | stg RB, -8(RA)
+ | j =>BC_JLOOP
+ } else {
+ | branchPC RD // Otherwise save control var + branch.
+ | stg RB, -8(RA)
+ }
+ |1:
+ | ins_next
+ break;
+
+ case BC_LOOP:
+ | ins_A // RA = base, RD = target (loop extent)
+ | // Note: RA/RD is only used by trace recorder to determine scope/extent
+ | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
+ |.if JIT
+ | hotloop RBd
+ |.endif
+ | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op.
+ break;
+
+ case BC_ILOOP:
+ | ins_A // RA = base, RD = target (loop extent)
+ | ins_next
+ break;
+
+ case BC_JLOOP:
+ | stg r0, 0
+ | stg r0, 0
+ break;
+
+ case BC_JMP:
+ | ins_AJ // RA = unused, RD = target
+ | branchPC RD
+ | ins_next
+ break;
+
+ /* -- Function headers -------------------------------------------------- */
+
+ /*
+ ** Reminder: A function may be called with func/args above L->maxstack,
+ ** i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot,
+ ** too. This means all FUNC* ops (including fast functions) must check
+ ** for stack overflow _before_ adding more slots!
+ */
+
+ case BC_FUNCF:
+ |.if JIT
+ | stg r0, 0
+ |.endif
+ case BC_FUNCV: /* NYI: compiled vararg functions. */
+ | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op.
+ break;
+
+ case BC_JFUNCF:
+#if !LJ_HASJIT
+ break;
+#endif
+ case BC_IFUNCF:
+ | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
+ | lg KBASE, (PC2PROTO(k)-4)(PC)
+ | lg L:RB, SAVE_L
+ | sllg RA, RA, 3
+ | la RA, 0(RA, BASE) // Top of frame.
+ | clg RA, L:RB->maxstack
+ | jh ->vm_growstack_f
+ | llgc RA, (PC2PROTO(numparams)-4)(PC)
+ | clgr NARGS:RD, RA // Check for missing parameters.
+ | jle >3
+ |2:
+ if (op == BC_JFUNCF) {
+ | llgh RD, PC_RD
+ | j =>BC_JLOOP
+ } else {
+ | ins_next
+ }
+ |
+ |3: // Clear missing parameters.
+ | sllg TMPR1, NARGS:RD, 3
+ | lghi TMPR0, LJ_TNIL
+ |4:
+ | stg TMPR0, -8(TMPR1, BASE)
+ | la TMPR1, 8(TMPR1)
+ | la RD, 1(RD)
+ | clgr RD, RA
+ | jle <4
+ | j <2
+ break;
+
+ case BC_JFUNCV:
+#if !LJ_HASJIT
+ break;
+#endif
+ | stg r0, 0 // NYI: compiled vararg functions
+ break; /* NYI: compiled vararg functions. */
+
+ case BC_IFUNCV:
+ | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
+ | sllg TMPR1, NARGS:RD, 3
+ | la RB, (FRAME_VARG+8)(TMPR1)
+ | la RD, 8(TMPR1, BASE)
+ | lg LFUNC:KBASE, -16(BASE)
+ | stg RB, -8(RD) // Store delta + FRAME_VARG.
+ | stg LFUNC:KBASE, -16(RD) // Store copy of LFUNC.
+ | lg L:RB, SAVE_L
+ | sllg RA, RA, 3
+ | la RA, 0(RA, RD)
+ | cg RA, L:RB->maxstack
+ | jh ->vm_growstack_v // Need to grow stack.
+ | lgr RA, BASE
+ | lgr BASE, RD
+ | llgc RB, (PC2PROTO(numparams)-4)(PC)
+ | cghi RB, 0
+ | je >2
+ | aghi RA, 8
+ | lghi TMPR1, LJ_TNIL
+ |1: // Copy fixarg slots up to new frame.
+ | la RA, 8(RA)
+ | cgr RA, BASE
+ | jnl >3 // Less args than parameters?
+ | lg KBASE, -16(RA)
+ | stg KBASE, 0(RD)
+ | la RD, 8(RD)
+ | stg TMPR1, -16(RA) // Clear old fixarg slot (help the GC).
+ | brctg RB, <1
+ |2:
+ if (op == BC_JFUNCV) {
+ | llgh RD, PC_RD
+ | j =>BC_JLOOP
+ } else {
+ | lg KBASE, (PC2PROTO(k)-4)(PC)
+ | ins_next
+ }
+ |
+ |3: // Clear missing parameters.
+ | stg TMPR1, 0(RD) // TMPR1=LJ_TNIL (-1) here.
+ | la RD, 8(RD)
+ | brctg RB, <3
+ | j <2
+ break;
+
+ case BC_FUNCC:
+ case BC_FUNCCW:
+ | ins_AD // BASE = new base, RD = nargs+1
+ | lg CFUNC:RB, -16(BASE)
+ | cleartp CFUNC:RB
+ | lg KBASE, CFUNC:RB->f
+ | lg L:RB, SAVE_L
+ | sllg RD, NARGS:RD, 3
+ | lay RD, -8(RD,BASE)
+ | stg BASE, L:RB->base
+ | la RA, (8*LUA_MINSTACK)(RD)
+ | clg RA, L:RB->maxstack
+ | stg RD, L:RB->top
+ | lgr CARG1, L:RB
+ if (op != BC_FUNCC) {
+ | lgr CARG2, KBASE
+ }
+ | jh ->vm_growstack_c // Need to grow stack.
+ | set_vmstate C
+ if (op == BC_FUNCC) {
+ | basr r14, KBASE // (lua_State *L)
+ } else {
+ | // (lua_State *L, lua_CFunction f)
+ | lg TMPR1, (DISPATCH_GL(wrapf))(DISPATCH)
+ | basr r14, TMPR1
+ }
+ | // nresults returned in r2 (CRET1).
+ | lgr RD, CRET1
+ | lg BASE, L:RB->base
+ | stg L:RB, (DISPATCH_GL(cur_L))(DISPATCH)
+ | set_vmstate INTERP
+ | sllg TMPR1, RD, 3
+ | la RA, 0(TMPR1, BASE)
+ | lcgr RA, RA
+ | ag RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
+ | lg PC, -8(BASE) // Fetch PC of caller.
+ | j ->vm_returnc
+ break;
+
+ /* ---------------------------------------------------------------------- */
+
+ default:
+ fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
+ exit(2);
+ break;
+ }
+}
+
+static int build_backend(BuildCtx *ctx)
+{
+ int op;
+ dasm_growpc(Dst, BC__MAX);
+ build_subroutines(ctx);
+ |.code_op
+ for (op = 0; op < BC__MAX; op++)
+ build_ins(ctx, (BCOp)op, op);
+ return BC__MAX;
+}
+
+/* Emit pseudo frame-info for all assembler functions. */
+static void emit_asm_debug(BuildCtx *ctx)
+{
+ int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
+ switch (ctx->mode) {
+ case BUILD_elfasm:
+ fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n");
+ fprintf(ctx->fp,
+ ".Lframe0:\n"
+ "\t.long .LECIE0-.LSCIE0\n"
+ ".LSCIE0:\n"
+ "\t.long 0xffffffff\n"
+ "\t.byte 0x1\n"
+ "\t.string \"\"\n"
+ "\t.uleb128 1\n"
+ "\t.sleb128 -8\n"
+ "\t.byte 0xe\n"
+ "\t.byte 0xc\n\t.uleb128 0xf\n\t.uleb128 160\n"
+ "\t.align 8\n"
+ ".LECIE0:\n\n");
+ fprintf(ctx->fp,
+ ".LSFDE0:\n"
+ "\t.long .LEFDE0-.LASFDE0\n"
+ ".LASFDE0:\n"
+ "\t.long .Lframe0\n"
+ "\t.quad .Lbegin\n"
+ "\t.quad %d\n"
+ "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
+ "\t.byte 0x86\n\t.uleb128 0xe\n" /* offset r6 */
+ "\t.byte 0x87\n\t.uleb128 0xd\n" /* offset r7 */
+ "\t.byte 0x88\n\t.uleb128 0xc\n" /* offset r8 */
+ "\t.byte 0x89\n\t.uleb128 0xb\n" /* offset r9 */
+ "\t.byte 0x8a\n\t.uleb128 0xa\n" /* offset r10 */
+ "\t.byte 0x8b\n\t.uleb128 0x9\n" /* offset r11 */
+ "\t.byte 0x8c\n\t.uleb128 0x8\n" /* offset r12 */
+ "\t.byte 0x8d\n\t.uleb128 0x7\n" /* offset r13 */
+ "\t.byte 0x8e\n\t.uleb128 0x6\n" /* offset r14 */
+ "\t.byte 0x8f\n\t.uleb128 0x5\n" /* offset r15 */
+ "\t.align 8\n"
+ ".LEFDE0:\n\n", fcofs, CFRAME_SIZE+160);
+#if LJ_HASFFI
+ fprintf(ctx->fp,
+ ".LSFDE1:\n"
+ "\t.long .LEFDE1-.LASFDE1\n"
+ ".LASFDE1:\n"
+ "\t.long .Lframe0\n"
+ "\t.quad lj_vm_ffi_call\n"
+ "\t.quad %d\n"
+ "\t.byte 0xe\n\t.uleb128 160\n" /* def_cfa_offset */
+ "\t.byte 0xd\n\t.uleb128 0xd\n" /* def_cfa_register r13 (FP) */
+ "\t.byte 0x86\n\t.uleb128 0xe\n" /* offset r6 */
+ "\t.byte 0x87\n\t.uleb128 0xd\n" /* offset r7 */
+ "\t.byte 0x88\n\t.uleb128 0xc\n" /* offset r8 */
+ "\t.byte 0x89\n\t.uleb128 0xb\n" /* offset r9 */
+ "\t.byte 0x8a\n\t.uleb128 0xa\n" /* offset r10 */
+ "\t.byte 0x8b\n\t.uleb128 0x9\n" /* offset r11 */
+ "\t.byte 0x8c\n\t.uleb128 0x8\n" /* offset r12 */
+ "\t.byte 0x8d\n\t.uleb128 0x7\n" /* offset r13 */
+ "\t.byte 0x8e\n\t.uleb128 0x6\n" /* offset r14 */
+ "\t.byte 0x8f\n\t.uleb128 0x5\n" /* offset r15 */
+ "\t.align 8\n"
+ ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
+#endif
+#if !LJ_NO_UNWIND
+ fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n");
+ fprintf(ctx->fp,
+ ".Lframe1:\n"
+ "\t.long .LECIE1-.LSCIE1\n"
+ ".LSCIE1:\n"
+ "\t.long 0\n"
+ "\t.byte 0x1\n"
+ "\t.string \"zPR\"\n"
+ "\t.uleb128 0x1\n"
+ "\t.sleb128 -8\n"
+ "\t.byte 0xe\n"
+ "\t.uleb128 6\n" /* augmentation length */
+ "\t.byte 0x1b\n" /* pcrel|sdata4 */
+ "\t.long lj_err_unwind_dwarf-.\n"
+ "\t.byte 0x1b\n" /* pcrel|sdata4 */
+ "\t.byte 0xc\n\t.uleb128 0xf\n\t.uleb128 160\n"
+ "\t.align 8\n"
+ ".LECIE1:\n\n");
+ fprintf(ctx->fp,
+ ".LSFDE2:\n"
+ "\t.long .LEFDE2-.LASFDE2\n"
+ ".LASFDE2:\n"
+ "\t.long .LASFDE2-.Lframe1\n"
+ "\t.long .Lbegin-.\n"
+ "\t.long %d\n"
+ "\t.uleb128 0\n" /* augmentation length */
+ "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
+ "\t.byte 0x86\n\t.uleb128 0xe\n" /* offset r6 */
+ "\t.byte 0x87\n\t.uleb128 0xd\n" /* offset r7 */
+ "\t.byte 0x88\n\t.uleb128 0xc\n" /* offset r8 */
+ "\t.byte 0x89\n\t.uleb128 0xb\n" /* offset r9 */
+ "\t.byte 0x8a\n\t.uleb128 0xa\n" /* offset r10 */
+ "\t.byte 0x8b\n\t.uleb128 0x9\n" /* offset r11 */
+ "\t.byte 0x8c\n\t.uleb128 0x8\n" /* offset r12 */
+ "\t.byte 0x8d\n\t.uleb128 0x7\n" /* offset r13 */
+ "\t.byte 0x8e\n\t.uleb128 0x6\n" /* offset r14 */
+ "\t.byte 0x8f\n\t.uleb128 0x5\n" /* offset r15 */
+ "\t.align 8\n"
+ ".LEFDE2:\n\n", fcofs, CFRAME_SIZE+160);
+#if LJ_HASFFI
+ fprintf(ctx->fp,
+ ".Lframe2:\n"
+ "\t.long .LECIE2-.LSCIE2\n"
+ ".LSCIE2:\n"
+ "\t.long 0\n"
+ "\t.byte 0x1\n"
+ "\t.string \"zR\"\n"
+ "\t.uleb128 0x1\n"
+ "\t.sleb128 -8\n"
+ "\t.byte 0xe\n"
+ "\t.uleb128 1\n" /* augmentation length */
+ "\t.byte 0x1b\n" /* pcrel|sdata4 */
+ "\t.byte 0xc\n\t.uleb128 0xf\n\t.uleb128 160\n"
+ "\t.align 8\n"
+ ".LECIE2:\n\n");
+ fprintf(ctx->fp,
+ ".LSFDE3:\n"
+ "\t.long .LEFDE3-.LASFDE3\n"
+ ".LASFDE3:\n"
+ "\t.long .LASFDE3-.Lframe2\n"
+ "\t.long lj_vm_ffi_call-.\n"
+ "\t.long %d\n"
+ "\t.uleb128 0\n" /* augmentation length */
+ "\t.byte 0xe\n\t.uleb128 160\n" /* def_cfa_offset */
+ "\t.byte 0xd\n\t.uleb128 0xd\n" /* def_cfa_register r13 (FP) */
+ "\t.byte 0x86\n\t.uleb128 0xe\n" /* offset r6 */
+ "\t.byte 0x87\n\t.uleb128 0xd\n" /* offset r7 */
+ "\t.byte 0x88\n\t.uleb128 0xc\n" /* offset r8 */
+ "\t.byte 0x89\n\t.uleb128 0xb\n" /* offset r9 */
+ "\t.byte 0x8a\n\t.uleb128 0xa\n" /* offset r10 */
+ "\t.byte 0x8b\n\t.uleb128 0x9\n" /* offset r11 */
+ "\t.byte 0x8c\n\t.uleb128 0x8\n" /* offset r12 */
+ "\t.byte 0x8d\n\t.uleb128 0x7\n" /* offset r13 */
+ "\t.byte 0x8e\n\t.uleb128 0x6\n" /* offset r14 */
+ "\t.byte 0x8f\n\t.uleb128 0x5\n" /* offset r15 */
+ "\t.align 8\n"
+ ".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
+#endif
+#endif
+ break;
+ default: /* No other modes. */
+ break;
+ }
+}