File luajit-s390x.patch of Package luajit.28820

diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/dynasm/dasm_s390x.h luajit-2.1.0~beta3+git.1624618403.e9577376/dynasm/dasm_s390x.h
--- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/dynasm/dasm_s390x.h	1969-12-31 18:00:00.000000000 -0600
+++ luajit-2.1.0~beta3+git.1624618403.e9577376/dynasm/dasm_s390x.h	2021-10-05 12:36:23.421160526 -0500
@@ -0,0 +1,546 @@
+** DynASM s390x encoding engine.
+** Copyright (C) 2005-2016 Mike Pall. All rights reserved.
+** Released under the MIT license. See dynasm.lua for full copyright notice.
+#include <stddef.h>
+#include <stdarg.h>
+#include <string.h>
+#include <stdlib.h>
+#define DASM_ARCH		"s390x"
+#ifndef DASM_EXTERN
+#define DASM_EXTERN(a,b,c,d)	0
+/* Action definitions. */
+enum {
+  /* The following actions need a buffer position. */
+  /* The following actions also have an argument. */
+/* Maximum number of section buffer positions for a single dasm_put() call. */
+#define DASM_MAXSECPOS		25
+/* DynASM encoder status codes. Action list offset or number are or'ed in. */
+#define DASM_S_OK		0x00000000
+#define DASM_S_NOMEM		0x01000000
+#define DASM_S_PHASE		0x02000000
+#define DASM_S_MATCH_SEC	0x03000000
+#define DASM_S_RANGE_I		0x11000000
+#define DASM_S_RANGE_SEC	0x12000000
+#define DASM_S_RANGE_LG		0x13000000
+#define DASM_S_RANGE_PC		0x14000000
+#define DASM_S_RANGE_REL	0x15000000
+#define DASM_S_UNDEF_LG		0x21000000
+#define DASM_S_UNDEF_PC		0x22000000
+/* Macros to convert positions (8 bit section + 24 bit index). */
+#define DASM_POS2IDX(pos)	((pos)&0x00ffffff)
+#define DASM_POS2BIAS(pos)	((pos)&0xff000000)
+#define DASM_SEC2POS(sec)	((sec)<<24)
+#define DASM_POS2SEC(pos)	((pos)>>24)
+#define DASM_POS2PTR(D, pos)	(D->sections[DASM_POS2SEC(pos)].rbuf + (pos))
+/* Action list type. */
+typedef const unsigned short *dasm_ActList;
+/* Per-section structure. */
+typedef struct dasm_Section {
+  int *rbuf;                    /* Biased buffer pointer (negative section bias). */
+  int *buf;                     /* True buffer pointer. */
+  size_t bsize;                 /* Buffer size in bytes. */
+  int pos;                      /* Biased buffer position. */
+  int epos;                     /* End of biased buffer position - max single put. */
+  int ofs;                      /* Byte offset into section. */
+} dasm_Section;
+/* Core structure holding the DynASM encoding state. */
+struct dasm_State {
+  size_t psize;                 /* Allocated size of this structure. */
+  dasm_ActList actionlist;      /* Current actionlist pointer. */
+  int *lglabels;                /* Local/global chain/pos ptrs. */
+  size_t lgsize;
+  int *pclabels;                /* PC label chains/pos ptrs. */
+  size_t pcsize;
+  void **globals;               /* Array of globals (bias -10). */
+  dasm_Section *section;        /* Pointer to active section. */
+  size_t codesize;              /* Total size of all code sections. */
+  int maxsection;               /* 0 <= sectionidx < maxsection. */
+  int status;                   /* Status code. */
+  dasm_Section sections[1];     /* All sections. Alloc-extended. */
+/* The size of the core structure depends on the max. number of sections. */
+#define DASM_PSZ(ms)	(sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section))
+/* Initialize DynASM state. */
+void dasm_init(Dst_DECL, int maxsection)
+  dasm_State *D;
+  size_t psz = 0;
+  int i;
+  Dst_REF = NULL;
+  DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
+  D = Dst_REF;
+  D->psize = psz;
+  D->lglabels = NULL;
+  D->lgsize = 0;
+  D->pclabels = NULL;
+  D->pcsize = 0;
+  D->globals = NULL;
+  D->maxsection = maxsection;
+  for (i = 0; i < maxsection; i++) {
+    D->sections[i].buf = NULL;  /* Need this for pass3. */
+    D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
+    D->sections[i].bsize = 0;
+    D->sections[i].epos = 0;    /* Wrong, but is recalculated after resize. */
+  }
+/* Free DynASM state. */
+void dasm_free(Dst_DECL)
+  dasm_State *D = Dst_REF;
+  int i;
+  for (i = 0; i < D->maxsection; i++)
+    if (D->sections[i].buf)
+      DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize);
+  if (D->pclabels)
+    DASM_M_FREE(Dst, D->pclabels, D->pcsize);
+  if (D->lglabels)
+    DASM_M_FREE(Dst, D->lglabels, D->lgsize);
+  DASM_M_FREE(Dst, D, D->psize);
+/* Setup global label array. Must be called before dasm_setup(). */
+void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
+  dasm_State *D = Dst_REF;
+  D->globals = gl - 10;         /* Negative bias to compensate for locals. */
+  DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10 + maxgl) * sizeof(int));
+/* Grow PC label array. Can be called after dasm_setup(), too. */
+void dasm_growpc(Dst_DECL, unsigned int maxpc)
+  dasm_State *D = Dst_REF;
+  size_t osz = D->pcsize;
+  DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc * sizeof(int));
+  memset((void *)(((unsigned char *)D->pclabels) + osz), 0, D->pcsize - osz);
+/* Setup encoder. */
+void dasm_setup(Dst_DECL, const void *actionlist)
+  dasm_State *D = Dst_REF;
+  int i;
+  D->actionlist = (dasm_ActList) actionlist;
+  D->status = DASM_S_OK;
+  D->section = &D->sections[0];
+  memset((void *)D->lglabels, 0, D->lgsize);
+  if (D->pclabels)
+    memset((void *)D->pclabels, 0, D->pcsize);
+  for (i = 0; i < D->maxsection; i++) {
+    D->sections[i].pos = DASM_SEC2POS(i);
+    D->sections[i].ofs = 0;
+  }
+#define CK(x, st) \
+  do { if (!(x)) { \
+    D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0)
+#define CKPL(kind, st) \
+  do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \
+    D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0)
+#define CK(x, st)	((void)0)
+#define CKPL(kind, st)	((void)0)
+/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */
+void dasm_put(Dst_DECL, int start, ...)
+  va_list ap;
+  dasm_State *D = Dst_REF;
+  dasm_ActList p = D->actionlist + start;
+  dasm_Section *sec = D->section;
+  int pos = sec->pos, ofs = sec->ofs;
+  int *b;
+  if (pos >= sec->epos) {
+    DASM_M_GROW(Dst, int, sec->buf, sec->bsize,
+                sec->bsize + 2 * DASM_MAXSECPOS * sizeof(int));
+    sec->rbuf = sec->buf - DASM_POS2BIAS(pos);
+    sec->epos =
+      (int)sec->bsize / sizeof(int) - DASM_MAXSECPOS + DASM_POS2BIAS(pos);
+  }
+  b = sec->rbuf;
+  b[pos++] = start;
+  va_start(ap, start);
+  while (1) {
+    unsigned short ins = *p++;
+    unsigned short action = ins;
+    if (action >= DASM__MAX) {
+      ofs += 2;
+      continue;
+    }
+    int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0;
+    switch (action) {
+    case DASM_STOP:
+      goto stop;
+    case DASM_SECTION:
+      n = *p++ & 255;
+      CK(n < D->maxsection, RANGE_SEC);
+      D->section = &D->sections[n];
+      goto stop;
+    case DASM_ESC:
+      p++;
+      ofs += 2;
+      break;
+    case DASM_REL_EXT:
+      p++;
+      ofs += 4;
+      break;
+    case DASM_ALIGN:
+      ofs += *p++;
+      b[pos++] = ofs;
+      break;
+    case DASM_REL_LG:
+      if (p[-2] >> 12 == 0xc) { /* RIL instruction needs 32-bit immediate. */
+        ofs += 2;
+      }
+      n = *p++ - 10;
+      pl = D->lglabels + n;
+      /* Bkwd rel or global. */
+      if (n >= 0) {
+        CK(n >= 10 || *pl < 0, RANGE_LG);
+        CKPL(lg, LG);
+        goto putrel;
+      }
+      pl += 10;
+      n = *pl;
+      if (n < 0)
+        n = 0;                  /* Start new chain for fwd rel if label exists. */
+      goto linkrel;
+    case DASM_REL_PC:
+      if (p[-2] >> 12 == 0xc) { /* RIL instruction needs 32-bit immediate. */
+        ofs += 2;
+      }
+      pl = D->pclabels + n;
+      CKPL(pc, PC);
+    putrel:
+      n = *pl;
+      if (n < 0) {              /* Label exists. Get label pos and store it. */
+        b[pos] = -n;
+      } else {
+      linkrel:
+        b[pos] = n;             /* Else link to rel chain, anchored at label. */
+        *pl = pos;
+      }
+      ofs += 2;
+      pos++;
+      break;
+    case DASM_LABEL_LG:
+      pl = D->lglabels + *p++ - 10;
+      CKPL(lg, LG);
+      goto putlabel;
+    case DASM_LABEL_PC:
+      pl = D->pclabels + n;
+      CKPL(pc, PC);
+    putlabel:
+      n = *pl;                  /* n > 0: Collapse rel chain and replace with label pos. */
+      while (n > 0) {
+        int *pb = DASM_POS2PTR(D, n);
+        n = *pb;
+        *pb = pos;
+      }
+      *pl = -pos;               /* Label exists now. */
+      b[pos++] = ofs;           /* Store pass1 offset estimate. */
+      break;
+    case DASM_IMM8:
+      b[pos++] = n;
+      break;
+    case DASM_IMM16:
+      CK(((short)n) == n || ((unsigned short)n) == n, RANGE_I);     /* TODO: is this the right way to handle unsigned immediates? */
+      ofs += 2;
+      b[pos++] = n;
+      break;
+    case DASM_IMM32:
+      ofs += 4;
+      b[pos++] = n;
+      break;
+    case DASM_DISP20:
+      CK(-(1 << 19) <= n && n < (1 << 19), RANGE_I);
+      b[pos++] = n;
+      break;
+    case DASM_DISP12:
+      CK((n >> 12) == 0, RANGE_I);
+      b[pos++] = n;
+      break;
+    case DASM_LEN8R:
+      CK(n >= 1 && n <= 256, RANGE_I);
+      b[pos++] = n;
+      break;
+    case DASM_LEN4HR:
+    case DASM_LEN4LR:
+      CK(n >= 1 && n <= 128, RANGE_I);
+      b[pos++] = n;
+      break;
+    }
+  }
+  va_end(ap);
+  sec->pos = pos;
+  sec->ofs = ofs;
+#undef CK
+/* Pass 2: Link sections, shrink aligns, fix label offsets. */
+int dasm_link(Dst_DECL, size_t * szp)
+  dasm_State *D = Dst_REF;
+  int secnum;
+  int ofs = 0;
+  *szp = 0;
+  if (D->status != DASM_S_OK)
+    return D->status;
+  {
+    int pc;
+    for (pc = 0; pc * sizeof(int) < D->pcsize; pc++)
+      if (D->pclabels[pc] > 0)
+        return DASM_S_UNDEF_PC | pc;
+  }
+  {                             /* Handle globals not defined in this translation unit. */
+    int idx;
+    for (idx = 20; idx * sizeof(int) < D->lgsize; idx++) {
+      int n = D->lglabels[idx];
+      /* Undefined label: Collapse rel chain and replace with marker (< 0). */
+      while (n > 0) {
+        int *pb = DASM_POS2PTR(D, n);
+        n = *pb;
+        *pb = -idx;
+      }
+    }
+  }
+  /* Combine all code sections. No support for data sections (yet). */
+  for (secnum = 0; secnum < D->maxsection; secnum++) {
+    dasm_Section *sec = D->sections + secnum;
+    int *b = sec->rbuf;
+    int pos = DASM_SEC2POS(secnum);
+    int lastpos = sec->pos;
+    while (pos != lastpos) {
+      dasm_ActList p = D->actionlist + b[pos++];
+      while (1) {
+        unsigned short ins = *p++;
+        unsigned short action = ins;
+        switch (action) {
+        case DASM_STOP:
+        case DASM_SECTION:
+          goto stop;
+        case DASM_ESC:
+          p++;
+          break;
+        case DASM_REL_EXT:
+          p++;
+          break;
+        case DASM_ALIGN:
+          ofs -= (b[pos++] + ofs) & *p++;
+          break;
+        case DASM_REL_LG:
+        case DASM_REL_PC:
+          p++;
+          pos++;
+          break;
+        case DASM_LABEL_LG:
+        case DASM_LABEL_PC:
+          p++;
+          b[pos++] += ofs;
+          break;
+        case DASM_IMM8:
+        case DASM_IMM16:
+        case DASM_IMM32:
+        case DASM_DISP20:
+        case DASM_DISP12:
+        case DASM_LEN8R:
+        case DASM_LEN4HR:
+        case DASM_LEN4LR:
+          pos++;
+          break;
+        }
+      }
+    stop:(void)0;
+    }
+    ofs += sec->ofs;            /* Next section starts right after current section. */
+  }
+  D->codesize = ofs;            /* Total size of all code sections */
+  *szp = ofs;
+  return DASM_S_OK;
+#define CK(x, st) \
+  do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0)
+#define CK(x, st)	((void)0)
+/* Pass 3: Encode sections. */
+int dasm_encode(Dst_DECL, void *buffer)
+  dasm_State *D = Dst_REF;
+  char *base = (char *)buffer;
+  unsigned short *cp = (unsigned short *)buffer;
+  int secnum;
+  /* Encode all code sections. No support for data sections (yet). */
+  for (secnum = 0; secnum < D->maxsection; secnum++) {
+    dasm_Section *sec = D->sections + secnum;
+    int *b = sec->buf;
+    int *endb = sec->rbuf + sec->pos;
+    while (b != endb) {
+      dasm_ActList p = D->actionlist + *b++;
+      while (1) {
+        unsigned short ins = *p++;
+        unsigned short action = ins;
+        int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0;
+        switch (action) {
+        case DASM_STOP:
+        case DASM_SECTION:
+          goto stop;
+        case DASM_ESC:
+          *cp++ = *p++;
+          break;
+        case DASM_REL_EXT:
+          n = DASM_EXTERN(Dst, (unsigned char *)cp, *p++, 1) - 4;
+          goto patchrel;
+        case DASM_ALIGN:
+          ins = *p++;
+          /* TODO: emit 4-byte noprs instead of 2-byte nops where possible. */
+          while ((((char *)cp - base) & ins))
+            *cp++ = 0x0700;     /* nop */
+          break;
+        case DASM_REL_LG:
+          CK(n >= 0, UNDEF_LG);
+        case DASM_REL_PC:
+          CK(n >= 0, UNDEF_PC);
+          n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base);
+          p++;                  /* skip argument */
+        patchrel:
+          /* Offsets are halfword aligned (so need to be halved). */
+          n += 2;               /* Offset is relative to start of instruction. */
+          if (cp[-1] >> 12 == 0xc) {
+            *cp++ = n >> 17;
+          } else {
+            CK(-(1 << 16) <= n && n < (1 << 16) && (n & 1) == 0, RANGE_LG);
+          }
+          *cp++ = n >> 1;
+          break;
+        case DASM_LABEL_LG:
+          ins = *p++;
+          if (ins >= 20)
+            D->globals[ins - 10] = (void *)(base + n);
+          break;
+        case DASM_LABEL_PC:
+          break;
+        case DASM_IMM8:
+          cp[-1] |= n & 0xff;  
+          break;
+        case DASM_IMM16:
+          *cp++ = n;
+          break;
+        case DASM_IMM32:
+          *cp++ = n >> 16;
+          *cp++ = n;
+          break;
+        case DASM_DISP20:
+          cp[-2] |= n & 0xfff;
+          cp[-1] |= (n >> 4) & 0xff00;
+          break;
+        case DASM_DISP12:
+          cp[-1] |= n & 0xfff;
+          break;
+        case DASM_LEN8R:
+          cp[-1] |= (n - 1) & 0xff;
+          break;
+        case DASM_LEN4HR:
+          cp[-1] |= ((n - 1) << 4) & 0xf0;
+          break;
+        case DASM_LEN4LR:
+          cp[-1] |= (n - 1) & 0x0f;
+          break;
+        default:
+          *cp++ = ins;
+          break;
+        }
+      }
+    stop:(void)0;
+    }
+  }
+  if (base + D->codesize != (char *)cp) /* Check for phase errors. */
+    return DASM_S_PHASE;
+  return DASM_S_OK;
+#undef CK
+/* Get PC label offset. */
+int dasm_getpclabel(Dst_DECL, unsigned int pc)
+  dasm_State *D = Dst_REF;
+  if (pc * sizeof(int) < D->pcsize) {
+    int pos = D->pclabels[pc];
+    if (pos < 0)
+      return *DASM_POS2PTR(D, -pos);
+    if (pos > 0)
+      return -1;                /* Undefined. */
+  }
+  return -2;                    /* Unused or out of range. */
+/* Optional sanity checker to call between isolated encoding steps. */
+int dasm_checkstep(Dst_DECL, int secmatch)
+  dasm_State *D = Dst_REF;
+  if (D->status == DASM_S_OK) {
+    int i;
+    for (i = 1; i <= 9; i++) {
+      if (D->lglabels[i] > 0) {
+        D->status = DASM_S_UNDEF_LG | i;
+        break;
+      }
+      D->lglabels[i] = 0;
+    }
+  }
+  if (D->status == DASM_S_OK && secmatch >= 0 &&
+      D->section != &D->sections[secmatch])
+    D->status = DASM_S_MATCH_SEC | (D->section - D->sections);
+  return D->status;
diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/dynasm/dasm_s390x.lua luajit-2.1.0~beta3+git.1624618403.e9577376/dynasm/dasm_s390x.lua
--- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/dynasm/dasm_s390x.lua	1969-12-31 18:00:00.000000000 -0600
+++ luajit-2.1.0~beta3+git.1624618403.e9577376/dynasm/dasm_s390x.lua	2021-10-05 12:36:23.461160737 -0500
@@ -0,0 +1,1633 @@
+-- DynASM s390x module.
+-- Copyright (C) 2005-2016 Mike Pall. All rights reserved.
+-- See dynasm.lua for full copyright notice.
+-- Module information:
+local _info = {
+  arch =	"s390x",
+  description =	"DynASM s390x module",
+  version =	"1.4.0",
+  vernum =	 10400,
+  release =	"2015-10-18",
+  author =	"Mike Pall",
+  license =	"MIT",
+-- Exported glue functions for the arch-specific module.
+local _M = { _info = _info }
+-- Cache library functions.
+local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs
+local assert, setmetatable, rawget = assert, setmetatable, rawget
+local _s = string
+local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
+local match, gmatch, gsub = _s.match, _s.gmatch, _s.gsub
+local concat, sort, insert = table.concat, table.sort, table.insert
+local bit = bit or require("bit")
+local band, shl, shr, sar =, bit.lshift, bit.rshift, bit.arshift
+local ror, tohex = bit.ror, bit.tohex
+-- Inherited tables and callbacks.
+local g_opt, g_arch
+local wline, werror, wfatal, wwarn
+-- Action name list.
+-- CHECK: Keep this in sync with the C code!
+local action_names = {
+  "REL_PC", "LABEL_PC", "DISP12", "DISP20", "IMM8", "IMM16", "IMM32", "LEN8R","LEN4HR","LEN4LR",
+-- Maximum number of section buffer positions for dasm_put().
+-- CHECK: Keep this in sync with the C code!
+local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines.
+-- Action name -> action number.
+local map_action = {}
+local max_action = 0
+for n, name in ipairs(action_names) do
+  map_action[name] = n-1
+  max_action = n
+-- Action list buffer.
+local actlist = {}
+-- Argument list for next dasm_put(). Start with offset 0 into action list.
+local actargs = { 0 }
+-- Current number of section buffer positions for dasm_put().
+local secpos = 1
+-- Dump action names and numbers.
+local function dumpactions(out)
+  out:write("DynASM encoding engine action codes:\n")
+  for n, name in ipairs(action_names) do
+    local num = map_action[name]
+    out:write(format("  %-10s %02X  %d\n", name, num, num))
+  end
+  out:write("\n")
+local function havearg(a)
+  return a == "ESC" or
+         a == "SECTION" or
+         a == "REL_LG" or
+         a == "LABEL_LG" or
+         a == "REL_EXT"
+-- Write action list buffer as a huge static C array.
+local function writeactions(out, name)
+  local nn = #actlist
+  if nn == 0 then nn = 1; actlist[0] = map_action.STOP end
+  out:write("static const unsigned short ", name, "[", nn, "] = {")
+  local esc = false -- also need to escape for action arguments
+  for i = 1, nn do
+    assert(out:write("\n  0x", sub(tohex(actlist[i]), 5, 8)))
+    if i ~= nn then assert(out:write(",")) end
+    local name = action_names[actlist[i]+1]
+    if not esc and name then
+      assert(out:write(" /* ", name, " */"))
+      esc = havearg(name)
+    else
+      esc = false
+    end
+  end
+  assert(out:write("\n};\n\n"))
+-- Add halfword to action list.
+local function wputxhw(n)
+  assert(n >= 0 and n <= 0xffff, "halfword out of range")
+  actlist[#actlist+1] = n
+-- Add action to list with optional arg. Advance buffer pos, too.
+local function waction(action, val, a, num)
+  local w = assert(map_action[action], "bad action name `"..action.."'")
+  wputxhw(w)
+  if val then wputxhw(val) end -- Not sure about this, do we always have one arg?
+  if a then actargs[#actargs+1] = a end
+  if val or a or num then secpos = secpos + (num or 1) end
+-- Flush action list (intervening C code or buffer pos overflow).
+local function wflush(term)
+  if #actlist == actargs[1] then return end -- Nothing to flush.
+  if not term then waction("STOP") end -- Terminate action list.
+  wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true)
+  actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put().
+  secpos = 1 -- The actionlist offset occupies a buffer position, too.
+-- Put escaped halfword.
+local function wputhw(n)
+  if n <= max_action then waction("ESC") end
+  wputxhw(n)
+-- Reserve position for halfword.
+local function wpos()
+  local pos = #actlist+1
+  actlist[pos] = ""
+  return pos
+-- Global label name -> global label number. With auto assignment on 1st use.
+local next_global = 20
+local map_global = setmetatable({}, { __index = function(t, name)
+  if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end
+  local n = next_global
+  if n > 2047 then werror("too many global labels") end
+  next_global = n + 1
+  t[name] = n
+  return n
+-- Dump global labels.
+local function dumpglobals(out, lvl)
+  local t = {}
+  for name, n in pairs(map_global) do t[n] = name end
+  out:write("Global labels:\n")
+  for i=20, next_global-1 do
+    out:write(format("  %s\n", t[i]))
+  end
+  out:write("\n")
+-- Write global label enum.
+local function writeglobals(out, prefix)
+  local t = {}
+  for name, n in pairs(map_global) do t[n] = name end
+  out:write("enum {\n")
+  for i=20, next_global-1 do
+    out:write("  ", prefix, t[i], ",\n")
+  end
+  out:write("  ", prefix, "_MAX\n};\n")
+-- Write global label names.
+local function writeglobalnames(out, name)
+  local t = {}
+  for name, n in pairs(map_global) do t[n] = name end
+  out:write("static const char *const ", name, "[] = {\n")
+  for i=20, next_global-1 do
+    out:write("  \"", t[i], "\",\n")
+  end
+  out:write("  (const char *)0\n};\n")
+-- Extern label name -> extern label number. With auto assignment on 1st use.
+local next_extern = 0
+local map_extern_ = {}
+local map_extern = setmetatable({}, { __index = function(t, name)
+  -- No restrictions on the name for now.
+  local n = next_extern
+  if n > 2047 then werror("too many extern labels") end
+  next_extern = n + 1
+  t[name] = n
+  map_extern_[n] = name
+  return n
+-- Dump extern labels.
+local function dumpexterns(out, lvl)
+  out:write("Extern labels:\n")
+  for i=0, next_extern-1 do
+    out:write(format("  %s\n", map_extern_[i]))
+  end
+  out:write("\n")
+-- Write extern label names.
+local function writeexternnames(out, name)
+  out:write("static const char *const ", name, "[] = {\n")
+  for i=0, next_extern-1 do
+    out:write("  \"", map_extern_[i], "\",\n")
+  end
+  out:write("  (const char *)0\n};\n")
+-- Arch-specific maps.
+-- Ext. register name -> int. name.
+local map_archdef = { sp = "r15" }
+-- Int. register name -> ext. name.
+local map_reg_rev = { r15 = "sp" }
+local map_type = {}		-- Type name -> { ctype, reg }
+local ctypenum = 0		-- Type number (for Dt... macros).
+-- Reverse defines for registers.
+function _M.revdef(s)
+  return map_reg_rev[s] or s
+local map_cond = {
+  o = 1, h = 2, nle = 3, l = 4,
+  nhe = 5, lh = 6, ne = 7, e = 8,
+  nlh = 9, he = 10, nl = 11, le = 12,
+  nh = 13, no = 14, [""] = 15,
+local function parse_reg(expr)
+  if not expr then werror("expected register name") end
+  local tname, ovreg = match(expr, "^([%w_]+):(r1?%d)$")
+  local tp = map_type[tname or expr]
+  if tp then
+    local reg = ovreg or tp.reg
+    if not reg then
+      werror("type `"..(tname or expr).."' needs a register override")
+    end
+    expr = reg
+  end
+  local r = match(expr, "^[rf](1?%d)$")
+  if r then
+    r = tonumber(r)
+    if r <= 15 then return r, tp end
+  end
+  werror("bad register name `"..expr.."'")
+local parse_ctx = {}
+local loadenv = setfenv and function(s)
+  local code = loadstring(s, "")
+  if code then setfenv(code, parse_ctx) end
+  return code
+end or function(s)
+  return load(s, "", nil, parse_ctx)
+-- Try to parse simple arithmetic, too, since some basic ops are aliases.
+local function parse_number(n)
+  local x = tonumber(n)
+  if x then return x end
+  local code = loadenv("return "..n)
+  if code then
+    local ok, y = pcall(code)
+    if ok then return y end
+  end
+  return nil
+local function is_uint12(num)
+  return 0 <= num and num < 4096
+local function is_int20(num)
+  return -shl(1, 19) <= num and num < shl(1, 19)
+local function is_int32(num)
+  return -2147483648 <= num and num < 2147483648
+local function is_uint16(num)
+  return 0 <= num and num < 0xffff
+local function is_int16(num)
+  return -32768 <= num and num < 32768
+local function is_int8(num)
+  return -128 <= num and num < 128
+local function is_uint8(num)
+  return 0 <= num and num < 256
+-- Split a memory operand of the form d(b) or d(x,b) into d, x and b.
+-- If x is not specified then it is 0.
+local function split_memop(arg)
+  local reg = "[%w_:]+"
+  local d, x, b = match(arg, "^(.*)%(%s*("..reg..")%s*,%s*("..reg..")%s*%)$")
+  if d then
+    return d, parse_reg(x), parse_reg(b)
+  end
+  local d, b = match(arg, "^(.*)%(%s*("..reg..")%s*%)$")
+  if d then
+    return d, 0, parse_reg(b)
+  end
+  -- Assume the two registers are passed as "(r1,r2)", and displacement(d) is not specified. TODO: not sure if we want to do this, GAS doesn't.
+  local x, b = match(arg,"%(%s*("..reg..")%s*,%s*("..reg..")%s*%)$")
+  if b then
+    return 0, parse_reg(x), parse_reg(b)
+  end
+  -- Accept a lone integer as a displacement. TODO: allow expressions/variables here? Interacts badly with the other rules currently.
+  local d = match(arg,"^(-?[%d]+)$")
+  if d then
+    return d, 0, 0
+  end
+  local reg, tailr = match(arg, "^([%w_:]+)%s*(.*)$")
+  if reg then
+    local r, tp = parse_reg(reg)
+    if tp then
+      return format(tp.ctypefmt, tailr), 0, r
+    end
+  end
+  werror("bad memory operand: "..arg)
+  return nil
+-- Parse memory operand of the form d(x, b) where 0 <= d < 4096 and b and x
+-- are GPRs.
+-- If the fourth return value is not-nil then it needs to be called to
+-- insert an action.
+-- Encoded as: xbddd
+local function parse_mem_bx(arg)
+  local d, x, b = split_memop(arg)
+  local dval = tonumber(d)
+  if dval then
+    if not is_uint12(dval) then
+      werror("displacement out of range: ", dval)
+    end
+    return dval, x, b, nil
+  end
+  if match(d, "^[rf]1?[0-9]?") then
+    werror("expected immediate operand, got register")
+  end
+  return 0, x, b, function() waction("DISP12", nil, d) end
+-- Parse memory operand of the form d(b) where 0 <= d < 4096 and b is a GPR.
+-- Encoded as: bddd
+local function parse_mem_b(arg)
+  local d, x, b, a = parse_mem_bx(arg)
+  if x ~= 0 then
+    werror("unexpected index register")
+  end
+  return d, b, a
+-- Parse memory operand of the form d(x, b) where -(2^20)/2 <= d < (2^20)/2
+-- and b and x are GPRs.
+-- Encoded as: xblllhh (ls are the low-bits of d, and hs are the high bits).
+local function parse_mem_bxy(arg)
+  local d, x, b = split_memop(arg)
+  local dval = tonumber(d)
+  if dval then
+    if not is_int20(dval) then
+      werror("displacement out of range: ", dval)
+    end
+    return dval, x, b, nil
+  end
+  if match(d, "^[rf]1?[0-9]?") then
+    werror("expected immediate operand, got register")
+  end
+  return 0, x, b, function() waction("DISP20", nil, d) end
+-- Parse memory operand of the form d(b) where -(2^20)/2 <= d < (2^20)/2 and
+-- b is a GPR.
+-- Encoded as: blllhh (ls are the low-bits of d, and hs are the high bits).
+local function parse_mem_by(arg)
+  local d, x, b, a = parse_mem_bxy(arg)
+  if x ~= 0 then
+    werror("unexpected index register")
+  end
+  return d, b, a
+-- Parse memory operand of the form d(l, b) where 0 <= d < 4096, 1 <= l <= 256,
+-- and b is a GPR.
+local function parse_mem_lb(arg)
+  local reg = "r1?[0-9]"
+  local d, l, b = match(arg, "^(.*)%s*%(%s*(.*)%s*,%s*("..reg..")%s*%)$")
+  if not d then
+    -- TODO: handle values without registers?
+    -- TODO: handle registers without a displacement?
+    werror("bad memory operand: "..arg)
+    return nil
+  end
+  local dval = tonumber(d)
+  local dact = nil
+  if dval then
+    if not is_uint12(dval) then
+      werror("displacement out of range: ", dval)
+    end
+  else
+    dval = 0
+    dact = function() waction("DISP12", nil, d) end
+  end
+  local lval = tonumber(l)
+  local lact = nil
+  if lval then
+    if lval < 1 or lval > 256 then
+      werror("length out of range: ", dval)
+    end
+    lval = lval - 1
+  else
+    lval = 0
+    lact = function() waction("LEN8R", nil, l) end
+  end
+  return dval, lval, parse_reg(b), dact, lact
+local function parse_mem_l2b(arg, high_l)
+  local reg = "r1?[0-9]"
+  local d, l, b = match(arg, "^(.*)%s*%(%s*(.*)%s*,%s*("..reg..")%s*%)$")
+  if not d then
+    -- TODO: handle values without registers?
+    -- TODO: handle registers without a displacement?
+    werror("bad memory operand: "..arg)
+    return nil
+  end
+  local dval = tonumber(d)
+  local dact = nil
+  if dval then
+    if not is_uint12(dval) then
+      werror("displacement out of range: ", dval)
+    end
+  else
+    dval = 0
+    dact = function() waction("DISP12", nil, d) end
+  end
+  local lval = tonumber(l)
+  local lact = nil
+  if lval then
+    if lval < 1 or lval > 128 then
+      werror("length out of range: ", dval)
+    end
+    lval = lval - 1
+  else
+    lval = 0
+    if high_l then
+    lact = function() waction("LEN4HR", nil, l) end
+    else
+    lact = function() waction("LEN4LR", nil, l) end
+    end
+  end
+  return dval, lval, parse_reg(b), dact, lact
+local function parse_imm32(imm)
+  local imm_val = tonumber(imm)
+  if imm_val then
+    if not is_int32(imm_val) then
+      werror("immediate value out of range: ", imm_val)
+    end
+    wputhw(band(shr(imm_val, 16), 0xffff))
+    wputhw(band(imm_val, 0xffff))
+  elseif match(imm, "^[rfv]([1-3]?[0-9])$") or
+	 match(imm, "^([%w_]+):(r1?[0-9])$") then
+    werror("expected immediate operand, got register")
+  else
+    waction("IMM32", nil, imm) -- if we get label
+  end
+local function parse_imm16(imm)
+  local imm_val = tonumber(imm)
+  if imm_val then
+    if not is_int16(imm_val) and not is_uint16(imm_val) then
+      werror("immediate value out of range: ", imm_val)
+    end
+    wputhw(band(imm_val, 0xffff))
+  elseif match(imm, "^[rfv]([1-3]?[0-9])$") or
+	 match(imm, "^([%w_]+):(r1?[0-9])$") then
+    werror("expected immediate operand, got register")
+  else
+    waction("IMM16", nil, imm)
+  end
+local function parse_imm8(imm)
+  local imm_val = tonumber(imm)
+  if imm_val then
+    if not is_int8(imm_val) and not is_uint8(imm_val) then
+      werror("Immediate value out of range: ", imm_val)
+    end
+    return imm_val, nil
+  end
+  return 0, function() waction("IMM8", nil, imm) end
+local function parse_mask(mask)
+  local m3 = parse_number(mask)
+  if m3 then
+    if ((m3 == 1) or (m3 == 0) or ( m3 >=3 and m3 <=7)) then
+      return m3
+    else
+      werror("Mask value should be 0,1 or 3-7: ", m3)
+    end
+  end
+local function parse_mask2(mask)
+  local m4 = parse_number(mask)
+  if ( m4 >=0 and m4 <=1) then
+    return m4
+  else
+    werror("Mask value should be 0 or 1: ", m4)
+  end
+local function parse_label(label, def)
+  local prefix = sub(label, 1, 2)
+  -- =>label (pc label reference)
+  if prefix == "=>" then
+    return "PC", 0, sub(label, 3)
+  end
+  -- ->name (global label reference)
+  if prefix == "->" then
+    return "LG", map_global[sub(label, 3)]
+  end
+  if def then
+    -- [1-9] (local label definition)
+    if match(label, "^[1-9]$") then
+      return "LG", 10+tonumber(label)
+    end
+  else
+    -- [<>][1-9] (local label reference)
+    local dir, lnum = match(label, "^([<>])([1-9])$")
+    if dir then -- Fwd: 1-9, Bkwd: 11-19.
+      return "LG", lnum + (dir == ">" and 0 or 10)
+    end
+    -- extern label (extern label reference)
+    local extname = match(label, "^extern%s+(%S+)$")
+    if extname then
+      return "EXT", map_extern[extname]
+    end
+  end
+  werror("bad label `"..label.."'")
+local map_op, op_template
+local function op_alias(opname, f)
+  return function(params, nparams)
+    if not params then return "-> "..opname:sub(1, -3) end
+    f(params, nparams)
+    op_template(params, map_op[opname], nparams)
+  end
+-- Template strings for s390x instructions.
+map_op = {
+  a_2 =		"00005a000000RX-a",
+  ad_2 =	"00006a000000RX-a",
+  adb_2 =	"ed000000001aRXE",
+  adbr_2 =	"0000b31a0000RRE",
+  adr_2 =	"000000002a00RR",
+  ae_2 =	"00007a000000RX-a",
+  aeb_2 =	"ed000000000aRXE",
+  aebr_2 =	"0000b30a0000RRE",
+  aer_2 =	"000000003a00RR",
+  afi_2 =	"c20900000000RIL-a",
+  ag_2 =	"e30000000008RXY-a",
+  agf_2 =	"e30000000018RXY-a",
+  agfi_2 =	"c20800000000RIL-a",
+  agfr_2 =	"0000b9180000RRE",
+  aghi_2 =	"0000a70b0000RI-a",
+  agr_2 =	"0000b9080000RRE",
+  ah_2 =	"00004a000000RX-a",
+  ahi_2 =	"0000a70a0000RI-a",
+  ahy_2 =	"e3000000007aRXY-a",
+  aih_2 =	"cc0800000000RIL-a",
+  al_2 =	"00005e000000RX-a",
+  alc_2 =	"e30000000098RXY-a",
+  alcg_2 =	"e30000000088RXY-a",
+  alcgr_2 =	"0000b9880000RRE",
+  alcr_2 =	"0000b9980000RRE",
+  alfi_2 =	"c20b00000000RIL-a",
+  alg_2 =	"e3000000000aRXY-a",
+  algf_2 =	"e3000000001aRXY-a",
+  algfi_2 =	"c20a00000000RIL-a",
+  algfr_2 =	"0000b91a0000RRE",
+  algr_2 =	"0000b90a0000RRE",
+  alr_2 =	"000000001e00RR",
+  alsih_2 =	"cc0a00000000RIL-a",
+  alsihn_2 =	"cc0b00000000RIL-a",
+  aly_2 =	"e3000000005eRXY-a",
+  ap_2 =	"fa0000000000SS-b",
+  ar_2 =	"000000001a00RR",
+  au_2 =	"00007e000000RX-a",
+  aur_2 =	"000000003e00RR",
+  aw_2 =	"00006e000000RX-a",
+  awr_2 =	"000000002e00RR",
+  axbr_2 =	"0000b34a0000RRE",
+  axr_2 =	"000000003600RR",
+  ay_2 =	"e3000000005aRXY-a",
+  bakr_2 =	"0000b2400000RRE",
+  bal_2 =	"000045000000RX-a",
+  balr_2 =	"000000000500RR",
+  bas_2 =	"00004d000000RX-a",
+  basr_2 =	"000000000d00RR",
+  bassm_2 =	"000000000c00RR",
+  bc_2 =	"000047000000RX-b",
+  bc_2 =	"000047000000RX-b",
+  bcr_2 =	"000000000700RR",
+  bct_2 =	"000046000000RX-a",
+  bctg_2 =	"e30000000046RXY-a",
+  bctgr_2 =	"0000b9460000RRE",
+  bctr_2 =	"000000000600RR",
+  bras_2 =	"0000a7050000RI-b",
+  brasl_2 =	"c00500000000RIL-b",
+  brc_2 =	"0000a7040000RI-c",
+  brcl_2 =	"c00400000000RIL-c",
+  brcl_2 =	"c00400000000RIL-c",
+  brct_2 =	"0000a7060000RI-b",
+  brctg_2 =	"0000a7070000RI-b",
+  brcth_2 =	"cc0600000000RIL-b",
+  brxh_3 =	"000084000000RSI",
+  brxhg_3 =	"ec0000000044RIE-e",
+  bsa_2 =	"0000b25a0000RRE",
+  bsg_2 =	"0000b2580000RRE",
+  bsm_2 =	"000000000b00RR",
+  bxh_3 =	"000086000000RS-a",
+  bxhg_3 =	"eb0000000044RSY-a",
+  bxle_3 =	"000087000000RS-a",
+  bxleg_3 =	"eb0000000045RSY-a",
+  c_2 =		"000059000000RX-a",
+  cd_2 =	"000069000000RX-a",
+  cdb_2 =	"ed0000000019RXE",
+  cdbr_2 =	"0000b3190000RRE",
+  cdfbr_2 =	"0000b3950000RRE",
+  cdfbra_4 =	"0000b3950000RRF-e",
+  cdfr_2 =	"0000b3b50000RRE",
+  cdftr_2 =	"0000b9510000RRE",
+  cdgbr_2 =	"0000b3a50000RRE",
+  cdgbra_4 =	"0000b3a50000RRF-e",
+  cdgr_2 =	"0000b3c50000RRE",
+  cdgtr_2 =	"0000b3f10000RRE",
+  cdr_2 =	"000000002900RR",
+  cds_3 =	"0000bb000000RS-a",
+  cdsg_3 =	"eb000000003eRSY-a",
+  cdstr_2 =	"0000b3f30000RRE",
+  cdsy_3 =	"eb0000000031RSY-a",
+  cdtr_2 =	"0000b3e40000RRE",
+  cdutr_2 =	"0000b3f20000RRE",
+  ce_2 =	"000079000000RX-a",
+  ceb_2 =	"ed0000000009RXE",
+  cebr_2 =	"0000b3090000RRE",
+  cedtr_2 =	"0000b3f40000RRE",
+  cefbr_2 =	"0000b3940000RRE",
+  cefbra_4 =	"0000b3940000RRF-e",
+  cefr_2 =	"0000b3b40000RRE",
+  cegbr_2 =	"0000b3a40000RRE",
+  cegbra_4 =	"0000b3a40000RRF-e",
+  cegr_2 =	"0000b3c40000RRE",
+  cer_2 =	"000000003900RR",
+  cextr_2 =	"0000b3fc0000RRE",
+  cfdbr_3 =	"0000b3990000RRF-e",
+  cfdbra_4 =	"0000b3990000RRF-e",
+  cfebr_3 =	"0000b3980000RRF-e",
+  cfebra_4 =	"0000b3980000RRF-e",
+  cfi_2 =	"c20d00000000RIL-a",
+  cfxbr_3 =	"0000b39a0000RRF-e",
+  cfxbra_4 =	"0000b39a0000RRF-e",
+  cg_2 =	"e30000000020RXY-a",
+  cgdbr_3 =	"0000b3a90000RRF-e",
+  cgdbra_4 =	"0000b3a90000RRF-e",
+  cgebr_3 =	"0000b3a80000RRF-e",
+  cgebra_4 =	"0000b3a80000RRF-e",
+  cgf_2 =	"e30000000030RXY-a",
+  cgfi_2 =	"c20c00000000RIL-a",
+  cgfr_2 =	"0000b9300000RRE",
+  cgfrl_2 =	"c60c00000000RIL-b",
+  cgh_2 =	"e30000000034RXY-a",
+  cghi_2 =	"0000a70f0000RI-a",
+  cghrl_2 =	"c60400000000RIL-b",
+  cgr_2 =	"0000b9200000RRE",
+  cgrl_2 =	"c60800000000RIL-b",
+  cgxbr_3 =	"0000b3aa0000RRF-e",
+  cgxbra_4 =	"0000b3aa0000RRF-e",
+  ch_2 =	"000049000000RX-a",
+  chf_2 =	"e300000000cdRXY-a",
+  chhr_2 =	"0000b9cd0000RRE",
+  chi_2 =	"0000a70e0000RI-a",
+  chlr_2 =	"0000b9dd0000RRE",
+  chrl_2 =	"c60500000000RIL-b",
+  chy_2 =	"e30000000079RXY-a",
+  cih_2 =	"cc0d00000000RIL-a",
+  cksm_2 =	"0000b2410000RRE",
+  cl_2 =	"000055000000RX-a",
+  clc_2 =	"d50000000000SS-a",
+  clcl_2 =	"000000000f00RR",
+  clcle_3 =	"0000a9000000RS-a",
+  clclu_3 =	"eb000000008fRSY-a",
+  clfi_2 =	"c20f00000000RIL-a",
+  clg_2 =	"e30000000021RXY-a",
+  clgf_2 =	"e30000000031RXY-a",
+  clgfi_2 =	"c20e00000000RIL-a",
+  clgfr_2 =	"0000b9310000RRE",
+  clgfrl_2 =	"c60e00000000RIL-b",
+  clghrl_2 =	"c60600000000RIL-b",
+  clgr_2 =	"0000b9210000RRE",
+  clgrl_2 =	"c60a00000000RIL-b",
+  clhf_2 =	"e300000000cfRXY-a",
+  clhhr_2 =	"0000b9cf0000RRE",
+  clhlr_2 =	"0000b9df0000RRE",
+  clhrl_2 =	"c60700000000RIL-b",
+  cli_2 =	"000095000000SI",
+  clih_2 =	"cc0f00000000RIL-a",
+  clm_3 =	"0000bd000000RS-b",
+  clmh_3 =	"eb0000000020RSY-b",
+  clmy_3 =	"eb0000000021RSY-b",
+  clr_2 =	"000000001500RR",
+  clrl_2 =	"c60f00000000RIL-b",
+  clst_2 =	"0000b25d0000RRE",
+  cly_2 =	"e30000000055RXY-a",
+  cmpsc_2 =	"0000b2630000RRE",
+  cpya_2 =	"0000b24d0000RRE",
+  cr_2 =	"000000001900RR",
+  crl_2 =	"c60d00000000RIL-b",
+  cs_3 =	"0000ba000000RS-a",
+  csg_3 =	"eb0000000030RSY-a",
+  csp_2 =	"0000b2500000RRE",
+  cspg_2 =	"0000b98a0000RRE",
+  csy_3 =	"eb0000000014RSY-a",
+  cu41_2 =	"0000b9b20000RRE",
+  cu42_2 =	"0000b9b30000RRE",
+  cudtr_2 =	"0000b3e20000RRE",
+  cuse_2 =	"0000b2570000RRE",
+  cuxtr_2 =	"0000b3ea0000RRE",
+  cvb_2 =	"00004f000000RX-a",
+  cvbg_2 =	"e3000000000eRXY-a",
+  cvby_2 =	"e30000000006RXY-a",
+  cvd_2 =	"00004e000000RX-a",
+  cvdg_2 =	"e3000000002eRXY-a",
+  cvdy_2 =	"e30000000026RXY-a",
+  cxbr_2 =	"0000b3490000RRE",
+  cxfbr_2 =	"0000b3960000RRE",
+  cxfbra_4 =	"0000b3960000RRF-e",
+  cxfr_2 =	"0000b3b60000RRE",
+  cxftr_2 =	"0000b9590000RRE",
+  cxgbr_2 =	"0000b3a60000RRE",
+  cxgbra_4 =	"0000b3a60000RRF-e",
+  cxgr_2 =	"0000b3c60000RRE",
+  cxgtr_2 =	"0000b3f90000RRE",
+  cxr_2 =	"0000b3690000RRE",
+  cxstr_2 =	"0000b3fb0000RRE",
+  cxtr_2 =	"0000b3ec0000RRE",
+  cxutr_2 =	"0000b3fa0000RRE",
+  cy_2 =	"e30000000059RXY-a",
+  d_2 =		"00005d000000RX-a",
+  dd_2 =	"00006d000000RX-a",
+  ddb_2 =	"ed000000001dRXE",
+  ddbr_2 =	"0000b31d0000RRE",
+  ddr_2 =	"000000002d00RR",
+  de_2 =	"00007d000000RX-a",
+  deb_2 =	"ed000000000dRXE",
+  debr_2 =	"0000b30d0000RRE",
+  der_2 =	"000000003d00RR",
+  didbr_4 =	"0000b35b0000RRF-b",
+  dl_2 =	"e30000000097RXY-a",
+  dlg_2 =	"e30000000087RXY-a",
+  dlgr_2 =	"0000b9870000RRE",
+  dlr_2 =	"0000b9970000RRE",
+  dr_2 =	"000000001d00RR",
+  dsg_2 =	"e3000000000dRXY-a",
+  dsgf_2 =	"e3000000001dRXY-a",
+  dsgfr_2 =	"0000b91d0000RRE",
+  dsgr_2 =	"0000b90d0000RRE",
+  dxbr_2 =	"0000b34d0000RRE",
+  dxr_2 =	"0000b22d0000RRE",
+  ear_2 =	"0000b24f0000RRE",
+  ecag_3 =	"eb000000004cRSY-a",
+  ed_2 =	"de0000000000SS-a",
+  edmk_2 =	"df0000000000SS-a",
+  eedtr_2 =	"0000b3e50000RRE",
+  eextr_2 =	"0000b3ed0000RRE",
+  efpc_2 =	"0000b38c0000RRE",
+  epair_2 =	"0000b99a0000RRE",
+  epar_2 =	"0000b2260000RRE",
+  epsw_2 =	"0000b98d0000RRE",
+  ereg_2 =	"0000b2490000RRE",
+  eregg_2 =	"0000b90e0000RRE",
+  esair_2 =	"0000b99b0000RRE",
+  esar_2 =	"0000b2270000RRE",
+  esdtr_2 =	"0000b3e70000RRE",
+  esea_2 =	"0000b99d0000RRE",
+  esta_2 =	"0000b24a0000RRE",
+  esxtr_2 =	"0000b3ef0000RRE",
+  ex_2 =	"000044000000RX-a",
+  exrl_2 =	"c60000000000RIL-b",
+  fidr_2 =	"0000b37f0000RRE",
+  fier_2 =	"0000b3770000RRE",
+  fixr_2 =	"0000b3670000RRE",
+  flogr_2 =	"0000b9830000RRE",
+  hdr_2 =	"000000002400RR",
+  her_2 =	"000000003400RR",
+  iac_2 =	"0000b2240000RRE",
+  ic_2 =	"000043000000RX-a",
+  icm_3 =	"0000bf000000RS-b",
+  icmh_3 =	"eb0000000080RSY-b",
+  icmy_3 =	"eb0000000081RSY-b",
+  icy_2 =	"e30000000073RXY-a",
+  iihf_2 =	"c00800000000RIL-a",
+  iihh_2 =	"0000a5000000RI-a",
+  iihl_2 =	"0000a5010000RI-a",
+  iilf_2 =	"c00900000000RIL-a",
+  iilh_2 =	"0000a5020000RI-a",
+  iill_2 =	"0000a5030000RI-a",
+  ipm_2 =	"0000b2220000RRE",
+  iske_2 =	"0000b2290000RRE",
+  ivsk_2 =	"0000b2230000RRE",
+  kdbr_2 =	"0000b3180000RRE",
+  kdtr_2 =	"0000b3e00000RRE",
+  kebr_2 =	"0000b3080000RRE",
+  kimd_2 =	"0000b93e0000RRE",
+  klmd_2 =	"0000b93f0000RRE",
+  km_2 =	"0000b92e0000RRE",
+  kmac_2 =	"0000b91e0000RRE",
+  kmc_2 =	"0000b92f0000RRE",
+  kmf_2 =	"0000b92a0000RRE",
+  kmo_2 =	"0000b92b0000RRE",
+  kxbr_2 =	"0000b3480000RRE",
+  kxtr_2 =	"0000b3e80000RRE",
+  l_2 =		"000058000000RX-a",
+  la_2 =	"000041000000RX-a",
+  laa_3 =	"eb00000000f8RSY-a",
+  laag_3 =	"eb00000000e8RSY-a",
+  laal_3 =	"eb00000000faRSY-a",
+  laalg_3 =	"eb00000000eaRSY-a",
+  lae_2 =	"000051000000RX-a",
+  laey_2 =	"e30000000075RXY-a",
+  lam_3 =	"00009a000000RS-a",
+  lamy_3 =	"eb000000009aRSY-a",
+  lan_3 =	"eb00000000f4RSY-a",
+  lang_3 =	"eb00000000e4RSY-a",
+  lao_3 =	"eb00000000f6RSY-a",
+  laog_3 =	"eb00000000e6RSY-a",
+  larl_2 =	"c00000000000RIL-b",
+  lax_3 =	"eb00000000f7RSY-a",
+  laxg_3 =	"eb00000000e7RSY-a",
+  lay_2 =	"e30000000071RXY-a",
+  lb_2 =	"e30000000076RXY-a",
+  lbh_2 =	"e300000000c0RXY-a",
+  lbr_2 =	"0000b9260000RRE",
+  lcdbr_2 =	"0000b3130000RRE",
+  lcdfr_2 =	"0000b3730000RRE",
+  lcdr_2 =	"000000002300RR",
+  lcebr_2 =	"0000b3030000RRE",
+  lcer_2 =	"000000003300RR",
+  lcgfr_2 =	"0000b9130000RRE",
+  lcgr_2 =	"0000b9030000RRE",
+  lcr_2 =	"000000001300RR",
+  lctl_3 =	"0000b7000000RS-a",
+  lctlg_3 =	"eb000000002fRSY-a",
+  lcxbr_2 =	"0000b3430000RRE",
+  lcxr_2 =	"0000b3630000RRE",
+  ld_2 =	"000068000000RX-a",
+  ldebr_2 =	"0000b3040000RRE",
+  lder_2 =	"0000b3240000RRE",
+  ldgr_2 =	"0000b3c10000RRE",
+  ldr_2 =	"000000002800RR",
+  ldxbr_2 =	"0000b3450000RRE",
+  ldxr_2 =	"000000002500RR",
+  ldy_2 =	"ed0000000065RXY-a",
+  le_2 =	"000078000000RX-a",
+  ledbr_2 =	"0000b3440000RRE",
+  ledr_2 =	"000000003500RR",
+  ler_2 =	"000000003800RR",
+  lexbr_2 =	"0000b3460000RRE",
+  lexr_2 =	"0000b3660000RRE",
+  ley_2 =	"ed0000000064RXY-a",
+  lfh_2 =	"e300000000caRXY-a",
+  lg_2 =	"e30000000004RXY-a",
+  lgb_2 =	"e30000000077RXY-a",
+  lgbr_2 =	"0000b9060000RRE",
+  lgdr_2 =	"0000b3cd0000RRE",
+  lgf_2 =	"e30000000014RXY-a",
+  lgfi_2 =	"c00100000000RIL-a",
+  lgfr_2 =	"0000b9140000RRE",
+  lgfrl_2 =	"c40c00000000RIL-b",
+  lgh_2 =	"e30000000015RXY-a",
+  lghi_2 =	"0000a7090000RI-a",
+  lghr_2 =	"0000b9070000RRE",
+  lghrl_2 =	"c40400000000RIL-b",
+  lgr_2 =	"0000b9040000RRE",
+  lgrl_2 =	"c40800000000RIL-b",
+  lh_2 =	"000048000000RX-a",
+  lhh_2 =	"e300000000c4RXY-a",
+  lhi_2 =	"0000a7080000RI-a",
+  lhr_2 =	"0000b9270000RRE",
+  lhrl_2 =	"c40500000000RIL-b",
+  lhy_2 =	"e30000000078RXY-a",
+  llc_2 =	"e30000000094RXY-a",
+  llch_2 =	"e300000000c2RXY-a",
+  llcr_2 =	"0000b9940000RRE",
+  llgc_2 =	"e30000000090RXY-a",
+  llgcr_2 =	"0000b9840000RRE",
+  llgf_2 =	"e30000000016RXY-a",
+  llgfr_2 =	"0000b9160000RRE",
+  llgfrl_2 =	"c40e00000000RIL-b",
+  llgh_2 =	"e30000000091RXY-a",
+  llghr_2 =	"0000b9850000RRE",
+  llghrl_2 =	"c40600000000RIL-b",
+  llgt_2 =	"e30000000017RXY-a",
+  llgtr_2 =	"0000b9170000RRE",
+  llh_2 =	"e30000000095RXY-a",
+  llhh_2 =	"e300000000c6RXY-a",
+  llhr_2 =	"0000b9950000RRE",
+  llhrl_2 =	"c40200000000RIL-b",
+  llihf_2 =	"c00e00000000RIL-a",
+  llihh_2 =	"0000a50c0000RI-a",
+  llihl_2 =	"0000a50d0000RI-a",
+  llilf_2 =	"c00f00000000RIL-a",
+  llilh_2 =	"0000a50e0000RI-a",
+  llill_2 =	"0000a50f0000RI-a",
+  lm_3 =	"000098000000RS-a",
+  lmg_3 =	"eb0000000004RSY-a",
+  lmh_3 =	"eb0000000096RSY-a",
+  lmy_3 =	"eb0000000098RSY-a",
+  lndbr_2 =	"0000b3110000RRE",
+  lndfr_2 =	"0000b3710000RRE",
+  lndr_2 =	"000000002100RR",
+  lnebr_2 =	"0000b3010000RRE",
+  lner_2 =	"000000003100RR",
+  lngfr_2 =	"0000b9110000RRE",
+  lngr_2 =	"0000b9010000RRE",
+  lnr_2 =	"000000001100RR",
+  lnxbr_2 =	"0000b3410000RRE",
+  lnxr_2 =	"0000b3610000RRE",
+  loc_3 =	"eb00000000f2RSY-b",
+  locg_3 =	"eb00000000e2RSY-b",
+  lpdbr_2 =	"0000b3100000RRE",
+  lpdfr_2 =	"0000b3700000RRE",
+  lpdr_2 =	"000000002000RR",
+  lpebr_2 =	"0000b3000000RRE",
+  lper_2 =	"000000003000RR",
+  lpgfr_2 =	"0000b9100000RRE",
+  lpgr_2 =	"0000b9000000RRE",
+  lpq_2 =	"e3000000008fRXY-a",
+  lpr_2 =	"000000001000RR",
+  lpxbr_2 =	"0000b3400000RRE",
+  lpxr_2 =	"0000b3600000RRE",
+  lr_2 =	"000000001800RR",
+  lra_2 =	"0000b1000000RX-a",
+  lrag_2 =	"e30000000003RXY-a",
+  lray_2 =	"e30000000013RXY-a",
+  lrdr_2 =	"000000002500RR",
+  lrer_2 =	"000000003500RR",
+  lrl_2 =	"c40d00000000RIL-b",
+  lrv_2 =	"e3000000001eRXY-a",
+  lrvg_2 =	"e3000000000fRXY-a",
+  lrvgr_2 =	"0000b90f0000RRE",
+  lrvh_2 =	"e3000000001fRXY-a",
+  lrvr_2 =	"0000b91f0000RRE",
+  lt_2 =	"e30000000012RXY-a",
+  ltdbr_2 =	"0000b3120000RRE",
+  ltdr_2 =	"000000002200RR",
+  ltdtr_2 =	"0000b3d60000RRE",
+  ltebr_2 =	"0000b3020000RRE",
+  lter_2 =	"000000003200RR",
+  ltg_2 =	"e30000000002RXY-a",
+  ltgf_2 =	"e30000000032RXY-a",
+  ltgfr_2 =	"0000b9120000RRE",
+  ltgr_2 =	"0000b9020000RRE",
+  ltr_2 =	"000000001200RR",
+  ltxbr_2 =	"0000b3420000RRE",
+  ltxr_2 =	"0000b3620000RRE",
+  ltxtr_2 =	"0000b3de0000RRE",
+  lura_2 =	"0000b24b0000RRE",
+  lurag_2 =	"0000b9050000RRE",
+  lxdbr_2 =	"0000b3050000RRE",
+  lxdr_2 =	"0000b3250000RRE",
+  lxebr_2 =	"0000b3060000RRE",
+  lxer_2 =	"0000b3260000RRE",
+  lxr_2 =	"0000b3650000RRE",
+  ly_2 =	"e30000000058RXY-a",
+  lzdr_2 =	"0000b3750000RRE",
+  lzer_2 =	"0000b3740000RRE",
+  lzxr_2 =	"0000b3760000RRE",
+  m_2 =		"00005c000000RX-a",
+  madb_3 =	"ed000000001eRXF",
+  maeb_3 =	"ed000000000eRXF",
+  maebr_3 =	"0000b30e0000RRD",
+  maer_3 =	"0000b32e0000RRD",
+  md_2 =	"00006c000000RX-a",
+  mdb_2 =	"ed000000001cRXE",
+  mdbr_2 =	"0000b31c0000RRE",
+  mde_2 =	"00007c000000RX-a",
+  mdeb_2 =	"ed000000000cRXE",
+  mdebr_2 =	"0000b30c0000RRE",
+  mder_2 =	"000000003c00RR",
+  mdr_2 =	"000000002c00RR",
+  me_2 =	"00007c000000RX-a",
+  meeb_2 =	"ed0000000017RXE",
+  meebr_2 =	"0000b3170000RRE",
+  meer_2 =	"0000b3370000RRE",
+  mer_2 =	"000000003c00RR",
+  mfy_2 =	"e3000000005cRXY-a",
+  mghi_2 =	"0000a70d0000RI-a",
+  mh_2 =	"00004c000000RX-a",
+  mhi_2 =	"0000a70c0000RI-a",
+  mhy_2 =	"e3000000007cRXY-a",
+  ml_2 =	"e30000000096RXY-a",
+  mlg_2 =	"e30000000086RXY-a",
+  mlgr_2 =	"0000b9860000RRE",
+  mlr_2 =	"0000b9960000RRE",
+  mr_2 =	"000000001c00RR",
+  ms_2 =	"000071000000RX-a",
+  msfi_2 =	"c20100000000RIL-a",
+  msg_2 =	"e3000000000cRXY-a",
+  msgf_2 =	"e3000000001cRXY-a",
+  msgfi_2 =	"c20000000000RIL-a",
+  msgfr_2 =	"0000b91c0000RRE",
+  msgr_2 =	"0000b90c0000RRE",
+  msr_2 =	"0000b2520000RRE",
+  msta_2 =	"0000b2470000RRE",
+  msy_2 =	"e30000000051RXY-a",
+  mvc_2 =	"d20000000000SS-a",
+  mvcin_2 =	"e80000000000SS-a",
+  mvcl_2 =	"000000000e00RR",
+  mvcle_3 =	"0000a8000000RS-a",
+  mvclu_3 =	"eb000000008eRSY-a",
+  mvghi_2 =	"e54800000000SIL",
+  mvhhi_2 =	"e54400000000SIL",
+  mvhi_2 =	"e54c00000000SIL",
+  mvi_2 =	"000092000000SI",
+  mvn_2 =	"d10000000000SS-a",
+  mvpg_2 =	"0000b2540000RRE",
+  mvst_2 =	"0000b2550000RRE",
+  mvz_2 =	"d30000000000SS-a",
+  mxbr_2 =	"0000b34c0000RRE",
+  mxd_2 =	"000067000000RX-a",
+  mxdb_2 =	"ed0000000007RXE",
+  mxdbr_2 =	"0000b3070000RRE",
+  mxdr_2 =	"000000002700RR",
+  mxr_2 =	"000000002600RR",
+  n_2 =		"000054000000RX-a",
+  nc_2 =	"d40000000000SS-a",
+  ng_2 =	"e30000000080RXY-a",
+  ngr_2 =	"0000b9800000RRE",
+  ni_2 =	"000094000000SI",
+  nihf_2 =	"c00a00000000RIL-a",
+  nihh_2 =	"0000a5040000RI-a",
+  nihl_2 =	"0000a5050000RI-a",
+  nilf_2 =	"c00b00000000RIL-a",
+  nilh_2 =	"0000a5060000RI-a",
+  nill_2 =	"0000a5070000RI-a",
+  nr_2 =	"000000001400RR",
+  ny_2 =	"e30000000054RXY-a",
+  o_2 =		"000056000000RX-a",
+  oc_2 =	"d60000000000SS-a",
+  og_2 =	"e30000000081RXY-a",
+  ogr_2 =	"0000b9810000RRE",
+  oi_2 =	"000096000000SI",
+  oihf_2 =	"c00c00000000RIL-a",
+  oihh_2 =	"0000a5080000RI-a",
+  oihl_2 =	"0000a5090000RI-a",
+  oilf_2 =	"c00d00000000RIL-a",
+  oilh_2 =	"0000a50a0000RI-a",
+  oill_2 =	"0000a50b0000RI-a",
+  or_2 =	"000000001600RR",
+  oy_2 =	"e30000000056RXY-a",
+  palb_2 =	"0000b2480000RRE",
+  pcc_2 =	"0000b92c0000RRE",
+  pckmo_2 =	"0000b9280000RRE",
+  pfd_2 =	"e30000000036m",
+  pfdrl_2 =	"c60200000000RIL-c",
+  pfmf_2 =	"0000b9af0000RRE",
+  pgin_2 =	"0000b22e0000RRE",
+  pgout_2 =	"0000b22f0000RRE",
+  popcnt_2 =	"0000b9e10000RRE",
+  pt_2 =	"0000b2280000RRE",
+  ptf_2 =	"0000b9a20000RRE",
+  pti_2 =	"0000b99e0000RRE",
+  rll_3 =	"eb000000001dRSY-a",
+  rllg_3 =	"eb000000001cRSY-a",
+  rrbe_2 =	"0000b22a0000RRE",
+  rrbm_2 =	"0000b9ae0000RRE",
+  s_2 =		"00005b000000RX-a",
+  sar_2 =	"0000b24e0000RRE",
+  sd_2 =	"00006b000000RX-a",
+  sdb_2 =	"ed000000001bRXE",
+  sdbr_2 =	"0000b31b0000RRE",
+  sdr_2 =	"000000002b00RR",
+  se_2 =	"00007b000000RX-a",
+  seb_2 =	"ed000000000bRXE",
+  sebr_2 =	"0000b30b0000RRE",
+  ser_2 =	"000000003b00RR",
+  sfasr_2 =	"0000b3850000RRE",
+  sfpc_2 =	"0000b3840000RRE",
+  sg_2 =	"e30000000009RXY-a",
+  sgf_2 =	"e30000000019RXY-a",
+  sgfr_2 =	"0000b9190000RRE",
+  sgr_2 =	"0000b9090000RRE",
+  sh_2 =	"00004b000000RX-a",
+  shy_2 =	"e3000000007bRXY-a",
+  sl_2 =	"00005f000000RX-a",
+  sla_2 =	"00008b000000RS-a",
+  slag_3 =	"eb000000000bRSY-a",
+  slak_3 =	"eb00000000ddRSY-a",
+  slb_2 =	"e30000000099RXY-a",
+  slbg_2 =	"e30000000089RXY-a",
+  slbgr_2 =	"0000b9890000RRE",
+  slbr_2 =	"0000b9990000RRE",
+  slda_2 =	"00008f000000RS-a",
+  sldl_2 =	"00008d000000RS-a",
+  slfi_2 =	"c20500000000RIL-a",
+  slg_2 =	"e3000000000bRXY-a",
+  slgf_2 =	"e3000000001bRXY-a",
+  slgfi_2 =	"c20400000000RIL-a",
+  slgfr_2 =	"0000b91b0000RRE",
+  slgr_2 =	"0000b90b0000RRE",
+  sll_2 =	"000089000000RS-a",
+  sllg_3 =	"eb000000000dRSY-a",
+  sllk_3 =	"eb00000000dfRSY-a",
+  slr_2 =	"000000001f00RR",
+  sly_2 =	"e3000000005fRXY-a",
+  spm_2 =	"000000000400RR",
+  sqdb_2 =	"ed0000000015RXE",
+  sqdbr_2 =	"0000b3150000RRE",
+  sqdr_2 =	"0000b2440000RRE",
+  sqeb_2 =	"ed0000000014RXE",
+  sqebr_2 =	"0000b3140000RRE",
+  sqer_2 =	"0000b2450000RRE",
+  sqxbr_2 =	"0000b3160000RRE",
+  sqxr_2 =	"0000b3360000RRE",
+  sr_2 =	"000000001b00RR",
+  sra_2 =	"00008a000000RS-a",
+  srag_3 =	"eb000000000aRSY-a",
+  srak_3 =	"eb00000000dcRSY-a",
+  srda_2 =	"00008e000000RS-a",
+  srdl_2 =	"00008c000000RS-a",
+  srl_2 =	"000088000000RS-a",
+  srlg_3 =	"eb000000000cRSY-a",
+  srlk_3 =	"eb00000000deRSY-a",
+  srst_2 =	"0000b25e0000RRE",
+  srstu_2 =	"0000b9be0000RRE",
+  ssair_2 =	"0000b99f0000RRE",
+  ssar_2 =	"0000b2250000RRE",
+  st_2 =	"000050000000RX-a",
+  stam_3 =	"00009b000000RS-a",
+  stamy_3 =	"eb000000009bRSY-a",
+  stc_2 =	"000042000000RX-a",
+  stch_2 =	"e300000000c3RXY-a",
+  stcm_3 =	"0000be000000RS-b",
+  stcmh_3 =	"eb000000002cRSY-b",
+  stcmy_3 =	"eb000000002dRSY-b",
+  stctg_3 =	"eb0000000025RSY-a",
+  stctl_3 =	"0000b6000000RS-a",
+  stcy_2 =	"e30000000072RXY-a",
+  std_2 =	"000060000000RX-a",
+  stdy_2 =	"ed0000000067RXY-a",
+  ste_2 =	"000070000000RX-a",
+  stey_2 =	"ed0000000066RXY-a",
+  stfh_2 =	"e300000000cbRXY-a",
+  stfl_1 =	"0000b2b10000S",
+  stg_2 =	"e30000000024RXY-a",
+  stgrl_2 =	"c40b00000000RIL-b",
+  sth_2 =	"000040000000RX-a",
+  sthh_2 =	"e300000000c7RXY-a",
+  sthrl_2 =	"c40700000000RIL-b",
+  sthy_2 =	"e30000000070RXY-a",
+  stm_3 =	"000090000000RS-a",
+  stmg_3 =	"eb0000000024RSY-a",
+  stmh_3 =	"eb0000000026RSY-a",
+  stmy_3 =	"eb0000000090RSY-a",
+  stoc_3 =	"eb00000000f3RSY-b",
+  stocg_3 =	"eb00000000e3RSY-b",
+  stpq_2 =	"e3000000008eRXY-a",
+  strl_2 =	"c40f00000000RIL-b",
+  strv_2 =	"e3000000003eRXY-a",
+  strvg_2 =	"e3000000002fRXY-a",
+  strvh_2 =	"e3000000003fRXY-a",
+  stura_2 =	"0000b2460000RRE",
+  sturg_2 =	"0000b9250000RRE",
+  sty_2 =	"e30000000050RXY-a",
+  su_2 =	"00007f000000RX-a",
+  sur_2 =	"000000003f00RR",
+  svc_1 =	"000000000a00I",
+  sw_2 =	"00006f000000RX-a",
+  swr_2 =	"000000002f00RR",
+  sxbr_2 =	"0000b34b0000RRE",
+  sxr_2 =	"000000003700RR",
+  sy_2 =	"e3000000005bRXY-a",
+  tar_2 =	"0000b24c0000RRE",
+  tb_2 =	"0000b22c0000RRE",
+  thder_2 =	"0000b3580000RRE",
+  thdr_2 =	"0000b3590000RRE",
+  tm_2 =	"000091000000SI",
+  tmhh_2 =	"0000a7020000RI-a",
+  tmhl_2 =	"0000a7030000RI-a",
+  tmlh_2 =	"0000a7000000RI-a",
+  tmll_2 =	"0000a7010000RI-a",
+  tmy_2 =	"eb0000000051SIY",
+  tr_2 =	"dc0000000000SS-a",
+  trace_3 =	"000099000000RS-a",
+  tracg_3 =	"eb000000000fRSY-a",
+  tre_2 =	"0000b2a50000RRE",
+  trt_2 =	"dd0000000000SS-a",
+  trtr_2 =	"d00000000000SS-a",
+  unpka_2 =	"ea0000000000SS-a",
+  unpku_2 =	"e20000000000SS-a",
+  x_2 =		"000057000000RX-a",
+  xc_2 =	"d70000000000SS-a",
+  xg_2 =	"e30000000082RXY-a",
+  xgr_2 =	"0000b9820000RRE",
+  xi_2 =	"000097000000SI",
+  xihf_2 =	"c00600000000RIL-a",
+  xilf_2 =	"c00700000000RIL-a",
+  xr_2 =	"000000001700RR",
+  xy_2 =	"e30000000057RXY-a",
+for cond, c in pairs(map_cond) do
+  -- Extended mnemonics for branches.
+  -- TODO: replace 'B' with correct encoding.
+  -- brc
+  map_op["j"..cond.."_1"] = "0000"..tohex(0xa7040000+shl(c, 20)).."RI-c"
+  -- brcl
+  map_op["jg"..cond.."_1"] = tohex(0xc0040000+shl(c, 20)).."0000".."RIL-c"
+  -- bc
+  map_op["b"..cond.."_1"] = "0000"..tohex(0x47000000+shl(c, 20)).."RX-b"
+  -- bcr
+  map_op["b"..cond.."r_1"] = "0000"..tohex(0x0700+shl(c, 4)).."RR"
+-- Handle opcodes defined with template strings.
+local function parse_template(params, template, nparams, pos)
+  -- Read the template in 16-bit chunks.
+  -- Leading halfword zeroes should not be written out.
+  local op0 = tonumber(sub(template, 1, 4), 16)
+  local op1 = tonumber(sub(template, 5, 8), 16)
+  local op2 = tonumber(sub(template, 9, 12), 16)
+  -- Process each character.
+  local p = sub(template, 13)
+  if p == "I" then
+    local imm_val, a = parse_imm8(params[1])
+    op2 = op2 + imm_val
+    wputhw(op2)
+    if a then a() end
+  elseif p == "RI-a" then
+    op1 = op1 + shl(parse_reg(params[1]), 4)
+    wputhw(op1)
+    parse_imm16(params[2])
+  elseif p == "RI-b" then
+    op1 = op1 + shl(parse_reg(params[1]), 4)
+    wputhw(op1)
+    local mode, n, s = parse_label(params[2])
+    waction("REL_"..mode, n, s)
+  elseif p == "RI-c" then
+    if #params > 1 then
+      op1 = op1 + shl(parse_num(params[1]), 4)
+    end
+    wputhw(op1)
+    local mode, n, s = parse_label(params[#params])
+    waction("REL_"..mode, n, s)
+  elseif p == "RIE-e" then
+    op0 = op0 + shl(parse_reg(params[1]), 4) + parse_reg(params[2])
+    wputhw1(op0)
+    local mode, n, s = parse_label(params[3])
+    waction("REL_"..mode, n, s)
+    wputhw(op2)
+  elseif p == "RIL-a" then
+    op0 = op0 + shl(parse_reg(params[1]), 4)
+    wputhw(op0);
+    parse_imm32(params[2])
+  elseif p == "RIL-b" then
+    op0 = op0 + shl(parse_reg(params[1]), 4)
+    wputhw(op0)
+    local mode, n, s = parse_label(params[2])
+    waction("REL_"..mode, n, s)
+  elseif p == "RIL-c" then
+    if #params > 1 then
+      op0 = op0 + shl(parse_num(params[1]), 4)
+    end
+    wputhw(op0)
+    local mode, n, s = parse_label(params[#params])
+    waction("REL_"..mode, n, s)
+  elseif p == "RR" then
+    if #params > 1 then
+      op2 = op2 + shl(parse_reg(params[1]), 4)
+    end
+    op2 = op2 + parse_reg(params[#params])
+    wputhw(op2)
+  elseif p == "RRD" then
+    wputhw(op1)
+    op2 = op2 + shl(parse_reg(params[1]), 12) + shl(parse_reg(params[2]), 4) + parse_reg(params[3])
+    wputhw(op2)
+  elseif p == "RRE" then
+    op2 = op2 + shl(parse_reg(params[1]), 4) + parse_reg(params[2])
+    wputhw(op1); wputhw(op2)
+  elseif p == "RRF-b" then
+    wputhw(op1)
+    op2 = op2 + shl(parse_reg(params[1]), 4) + shl(parse_reg(params[2]), 12) + parse_reg(params[3]) + shl(parse_mask(params[4]), 8)
+    wputhw(op2)
+  elseif p == "RRF-e" then
+    wputhw(op1)
+    op2 = op2 + shl(parse_reg(params[1]), 4) + shl(parse_mask(params[2]), 12) + parse_reg(params[3])
+    if params[4] then
+      op2 = op2 + shl(parse_mask2(params[4]), 8)
+    end
+    wputhw(op2)
+  elseif p == "RS-a" then
+    if (params[3]) then
+      local d, b, a = parse_mem_b(params[3])
+      op1 = op1 + shl(parse_reg(params[1]), 4) + parse_reg(params[2])
+      op2 = op2 + shl(b, 12) + d
+    else
+      local d, b, a = parse_mem_b(params[2])
+      op1 = op1 + shl(parse_reg(params[1]), 4)
+      op2 = op2 + shl(b, 12) + d
+    end
+    wputhw(op1); wputhw(op2)
+    if a then a() end 
+  elseif p == "RS-b" then
+    local m = parse_mask(params[2])
+    local d, b, a = parse_mem_b(params[3])
+    op1 = op1 + shl(parse_reg(params[1]), 4) + m
+    op2 = op2 + shl(b, 12) + d
+    wputhw(op1); wputhw(op2)
+    if a then a() end
+  elseif p == "RSI" then
+    op1 = op1 + shl(parse_reg(params[1]), 4) + parse_reg(params[2])
+    wputhw(op1)
+    local mode, n, s = parse_label(params[3])
+    waction("REL_"..mode, n, s)
+  elseif p == "RSY-a" then
+    local d, b, a = parse_mem_by(params[3])
+    op0 = op0 + shl(parse_reg(params[1]), 4) + parse_reg(params[2])
+    op1 = op1 + shl(b, 12) + band(d, 0xfff)
+    op2 = op2 + band(shr(d, 4), 0xff00)
+    wputhw(op0); wputhw(op1); wputhw(op2)
+    if a then a() end -- a() emits action.
+  elseif p == "RX-a" then
+    local d, x, b, a = parse_mem_bx(params[2])
+    op1 = op1 + shl(parse_reg(params[1]), 4) + x
+    op2 = op2 + shl(b, 12) + d
+    wputhw(op1); wputhw(op2)
+    if a then a() end
+  elseif p == "RX-b" then
+    local d, x, b, a = parse_mem_bx(params[#params])
+    if #params > 1 then
+      op1 = op1 + shl(parse_num(params[1]), 4)
+    end
+    op1 = op1 + x
+    op2 = op2 + shl(b, 12) + d
+    wputhw(op1); wputhw(op2)
+    if a then a() end
+  elseif p == "RXE" then
+    local d, x, b, a = parse_mem_bx(params[2])
+    op0 = op0 + shl(parse_reg(params[1]), 4) + x
+    op1 = op1 + shl(b, 12) + d
+    wputhw(op0); wputhw(op1)
+    if a then a() end
+    wputhw(op2);
+  elseif p == "RXF" then
+    local d, x, b, a = parse_mem_bx(params[3])
+    op0 = op0 + shl(parse_reg(params[2]), 4) + x
+    op1 = op1 + shl(b, 12) + d
+    wputhw(op0); wputhw(op1)
+    if a then a() end
+    op2 = op2 + shl(parse_reg(params[1]), 12)
+    wputhw(op2)
+  elseif p == "RXY-a" then
+    local d, x, b, a = parse_mem_bxy(params[2])
+    op0 = op0 + shl(parse_reg(params[1]), 4) + x
+    op1 = op1 + shl(b, 12) + band(d, 0xfff)
+    op2 = op2 + band(shr(d, 4), 0xff00)
+    wputhw(op0); wputhw(op1); wputhw(op2)
+    if a then a() end
+  elseif p == "S" then
+    wputhw(op1);
+    local d, b, a = parse_mem_b(params[1])
+    op2 = op2 + shl(b, 12) + d
+    wputhw(op2)
+    if a then a() end
+  elseif p == "SI" then
+    local imm_val, a = parse_imm8(params[2])
+    op1 = op1 + imm_val
+    wputhw(op1)
+    if a then a() end
+    local d, b, a = parse_mem_b(params[1])
+    op2 = op2 + shl(b, 12) + d
+    wputhw(op2)
+    if a then a() end
+  elseif p == "SIL" then
+    wputhw(op0)
+    local d, b, a = parse_mem_b(params[1])
+    op1 = op1 + shl(b, 12) + d
+    wputhw(op1)
+    if a then a() end
+    parse_imm16(params[2])
+  elseif p == "SIY" then
+    local imm8, iact = parse_imm8(params[2])
+    op0 = op0 + shl(imm8, 8)
+    wputhw(op0)
+    if iact then iact() end
+    local d, b, a = parse_mem_by(params[1])
+    op1 = op1 + shl(b, 12) + band(d, 0xfff)
+    op2 = op2 + band(shr(d, 4), 0xff00)
+    wputhw(op1); wputhw(op2)
+    if a then a() end 
+  elseif p == "SS-a" then
+    local d1, l1, b1, d1a, l1a = parse_mem_lb(params[1])
+    local d2, b2, d2a = parse_mem_b(params[2])
+    op0 = op0 + l1
+    op1 = op1 + shl(b1, 12) + d1
+    op2 = op2 + shl(b2, 12) + d2
+    wputhw(op0)
+    if l1a then l1a() end
+    wputhw(op1)
+    if d1a then d1a() end
+    wputhw(op2)
+    if d2a then d2a() end
+  elseif p == "SS-b" then
+    local high_l = true
+    local d1, l1, b1, d1a, l1a = parse_mem_l2b(params[1], high_l)
+    high_l = false
+    local d2, l2, b2, d2a, l2a = parse_mem_l2b(params[2], high_l)
+    op0 = op0 + shl(l1, 4) + l2
+    op1 = op1 + shl(b1, 12) + d1
+    op2 = op2 + shl(b2, 12) + d2
+    wputhw(op0)
+    if l1a then l1a() end
+    if l2a then l2a() end
+    wputhw(op1)
+    if d1a then d1a() end
+    wputhw(op2)
+    if d2a then d2a() end
+  else
+    werror("unrecognized encoding")
+  end
+function op_template(params, template, nparams)
+  if not params then return template:gsub("%x%x%x%x%x%x%x%x%x%x%x%x", "") end
+  -- Limit number of section buffer positions used by a single dasm_put().
+  -- A single opcode needs a maximum of 5 positions.
+  if secpos+5 > maxsecpos then wflush() end
+  local lpos, apos, spos = #actlist, #actargs, secpos
+  local ok, err
+  for t in gmatch(template, "[^|]+") do
+    ok, err = pcall(parse_template, params, t, nparams)
+    if ok then return end
+    secpos = spos
+    actlist[lpos+1] = nil
+    actlist[lpos+2] = nil
+    actlist[lpos+3] = nil
+    actargs[apos+1] = nil
+    actargs[apos+2] = nil
+    actargs[apos+3] = nil
+  end
+  error(err, 0)
+map_op[".template__"] = op_template
+-- Pseudo-opcode to mark the position where the action list is to be emitted.
+map_op[".actionlist_1"] = function(params)
+  if not params then return "cvar" end
+  local name = params[1] -- No syntax check. You get to keep the pieces.
+  wline(function(out) writeactions(out, name) end)
+-- Pseudo-opcode to mark the position where the global enum is to be emitted.
+map_op[".globals_1"] = function(params)
+  if not params then return "prefix" end
+  local prefix = params[1] -- No syntax check. You get to keep the pieces.
+  wline(function(out) writeglobals(out, prefix) end)
+-- Pseudo-opcode to mark the position where the global names are to be emitted.
+map_op[".globalnames_1"] = function(params)
+  if not params then return "cvar" end
+  local name = params[1] -- No syntax check. You get to keep the pieces.
+  wline(function(out) writeglobalnames(out, name) end)
+-- Pseudo-opcode to mark the position where the extern names are to be emitted.
+map_op[".externnames_1"] = function(params)
+  if not params then return "cvar" end
+  local name = params[1] -- No syntax check. You get to keep the pieces.
+  wline(function(out) writeexternnames(out, name) end)
+-- Label pseudo-opcode (converted from trailing colon form).
+map_op[".label_1"] = function(params)
+  if not params then return "[1-9] | ->global | =>pcexpr" end
+  if secpos+1 > maxsecpos then wflush() end
+  local mode, n, s = parse_label(params[1], true)
+  if mode == "EXT" then werror("bad label definition") end
+  waction("LABEL_"..mode, n, s, 1)
+-- Pseudo-opcodes for data storage.
+map_op[".long_*"] = function(params)
+  if not params then return "imm..." end
+  for _, p in ipairs(params) do
+    local n = tonumber(p)
+    if not n then werror("bad immediate `"..p.."'") end
+    if n < 0 then n = n + 2^32 end
+    wputw(n)
+    if secpos+2 > maxsecpos then wflush() end
+  end
+-- Alignment pseudo-opcode.
+map_op[".align_1"] = function(params)
+  if not params then return "numpow2" end
+  if secpos+1 > maxsecpos then wflush() end
+  local align = tonumber(params[1])
+  if align then
+    local x = align
+    -- Must be a power of 2 in the range (2 ... 256).
+    for i=1, 8 do
+      x = x / 2
+      if x == 1 then
+	waction("ALIGN", align-1, nil, 1) -- Action halfword is 2**n-1.
+	return
+      end
+    end
+  end
+  werror("bad alignment")
+-- Pseudo-opcode for (primitive) type definitions (map to C types).
+map_op[".type_3"] = function(params, nparams)
+  if not params then
+    return nparams == 2 and "name, ctype" or "name, ctype, reg"
+  end
+  local name, ctype, reg = params[1], params[2], params[3]
+  if not match(name, "^[%a_][%w_]*$") then
+    werror("bad type name `""'")
+  end
+  local tp = map_type[name]
+  if tp then
+    werror("duplicate type `""'")
+  end
+  -- Add #type to defines. A bit unclean to put it in map_archdef.
+  map_archdef["#"] = "sizeof("..ctype..")"
+  -- Add new type and emit shortcut define.
+  local num = ctypenum + 1
+  map_type[name] = {
+    ctype = ctype,
+    ctypefmt = format("Dt%X(%%s)", num),
+    reg = reg,
+  }
+  wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype))
+  ctypenum = num
+map_op[".type_2"] = map_op[".type_3"]
+-- Dump type definitions.
+local function dumptypes(out, lvl)
+  local t = {}
+  for name in pairs(map_type) do t[#t+1] = name end
+  sort(t)
+  out:write("Type definitions:\n")
+  for _, name in ipairs(t) do
+    local tp = map_type[name]
+    local reg = tp.reg or ""
+    out:write(format("  %-20s %-20s %s\n", name, tp.ctype, reg))
+  end
+  out:write("\n")
+-- Set the current section.
+function _M.section(num)
+  waction("SECTION", num)
+  wflush(true) -- SECTION is a terminal action.
+-- Dump architecture description.
+function _M.dumparch(out)
+  out:write(format("DynASM %s version %s, released %s\n\n",
+    _info.arch, _info.version, _info.release))
+  dumpactions(out)
+-- Dump all user defined elements.
+function _M.dumpdef(out, lvl)
+  dumptypes(out, lvl)
+  dumpglobals(out, lvl)
+  dumpexterns(out, lvl)
+-- Pass callbacks from/to the DynASM core.
+function _M.passcb(wl, we, wf, ww)
+  wline, werror, wfatal, wwarn = wl, we, wf, ww
+  return wflush
+-- Setup the arch-specific module.
+function _M.setup(arch, opt)
+  g_arch, g_opt = arch, opt
+-- Merge the core maps and the arch-specific maps.
+function _M.mergemaps(map_coreop, map_def)
+  setmetatable(map_op, { __index = map_coreop })
+  setmetatable(map_def, { __index = map_archdef })
+  return map_op, map_def
+return _M
diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/dynasm/Examples/ luajit-2.1.0~beta3+git.1624618403.e9577376/dynasm/Examples/
--- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/dynasm/Examples/	1969-12-31 18:00:00.000000000 -0600
+++ luajit-2.1.0~beta3+git.1624618403.e9577376/dynasm/Examples/	2021-10-05 12:36:23.413160484 -0500
@@ -0,0 +1,13 @@
+# set -x
+# run test
+lua ../dynasm.lua test_z_inst.c | gcc -DDASM_CHECKS -std=gnu99 -Wall -Werror -g -x c -o test_z_inst -
+# cleanup
+rm -f ./test_z_inst
+# exit
+exit $ec
diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/dynasm/Examples/test_z_inst.c luajit-2.1.0~beta3+git.1624618403.e9577376/dynasm/Examples/test_z_inst.c
--- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/dynasm/Examples/test_z_inst.c	1969-12-31 18:00:00.000000000 -0600
+++ luajit-2.1.0~beta3+git.1624618403.e9577376/dynasm/Examples/test_z_inst.c	2021-10-05 12:36:23.449160674 -0500
@@ -0,0 +1,487 @@
+#include <assert.h>
+#include <stdio.h>
+#include <sys/mman.h>
+#include "../dasm_proto.h"
+#include "../dasm_s390x.h"
+// DynASM directives.
+|.arch s390x
+|.actionlist actions
+|.globals lab_
+static void add(dasm_State *state)
+  dasm_State ** Dst = &state;
+  | ar r2,r3
+  | br r14
+static void add_rrd(dasm_State *state)
+  dasm_State **Dst = &state;
+  | lgfi r4 , 0x02
+  | maer r2 , r3 , r4
+  | br r14
+static void sub(dasm_State *state)
+  dasm_State **Dst = &state;
+  | sr r2,r3
+  | br r14
+static void mul(dasm_State *state)
+  dasm_State **Dst = &state;
+  | msr r2 , r3
+  | br r14
+static void rx(dasm_State *state)
+  dasm_State **Dst = &state;
+  int x = 1;
+  int y = 4095;
+  | la r4, 4095(r2, r3)
+  | la r5, 4095(r4)
+  | la r1, x(r5)
+  | la r2, y(r1, r0)
+  | br r14
+static void rxy(dasm_State *state)
+  dasm_State **Dst = &state;
+  int x = -524287;
+  int y = 524286;
+  | lay r4, -524288(r2, r3)
+  | lay r5, 524287(r4)
+  | lay r1, x(r5)
+  | lay r2, y(r1, r0)
+  | br r14
+static void lab(dasm_State *state)
+  dasm_State **Dst = &state;
+  // r1 = 0; do { r2 += r2; r1 += 1; } while(r1 < r3);
+  | la r1, 0(r0)
+  |1:
+  | agr r2, r2
+  | la r1, 1(r1)
+  | cgr r1, r3
+  | jl <1
+  | br r14
+static void labg(dasm_State *state)
+  dasm_State **Dst = &state;
+  // r1 = 0; do { r2 += r2; r1 += 1; } while(r1 < r3);
+  | la r1, 0(r0)
+  |1:
+  | agr r2, r2
+  | la r1, 1(r1)
+  | cgr r1, r3
+  | jgl <1
+  | jgnl >1
+  | stg r0, 0(r0)
+  |1:
+  | br r14
+static void jmp_fwd(dasm_State *state)
+  dasm_State **Dst = &state;
+  // while(r2!=r3){r2 += 2};
+  | j >1
+  |1:
+  | cgr r2 , r3
+  | jne >2
+  | je >3
+  |2:
+  | afi r2, 0x2
+  | j <1
+  |3:
+  | br r14
+static void add_imm16(dasm_State *state)
+  dasm_State **Dst = &state;
+  | ahi r2 , 0xf
+  | br r14
+static void add_imm32(dasm_State *state)
+  dasm_State **Dst = &state;
+  | afi r2 , 0xe
+  | br r14
+static void save(dasm_State *state)
+  dasm_State **Dst = &state;
+  |.define CFRAME_SPACE,	224	// Delta for sp, 8 byte aligned.
+  |
+  |// Register save area.
+  |.define SAVE_GPRS,	264(sp)	// Save area for r6-r15 (10*8 bytes).
+  |
+  |// Argument save area, each slot is 8-bytes (32-bit types are sign/zero extended).
+  |.define RESERVED,	232(sp)	// Reserved for compiler use.
+  |.define BACKCHAIN,	224(sp)
+  |
+  |// Current stack frame.
+  |.define SAVE_FPR15,	216(sp)
+  |.define SAVE_FPR14,	208(sp)
+  |.define SAVE_FPR13,	200(sp)
+  |.define SAVE_FPR12,	192(sp)
+  |.define SAVE_FPR11,	184(sp)
+  |.define SAVE_FPR10,	176(sp)
+  |.define SAVE_FPR9,	168(sp)
+  |.define SAVE_FPR8,	160(sp)
+  |
+  |// Callee save area.
+  |.define CALLEESAVE,	000(sp)
+  |
+  |.macro saveregs
+  |  lay sp, -CFRAME_SPACE(sp)	// Allocate stack frame.
+  |  stmg r6, r15, SAVE_GPRS	// Technically we restore r15 regardless.
+  |  std f8, SAVE_FPR8		// f8-f15 are callee-saved.
+  |  std f9, SAVE_FPR9
+  |  std f10, SAVE_FPR10
+  |  std f11, SAVE_FPR11
+  |  std f12, SAVE_FPR12
+  |  std f13, SAVE_FPR13
+  |  std f14, SAVE_FPR14
+  |  std f15, SAVE_FPR15
+  |.endmacro
+  |
+  |.macro restoreregs
+  |  ld f8, SAVE_FPR8		// f8-f15 are callee-saved.
+  |  ld f9, SAVE_FPR9
+  |  ld f10, SAVE_FPR10
+  |  ld f11, SAVE_FPR11
+  |  ld f12, SAVE_FPR12
+  |  ld f13, SAVE_FPR13
+  |  ld f14, SAVE_FPR14
+  |  ld f15, SAVE_FPR15
+  |  lmg r6, r15, SAVE_GPRS	// Restores the stack pointer.
+  |.endmacro
+  |
+  | saveregs
+  | lgfi r7, 0x10 // 16
+  | lgfi r8, 0x20 // 32
+  | agr r2, r3
+  | agr r7, r8
+  | msgr r2, r7
+  | restoreregs
+  | br r14
+static void labmul(dasm_State *state)
+  dasm_State **Dst = &state;
+  // Multiply using an add function.
+  // Only correct if input is positive.
+  |->mul_func:
+  | stmg r6, r14, 48(sp)
+  | lgr r6, r2
+  | lgr r7, r3
+  | cgfi r7, 0
+  | je >3
+  | cgfi r7, 1
+  | je >2
+  |1:
+  | lgr r3, r6
+  | brasl r14, ->add_func
+  | lay r7, -1(r7)
+  | cgfi r7, 1
+  | jh <1
+  |2:
+  | lmg r6, r14, 48(sp)
+  | br r14
+  |3:
+  | la r2, 0(r0)
+  | j <2
+  |->add_func:
+  | agr r2, r3
+  | br r14
+static void pc(dasm_State *state) {
+  dasm_State **Dst = &state;
+  int MAX = 10;
+  dasm_growpc(Dst, MAX+1);
+  | j =>MAX
+  for (int i = 0; i <= MAX; i++) {
+    |=>i:
+    if (i == 0) {
+      | br r14
+    } else {
+      | aghi r2, i
+      | j =>i-1
+    }
+  }
+static void load_test(dasm_State *state)
+  dasm_State **Dst = &state;
+  | ltdr r2 , r3
+  | br r14
+static void test_mask(dasm_State *state)
+  dasm_State **Dst = &state;
+  |lay   sp , -8(sp)
+  |stg   r2,  4(sp)
+  |tm    4(sp),0x04
+  |je >2
+  |jne >1
+  |ar r2,r3
+  |br r14
+  |sr r2,r3
+  |br r14
+static void ssa(dasm_State *state) {
+  dasm_State **Dst = &state;
+  | lay sp, -16(sp)
+  | lay r0, -1(r0)
+  | stg r0, 8(sp)
+  | xc 8(8, sp), 8(sp)
+  | stg r2, 0(sp)
+  | mvc 13(2, sp), 6(sp)
+  | lg r2, 8(sp)
+  | la sp, 16(sp)
+  | br r14
+static void ssa_act(dasm_State *state) {
+  dasm_State **Dst = &state;
+  int xl = 8;
+  int d1 = 13;
+  int l1 = 2;
+  int d2 = 6;
+  | lay sp, -16(sp)
+  | lay r0, -1(r0)
+  | stg r0, 8(sp)
+  | xc 8(xl, sp), 8(sp)
+  | stg r2, 0(sp)
+  | mvc d1(l1, sp), d2(sp)
+  | lg r2, 8(sp)
+  | la sp, 16(sp)
+  | br r14
+typedef struct {
+  int a;
+  int b;
+} SimpleStruct;
+static void type(dasm_State *state) {
+  dasm_State **Dst = &state;
+  | .type SIMPLE, SimpleStruct
+  | lay sp, -8(sp)
+  | stg r2, 0(sp)
+  | xgr r2, r2
+  | l r2, SIMPLE:sp->b
+  | la sp, 8(sp)
+  | br r14
+static void sil(dasm_State *state) {
+  dasm_State **Dst = &state;
+  | lay sp, -16(sp)
+  | xc 0(16, sp), 0(sp)
+  | mvghi 0(sp), 5
+  | mvhi 8(sp), 7
+  | mvhhi 12(sp), 11
+  | lghi r2, 0
+  | ag r2, 0(sp)  // r2 += 5
+  | a r2, 8(sp)   // r2 += 7
+  | ah r2, 12(sp) // r2 += 11
+  | la sp, 16(sp)
+  | br r14
+static void rrfe_rrd(dasm_State *state) {
+  dasm_State ** Dst = &state;
+  | cefbr f0,r2
+  | cefbr f2,r3
+  | cefbr f4,r4
+  | maebr f0 ,f2 ,f4
+  | cfebr r2, 0, f0
+  | br r14
+static void rre(dasm_State *state)  {
+  dasm_State **Dst = &state;
+  | lay   sp , -8(sp)
+  | cefbr f0 ,  r2
+  | cefbr f1 ,  r3
+  | fidr  f0 ,  f1
+  | cfebr r2 ,0,f0
+  | la    sp,   8(sp)
+  | br   r14
+static void rsb(dasm_State *state) {
+  dasm_State **Dst = &state;
+  | lay sp, -4(sp)
+  | lghi r3, 0x0706
+  | lghi r4, 0
+  | iill r4, 6
+  | iilh r4, 7
+  | st r4, 0(sp)
+  | lghi r2, 0
+  | clm r3, 5, 0(sp)
+  | jne >1
+  | lghi r2, 1
+  |1:
+  | la sp, 4(sp)
+  | br r14
+static void sqrt_rxe(dasm_State *state)
+  dasm_State **Dst = &state;
+  | lay     sp , -8(sp)
+  | cefbr   f0 , r2
+  | stdy    f0 , 0(sp)
+  | sqeb    f0 ,0(r4,sp)
+  | cfebr   r2 ,0, f0
+  | la      sp, 8(sp)
+  | br      r14
+static void rxf(dasm_State *state) {
+  dasm_State **Dst = &state;
+  | lay    sp , -8(sp)
+  | cegbra f1 ,0, r2,0
+  | cegbra f2 ,0,r3,0
+  | ste    f2 ,0(sp)
+  | maeb   f1, f2, 0(sp)
+  | cfebr  r2 ,0, f1
+  | la     sp, 8(sp)
+  | br     r14
+typedef struct {
+  int64_t arg1;
+  int64_t arg2;
+  int64_t arg3;
+  void (*fn)(dasm_State *);
+  int64_t want;
+  const char *testname;
+} test_table;
+test_table test[] = {
+  { 1, 2, 0,       add,        3,     "add"},
+  {10, 5, 0,       sub,        5,     "sub"},
+  { 2, 3, 0,       mul,        6,     "mul"},
+  { 5, 7, 0,        rx,    12298,      "rx"},
+  { 5, 7, 0,       rxy,       10,     "rxy"},
+  { 2, 4, 0,       lab,       32,     "lab"},
+  { 2, 4, 0,      labg,       32,    "labg"},
+  { 2, 0, 0, add_imm16,       17,   "imm16"},
+  { 2, 0, 0, add_imm32,       16,   "imm32"},
+  { 7, 3, 0,      save,      480,    "save"},
+  { 7, 3, 0,    labmul,       21, "labmul0"},
+  { 7, 0, 0,    labmul,        0, "labmul1"},
+  { 0, 0, 0,        pc,       55,      "pc"},
+  { 2,12, 0,   jmp_fwd,       12, "jmp_fwd"},
+//  { 9,8, 0,    add_rrd,       25, "add_rrd"},
+//  { 2,4, 0,  load_test,        4,"load_test"},
+  {-1, 0, 0,       ssa, 65535<<8,     "ssa"},
+  {-1, 0, 0,   ssa_act, 65535<<8, "ssa_act"},
+  {27, 0, 0,      type,       27,    "type"},
+  { 0, 0, 0,       sil,       23,     "sil"},
+  {15, 3,10,   rrfe_rrd,      45, "rrfe_rrd"},
+  { 0, 0, 0,        rsb,       0,     "rsb"},
+  {12,10, 0,        rre,      10,     "rre"},
+  {16,10, 0,   sqrt_rxe,       4,"sqrt_rxe"},
+  {16,10, 0,        rxf,     116,     "rxf"},
+  { 4, 3, 0,  test_mask,       1,"test_mask"}
+static void *jitcode(dasm_State **state, size_t *size)
+  int dasm_status = dasm_link(state, size);
+  assert(dasm_status == DASM_S_OK);
+  void *ret = mmap(0, *size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+  dasm_encode(state, ret);
+  dasm_free(state);
+  mprotect(ret, *size, PROT_READ | PROT_EXEC);
+  return (int *)ret;
+int main(int argc, char *argv[])
+  dasm_State *state;
+  for(int i = 0; i < sizeof(test)/sizeof(test[0]); i++) {
+    dasm_init(&state, 1);
+    void* labels[lab__MAX];
+    dasm_setupglobal(&state, labels, lab__MAX);
+    dasm_setup(&state, actions);
+    test[i].fn(state);
+    size_t size;
+    int64_t (*fptr)(int64_t, int64_t, int64_t) = jitcode(&state, &size);
+    int64_t got = fptr(test[i].arg1, test[i].arg2, test[i].arg3);
+    if (got != test[i].want) {
+      fprintf(stderr, "FAIL: test %s: want %ld, got %ld\n", test[i].testname, test[i].want, got);
+      exit(1);
+    }
+    munmap(fptr, size);
+  }
+  printf("all tests passed\n");
+  return 0;
diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/host/buildvm_asm.c luajit-2.1.0~beta3+git.1624618403.e9577376/src/host/buildvm_asm.c
--- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/host/buildvm_asm.c	2021-10-05 12:28:22.786623190 -0500
+++ luajit-2.1.0~beta3+git.1624618403.e9577376/src/host/buildvm_asm.c	2021-10-05 12:36:23.421160526 -0500
@@ -87,6 +87,54 @@ err:
   fprintf(ctx->fp, "\t%s %s\n", opname, sym);
+#elif LJ_TARGET_S390X
+/* Emit halfwords piecewise as assembler text. */
+static void emit_asm_halfwords(BuildCtx *ctx, uint8_t *p, int n)
+  uint16_t *cp = (uint16_t*)p;
+  n /= 2;
+  int i;
+  for (i = 0; i < n; i++) {
+    if ((i & 7) == 0)
+      fprintf(ctx->fp, "\t.hword 0x%hx", cp[i]);
+    else
+      fprintf(ctx->fp, ",0x%hx", cp[i]);
+    if ((i & 7) == 7) putc('\n', ctx->fp);
+  }
+  if ((n & 7) != 0) putc('\n', ctx->fp);
+/* Emit s390x text relocations. */
+static void emit_asm_reloc_text(BuildCtx *ctx, uint8_t *cp, int n,
+				const char *sym)
+  if (n & 1 || n < 2) {
+    fprintf(stderr, "Error: instruction stream length invalid: %d.\n", n);
+    exit(1);
+  }
+  n -= 2;
+  const char *opname = NULL;
+  const char *argt = ""; /* Inserted before argument. */
+  int opcode = *(uint16_t*)(&cp[n]);
+  int arg = (opcode>>4) & 0xf;
+  switch (opcode & 0xff0f) {
+  case 0xa705: opname = "bras"; argt = "%r"; break;
+  case 0xc005: opname = "brasl"; argt = "%r"; break;
+  case 0xa704: opname = "brc"; break;
+  case 0xc004: opname = "brcl"; break;
+  default:
+    fprintf(stderr, "Error: unsupported opcode for %s symbol relocation.\n",
+	    sym);
+    exit(1);
+  }
+  emit_asm_halfwords(ctx, cp, n);
+  if (strncmp(sym+(*sym == '_'), LABEL_PREFIX, sizeof(LABEL_PREFIX)-1)) {
+    /* Various fixups for external symbols outside of our binary. */
+    fprintf(ctx->fp, "\t%s %s%d, %s@PLT\n", opname, argt, arg, sym);
+    return;
+  }
+  fprintf(ctx->fp, "\t%s %s%d, %s\n", opname, argt, arg, sym);
 /* Emit words piecewise as assembler text. */
 static void emit_asm_words(BuildCtx *ctx, uint8_t *p, int n)
@@ -303,6 +351,9 @@ void emit_asm(BuildCtx *ctx)
 	emit_asm_reloc(ctx, r->type, ctx->relocsym[r->sym]);
       ofs += n+4;
+#elif LJ_TARGET_S390X
+      emit_asm_reloc_text(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]);
+      ofs += n+4;
       emit_asm_wordreloc(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]);
       ofs += n;
@@ -311,6 +362,8 @@ void emit_asm(BuildCtx *ctx)
     emit_asm_bytes(ctx, ctx->code+ofs, next-ofs);
+#elif LJ_TARGET_S390X
+    emit_asm_halfwords(ctx, ctx->code+ofs, next-ofs);
     emit_asm_words(ctx, ctx->code+ofs, next-ofs);
diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/host/buildvm.c luajit-2.1.0~beta3+git.1624618403.e9577376/src/host/buildvm.c
--- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/host/buildvm.c	2021-06-25 05:53:23.000000000 -0500
+++ luajit-2.1.0~beta3+git.1624618403.e9577376/src/host/buildvm.c	2021-10-05 12:36:23.377160294 -0500
@@ -65,6 +65,8 @@ static int collect_reloc(BuildCtx *ctx,
 #include "../dynasm/dasm_ppc.h"
 #include "../dynasm/dasm_mips.h"
+#elif LJ_TARGET_S390X
+#include "../dynasm/dasm_s390x.h"
 #error "No support for this architecture (yet)"
diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/jit/dis_s390x.lua luajit-2.1.0~beta3+git.1624618403.e9577376/src/jit/dis_s390x.lua
--- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/jit/dis_s390x.lua	1969-12-31 18:00:00.000000000 -0600
+++ luajit-2.1.0~beta3+git.1624618403.e9577376/src/jit/dis_s390x.lua	2021-10-05 12:36:23.445160653 -0500
@@ -0,0 +1 @@
+-- Not yet implemented.
diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lib_jit.c luajit-2.1.0~beta3+git.1624618403.e9577376/src/lib_jit.c
--- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lib_jit.c	2021-06-25 05:53:23.000000000 -0500
+++ luajit-2.1.0~beta3+git.1624618403.e9577376/src/lib_jit.c	2021-10-05 12:58:31.960174477 -0500
@@ -718,6 +718,8 @@ static uint32_t jit_cpudetect(void)
     if (x) flags |= JIT_F_MIPSXXR2;  /* Either 0x80000000 (R2) or 0 (R1). */
+#elif LJ_TARGET_S390X
+  /* No optional CPU features to detect (for now). */
 #error "Missing CPU detection for this architecture"
diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_arch.h luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_arch.h
--- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_arch.h	2021-10-05 12:28:22.786623190 -0500
+++ luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_arch.h	2021-10-05 12:36:23.457160716 -0500
@@ -31,6 +31,8 @@
 #define LUAJIT_ARCH_mips32	6
 #define LUAJIT_ARCH_MIPS64	7
 #define LUAJIT_ARCH_mips64	7
+#define LUAJIT_ARCH_S390X	8
+#define LUAJIT_ARCH_s390x	8
 /* Target OS. */
 #define LUAJIT_OS_OTHER		0
@@ -59,6 +61,8 @@
 #elif defined(__aarch64__)
+#elif defined(__s390x__) || defined(__s390x)
 #elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC)
 #elif defined(__mips64__) || defined(__mips64) || defined(__MIPS64__) || defined(__MIPS64)
@@ -428,6 +432,21 @@
 #define LJ_ARCH_VERSION		10
+#define LJ_ARCH_NAME		"s390x"
+#define LJ_ARCH_BITS		64
+#define LJ_TARGET_S390X		1
+#define LJ_TARGET_JUMPRANGE	32	/* +-2^32 = +-4GB (32-bit, halfword aligned) */
+#define LJ_TARGET_GC64		1
+#define LJ_ARCH_NOJIT		1	/* NYI */
 #error "No target architecture defined"
diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_asm.c luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_asm.c
--- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_asm.c	2021-06-25 05:53:23.000000000 -0500
+++ luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_asm.c	2021-10-05 12:36:23.389160358 -0500
@@ -1642,6 +1642,8 @@ static void asm_loop(ASMState *as)
 #include "lj_asm_ppc.h"
 #include "lj_asm_mips.h"
+#elif LJ_TARGET_S390X
+#include "lj_asm_s390x.h"
 #error "Missing assembler for target CPU"
diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_ccallback.c luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_ccallback.c
--- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_ccallback.c	2021-10-25 16:52:59.684087182 -0500
+++ luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_ccallback.c	2021-10-25 15:40:46.949027757 -0500
@@ -566,6 +566,15 @@ void lj_ccallback_mcode_free(CTState *ct
   if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
     ((float *)dp)[1] = *(float *)dp;
+#elif LJ_TARGET_S390X
+  if (isfp) { \
+    if (nfpr < CCALL_NARG_FPR) { sp = &cts->cb.fpr[nfpr++]; goto done; } \
+  } else { \
+    if (ngpr < maxgpr) { sp = &cts->cb.gpr[ngpr++]; goto done; } \
+  }
 #error "Missing calling convention definitions for this architecture"
diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_ccall.c luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_ccall.c
--- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_ccall.c	2021-10-05 12:28:22.790623212 -0500
+++ luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_ccall.c	2021-10-05 12:36:23.449160674 -0500
@@ -653,6 +653,40 @@
     goto done; \
+#elif LJ_TARGET_S390X
+/* -- POSIX/s390x calling conventions --------------------------------------- */
+  cc->retref = 1;  /* Return all structs by reference. */ \
+  cc->gpr[ngpr++] = (GPRArg)dp;
+  cc->retref = 1;  /* Return all complex values by reference. */ \
+  cc->gpr[ngpr++] = (GPRArg)dp;
+  UNUSED(dp); /* Nothing to do. */
+  /* Pass structs of size 1, 2, 4 or 8 in a GPR by value. */ \
+  if (!(sz == 1 || sz == 2 || sz == 4 || sz == 8)) { \
+    rp = cdataptr(lj_cdata_new(cts, did, sz)); \
+    sz = CTSIZE_PTR;  /* Pass all other structs by reference. */ \
+  }
+  /* Pass complex numbers by reference. */ \
+  /* TODO: not sure why this is different to structs. */ \
+  rp = cdataptr(lj_cdata_new(cts, did, sz)); \
+  sz = CTSIZE_PTR; \
+  if (isfp) { \
+    if (nfpr < CCALL_NARG_FPR) { dp = &cc->fpr[nfpr++]; goto done; } \
+  } else { \
+    if (ngpr < maxgpr) { dp = &cc->gpr[ngpr++]; goto done; } \
+  }
 #error "Missing calling convention definitions for this architecture"
@@ -1111,7 +1145,7 @@ static int ccall_set_args(lua_State *L,
     CTypeID did;
     CType *d;
     CTSize sz;
-    MSize n, isfp = 0, isva = 0;
+    MSize n, isfp = 0, isva = 0, onstack = 0;
     void *dp, *rp = NULL;
     int isf32 = 0;
@@ -1154,6 +1188,7 @@ static int ccall_set_args(lua_State *L,
     CCALL_HANDLE_REGARG  /* Handle register arguments. */
     /* Otherwise pass argument on stack. */
+    onstack = 1;
     if (CCALL_ALIGN_STACKARG && !rp && (d->info & CTF_ALIGN) > CTALIGN_PTR) {
       MSize align = (1u << ctype_align(d->info-CTALIGN_PTR)) -1;
       nsp = (nsp + align) & ~align;  /* Align argument on stack. */
@@ -1234,6 +1269,16 @@ static int ccall_set_args(lua_State *L,
       *(int64_t *)dp = (int64_t)*(int32_t *)dp;  /* Sign-extend to 64 bit. */
+#if LJ_TARGET_S390X
+    /* Arguments need to be sign-/zero-extended to 64-bits. */
+    if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info) ||
+          (isfp && onstack)) && d->size <= 4) {
+      if (d->info & CTF_UNSIGNED || isfp)
+        *(uint64_t *)dp = (uint64_t)*(uint32_t *)dp;
+      else
+        *(int64_t *)dp = (int64_t)*(int32_t *)dp;
+    }
     if (isva) {  /* Windows/x64 mirrors varargs in both register sets. */
       if (nfpr == ngpr)
diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_ccall.h luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_ccall.h
--- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_ccall.h	2021-10-05 12:28:22.790623212 -0500
+++ luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_ccall.h	2021-10-05 12:36:23.445160653 -0500
@@ -139,6 +139,21 @@ typedef union FPRArg {
   struct { LJ_ENDIAN_LOHI(float f; , float g;) };
 } FPRArg;
+#elif LJ_TARGET_S390X
+#define CCALL_NARG_GPR		5	/* GPR 2,3,4,5,6 */
+#define CCALL_NARG_FPR		4	/* FPR 0,2,4,8 */
+#define CCALL_NRET_GPR		1	/* GPR 2 */
+#define CCALL_NRET_FPR		1	/* FPR 0 */
+#define CCALL_SPS_EXTRA		20	/* 160-byte callee save area (not sure if this is the right place) */
+#define CCALL_SPS_FREE		0
+typedef intptr_t GPRArg;
+typedef union FPRArg {
+  double d;
+  float f;
+} FPRArg;
 #error "Missing calling convention definitions for this architecture"
diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_err.c luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_err.c
--- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_err.c	2021-06-25 05:53:23.000000000 -0500
+++ luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_err.c	2021-10-05 12:36:23.457160716 -0500
@@ -419,6 +419,9 @@ LJ_FUNCA int lj_err_unwind_dwarf(int ver
   if (version != 1)
     return _URC_FATAL_PHASE1_ERROR;
   cf = (void *)_Unwind_GetCFA(ctx);
+#ifdef LJ_TARGET_S390X
+  cf -= 160; /* CFA points 160 bytes above r15. */
   L = cframe_L(cf);
   if ((actions & _UA_SEARCH_PHASE)) {
diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_frame.h luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_frame.h
--- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_frame.h	2021-10-05 12:28:22.794623233 -0500
+++ luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_frame.h	2021-10-05 12:36:23.453160695 -0500
@@ -273,6 +273,20 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CAL
+#elif LJ_TARGET_S390X
+#define CFRAME_OFS_ERRF		280
+#define CFRAME_OFS_NRES		272
+#define CFRAME_OFS_PREV		264
+#define CFRAME_OFS_L		256
+#define CFRAME_OFS_PC		168
+#define CFRAME_SIZE		240
+** TODO: it would be good if we always decoded param*8 like
+** the RISC architectures do. If so then SHIFT_MULTRES will
+** need to change to 3.
 #error "Missing CFRAME_* definitions for this architecture"
diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_target.h luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_target.h
--- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_target.h	2021-06-25 05:53:23.000000000 -0500
+++ luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_target.h	2021-10-05 12:36:23.445160653 -0500
@@ -144,6 +144,8 @@ typedef uint32_t RegCost;
 #include "lj_target_ppc.h"
 #include "lj_target_mips.h"
+#elif LJ_TARGET_S390X
+#include "lj_target_s390x.h"
 #error "Missing include for target CPU"
diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_target_s390x.h luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_target_s390x.h
--- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_target_s390x.h	1969-12-31 18:00:00.000000000 -0600
+++ luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_target_s390x.h	2021-10-05 12:36:23.445160653 -0500
@@ -0,0 +1,80 @@
+** Definitions for IBM z/Architecture (s390x) CPUs.
+** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
+#ifndef _LJ_TARGET_S390X_H
+#define _LJ_TARGET_S390X_H
+/* -- Registers IDs ------------------------------------------------------- */
+#define GPRDEF(_) \
+  _(R0) _(R1) _(R2) _(R3) _(R4) _(R5) _(R6) _(R7) \
+  _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _(R15)
+#define FPRDEF(_) \
+  _(F0) _(F1) _(F2) _(F3) \
+  _(F4) _(F5) _(F6) _(F7) \
+  _(F8) _(F9) _(F10) _(F11) \
+  _(F12) _(F13) _(F14) _(F15) 
+#define RIDENUM(name)	RID_##name,
+enum {
+  GPRDEF(RIDENUM)		/* General-purpose registers (GPRs). */
+  FPRDEF(RIDENUM)		/* Floating-point registers (FPRs). */
+  /* Calling conventions. */
+  RID_SP = RID_R15,
+  RID_RET = RID_R2,
+  /* These definitions must match with the *.dasc file(s): */
+  RID_BASE = RID_R7,		/* Interpreter BASE. */
+  RID_LPC = RID_R9,		/* Interpreter PC. */
+  RID_DISPATCH = RID_R10,	/* Interpreter DISPATCH table. */
+  /* Register ranges [min, max) and number of registers. */
+/* -- Register sets ------------------------------------------------------- */
+/* -- Spill slots --------------------------------------------------------- */
+/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs.
+** SPS_FIXED: Available fixed spill slots in interpreter frame.
+** This definition must match with the *.dasc file(s).
+** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots.
+#define SPS_FIXED	2
+#define SPS_FIRST	2
+#define SPOFS_TMP	0
+#define sps_scale(slot)		(4 * (int32_t)(slot))
+#define sps_align(slot)		(((slot) - SPS_FIXED + 1) & ~1)
+/* -- Exit state ---------------------------------------------------------- */
+/* This definition must match with the *.dasc file(s). */
+typedef struct {
+  lua_Number fpr[RID_NUM_FPR];	/* Floating-point registers. */
+  int32_t gpr[RID_NUM_GPR];	/* General-purpose registers. */
+  int32_t spill[256];		/* Spill slots. */
+} ExitState;
+#define EXITSTUB_SPACING        4
+#define EXITSTUBS_PER_GROUP     32
+/* -- Instructions -------------------------------------------------------- */
diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/Makefile luajit-2.1.0~beta3+git.1624618403.e9577376/src/Makefile
--- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/Makefile	2021-10-05 12:28:22.786623190 -0500
+++ luajit-2.1.0~beta3+git.1624618403.e9577376/src/Makefile	2021-10-05 13:54:29.633902276 -0500
@@ -245,6 +245,9 @@ else
 ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH)))
+ifneq (,$(findstring LJ_TARGET_S390X ,$(TARGET_TESTARCH)))
 ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH)))
   ifneq (,$(findstring __AARCH64EB__ ,$(TARGET_TESTARCH)))
@@ -273,6 +276,7 @@ else
diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/vm_s390x.dasc luajit-2.1.0~beta3+git.1624618403.e9577376/src/vm_s390x.dasc
--- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/vm_s390x.dasc	1969-12-31 18:00:00.000000000 -0600
+++ luajit-2.1.0~beta3+git.1624618403.e9577376/src/vm_s390x.dasc	2021-10-25 16:52:46.620017650 -0500
@@ -0,0 +1,4261 @@
+|// Low-level VM code for IBM z/Architecture (s390x) CPUs in LJ_GC64 mode.
+|// Bytecode interpreter, fast functions and helper functions.
+|// Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
+|// This assembly targets the instruction set available on z10 (and newer)
+|// machines.
+|// ELF ABI registers:
+|// r0,r1       |                            | volatile |
+|// r2          | parameter and return value | volatile |
+|// r3-r5       | parameter                  | volatile |
+|// r6          | parameter                  | saved    |
+|// r7-r11      |                            | saved    |
+|// r12         | GOT pointer (needed?)      | saved    |
+|// r13         | literal pool (not needed)  | saved    |
+|// r14         | return address             | volatile |
+|// r15         | stack pointer              | saved    |
+|// f0,f2,f4,f6 | parameter and return value | volatile |
+|// f1,f3,f5,f7 |                            | volatile |
+|// f8-f15      |                            | saved    |
+|// ar0,ar1     | TLS                        | volatile |
+|// ar2-ar15    |                            | volatile |
+|.arch s390x
+|.section code_op, code_sub
+|.actionlist build_actionlist
+|.globals GLOB_
+|.globalnames globnames
+|.externnames extnames
+|// Fixed register assignments for the interpreter, callee-saved.
+|.define KBASE,			r8	// Constants of current Lua function.
+|.define PC,			r9	// Next PC.
+|.define DISPATCH,		r10	// Opcode dispatch table.
+|.define ITYPE,			r11	// Temporary used for type information.
+|.define BASE,			r13	// Base of current Lua stack frame.
+|// The following temporaries are not saved across C calls, except for RB.
+|.define RA,			r4	// Overlaps CARG3.
+|.define RB,			r7	// Must be callee-save.
+|.define RC,			r5	// Overlaps CARG4.
+|.define RD,			r6	// Overlaps CARG5.
+|// Calling conventions. Also used as temporaries.
+|.define CARG1,			r2
+|.define CARG2,			r3
+|.define CARG3,			r4
+|.define CARG4,			r5
+|.define CARG5,			r6
+|.define FARG1,			f0
+|.define FARG2,			f2
+|.define FARG3,			f4
+|.define FARG4,			f6
+|.define CRET1,			r2
+|.define TMPR0,			r0
+|.define TMPR1,			r1
+|.define OP,			r2
+|// Stack layout while in interpreter. Must match with lj_frame.h.
+|.define CFRAME_SPACE,	240	// Delta for sp, 8 byte aligned.
+|// Register save area.
+|.define SAVE_GPRS,	288(sp)	// Save area for r6-r15 (10*8 bytes).
+|.define SAVE_GPRS_P,	48(sp)  // Save area for r6-r15 (10*8 bytes) in prologue (before stack frame is allocated).
+|// Argument save area.
+|.define SAVE_ERRF,	280(sp) // Argument 4, in r5.
+|.define SAVE_NRES,	272(sp)	// Argument 3, in r4. Size is 4-bytes.
+|.define SAVE_CFRAME,	264(sp)	// Argument 2, in r3.
+|.define SAVE_L,	256(sp)	// Argument 1, in r2.
+|.define RESERVED,	248(sp)	// Reserved for compiler use.
+|.define BACKCHAIN,	240(sp)	// <- sp entering interpreter.
+|// Interpreter stack frame.
+|.define SAVE_FPR15,	232(sp)
+|.define SAVE_FPR14,	224(sp)
+|.define SAVE_FPR13,	216(sp)
+|.define SAVE_FPR12,	208(sp)
+|.define SAVE_FPR11,	200(sp)
+|.define SAVE_FPR10,	192(sp)
+|.define SAVE_FPR9,	184(sp)
+|.define SAVE_FPR8,	176(sp)
+|.define SAVE_PC,	168(sp)
+|.define SAVE_MULTRES,	160(sp)
+|.define SAVE_TMP,	160(sp) // Overlaps SAVE_MULTRES
+|.define SAVE_TMP_HI,	164(sp) // High 32-bits (to avoid SAVE_MULTRES).
+|// Callee save area (allocated by interpreter).
+|.define CALLEESAVE,	000(sp) // <- sp in interpreter.
+|.macro saveregs
+|  stmg r6, r15, SAVE_GPRS_P
+|  lay sp, -CFRAME_SPACE(sp)	// Allocate stack frame.
+|  std f8, SAVE_FPR8		// f8-f15 are callee-saved.
+|  std f9, SAVE_FPR9
+|  std f10, SAVE_FPR10
+|  std f11, SAVE_FPR11
+|  std f12, SAVE_FPR12
+|  std f13, SAVE_FPR13
+|  std f14, SAVE_FPR14
+|  std f15, SAVE_FPR15
+|.macro restoreregs
+|  ld f8, SAVE_FPR8		// f8-f15 are callee-saved.
+|  ld f9, SAVE_FPR9
+|  ld f10, SAVE_FPR10
+|  ld f11, SAVE_FPR11
+|  ld f12, SAVE_FPR12
+|  ld f13, SAVE_FPR13
+|  ld f14, SAVE_FPR14
+|  ld f15, SAVE_FPR15
+|  lmg r6, r15, SAVE_GPRS	// Restores the stack pointer.
+|// Type definitions. Some of these are only used for documentation.
+|.type L,		lua_State
+|.type GL,		global_State
+|.type TVALUE,		TValue
+|.type GCOBJ,		GCobj
+|.type STR,		GCstr
+|.type TAB,		GCtab
+|.type LFUNC,		GCfuncL
+|.type CFUNC,		GCfuncC
+|.type PROTO,		GCproto
+|.type UPVAL,		GCupval
+|.type NODE,		Node
+|.type NARGS,		int
+|.type TRACE,		GCtrace
+|.type SBUF,		SBuf
+|// Instruction headers.
+|.macro ins_A; .endmacro
+|.macro ins_AD; .endmacro
+|.macro ins_AJ; .endmacro
+|.macro ins_ABC; srlg RB, RD, 8; llgcr RC, RD; .endmacro
+|.macro ins_AB_; srlg RB, RD, 8; .endmacro
+|.macro ins_A_C; llgcr RC, RD; .endmacro
+|.macro ins_AND; lghi TMPR1, -1; xgr RD, TMPR1; .endmacro // RD = ~RD
+|// Instruction decode+dispatch.
+|.macro ins_NEXT
+|  llgc OP, 3(PC)
+|  llgh RD, 0(PC)
+|  llgc RA, 2(PC)
+|  sllg TMPR1, OP, 3
+|  lg TMPR1, 0(TMPR1, DISPATCH)
+|  la PC, 4(PC)
+|  br TMPR1
+|// Instruction footer.
+|.if 1
+|  // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
+|  .define ins_next, ins_NEXT
+|  .define ins_next_, ins_NEXT
+|  // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
+|  .macro ins_next
+|    j ->ins_next
+|  .endmacro
+|  .macro ins_next_
+|  ->ins_next:
+|    ins_NEXT
+|  .endmacro
+|// Call decode and dispatch.
+|.macro ins_callt
+|  // BASE = new base, RB = LFUNC, RD = nargs+1, -8(BASE) = PC
+|  lg PC, LFUNC:RB->pc
+|  llgc OP, 3(PC)
+|  llgc RA, 2(PC)
+|  sllg TMPR1, OP, 3
+|  la PC, 4(PC)
+|  lg TMPR1, 0(TMPR1, DISPATCH)
+|  br TMPR1
+|.macro ins_call
+|  // BASE = new base, RB = LFUNC, RD = nargs+1
+|  stg PC, -8(BASE)
+|  ins_callt
+|// Assumes DISPATCH is relative to GL.
+#define DISPATCH_GL(field)	(GG_DISP2G + (int)offsetof(global_State, field))
+#define DISPATCH_J(field)	(GG_DISP2J + (int)offsetof(jit_State, field))
+#define PC2PROTO(field)  ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
+|// Macros to clear or set tags.
+|.macro cleartp, reg
+|  nihf reg, 0x7fff
+|.macro settp, reg, tp
+|  oihf reg, tp<<15
+|.macro settp, dst, reg, tp
+|  llihf dst, tp<<15
+|  ogr dst, reg
+|.macro setint, reg
+|  settp reg, LJ_TISNUM
+|.macro setint, dst, reg
+|  settp dst, reg, LJ_TISNUM
+|// Macros to test operand types.
+|.macro checktp_nc, reg, tp, target
+|  srag ITYPE, reg, 47
+|  clfi ITYPE, tp
+|  jne target
+|.macro checktp, reg, tp, target
+|  srag ITYPE, reg, 47
+|  cleartp reg
+|  clfi ITYPE, tp
+|  jne target
+|.macro checktptp, src, tp, target
+|  srag ITYPE, src, 47
+|  clfi ITYPE, tp
+|  jne target
+|.macro checkstr, reg, target; checktp reg, LJ_TSTR, target; .endmacro
+|.macro checktab, reg, target; checktp reg, LJ_TTAB, target; .endmacro
+|.macro checkfunc, reg, target; checktp reg, LJ_TFUNC, target; .endmacro
+|.macro checknumx, reg, target, jump
+|  srag ITYPE, reg, 47
+|  jump target
+|.macro checkint, reg, target; checknumx reg, target, jne; .endmacro
+|.macro checkinttp, src, target; checknumx src, target, jne; .endmacro
+|.macro checknum, reg, target; checknumx reg, target, jhe; .endmacro
+|.macro checknumtp, src, target; checknumx src, target, jhe; .endmacro
+|.macro checknumber, src, target; checknumx src, target, jh; .endmacro
+|.macro load_false, reg; lghi reg, -1; iihl reg, 0x7fff; .endmacro	// assumes LJ_TFALSE == ~(1<<47)
+|.macro load_true, reg; lghi reg, -1; iihh reg, 0xfffe; .endmacro	// assumes LJ_TTRUE  == ~(2<<47)
+|.define PC_OP, -1(PC)
+|.define PC_RA, -2(PC)
+|.define PC_RB, -4(PC)
+|.define PC_RC, -3(PC)
+|.define PC_RD, -4(PC)
+|.macro branchPC, reg
+|  // Must not clobber condition code.
+|  sllg TMPR1, reg, 2
+|  lay PC, (-BCBIAS_J*4)(TMPR1, PC)
+|// Set current VM state.
+|.macro set_vmstate, st
+|  lghi TMPR1,
+|  stg TMPR1, DISPATCH_GL(vmstate)(DISPATCH)
+|// Synthesize binary floating-point constants.
+|.macro bfpconst_tobit, reg, tmp	// Synthesize 2^52 + 2^51.
+|  llihh tmp, 0x4338
+|  ldgr reg, tmp
+|// Move table write barrier back. Overwrites reg.
+|.macro barrierback, tab, reg
+|  ni tab->marked, ~LJ_GC_BLACK // black2gray(tab)
+|  lg reg, (DISPATCH_GL(gc.grayagain))(DISPATCH)
+|  stg tab, (DISPATCH_GL(gc.grayagain))(DISPATCH)
+|  stg reg, tab->gclist
+#error "Only dual-number mode supported for s390x target"
+/* Generate subroutines used by opcodes and other parts of the VM. */
+/* The .code_sub section should be last to help static branch prediction. */
+static void build_subroutines(BuildCtx *ctx)
+  |.code_sub
+  |
+  |//-----------------------------------------------------------------------
+  |//-- Return handling ----------------------------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |->vm_returnp:
+  |  tmll PC, FRAME_P
+  |  je ->cont_dispatch
+  |
+  |  // Return from pcall or xpcall fast func.
+  |  nill PC, -8
+  |  sgr BASE, PC			// Restore caller base.
+  |  lay RA, -8(RA, PC)			// Rebase RA and prepend one result.
+  |  lg PC, -8(BASE)			// Fetch PC of previous frame.
+  |  // Prepending may overwrite the pcall frame, so do it at the end.
+  |  load_true ITYPE
+  |  stg ITYPE, 0(RA, BASE)		// Prepend true to results.
+  |
+  |->vm_returnc:
+  |  aghi RD, 1				// RD = nresults+1
+  |  je ->vm_unwind_yield
+  |  tmll PC, FRAME_TYPE
+  |  je ->BC_RET_Z			// Handle regular return to Lua.
+  |
+  |->vm_return:
+  |  // BASE = base, RA = resultofs, RD = nresults+1 (= MULTRES), PC = return
+  |  lghi TMPR1, FRAME_C
+  |  xgr PC, TMPR1
+  |  tmll PC, FRAME_TYPE
+  |  jne ->vm_returnp
+  |
+  |  // Return to C.
+  |  set_vmstate C
+  |  nill PC, -8
+  |  sgr PC, BASE
+  |  lcgr PC, PC			// Previous base = BASE - delta.
+  |
+  |  aghi RD, -1
+  |  je >2
+  |1:  // Move results down.
+  |  lg RB, 0(BASE, RA)
+  |  stg RB, -16(BASE)
+  |  la BASE, 8(BASE)
+  |  aghi RD, -1
+  |  jne <1
+  |2:
+  |  lg L:RB, SAVE_L
+  |  stg PC, L:RB->base
+  |3:
+  |  llgf RD, SAVE_MULTRES
+  |  lgf RA, SAVE_NRES			// RA = wanted nresults+1
+  |4:
+  |  cgr RA, RD
+  |  jne >6				// More/less results wanted?
+  |5:
+  |  lay BASE, -16(BASE)
+  |  stg BASE, L:RB->top
+  |
+  |->vm_leave_cp:
+  |  lg RA, SAVE_CFRAME			// Restore previous C frame.
+  |  stg RA, L:RB->cframe
+  |  lghi CRET1, 0			// Ok return status for vm_pcall.
+  |
+  |->vm_leave_unw:
+  |  restoreregs
+  |  br r14
+  |
+  |6:
+  |  jl >7				// Less results wanted?
+  |  // More results wanted. Check stack size and fill up results with nil.
+  |  cg BASE, L:RB->maxstack
+  |  jh >8
+  |  lghi TMPR1, LJ_TNIL
+  |  stg TMPR1, -16(BASE)
+  |  la BASE, 8(BASE)
+  |  aghi RD, 1
+  |  j <4
+  |
+  |7:  // Fewer results wanted.
+  |  cghi RA, 0
+  |  je <5				// But check for LUA_MULTRET+1.
+  |  sgr RA, RD				// Negative result!
+  |  sllg TMPR1, RA, 3
+  |  la BASE, 0(TMPR1, BASE)		// Correct top.
+  |  j <5
+  |
+  |8:  // Corner case: need to grow stack for filling up results.
+  |  // This can happen if:
+  |  // - A C function grows the stack (a lot).
+  |  // - The GC shrinks the stack in between.
+  |  // - A return back from a lua_call() with (high) nresults adjustment.
+  |  stg BASE, L:RB->top		// Save current top held in BASE (yes).
+  |  st RD, SAVE_MULTRES		// Need to fill only remainder with nil.
+  |  lgr CARG2, RA
+  |  lgr CARG1, L:RB
+  |  brasl r14, extern lj_state_growstack	// (lua_State *L, int n)
+  |  lg BASE, L:RB->top			// Need the (realloced) L->top in BASE.
+  |  j <3
+  |
+  |->vm_unwind_yield:
+  |  lghi CRET1, LUA_YIELD
+  |  j ->vm_unwind_c_eh
+  |
+  |->vm_unwind_c:			// Unwind C stack, return from vm_pcall.
+  |  // (void *cframe, int errcode)
+  |  lgr sp, CARG1
+  |  lgfr CARG2, CRET1			// Error return status for vm_pcall.
+  |->vm_unwind_c_eh:			// Landing pad for external unwinder.
+  |  lg L:RB, SAVE_L
+  |  lg GL:RB, L:RB->glref
+  |  lghi TMPR1, ~LJ_VMST_C
+  |  stg TMPR1, GL:RB->vmstate
+  |  j ->vm_leave_unw
+  |
+  |->vm_unwind_ff:			// Unwind C stack, return from ff pcall.
+  |  // (void *cframe)
+  |  nill CARG1, CFRAME_RAWMASK		// Assumes high 48-bits set in CFRAME_RAWMASK.
+  |  lgr sp, CARG1
+  |->vm_unwind_ff_eh:			// Landing pad for external unwinder.
+  |  lg L:RB, SAVE_L
+  |  lghi RD, 1+1			// Really 1+2 results, incr. later.
+  |  lg BASE, L:RB->base
+  |  lg DISPATCH, L:RB->glref		// Setup pointer to dispatch table.
+  |  lg PC, -8(BASE)			// Fetch PC of previous frame.
+  |  load_false RA
+  |  lg RB, 0(BASE)
+  |  stg RA, -16(BASE)			// Prepend false to error message.
+  |  stg RB, -8(BASE)
+  |  lghi RA, -16			// Results start at BASE+RA = BASE-16.
+  |  set_vmstate INTERP
+  |  j ->vm_returnc			// Increments RD/MULTRES and returns.
+  |
+  |//-----------------------------------------------------------------------
+  |//-- Grow stack for calls -----------------------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |->vm_growstack_c:			// Grow stack for C function.
+  |  lghi CARG2, LUA_MINSTACK
+  |  j >2
+  |
+  |->vm_growstack_v:			// Grow stack for vararg Lua function.
+  |  aghi RD, -16			// LJ_FR2
+  |  j >1
+  |
+  |->vm_growstack_f:			// Grow stack for fixarg Lua function.
+  |  // BASE = new base, RD = nargs+1, RB = L, PC = first PC
+  |  sllg RD, NARGS:RD, 3
+  |  lay RD, -8(RD, BASE)
+  |1:
+  |  llgc RA, (PC2PROTO(framesize)-4)(PC)
+  |  la PC, 4(PC)			// Must point after first instruction.
+  |  stg BASE, L:RB->base
+  |  stg RD, L:RB->top
+  |  stg PC, SAVE_PC
+  |  lgr CARG2, RA
+  |2:
+  |  // RB = L, L->base = new base, L->top = top
+  |  lgr CARG1, L:RB
+  |  brasl r14, extern lj_state_growstack	// (lua_State *L, int n)
+  |  lg BASE, L:RB->base
+  |  lg RD, L:RB->top
+  |  lg LFUNC:RB, -16(BASE)
+  |  cleartp LFUNC:RB
+  |  sgr RD, BASE
+  |  srlg RD, RD, 3
+  |  aghi NARGS:RD, 1
+  |  // BASE = new base, RB = LFUNC, RD = nargs+1
+  |  ins_callt				// Just retry the call.
+  |
+  |//-----------------------------------------------------------------------
+  |//-- Entry points into the assembler VM ---------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |->vm_resume:				// Setup C frame and resume thread.
+  |  // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
+  |  saveregs
+  |  lgr L:RB, CARG1
+  |  stg CARG1, SAVE_L
+  |  lgr RA, CARG2
+  |  lghi PC, FRAME_CP
+  |  lghi RD, 0
+  |  lg DISPATCH, L:RB->glref		// Setup pointer to dispatch table.
+  |  aghi DISPATCH, GG_G2DISP
+  |  stg RD, SAVE_PC			// Any value outside of bytecode is ok.
+  |  stg RD, SAVE_CFRAME
+  |  st RD, SAVE_NRES
+  |  stg RD, SAVE_ERRF
+  |  stg KBASE, L:RB->cframe
+  |  clm RD, 1, L:RB->status
+  |  je >2				// Initial resume (like a call).
+  |
+  |  // Resume after yield (like a return).
+  |  stg L:RB, (DISPATCH_GL(cur_L))(DISPATCH)
+  |  set_vmstate INTERP
+  |  stc RD, L:RB->status
+  |  lg BASE, L:RB->base
+  |  lg RD, L:RB->top
+  |  sgr RD, RA
+  |  srlg RD, RD, 3
+  |  aghi RD, 1				// RD = nresults+1
+  |  sgr RA, BASE			// RA = resultofs
+  |  lg PC, -8(BASE)
+  |  tmll PC, FRAME_TYPE
+  |  je ->BC_RET_Z
+  |  j ->vm_return
+  |
+  |->vm_pcall:				// Setup protected C frame and enter VM.
+  |  // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
+  |  saveregs
+  |  lghi PC, FRAME_CP
+  |  llgfr CARG4, CARG4
+  |  stg CARG4, SAVE_ERRF
+  |  j >1
+  |
+  |->vm_call:				// Setup C frame and enter VM.
+  |  // (lua_State *L, TValue *base, int nres1)
+  |  saveregs
+  |  lghi PC, FRAME_C
+  |
+  |1:  // Entry point for vm_pcall above (PC = ftype).
+  |  st CARG3, SAVE_NRES
+  |  lgr L:RB, CARG1
+  |  stg CARG1, SAVE_L
+  |  lgr RA, CARG2			// Caveat: RA = CARG3.
+  |
+  |  lg DISPATCH, L:RB->glref		// Setup pointer to dispatch table.
+  |  lg KBASE, L:RB->cframe		// Add our C frame to cframe chain.
+  |  stg L:RB, SAVE_PC			// Any value outside of bytecode is ok.
+  |  aghi DISPATCH, GG_G2DISP
+  |  stg sp, L:RB->cframe
+  |
+  |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype).
+  |  stg L:RB, DISPATCH_GL(cur_L)(DISPATCH)
+  |  set_vmstate INTERP
+  |  lg BASE, L:RB->base		// BASE = old base (used in vmeta_call).
+  |  agr PC, RA
+  |  sgr PC, BASE			// PC = frame delta + frame type
+  |
+  |  lg RD, L:RB->top
+  |  sgr RD, RA
+  |  srlg NARGS:RD, NARGS:RD, 3
+  |  aghi NARGS:RD, 1			// RD = nargs+1
+  |
+  |->vm_call_dispatch:
+  |  lg LFUNC:RB, -16(RA)
+  |  checkfunc LFUNC:RB, ->vmeta_call	// Ensure KBASE defined and != BASE.
+  |
+  |->vm_call_dispatch_f:
+  |  lgr BASE, RA
+  |  ins_call
+  |  // BASE = new base, RB = func, RD = nargs+1, PC = caller PC
+  |
+  |->vm_cpcall:				// Setup protected C frame, call C.
+  |  // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
+  |  saveregs
+  |  lgr L:RB, CARG1
+  |  stg L:RB, SAVE_L
+  |  stg L:RB, SAVE_PC			// Any value outside of bytecode is ok.
+  |
+  |  lg KBASE, L:RB->stack		// Compute -savestack(L, L->top).
+  |  sg KBASE, L:RB->top
+  |   lg DISPATCH, L:RB->glref	// Setup pointer to dispatch table.
+  |  lghi TMPR0, 0
+  |  stg TMPR0, SAVE_ERRF		// No error function.
+  |  st KBASE, SAVE_NRES		// Neg. delta means cframe w/o frame.
+  |   aghi DISPATCH, GG_G2DISP
+  |  // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
+  |
+  |  lg KBASE, L:RB->cframe		// Add our C frame to cframe chain.
+  |  stg sp, L:RB->cframe
+  |  stg L:RB, DISPATCH_GL(cur_L)(DISPATCH)
+  |
+  |  basr r14, CARG4			// (lua_State *L, lua_CFunction func, void *ud)
+  |  // TValue * (new base) or NULL returned in r2 (CRET1/).
+  |  cghi CRET1, 0
+  |  je ->vm_leave_cp			// No base? Just remove C frame.
+  |  lgr RA, CRET1
+  |  lghi PC, FRAME_CP
+  |  j <2				// Else continue with the call.
+  |
+  |//-----------------------------------------------------------------------
+  |//-- Metamethod handling ------------------------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |//-- Continuation dispatch ----------------------------------------------
+  |
+  |->cont_dispatch:
+  |  // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES)
+  |  agr RA, BASE
+  |  nill PC, -8
+  |  lgr RB, BASE
+  |  sgr BASE, PC			// Restore caller BASE.
+  |  sllg TMPR1, RD, 3
+  |  lghi TMPR0, LJ_TNIL
+  |  stg TMPR0, -8(RA, TMPR1)		// Ensure one valid arg.
+  |  lgr RC, RA				// ... in [RC]
+  |  lg PC, -24(RB)			// Restore PC from [cont|PC].
+  |  lg RA, -32(RB)
+  |.if FFI
+  |  clfi RA, 1
+  |  jle >1
+  |.endif
+  |  lg LFUNC:KBASE, -16(BASE)
+  |  cleartp LFUNC:KBASE
+  |  lg KBASE, LFUNC:KBASE->pc
+  |  lg KBASE, (PC2PROTO(k))(KBASE)
+  |  // BASE = base, RC = result, RB = meta base
+  |  br RA				// Jump to continuation.
+  |
+  |.if FFI
+  |1:
+  |  je ->cont_ffi_callback		// cont = 1: return from FFI callback.
+  |  // cont = 0: Tail call from C function.
+  |  sgr RB, BASE
+  |  srl RB, 3
+  |  ahi RB, -3
+  |  llgfr RD, RB
+  |  j ->vm_call_tail
+  |.endif
+  |
+  |->cont_cat:				// BASE = base, RC = result, RB = mbase
+  |  llgc RA, PC_RB
+  |  sllg RA, RA, 3
+  |  aghi RB, -32
+  |  la RA, 0(RA, BASE)
+  |  sgr RA, RB
+  |  je ->cont_ra
+  |  lcgr RA, RA
+  |  srlg RA, RA, 3
+  |  lg L:CARG1, SAVE_L
+  |  stg BASE, L:CARG1->base
+  |  lgfr CARG3, RA			// Caveat: RA == CARG3.
+  |  lg TMPR0, 0(RC)
+  |  stg TMPR0, 0(RB)
+  |  lgr CARG2, RB
+  |  j ->BC_CAT_Z
+  |
+  |//-- Table indexing metamethods -----------------------------------------
+  |
+  |->vmeta_tgets:
+  |  settp STR:RC, LJ_TSTR			// STR:RC = GCstr *
+  |  stg STR:RC, SAVE_TMP
+  |  la RC, SAVE_TMP
+  |  llgc TMPR1, PC_OP
+  |  cghi TMPR1, BC_GGET
+  |  jne >1
+  |  settp TAB:RA, TAB:RB, LJ_TTAB		// TAB:RB = GCtab *
+  |  lay RB, (DISPATCH_GL(tmptv))(DISPATCH)	// Store fn->l.env in g->tmptv.
+  |  stg TAB:RA, 0(RB)
+  |  j >2
+  |
+  |->vmeta_tgetb:
+  |  llgc RC, PC_RC
+  |  setint RC
+  |  stg RC, SAVE_TMP
+  |  la RC, SAVE_TMP
+  |  j >1
+  |
+  |->vmeta_tgetv:
+  |  llgc RC, PC_RC			// Reload TValue *k from RC.
+  |  sllg RC, RC, 3
+  |  la RC, 0(RC, BASE)
+  |1:
+  |  llgc RB, PC_RB			// Reload TValue *t from RB.
+  |  sllg RB, RB, 3
+  |  la RB, 0(RB, BASE)
+  |2:
+  |  lg L:CARG1, SAVE_L
+  |  stg BASE, L:CARG1->base
+  |  lgr CARG2, RB
+  |  lgr CARG3, RC
+  |  lgr L:RB, L:CARG1
+  |  stg PC, SAVE_PC
+  |  brasl r14, extern lj_meta_tget	// (lua_State *L, TValue *o, TValue *k)
+  |  // TValue * (finished) or NULL (metamethod) returned in r2 (CRET1).
+  |  lg BASE, L:RB->base
+  |  ltgr RC, CRET1
+  |  je >3
+  |->cont_ra:				// BASE = base, RC = result
+  |  llgc RA, PC_RA
+  |  sllg RA, RA, 3
+  |  lg RB, 0(RC)
+  |  stg RB, 0(RA, BASE)
+  |  ins_next
+  |
+  |3:  // Call __index metamethod.
+  |  // BASE = base, L->top = new base, stack = cont/func/t/k
+  |  lg RA, L:RB->top
+  |  stg PC, -24(RA)			// [cont|PC]
+  |  la PC, FRAME_CONT(RA)
+  |  sgr PC, BASE
+  |  lg LFUNC:RB, -16(RA)		// Guaranteed to be a function here.
+  |  lghi NARGS:RD, 2+1			// 2 args for func(t, k).
+  |  cleartp LFUNC:RB
+  |  j ->vm_call_dispatch_f
+  |
+  |->vmeta_tgetr:
+  |  lgr CARG1, TAB:RB
+  |  lgfr CARG2, RC
+  |  brasl r14, extern lj_tab_getinth		// (GCtab *t, int32_t key)
+  |  // cTValue * or NULL returned in r2 (CRET1).
+  |  llgc RA, PC_RA
+  |  ltgr RC, CRET1
+  |  jne ->BC_TGETR_Z
+  |  lghi ITYPE, LJ_TNIL
+  |  j ->BC_TGETR2_Z
+  |
+  |//-----------------------------------------------------------------------
+  |
+  |->vmeta_tsets:
+  |  settp STR:RC, LJ_TSTR			// STR:RC = GCstr *
+  |  stg STR:RC, SAVE_TMP
+  |  la RC, SAVE_TMP
+  |  llgc TMPR0, PC_OP
+  |  cghi TMPR0, BC_GSET
+  |  jne >1
+  |  settp TAB:RA, TAB:RB, LJ_TTAB		// TAB:RB = GCtab *
+  |  lay RB, (DISPATCH_GL(tmptv))(DISPATCH)	// Store fn->l.env in g->tmptv.
+  |  stg TAB:RA, 0(RB)
+  |  j >2
+  |
+  |->vmeta_tsetb:
+  |  llgc RC, PC_RC
+  |  setint RC
+  |  stg RC, SAVE_TMP
+  |  la RC, SAVE_TMP
+  |  j >1
+  |
+  |->vmeta_tsetv:
+  |  llgc RC, PC_RC			// Reload TValue *k from RC.
+  |  sllg RC, RC, 3
+  |  la RC, 0(RC, BASE)
+  |1:
+  |  llgc RB, PC_RB			// Reload TValue *t from RB.
+  |  sllg RB, RB, 3
+  |  la RB, 0(RB, BASE)
+  |2:
+  |  lg L:CARG1, SAVE_L
+  |  stg BASE, L:CARG1->base
+  |  lgr CARG2, RB
+  |  lgr CARG3, RC
+  |  lgr L:RB, L:CARG1
+  |  stg PC, SAVE_PC
+  |  brasl r14, extern lj_meta_tset	// (lua_State *L, TValue *o, TValue *k)
+  |  // TValue * (finished) or NULL (metamethod) returned in r2 (CRET1).
+  |  lg BASE, L:RB->base
+  |  ltgr RC, CRET1
+  |  je >3
+  |  // NOBARRIER: lj_meta_tset ensures the table is not black.
+  |  llgc RA, PC_RA
+  |  sllg RA, RA, 3
+  |  lg RB, 0(RA, BASE)
+  |  stg RB, 0(RC)
+  |->cont_nop:				// BASE = base, (RC = result)
+  |  ins_next
+  |
+  |3:  // Call __newindex metamethod.
+  |  // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
+  |  lg RA, L:RB->top
+  |  stg PC, -24(RA)			// [cont|PC]
+  |  llgc RC, PC_RA
+  |  // Copy value to third argument.
+  |  sllg RB, RC, 3
+  |  lg RB, 0(RB, BASE)
+  |  stg RB, 16(RA)
+  |  la PC, FRAME_CONT(RA)
+  |  sgr PC, BASE
+  |  lg LFUNC:RB, -16(RA)		// Guaranteed to be a function here.
+  |  lghi NARGS:RD, 3+1			// 3 args for func(t, k, v).
+  |  cleartp LFUNC:RB
+  |  j ->vm_call_dispatch_f
+  |
+  |->vmeta_tsetr:
+  |  lg L:CARG1, SAVE_L
+  |  lgr CARG2, TAB:RB
+  |  stg BASE, L:CARG1->base
+  |  lgfr CARG3, RC
+  |  stg PC, SAVE_PC
+  |  brasl r14, extern lj_tab_setinth  // (lua_State *L, GCtab *t, int32_t key)
+  |  // TValue * returned in r2 (CRET1).
+  |  lgr RC, CRET1
+  |  llgc RA, PC_RA
+  |  j ->BC_TSETR_Z
+  |
+  |//-- Comparison metamethods ---------------------------------------------
+  |
+  |->vmeta_comp:
+  |  llgh RD, PC_RD
+  |  sllg RD, RD, 3
+  |  llgc RA, PC_RA
+  |  sllg RA, RA, 3
+  |  lg L:RB, SAVE_L
+  |  stg BASE, L:RB->base
+  |  la CARG2, 0(RA, BASE)
+  |  la CARG3, 0(RD, BASE)		// Caveat: RA == CARG3
+  |  lgr CARG1, L:RB
+  |  llgc CARG4, PC_OP
+  |  stg PC, SAVE_PC
+  |  brasl r14, extern lj_meta_comp	// (lua_State *L, TValue *o1, *o2, int op)
+  |  // 0/1 or TValue * (metamethod) returned in r2 (CRET1).
+  |3:
+  |  lgr RC, CRET1
+  |  lg BASE, L:RB->base
+  |  clgfi RC, 1
+  |  jh ->vmeta_binop
+  |4:
+  |  la PC, 4(PC)
+  |  jl >6
+  |5:
+  |  llgh RD, PC_RD
+  |  branchPC RD
+  |6:
+  |  ins_next
+  |
+  |->cont_condt:			// BASE = base, RC = result
+  |  la PC, 4(PC)
+  |  lg ITYPE, 0(RC)
+  |  srag ITYPE, ITYPE, 47
+  |  clr ITYPE, TMPR0		// Branch if result is true.
+  |  jl <5
+  |  j <6
+  |
+  |->cont_condf:			// BASE = base, RC = result
+  |  lg ITYPE, 0(RC)
+  |  srag ITYPE, ITYPE, 47
+  |  clr ITYPE, TMPR0		// Branch if result is false.
+  |  j <4
+  |
+  |->vmeta_equal:
+  |  cleartp TAB:RD
+  |  lay PC, -4(PC)
+  |  lgr CARG2, RA
+  |  lgfr CARG4, RB
+  |  lg L:RB, SAVE_L
+  |  stg BASE, L:RB->base
+  |  lgr CARG3, RD
+  |  lgr CARG1, L:RB
+  |  stg PC, SAVE_PC
+  |  brasl r14, extern lj_meta_equal	// (lua_State *L, GCobj *o1, *o2, int ne)
+  |  // 0/1 or TValue * (metamethod) returned in r2 (CRET1).
+  |  j <3
+  |
+  |->vmeta_equal_cd:
+  |.if FFI
+  |  lay PC, -4(PC)
+  |  lg L:RB, SAVE_L
+  |  stg BASE, L:RB->base
+  |  lgr CARG1, L:RB
+  |  llgf CARG2, -4(PC)
+  |  stg PC, SAVE_PC
+  |  brasl r14, extern lj_meta_equal_cd	// (lua_State *L, BCIns ins)
+  |  // 0/1 or TValue * (metamethod) returned in r2 (CRET1).
+  |  j <3
+  |.endif
+  |
+  |->vmeta_istype:
+  |  lg L:RB, SAVE_L
+  |  stg BASE, L:RB->base
+  |  llgfr CARG2, RA
+  |  llgfr CARG3, RD			// Caveat: CARG3 == RA.
+  |  lgr L:CARG1, L:RB
+  |  stg PC, SAVE_PC
+  |  brasl r14, extern lj_meta_istype  // (lua_State *L, BCReg ra, BCReg tp)
+  |  lg BASE, L:RB->base
+  |  j <6
+  |
+  |//-- Arithmetic metamethods ---------------------------------------------
+  |
+  |->vmeta_arith_vno:
+  |  llgc RB, PC_RB
+  |  llgc RC, PC_RC
+  |->vmeta_arith_vn:
+  |  sllg RB, RB, 3
+  |  sllg RC, RC, 3
+  |  la RB, 0(RB, BASE)
+  |  la RC, 0(RC, KBASE)
+  |  j >1
+  |
+  |->vmeta_arith_nvo:
+  |  llgc RC, PC_RC
+  |  llgc RB, PC_RB
+  |->vmeta_arith_nv:
+  |  sllg RC, RC, 3
+  |  sllg RB, RB, 3
+  |  la TMPR1, 0(RC, KBASE)
+  |  la RC, 0(RB, BASE)
+  |  lgr RB, TMPR1
+  |  j >1
+  |
+  |->vmeta_unm:
+  |  llgh RD, PC_RD
+  |  sllg RD, RD, 3
+  |  la RC, 0(RD, BASE)
+  |  lgr RB, RC
+  |  j >1
+  |
+  |->vmeta_arith_vvo:
+  |  llgc RB, PC_RB
+  |  llgc RC, PC_RC
+  |->vmeta_arith_vv:
+  |  sllg RC, RC, 3
+  |  sllg RB, RB, 3
+  |  la RB, 0(RB, BASE)
+  |  la RC, 0(RC, BASE)
+  |1:
+  |  llgc RA, PC_RA
+  |  sllg RA, RA, 3
+  |  la RA, 0(RA, BASE)
+  |  llgc CARG5, PC_OP			// Caveat: CARG5 == RD.
+  |  lgr CARG2, RA
+  |  lgr CARG3, RB			// Caveat: CARG3 == RA.
+  |  // lgr CARG4, RC			// Caveat: CARG4 == RC (nop, so commented out).
+  |  lg L:CARG1, SAVE_L
+  |  stg BASE, L:CARG1->base
+  |  lgr L:RB, L:CARG1
+  |  stg PC, SAVE_PC
+  |  brasl r14, extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
+  |  // NULL (finished) or TValue * (metamethod) returned in r2 (CRET1).
+  |  lg BASE, L:RB->base
+  |  cghi CRET1, 0
+  |  lgr RC, CRET1
+  |  je ->cont_nop
+  |
+  |  // Call metamethod for binary op.
+  |->vmeta_binop:
+  |  // BASE = base, RC = new base, stack = cont/func/o1/o2
+  |  lgr RA, RC
+  |  sgr RC, BASE
+  |  stg PC, -24(RA)			// [cont|PC]
+  |  la PC, FRAME_CONT(RC)
+  |  lghi NARGS:RD, 2+1			// 2 args for func(o1, o2).
+  |  j ->vm_call_dispatch
+  |
+  |->vmeta_len:
+  |  llgh RD, PC_RD
+  |  sllg RD, RD, 3
+  |  lg L:RB, SAVE_L
+  |  stg BASE, L:RB->base
+  |  la CARG2, 0(RD, BASE)
+  |  lgr L:CARG1, L:RB
+  |  stg PC, SAVE_PC
+  |  brasl r14, extern lj_meta_len	// (lua_State *L, TValue *o)
+  |  // NULL (retry) or TValue * (metamethod) returned in r2 (CRET1).
+  |  lgr RC, CRET1
+  |  lg BASE, L:RB->base
+#if LJ_52
+  |  cghi RC, 0
+  |  jne ->vmeta_binop			// Binop call for compatibility.
+  |  llgh RD, PC_RD
+  |  sllg RD, RD, 3
+  |  lg TAB:CARG1, 0(RD, BASE)
+  |  cleartp TAB:CARG1
+  |  j ->BC_LEN_Z
+  |  j ->vmeta_binop			// Binop call for compatibility.
+  |
+  |//-- Call metamethod ----------------------------------------------------
+  |
+  |->vmeta_call_ra:
+  |  la RA, 16(RA, BASE)		// RA previously set to RA*8.
+  |->vmeta_call:			// Resolve and call __call metamethod.
+  |  // BASE = old base, RA = new base, RC = nargs+1, PC = return
+  |  stg NARGS:RD, SAVE_TMP		// Save RA, RC for us (not sure about this).
+  |  lgr RB, RA
+  |  lg L:CARG1, SAVE_L
+  |  stg BASE, L:CARG1->base
+  |  lay CARG2, -16(RA)
+  |  sllg RD, RD, 3
+  |  lay CARG3, -8(RA, RD)		// Caveat: CARG3 == RA.
+  |  stg PC, SAVE_PC
+  |  brasl r14, extern lj_meta_call	// (lua_State *L, TValue *func, TValue *top)
+  |  lgr RA, RB
+  |  lg L:RB, SAVE_L
+  |  lg BASE, L:RB->base
+  |  lg LFUNC:RB, -16(RA)
+  |  aghi NARGS:RD, 1			// 32-bit on x64.
+  |  // This is fragile. L->base must not move, KBASE must always be defined.
+  |  cgr KBASE, BASE			// Continue with CALLT if flag set.
+  |  je ->BC_CALLT_Z
+  |  cleartp LFUNC:RB
+  |  lgr BASE, RA
+  |  ins_call				// Otherwise call resolved metamethod.
+  |
+  |//-- Argument coercion for 'for' statement ------------------------------
+  |
+  |->vmeta_for:
+  |  lg L:RB, SAVE_L
+  |  stg BASE, L:RB->base
+  |  lgr CARG2, RA
+  |  lgr CARG1, RB
+  |  stg PC, SAVE_PC
+  |  brasl r14, extern lj_meta_for	// (lua_State *L, TValue *base)
+  |  lg BASE, L:RB->base
+  |  llgc OP, PC_OP
+  |  llgc RA, PC_RA
+  |  llgh RD, PC_RD
+  |  sllg TMPR1, OP, 3
+  |  br TMPR1
+  |
+  |//-----------------------------------------------------------------------
+  |//-- Fast functions -----------------------------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |.macro .ffunc, name
+  |->ff_ .. name:
+  |.endmacro
+  |
+  |.macro .ffunc_1, name
+  |->ff_ .. name:
+  |  clfi NARGS:RD, 1+1; jl ->fff_fallback
+  |.endmacro
+  |
+  |.macro .ffunc_2, name
+  |->ff_ .. name:
+  |  clfi NARGS:RD, 2+1; jl ->fff_fallback
+  |.endmacro
+  |
+  |.macro .ffunc_n, name, op
+  |  .ffunc_1 name
+  |  lg TMPR0, 0(BASE)
+  |  checknumtp TMPR0, ->fff_fallback
+  |  op f0, 0(BASE)
+  |.endmacro
+  |
+  |.macro .ffunc_n, name
+  |  .ffunc_n name, ld
+  |.endmacro
+  |
+  |.macro .ffunc_nn, name
+  |  .ffunc_2 name
+  |  lg TMPR1, 0(BASE)
+  |  lg TMPR0, 8(BASE)
+  |  ld FARG1, 0(BASE)
+  |  ld FARG2, 8(BASE)
+  |  checknumtp TMPR1, ->fff_fallback
+  |  checknumtp TMPR0, ->fff_fallback
+  |.endmacro
+  |
+  |// Inlined GC threshold check. Caveat: uses label 1.
+  |.macro ffgccheck
+  |  lg RB, (DISPATCH_GL(
+  |  clg RB, (DISPATCH_GL(gc.threshold))(DISPATCH)
+  |  jl >1
+  |  brasl r14, ->fff_gcstep
+  |1:
+  |.endmacro
+  |
+  |//-- Base library: checks -----------------------------------------------
+  |
+  |.ffunc_1 assert
+  |  lg RB, 0(BASE)
+  |  srag ITYPE, RB, 47
+  |  clfi ITYPE, LJ_TISTRUECOND; jhe ->fff_fallback
+  |  lg PC, -8(BASE)
+  |  lg RB, 0(BASE)
+  |  stg RB, -16(BASE)
+  |  ahi RD, -2
+  |  je >2
+  |  lgr RA, BASE
+  |1:
+  |  la RA, 8(RA)
+  |  lg RB, 0(RA)
+  |  stg RB, -16(RA)
+  |  brct RD, <1
+  |2:
+  |  llgf RD, SAVE_MULTRES
+  |  j ->fff_res_
+  |
+  |.ffunc_1 type
+  |  lg RC, 0(BASE)
+  |  srag RC, RC, 47
+  |  lghi RB, LJ_TISNUM
+  |  clgr RC, RB
+  |  jnl >1
+  |  lgr RC, RB
+  |1:
+  |  lghi TMPR0, -1
+  |  xgr RC, TMPR0
+  |2:
+  |  lg CFUNC:RB, -16(BASE)
+  |  cleartp CFUNC:RB
+  |  sllg RC, RC, 3
+  |  lg STR:RC, ((char *)(&((GCfuncC *)0)->upvalue))(RC, CFUNC:RB)
+  |  lg PC, -8(BASE)
+  |  settp STR:RC, LJ_TSTR
+  |  stg STR:RC, -16(BASE)
+  |  j ->fff_res1
+  |
+  |//-- Base library: getters and setters ---------------------------------
+  |
+  |.ffunc_1 getmetatable
+  |  lg TAB:RB, 0(BASE)
+  |  lg PC, -8(BASE)
+  |  checktab TAB:RB, >6
+  |1:  // Field metatable must be at same offset for GCtab and GCudata!
+  |  lg TAB:RB, TAB:RB->metatable
+  |2:
+  |  lghi TMPR0, LJ_TNIL
+  |  stg TMPR0, -16(BASE)
+  |  cghi TAB:RB, 0
+  |  je ->fff_res1
+  |  settp TAB:RC, TAB:RB, LJ_TTAB
+  |  stg TAB:RC, -16(BASE)		// Store metatable as default result.
+  |  lg STR:RC, (DISPATCH_GL(gcroot)+8*(GCROOT_MMNAME+MM_metatable))(DISPATCH)
+  |  llgf RA, TAB:RB->hmask
+  |  n RA, STR:RC->hash
+  |  settp STR:RC, LJ_TSTR
+  |  mghi RA, #NODE
+  |  ag NODE:RA, TAB:RB->node
+  |3:  // Rearranged logic, because we expect _not_ to find the key.
+  |  cg STR:RC, NODE:RA->key
+  |  je >5
+  |4:
+  |  ltg NODE:RA, NODE:RA->next
+  |  jne <3
+  |  j ->fff_res1			// Not found, keep default result.
+  |5:
+  |  lg RB, NODE:RA->val
+  |  cghi RB, LJ_TNIL; je ->fff_res1	// Ditto for nil value.
+  |  stg RB, -16(BASE)			// Return value of mt.__metatable.
+  |  j ->fff_res1
+  |
+  |6:
+  |  clfi ITYPE, LJ_TUDATA; je <1
+  |  clfi ITYPE, LJ_TISNUM; jh >7
+  |  lhi ITYPE, LJ_TISNUM
+  |7:
+  |  lhi TMPR0, -1
+  |  xr ITYPE, TMPR0 // not ITYPE
+  |  llgfr ITYPE, ITYPE
+  |  sllg ITYPE, ITYPE, 3
+  |  j <2
+  |
+  |.ffunc_2 setmetatable
+  |  lg TAB:RB, 0(BASE)
+  |  lgr TAB:TMPR1, TAB:RB
+  |  checktab TAB:RB, ->fff_fallback
+  |  // Fast path: no mt for table yet and not clearing the mt.
+  |  lghi TMPR0, 0
+  |  cg TMPR0, TAB:RB->metatable; jne ->fff_fallback
+  |  lg TAB:RA, 8(BASE)
+  |  checktab TAB:RA, ->fff_fallback
+  |  stg TAB:RA, TAB:RB->metatable
+  |  lg PC, -8(BASE)
+  |  stg TAB:TMPR1, -16(BASE)			// Return original table.
+  |  tm TAB:RB->marked, LJ_GC_BLACK		// isblack(table)
+  |  je >1
+  |  // Possible write barrier. Table is black, but skip iswhite(mt) check.
+  |  barrierback TAB:RB, RC
+  |1:
+  |  j ->fff_res1
+  |
+  |.ffunc_2 rawget
+  |  lg TAB:CARG2, 0(BASE)
+  |  checktab TAB:CARG2, ->fff_fallback
+  |  la CARG3, 8(BASE)
+  |  lg CARG1, SAVE_L
+  |  brasl r14, extern lj_tab_get	// (lua_State *L, GCtab *t, cTValue *key)
+  |  // cTValue * returned in r2 (CRET1).
+  |  // Copy table slot.
+  |  lg RB, 0(CRET1)
+  |  lg PC, -8(BASE)
+  |  stg RB, -16(BASE)
+  |  j ->fff_res1
+  |
+  |//-- Base library: conversions ------------------------------------------
+  |
+  |.ffunc tonumber
+  |  // Only handles the number case inline (without a base argument).
+  |  clfi NARGS:RD, 1+1; jne ->fff_fallback	// Exactly one argument.
+  |  lg RB, 0(BASE)
+  |  checknumber RB, ->fff_fallback
+  |  lg PC, -8(BASE)
+  |  stg RB, -16(BASE)
+  |  j ->fff_res1
+  |
+  |.ffunc_1 tostring
+  |  // Only handles the string or number case inline.
+  |  lg PC, -8(BASE)
+  |  lg STR:RB, 0(BASE)
+  |  checktp_nc STR:RB, LJ_TSTR, >3
+  |  // A __tostring method in the string base metatable is ignored.
+  |2:
+  |  stg STR:RB, -16(BASE)
+  |  j ->fff_res1
+  |3:  // Handle numbers inline, unless a number base metatable is present.
+  |  clfi ITYPE, LJ_TISNUM; jh ->fff_fallback_1
+  |  lghi TMPR0, 0
+  |  jne ->fff_fallback
+  |  ffgccheck				// Caveat: uses label 1.
+  |  lg L:RB, SAVE_L
+  |  stg BASE, L:RB->base		// Add frame since C call can throw.
+  |  stg PC, SAVE_PC			// Redundant (but a defined value).
+  |  lgr CARG2, BASE			// Otherwise: CARG2 == BASE
+  |  lgr L:CARG1, L:RB
+  |  brasl r14, extern lj_strfmt_number	// (lua_State *L, cTValue *o)
+  |  // GCstr returned in r2 (CRET1).
+  |  lg BASE, L:RB->base
+  |  settp STR:RB, CRET1, LJ_TSTR
+  |  j <2
+  |
+  |//-- Base library: iterators -------------------------------------------
+  |
+  |.ffunc_1 next
+  |  je >2				// Missing 2nd arg?
+  |1:
+  |  lg CARG2, 0(BASE)
+  |  checktab CARG2, ->fff_fallback
+  |  lg L:RB, SAVE_L
+  |  stg BASE, L:RB->base		// Add frame since C call can throw.
+  |  stg BASE, L:RB->top		// Dummy frame length is ok.
+  |  lg PC, -8(BASE)
+  |  la CARG3, 8(BASE)
+  |  lgr CARG1, L:RB
+  |  stg PC, SAVE_PC			// Needed for ITERN fallback.
+  |  brasl r14, extern lj_tab_next	// (lua_State *L, GCtab *t, TValue *key)
+  |  // Flag returned in r2 (CRET1).
+  |  lg BASE, L:RB->base
+  |  ltr RD, CRET1;  je >3		// End of traversal?
+  |  // Copy key and value to results.
+  |  lg RB, 8(BASE)
+  |  lg RD, 16(BASE)
+  |  stg RB, -16(BASE)
+  |  stg RD, -8(BASE)
+  |->fff_res2:
+  |  lghi RD, 1+2
+  |  j ->fff_res
+  |2:  // Set missing 2nd arg to nil.
+  |  lghi TMPR0, LJ_TNIL
+  |  stg TMPR0, 8(BASE)
+  |  j <1
+  |3:  // End of traversal: return nil.
+  |  lghi TMPR0, LJ_TNIL
+  |  stg TMPR0, -16(BASE)
+  |  j ->fff_res1
+  |
+  |.ffunc_1 pairs
+  |  lg TAB:RB, 0(BASE)
+  |  lgr TMPR1, TAB:RB
+  |  checktab TAB:RB, ->fff_fallback
+#if LJ_52
+  |  ltg TMPR0, TAB:RB->metatable; jne ->fff_fallback
+  |  lg CFUNC:RD, -16(BASE)
+  |  cleartp CFUNC:RD
+  |  lg CFUNC:RD, CFUNC:RD->upvalue[0]
+  |  settp CFUNC:RD, LJ_TFUNC
+  |  lg PC, -8(BASE)
+  |  stg CFUNC:RD, -16(BASE)
+  |  stg TMPR1, -8(BASE)
+  |  lghi TMPR0, LJ_TNIL
+  |  stg TMPR0, 0(BASE)
+  |  lghi RD, 1+3
+  |  j ->fff_res
+  |
+  |.ffunc_2 ipairs_aux
+  |  lg TAB:RB, 0(BASE)
+  |  checktab TAB:RB, ->fff_fallback
+  |  lg RA, 8(BASE)
+  |  checkint RA, ->fff_fallback
+  |  lg PC, -8(BASE)
+  |  aghi RA, 1
+  |  setint ITYPE, RA
+  |  stg ITYPE, -16(BASE)
+  |  cl RA, TAB:RB->asize;  jhe >2	// Not in array part?
+  |  lg RD, TAB:RB->array
+  |  lgfr TMPR1, RA
+  |  sllg TMPR1, TMPR1, 3
+  |  la RD, 0(TMPR1, RD)
+  |1:
+  |  lg TMPR0, 0(RD)
+  |  cghi TMPR0, LJ_TNIL;  je ->fff_res0
+  |  // Copy array slot.
+  |  stg TMPR0, -8(BASE)
+  |  j ->fff_res2
+  |2:  // Check for empty hash part first. Otherwise call C function.
+  |  lt TMPR0, TAB:RB->hmask; je ->fff_res0
+  |  lgr CARG1, TAB:RB
+  |  lgfr CARG2, RA
+  |  brasl r14, extern lj_tab_getinth	// (GCtab *t, int32_t key)
+  |  // cTValue * or NULL returned in r2 (CRET1).
+  |  ltgr RD, CRET1
+  |  jne <1
+  |->fff_res0:
+  |  lghi RD, 1+0
+  |  j ->fff_res
+  |
+  |.ffunc_1 ipairs
+  |  lg TAB:RB, 0(BASE)
+  |  lgr TMPR1, TAB:RB
+  |  checktab TAB:RB, ->fff_fallback
+#if LJ_52
+  |  lghi TMPR0, 0
+  |  cg TMPR0, TAB:RB->metatable; jne ->fff_fallback
+  |  lg CFUNC:RD, -16(BASE)
+  |  cleartp CFUNC:RD
+  |  lg CFUNC:RD, CFUNC:RD->upvalue[0]
+  |  settp CFUNC:RD, LJ_TFUNC
+  |  lg PC, -8(BASE)
+  |  stg CFUNC:RD, -16(BASE)
+  |  stg TMPR1, -8(BASE)
+  |  llihf RD, LJ_TISNUM<<15
+  |  stg RD, 0(BASE)
+  |  lghi RD, 1+3
+  |  j ->fff_res
+  |
+  |//-- Base library: catch errors ----------------------------------------
+  |
+  |.ffunc_1 pcall
+  |  la RA, 16(BASE)
+  |  aghi NARGS:RD, -1
+  |  lghi PC, 16+FRAME_PCALL
+  |1:
+  |  llgc RB, (DISPATCH_GL(hookmask))(DISPATCH)
+  |  srlg RB, RB, HOOK_ACTIVE_SHIFT(r0)
+  |  nill RB, 1				// High bits already zero (from load).
+  |  agr PC, RB				// Remember active hook before pcall.
+  |  // Note: this does a (harmless) copy of the function to the PC slot, too.
+  |  lgr KBASE, RD
+  |2:
+  |  sllg TMPR1, KBASE, 3
+  |  lg RB, -24(TMPR1, RA)
+  |  stg RB, -16(TMPR1, RA)
+  |  aghi KBASE, -1
+  |  jh <2
+  |  j ->vm_call_dispatch
+  |
+  |.ffunc_2 xpcall
+  |  lg LFUNC:RA, 8(BASE)
+  |  checktp_nc LFUNC:RA, LJ_TFUNC, ->fff_fallback
+  |  lg LFUNC:RB, 0(BASE)		// Swap function and traceback.
+  |  stg LFUNC:RA, 0(BASE)
+  |  stg LFUNC:RB, 8(BASE)
+  |  la RA, 24(BASE)
+  |  aghi NARGS:RD, -2
+  |  lghi PC, 24+FRAME_PCALL
+  |  j <1
+  |
+  |//-- Coroutine library --------------------------------------------------
+  |
+  |.macro coroutine_resume_wrap, resume
+  |.if resume
+  |.ffunc_1 coroutine_resume
+  |  lg L:RB, 0(BASE)
+  |  lgr L:TMPR0, L:RB			// Save type for checktptp.
+  |  cleartp L:RB
+  |.else
+  |.ffunc coroutine_wrap_aux
+  |  lg CFUNC:RB, -16(BASE)
+  |  cleartp CFUNC:RB
+  |  lg L:RB, CFUNC:RB->upvalue[0].gcr
+  |  cleartp L:RB
+  |.endif
+  |  lg PC, -8(BASE)
+  |  stg PC, SAVE_PC
+  |  stg L:RB, SAVE_TMP
+  |.if resume
+  |  checktptp L:TMPR0, LJ_TTHREAD, ->fff_fallback
+  |.endif
+  |  ltg TMPR0, L:RB->cframe; jne ->fff_fallback
+  |  cli L:RB->status, LUA_YIELD; jh ->fff_fallback
+  |  lg RA, L:RB->top
+  |  je >1				// Status != LUA_YIELD (i.e. 0)?
+  |  cg RA, L:RB->base			// Check for presence of initial func.
+  |  je ->fff_fallback
+  |  lg PC, -8(RA)			// Move initial function up.
+  |  stg PC, 0(RA)
+  |  la RA, 8(RA)
+  |1:
+  |  sllg TMPR1, NARGS:RD, 3
+  |.if resume
+  |  lay PC, -16(TMPR1, RA)		// Check stack space (-1-thread).
+  |.else
+  |  lay PC, -8(TMPR1, RA)		// Check stack space (-1).
+  |.endif
+  |  clg PC, L:RB->maxstack; jh ->fff_fallback
+  |  stg PC, L:RB->top
+  |
+  |  lg L:RB, SAVE_L
+  |  stg BASE, L:RB->base
+  |.if resume
+  |  la BASE, 8(BASE)			// Keep resumed thread in stack for GC.
+  |.endif
+  |  stg BASE, L:RB->top
+  |.if resume
+  |  lay RB, -24(TMPR1, BASE)		// RB = end of source for stack move.
+  |.else
+  |  lay RB, -16(TMPR1, BASE)		// RB = end of source for stack move.
+  |.endif
+  |  sgr RB, PC			// Relative to PC.
+  |
+  |  cgr PC, RA
+  |  je >3
+  |2:  // Move args to coroutine.
+  |  lg RC, 0(RB, PC)
+  |  stg RC, -8(PC)
+  |  lay PC, -8(PC)
+  |  cgr PC, RA
+  |  jne <2
+  |3:
+  |  lgr CARG2, RA
+  |  lg L:CARG1, SAVE_TMP
+  |  lghi CARG3, 0
+  |  lghi CARG4, 0
+  |  brasl r14, ->vm_resume			// (lua_State *L, TValue *base, 0, 0)
+  |
+  |  lg L:RB, SAVE_L
+  |  lg L:PC, SAVE_TMP
+  |  lg BASE, L:RB->base
+  |  stg L:RB, (DISPATCH_GL(cur_L))(DISPATCH)
+  |  set_vmstate INTERP
+  |
+  |  clfi CRET1, LUA_YIELD
+  |  jh >8
+  |4:
+  |  lg RA, L:PC->base
+  |  lg KBASE, L:PC->top
+  |  stg RA, L:PC->top			// Clear coroutine stack.
+  |  lgr PC, KBASE
+  |  sgr PC, RA
+  |  je >6				// No results?
+  |  la RD, 0(PC, BASE)
+  |  llgfr PC, PC
+  |  srlg PC, PC, 3
+  |  clg RD, L:RB->maxstack
+  |  jh >9				// Need to grow stack?
+  |
+  |  lgr RB, BASE
+  |  sgr RB, RA
+  |5:  // Move results from coroutine.
+  |  lg RD, 0(RA)
+  |  stg RD, 0(RA, RB)
+  |  la RA, 8(RA)
+  |  cgr RA, KBASE
+  |  jne <5
+  |6:
+  |.if resume
+  |  la RD, 2(PC)			// nresults+1 = 1 + true + results.
+  |  load_true ITYPE			// Prepend true to results.
+  |  stg ITYPE, -8(BASE)
+  |.else
+  |  la RD, 1(PC)			// nresults+1 = 1 + results.
+  |.endif
+  |7:
+  |  lg PC, SAVE_PC
+  |.if resume
+  |  lghi RA, -8
+  |.else
+  |  lghi RA, 0
+  |.endif
+  |  tmll PC, FRAME_TYPE
+  |  je ->BC_RET_Z
+  |  j ->vm_return
+  |
+  |8:  // Coroutine returned with error (at co->top-1).
+  |.if resume
+  |  load_false ITYPE			// Prepend false to results.
+  |  stg ITYPE, -8(BASE)
+  |  lg RA, L:PC->top
+  |  aghi RA, -8
+  |  stg RA, L:PC->top			// Clear error from coroutine stack.
+  |  // Copy error message.
+  |  lg RD, 0(RA)
+  |  stg RD, 0(BASE)
+  |  lghi RD, 1+2			// nresults+1 = 1 + false + error.
+  |  j <7
+  |.else
+  |  lgr CARG2, L:PC
+  |  lgr CARG1, L:RB
+  |  brasl r14, extern lj_ffh_coroutine_wrap_err  // (lua_State *L, lua_State *co)
+  |  // Error function does not return.
+  |.endif
+  |
+  |9:  // Handle stack expansion on return from yield.
+  |  lg L:RA, SAVE_TMP
+  |  stg KBASE, L:RA->top		// Undo coroutine stack clearing.
+  |  lgr CARG2, PC
+  |  lgr CARG1, L:RB
+  |  brasl r14, extern lj_state_growstack	// (lua_State *L, int n)
+  |  lg L:PC, SAVE_TMP
+  |  lg BASE, L:RB->base
+  |  j <4				// Retry the stack move.
+  |.endmacro
+  |
+  |  coroutine_resume_wrap 1		// coroutine.resume
+  |  coroutine_resume_wrap 0		// coroutine.wrap
+  |
+  |.ffunc coroutine_yield
+  |  lg L:RB, SAVE_L
+  |  lg TMPR0, L:RB->cframe
+  |  je ->fff_fallback
+  |  stg BASE, L:RB->base
+  |  sllg RD, NARGS:RD, 3
+  |  lay RD, -8(RD, BASE)
+  |  stg RD, L:RB->top
+  |  lghi RD, 0
+  |  stg RD, L:RB->cframe
+  |  lghi CRET1, LUA_YIELD
+  |  stc CRET1, L:RB->status
+  |  j ->vm_leave_unw
+  |
+  |//-- Math library -------------------------------------------------------
+  |
+  |.ffunc_1 math_abs
+  |  lg RB, 0(BASE)
+  |  checkint RB, >3
+  |  lpr RB, RB; jo >2
+  |->fff_resbit:
+  |->fff_resi:
+  |  setint RB
+  |->fff_resRB:
+  |  lg PC, -8(BASE)
+  |  stg RB, -16(BASE)
+  |  j ->fff_res1
+  |2:
+  |  llihh RB, 0x41e0	// 2^31
+  |  j ->fff_resRB
+  |3:
+  |  jh ->fff_fallback
+  |  nihh RB, 0x7fff	// Clear sign bit.
+  |  lg PC, -8(BASE)
+  |  stg RB, -16(BASE)
+  |  j ->fff_res1
+  |
+  |.ffunc_n math_sqrt, sqdb
+  |->fff_resf0:
+  |  lg PC, -8(BASE)
+  |  stdy f0, -16(BASE)
+  |  // fallthrough
+  |
+  |->fff_res1:
+  |  lghi RD, 1+1
+  |->fff_res:
+  |->fff_res_:
+  |  tmll PC, FRAME_TYPE
+  |  jne >7
+  |5:
+  |  llgc TMPR1, PC_RB
+  |  clgr TMPR1, RD			// More results expected?
+  |  jh >6
+  |  // Adjust BASE. KBASE is assumed to be set for the calling frame.
+  |  llgc RA, PC_RA
+  |  lcgr RA, RA
+  |  sllg RA, RA, 3
+  |  lay BASE, -16(RA, BASE)		// base = base - (RA+2)*8
+  |  ins_next
+  |
+  |6:  // Fill up results with nil.
+  |  sllg TMPR1, RD, 3
+  |  lghi TMPR0, LJ_TNIL
+  |  stg TMPR0, -24(TMPR1, BASE)
+  |  la RD, 1(RD)
+  |  j <5
+  |
+  |7:  // Non-standard return case.
+  |  lghi RA, -16			// Results start at BASE+RA = BASE-16.
+  |  j ->vm_return
+  |
+  |.macro math_round, func
+  |  .ffunc math_ .. func
+  |  lg RB, 0(BASE)
+  |  ld f0, 0(BASE)
+  |  checknumx RB, ->fff_resRB, je
+  |  jh ->fff_fallback
+  |  brasl r14, ->vm_ .. func
+  |  cfdbr RB, 0, f0
+  |  jo ->fff_resf0
+  |  llgfr RB, RB
+  |  j ->fff_resi
+  |.endmacro
+  |
+  |  math_round floor
+  |  math_round ceil
+  |
+  |.ffunc math_log
+  |  chi NARGS:RD, 1+1; jne ->fff_fallback	// Exactly one argument.
+  |  lg TMPR0, 0(BASE)
+  |  ld FARG1, 0(BASE)
+  |  checknumtp TMPR0, ->fff_fallback
+  |  brasl r14, extern log
+  |  j ->fff_resf0
+  |
+  |.macro math_extern, func
+  |  .ffunc_n math_ .. func
+  |  brasl r14, extern func
+  |  j ->fff_resf0
+  |.endmacro
+  |
+  |.macro math_extern2, func
+  |  .ffunc_nn math_ .. func
+  |  brasl r14, extern func
+  |  j ->fff_resf0
+  |.endmacro
+  |
+  |  math_extern log10
+  |  math_extern exp
+  |  math_extern sin
+  |  math_extern cos
+  |  math_extern tan
+  |  math_extern asin
+  |  math_extern acos
+  |  math_extern atan
+  |  math_extern sinh
+  |  math_extern cosh
+  |  math_extern tanh
+  |  math_extern2 pow
+  |  math_extern2 atan2
+  |  math_extern2 fmod
+  |
+  |.ffunc_2 math_ldexp
+  |  lg TMPR0, 0(BASE)
+  |  ld FARG1, 0(BASE)
+  |  lg CARG1, 8(BASE)
+  |  checknumtp TMPR0, ->fff_fallback
+  |  checkinttp CARG1, ->fff_fallback
+  |  lgfr CARG1, CARG1
+  |  brasl r14, extern ldexp	// (double, int)
+  |  j ->fff_resf0
+  |
+  |.ffunc_n math_frexp
+  |  la CARG1, SAVE_TMP
+  |  brasl r14, extern frexp
+  |  llgf RB, SAVE_TMP
+  |  lg PC, -8(BASE)
+  |  stdy f0, -16(BASE)
+  |  setint RB
+  |  stg RB, -8(BASE)
+  |  lghi RD, 1+2
+  |  j ->fff_res
+  |
+  |.ffunc_n math_modf
+  |  lay CARG1, -16(BASE)
+  |  brasl r14, extern modf	// (double, double*)
+  |  lg PC, -8(BASE)
+  |  stdy f0, -8(BASE)
+  |  lghi RD, 1+2
+  |  j ->fff_res
+  |
+  |.macro math_minmax, name, cjmp
+  |  .ffunc name
+  |  lghi RA, 2*8
+  |  sllg TMPR1, RD, 3
+  |  lg RB, 0(BASE)
+  |  ld f0, 0(BASE)
+  |  checkint RB, >4
+  |1:  // Handle integers.
+  |  clgr RA, TMPR1; jhe ->fff_resRB
+  |  lg TMPR0, -8(RA, BASE)
+  |  checkint TMPR0, >3
+  |  cr RB, TMPR0
+  |  cjmp >2
+  |  lgr RB, TMPR0
+  |2:
+  |  aghi RA, 8
+  |  j <1
+  |3:
+  |  jh ->fff_fallback
+  |  // Convert intermediate result to number and continue below.
+  |  cdfbr f0, RB
+  |  ldgr f1, TMPR0
+  |  j >6
+  |4:
+  |  jh ->fff_fallback
+  |5:  // Handle numbers or integers.
+  |  clgr RA, TMPR1; jhe ->fff_resf0
+  |  lg RB, -8(RA, BASE)
+  |  ldy f1, -8(RA, BASE)
+  |  checknumx RB, >6, jl
+  |  jh ->fff_fallback
+  |  cdfbr f1, RB
+  |6:
+  |  cdbr f0, f1
+  |  cjmp >7
+  |  ldr f0, f1
+  |7:
+  |  aghi RA, 8
+  |  j <5
+  |.endmacro
+  |
+  |  math_minmax math_min, jnh
+  |  math_minmax math_max, jnl
+  |
+  |//-- String library -----------------------------------------------------
+  |
+  |.ffunc string_byte			// Only handle the 1-arg case here.
+  |  chi NARGS:RD, 1+1;  jne ->fff_fallback
+  |  lg STR:RB, 0(BASE)
+  |  checkstr STR:RB, ->fff_fallback
+  |  lg PC, -8(BASE)
+  |  ltg TMPR0, STR:RB->len
+  |  je ->fff_res0			// Return no results for empty string.
+  |  llgc RB, STR:RB[1]
+  |  j ->fff_resi
+  |
+  |.ffunc string_char			// Only handle the 1-arg case here.
+  |  ffgccheck
+  |  chi NARGS:RD, 1+1;  jne ->fff_fallback	// *Exactly* 1 arg.
+  |  lg RB, 0(BASE)
+  |  checkint RB, ->fff_fallback
+  |  clfi RB, 255;  jh ->fff_fallback
+  |  strvh RB, SAVE_TMP		// Store [c,0].
+  |  lghi TMPR1, 1
+  |  la RD, SAVE_TMP			// Points to stack. Little-endian.
+  |->fff_newstr:
+  |  lg L:RB, SAVE_L
+  |  stg BASE, L:RB->base
+  |  llgfr CARG3, TMPR1			// Zero-extended to size_t.
+  |  lgr CARG2, RD
+  |  lgr CARG1, L:RB
+  |  stg PC, SAVE_PC
+  |  brasl r14, extern lj_str_new	// (lua_State *L, char *str, size_t l)
+  |->fff_resstr:
+  |  // GCstr * returned in r2 (CRET1).
+  |  lgr STR:RD, CRET1
+  |  lg BASE, L:RB->base
+  |  lg PC, -8(BASE)
+  |  settp STR:RD, LJ_TSTR
+  |  stg STR:RD, -16(BASE)
+  |  j ->fff_res1
+  |
+  |.ffunc string_sub
+  |  ffgccheck
+  |  lghi TMPR1, -1
+  |  clfi NARGS:RD, 1+2;  jl ->fff_fallback
+  |  jnh >1
+  |  lg TMPR1, 16(BASE)
+  |  checkint TMPR1, ->fff_fallback
+  |1:
+  |  lg STR:RB, 0(BASE)
+  |  checkstr STR:RB, ->fff_fallback
+  |  lg ITYPE, 8(BASE)
+  |  lgfr RA, ITYPE
+  |  srag ITYPE, ITYPE, 47
+  |  cghi ITYPE, LJ_TISNUM
+  |  jne ->fff_fallback
+  |  llgf RC, STR:RB->len
+  |  clr RC, TMPR1			// len < end? (unsigned compare)
+  |  jl >5
+  |2:
+  |  cghi RA, 0				// start <= 0?
+  |  jle >7
+  |3:
+  |  sr TMPR1, RA			// start > end?
+  |  jnhe ->fff_emptystr
+  |  la RD, (#STR-1)(RA, STR:RB)
+  |  ahi TMPR1, 1
+  |4:
+  |  j ->fff_newstr
+  |
+  |5:  // Negative end or overflow.
+  |  chi TMPR1, 0
+  |  jnl >6
+  |  ahi TMPR1, 1
+  |  ar TMPR1, RC			// end = end+(len+1)
+  |  j <2
+  |6:  // Overflow.
+  |  lr TMPR1, RC			// end = len
+  |  j <2
+  |
+  |7:  // Negative start or underflow.
+  |  je >8
+  |  agr RA, RC			// start = start+(len+1)
+  |  aghi RA, 1
+  |  jh <3				// start > 0?
+  |8:  // Underflow.
+  |  lghi RA, 1				// start = 1
+  |  j <3
+  |
+  |->fff_emptystr:  // Range underflow.
+  |  lghi TMPR1, 0
+  |  j <4
+  |
+  |.macro ffstring_op, name
+  |  .ffunc_1 string_ .. name
+  |  ffgccheck
+  |  lg STR:CARG2, 0(BASE)
+  |  checkstr STR:CARG2, ->fff_fallback
+  |  lg L:RB, SAVE_L
+  |   lay SBUF:CARG1, (DISPATCH_GL(tmpbuf))(DISPATCH)
+  |  stg BASE, L:RB->base
+  |   lg RC, SBUF:CARG1->b
+  |   stg L:RB, SBUF:CARG1->L
+  |   stg RC, SBUF:CARG1->w
+  |  stg PC, SAVE_PC
+  |  brasl r14, extern lj_buf_putstr_ .. name
+  |  // lgr CARG1, CRET1 (nop, CARG1==CRET1)
+  |  brasl r14, extern lj_buf_tostr
+  |  j ->fff_resstr
+  |.endmacro
+  |
+  |ffstring_op reverse
+  |ffstring_op lower
+  |ffstring_op upper
+  |
+  |//-- Bit library --------------------------------------------------------
+  |
+  |.macro .ffunc_bit, name, kind, fdef
+  |  fdef name
+  |.if kind == 2
+  |  bfpconst_tobit f1, RB
+  |.endif
+  |  lg RB, 0(BASE)
+  |  ld f0, 0(BASE)
+  |  checkint RB, >1
+  |.if kind > 0
+  |  j >2
+  |.else
+  |  j ->fff_resbit
+  |.endif
+  |1:
+  |  jh ->fff_fallback
+  |.if kind < 2
+  |  bfpconst_tobit f1, RB
+  |.endif
+  |  adbr f0, f1
+  |  lgdr RB, f0
+  |  llgfr RB, RB
+  |2:
+  |.endmacro
+  |
+  |.macro .ffunc_bit, name, kind
+  |  .ffunc_bit name, kind, .ffunc_1
+  |.endmacro
+  |
+  |.ffunc_bit bit_tobit, 0
+  |  j ->fff_resbit
+  |
+  |.macro .ffunc_bit_op, name, ins
+  |  .ffunc_bit name, 2
+  |  lgr TMPR1, NARGS:RD		// Save for fallback.
+  |  sllg RD, NARGS:RD, 3
+  |  lay RD, -16(RD, BASE)
+  |1:
+  |  clgr RD, BASE
+  |  jle ->fff_resbit
+  |  lg RA, 0(RD)
+  |  checkint RA, >2
+  |  ins RB, RA
+  |  aghi RD, -8
+  |  j <1
+  |2:
+  |  jh ->fff_fallback_bit_op
+  |  ldgr f0, RA
+  |  adbr f0, f1
+  |  lgdr RA, f0
+  |  ins RB, RA
+  |  aghi RD, -8
+  |  j <1
+  |.endmacro
+  |
+  |.ffunc_bit_op bit_band, nr
+  |.ffunc_bit_op bit_bor, or
+  |.ffunc_bit_op bit_bxor, xr
+  |
+  |.ffunc_bit bit_bswap, 1
+  |  lrvr RB, RB
+  |  j ->fff_resbit
+  |
+  |.ffunc_bit bit_bnot, 1
+  |  xilf RB, -1
+  |  j ->fff_resbit
+  |
+  |->fff_fallback_bit_op:
+  |  lgr NARGS:RD, TMPR1		// Restore for fallback
+  |  j ->fff_fallback
+  |
+  |.macro .ffunc_bit_sh, name, ins
+  |  .ffunc_bit name, 1, .ffunc_2
+  |  // Note: no inline conversion from number for 2nd argument!
+  |  lg RA, 8(BASE)
+  |  checkint RA, ->fff_fallback
+  |  nill RA, 0x1f	// Limit shift to 5-bits.
+  |  ins RB, 0(RA)
+  |  j ->fff_resbit
+  |.endmacro
+  |
+  |.ffunc_bit_sh bit_lshift, sll
+  |.ffunc_bit_sh bit_rshift, srl
+  |.ffunc_bit_sh bit_arshift, sra
+  |
+  |.ffunc_bit bit_rol, 1, .ffunc_2
+  |  // Note: no inline conversion from number for 2nd argument!
+  |  lg RA, 8(BASE)
+  |  checkint RA, ->fff_fallback
+  |  rll RB, RB, 0(RA)
+  |  j ->fff_resbit
+  |
+  |.ffunc_bit bit_ror, 1, .ffunc_2
+  |  // Note: no inline conversion from number for 2nd argument!
+  |  lg RA, 8(BASE)
+  |  checkint RA, ->fff_fallback
+  |  lcr RA, RA		// Right rotate equivalent to negative left rotate.
+  |  rll RB, RB, 0(RA)
+  |  j ->fff_resbit
+  |
+  |//-----------------------------------------------------------------------
+  |
+  |->fff_fallback_2:
+  |  lghi NARGS:RD, 1+2			// Other args are ignored, anyway.
+  |  j ->fff_fallback
+  |->fff_fallback_1:
+  |  lghi NARGS:RD, 1+1			// Other args are ignored, anyway.
+  |->fff_fallback:			// Call fast function fallback handler.
+  |  // BASE = new base, RD = nargs+1
+  |  lg L:RB, SAVE_L
+  |  lg PC, -8(BASE)			// Fallback may overwrite PC.
+  |  stg PC, SAVE_PC			// Redundant (but a defined value).
+  |  stg BASE, L:RB->base
+  |  sllg RD, NARGS:RD, 3
+  |  lay RD, -8(RD, BASE)
+  |  la RA, (8*LUA_MINSTACK)(RD)	// Ensure enough space for handler.
+  |  stg RD, L:RB->top
+  |  lg CFUNC:RD, -16(BASE)
+  |  cleartp CFUNC:RD
+  |  clg RA, L:RB->maxstack
+  |  jh >5				// Need to grow stack.
+  |  lgr CARG1, L:RB
+  |  lg TMPR1, CFUNC:RD->f
+  |  basr r14, TMPR1			// (lua_State *L)
+  |  lg BASE, L:RB->base
+  |  // Either throws an error, or recovers and returns -1, 0 or nresults+1.
+  |  lgr RD, CRET1
+  |  cghi RD, 0; jh ->fff_res	// Returned nresults+1?
+  |1:
+  |  lg RA, L:RB->top
+  |  sgr RA, BASE
+  |  srlg RA, RA, 3
+  |  cghi RD, 0
+  |    la NARGS:RD, 1(RA)
+  |    lg LFUNC:RB, -16(BASE)
+  |  jne ->vm_call_tail			// Returned -1?
+  |  cleartp LFUNC:RB
+  |  ins_callt				// Returned 0: retry fast path.
+  |
+  |// Reconstruct previous base for vmeta_call during tailcall.
+  |->vm_call_tail:
+  |  lgr RA, BASE
+  |  tmll PC, FRAME_TYPE
+  |  jne >3
+  |  llgc RB, PC_RA
+  |  lcgr RB, RB
+  |  sllg RB, RB, 3
+  |  lay BASE, -16(RB, BASE)		// base = base - (RB+2)*8
+  |  j ->vm_call_dispatch		// Resolve again for tailcall.
+  |3:
+  |  lgr RB, PC
+  |  nill RB, -8
+  |  sgr BASE, RB
+  |  j ->vm_call_dispatch		// Resolve again for tailcall.
+  |
+  |5:  // Grow stack for fallback handler.
+  |  lghi CARG2, LUA_MINSTACK
+  |  lgr CARG1, L:RB
+  |  brasl r14, extern lj_state_growstack	// (lua_State *L, int n)
+  |  lg BASE, L:RB->base
+  |  lghi RD, 0				// Simulate a return 0.
+  |  j <1				// Dumb retry (goes through ff first).
+  |
+  |->fff_gcstep:			// Call GC step function.
+  |  // BASE = new base, RD = nargs+1
+  |  stg r14, SAVE_TMP			// Save return address
+  |  lg L:RB, SAVE_L
+  |  stg PC, SAVE_PC			// Redundant (but a defined value).
+  |  stg BASE, L:RB->base
+  |  sllg RD, NARGS:RD, 3
+  |  lay RD, -8(RD, BASE)
+  |  lgr CARG1, L:RB
+  |  stg RD, L:RB->top
+  |  brasl r14, extern lj_gc_step	// (lua_State *L)
+  |  lg BASE, L:RB->base
+  |  lg RD, L:RB->top
+  |  sgr RD, BASE
+  |  srlg RD, RD, 3
+  |  aghi NARGS:RD, 1
+  |  lg r14, SAVE_TMP			// Restore return address.
+  |  br r14
+  |
+  |//-----------------------------------------------------------------------
+  |//-- Special dispatch targets -------------------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |->vm_record:				// Dispatch target for recording phase.
+  |  stg r0, 0
+  |  stg r0, 0
+  |
+  |->vm_rethook:			// Dispatch target for return hooks.
+  |  llgc RD, (DISPATCH_GL(hookmask))(DISPATCH)
+  |  tmll RD, HOOK_ACTIVE
+  |  jne >5
+  |  j >1
+  |
+  |->vm_inshook:			// Dispatch target for instr/line hooks.
+  |  llgc RD, (DISPATCH_GL(hookmask))(DISPATCH)
+  |  tmll RD, HOOK_ACTIVE		// Hook already active?
+  |  jne >5
+  |
+  |  je >5
+  |  ly TMPR0, (DISPATCH_GL(hookcount))(DISPATCH)
+  |  ahi TMPR0, -1
+  |  sty TMPR0, (DISPATCH_GL(hookcount))(DISPATCH)
+  |  je >1
+  |  tmll RD, LUA_MASKLINE
+  |  je >5
+  |1:
+  |  lg L:RB, SAVE_L
+  |  stg BASE, L:RB->base
+  |  lgr CARG2, PC
+  |  lgr CARG1, L:RB
+  |  // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
+  |  brasl r14, extern lj_dispatch_ins	// (lua_State *L, const BCIns *pc)
+  |3:
+  |  lg BASE, L:RB->base
+  |4:
+  |  llgc RA, PC_RA
+  |5:
+  |  llgc OP, PC_OP
+  |  sllg TMPR1, OP, 3
+  |  llgh RD, PC_RD
+  |  br TMPR1
+  |
+  |->cont_hook:				// Continue from hook yield.
+  |  stg r0, 0
+  |  stg r0, 0
+  |
+  |->vm_hotloop:			// Hot loop counter underflow.
+  |  stg r0, 0
+  |  stg r0, 0
+  |
+  |->vm_callhook:			// Dispatch target for call hooks.
+  |  stg PC, SAVE_PC
+  |.if JIT
+  |  j >1
+  |.endif
+  |
+  |->vm_hotcall:			// Hot call counter underflow.
+  |.if JIT
+  |  stg PC, SAVE_PC
+  |  oill PC, 1				// Marker for hot call.
+  |1:
+  |.endif
+  |  sllg RD, NARGS:RD, 3
+  |  lay RD, -8(RD, BASE)
+  |  lg L:RB, SAVE_L
+  |  stg BASE, L:RB->base
+  |  stg RD, L:RB->top
+  |  lgr CARG2, PC
+  |  lgr CARG1, L:RB
+  |  brasl r14, extern lj_dispatch_call	// (lua_State *L, const BCIns *pc)
+  |  // ASMFunction returned in r2 (CRET1).
+  |  lghi TMPR0, 0
+  |  stg TMPR0, SAVE_PC			// Invalidate for subsequent line hook.
+  |.if JIT
+  |  nill PC, -2
+  |.endif
+  |  lg BASE, L:RB->base
+  |  lg RD, L:RB->top
+  |  sgr RD, BASE
+  |  lgr RB, CRET1
+  |  llgc RA, PC_RA
+  |  srl RD, 3
+  |  ahi NARGS:RD, 1
+  |  llgfr RD, RD
+  |  br RB
+  |
+  |->cont_stitch:			// Trace stitching.
+  |  stg r0, 0
+  |  stg r0, 0
+  |
+  |->vm_profhook:			// Dispatch target for profiler hook.
+  |  stg r0, 0
+  |  stg r0, 0
+  |
+  |//-----------------------------------------------------------------------
+  |//-- Trace exit handler -------------------------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |// Called from an exit stub with the exit number on the stack.
+  |// The 16 bit exit number is stored with two (sign-extended) push imm8.
+  |->vm_exit_handler:
+  |  stg r0, 0
+  |  stg r0, 0
+  |->vm_exit_interp:
+  |  stg r0, 0
+  |  stg r0, 0
+  |
+  |//-----------------------------------------------------------------------
+  |//-- Math helper functions ----------------------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |// FP value rounding. Called by math.floor/math.ceil fast functions.
+  |// Value to round is in f0. May clobber f0-f7 and r0. Return address is r14.
+  |.macro vm_round, name, mask
+  |->name:
+  |  lghi r0, 1
+  |  cdfbr f1, r0
+  |  didbr f0, f2, f1, mask // f0=remainder, f2=quotient.
+  |  jnle >1
+  |  ldr f0, f2
+  |  br r14
+  |1: // partial remainder (sanity check)
+  |  stg r0, 0
+  |.endmacro
+  |
+  |  vm_round vm_floor, 7 // Round towards -inf.
+  |  vm_round vm_ceil,  6 // Round towards +inf.
+  |  vm_round vm_trunc, 5 // Round towards 0.
+  |
+  |// FP modulo x%y. Called by BC_MOD* and vm_arith.
+  |->vm_mod: // NYI.
+  |  stg r0, 0
+  |  stg r0, 0
+  |
+  |//-----------------------------------------------------------------------
+  |//-- Assertions ---------------------------------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |->assert_bad_for_arg_type:
+  |  stg r0, 0
+  |  stg r0, 0
+  |
+  |//-----------------------------------------------------------------------
+  |//-- FFI helper functions -----------------------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |// Handler for callback functions. Callback slot number in ah/al.
+  |->vm_ffi_callback:
+  |  stg r0, 0
+  |  stg r0, 0
+  |
+  |->cont_ffi_callback:			// Return from FFI callback.
+  |  stg r0, 0
+  |  stg r0, 0
+  |
+  |->vm_ffi_call:			// Call C function via FFI.
+  |  // Caveat: needs special frame unwinding, see below.
+  |.if FFI
+  |  .type CCSTATE, CCallState, r8
+  |  stmg r6, r15, 48(sp)
+  |  lgr r13, sp			// Use r13 as frame pointer.
+  |  lgr CCSTATE, CARG1
+  |  lg r7, CCSTATE->func
+  |
+  |  // Readjust stack.
+  |  sgf sp, CCSTATE->spadj
+  |
+  |  // Copy stack slots.
+  |  llgc r1, CCSTATE->nsp
+  |  chi r1, 0
+  |  jh >2
+  |1:
+  |  lmg CARG1, CARG5, CCSTATE->gpr[0]
+  |  // TODO: conditionally load FPRs?
+  |  ld FARG1, CCSTATE->fpr[0]
+  |  ld FARG2, CCSTATE->fpr[1]
+  |  ld FARG3, CCSTATE->fpr[2]
+  |  ld FARG4, CCSTATE->fpr[3]
+  |  basr r14, r7
+  |
+  |  stg CRET1, CCSTATE->gpr[0]
+  |  std f0, CCSTATE->fpr[0]
+  |
+  |  lgr sp, r13
+  |  lmg r6, r15, 48(sp)
+  |  br r14
+  |
+  |2:
+  |  sll r1, 3
+  |  la r10, (offsetof(CCallState, stack))(CCSTATE)	// Source.
+  |  la r11, (CCALL_SPS_EXTRA*8)(sp)			// Destination.
+  |3:
+  |  chi r1, 256
+  |  jl >4
+  |  mvc 0(256, r11), 0(r10)
+  |  la r10, 256(r10)
+  |  la r11, 256(r11)
+  |  ahi r1, -256
+  |  j <3
+  |
+  |4:
+  |  ahi r1, -1
+  |  jl <1
+  |  larl r9, >5
+  |  ex r1, 0(r9)
+  |  j <1
+  |
+  |5:
+  |  // exrl target
+  |  mvc 0(1, r11), 0(r10)
+  |.endif
+  |// Note: vm_ffi_call must be the last function in this object file!
+  |
+  |//-----------------------------------------------------------------------
+/* Generate the code for a single instruction. */
+static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+  int vk = 0;
+  (void)vk;
+  |// Note: aligning all instructions does not pay off.
+  |=>defop:
+  switch (op) {
+  /* -- Comparison ops ---------------------------------------------------- */
+  /* Remember: all ops branch for a true comparison, fall through otherwise. */
+  |.macro jmp_comp, lt, ge, le, gt, target
+  ||switch (op) {
+  ||case BC_ISLT:
+  |   lt target
+  ||break;
+  ||case BC_ISGE:
+  |   ge target
+  ||break;
+  ||case BC_ISLE:
+  |   le target
+  ||break;
+  ||case BC_ISGT:
+  |   gt target
+  ||break;
+  ||default: break;  /* Shut up GCC. */
+  ||}
+  |.endmacro
+  case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
+    |  // RA = src1, RD = src2, JMP with RD = target
+    |  ins_AD
+    |  sllg RA, RA, 3
+    |  sllg RD, RD, 3
+    |  ld f0, 0(RA, BASE)
+    |  ld f1, 0(RD, BASE)
+    |  lg RA, 0(RA, BASE)
+    |  lg RD, 0(RD, BASE)
+    |  srag ITYPE, RA, 47
+    |  srag RB, RD, 47
+    |
+    |  clfi ITYPE, LJ_TISNUM; jne >7
+    |  clfi RB, LJ_TISNUM; jne >8
+    |  // Both are integers.
+    |  la PC, 4(PC)
+    |  cr RA, RD
+    |  jmp_comp jhe, jl, jh, jle, >9
+    |6:
+    |  llgh RD, PC_RD
+    |  branchPC RD
+    |9:
+    |  ins_next
+    |
+    |7:  // RA is not an integer.
+    |  jh ->vmeta_comp
+    |  // RA is a number.
+    |  clfi RB, LJ_TISNUM; jl >1; jne ->vmeta_comp
+    |  // RA is a number, RD is an integer.
+    |  cdfbr f1, RD
+    |  j >1
+    |
+    |8:  // RA is an integer, RD is not an integer.
+    |  jh ->vmeta_comp
+    |  // RA is an integer, RD is a number.
+    |  cdfbr f0, RA
+    |1:
+    |  la PC, 4(PC)
+    |  cdbr f0, f1
+    |  // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
+    |  jmp_comp jnl, jl, jnle, jle, <9
+    |  j <6
+    break;
+  case BC_ISEQV: case BC_ISNEV:
+    vk = op == BC_ISEQV;
+    |  ins_AD	// RA = src1, RD = src2, JMP with RD = target
+    |  sllg RD, RD, 3
+    |  ld f1, 0(RD, BASE)
+    |  lg RD, 0(RD, BASE)
+    |  sllg RA, RA, 3
+    |  ld f0, 0(RA, BASE)
+    |  lg RA, 0(RA, BASE)
+    |  la PC, 4(PC)
+    |  srag RB, RD, 47
+    |  srag ITYPE, RA, 47
+    |  clfi RB, LJ_TISNUM; jne >7
+    |  clfi ITYPE, LJ_TISNUM; jne >8
+    |  cr RD, RA
+    if (vk) {
+      |  jne >9
+    } else {
+      |  je >9
+    }
+    |  llgh RD, PC_RD
+    |  branchPC RD
+    |9:
+    |  ins_next
+    |
+    |7:  // RD is not an integer.
+    |  jh >5
+    |  // RD is a number.
+    |  clfi ITYPE, LJ_TISNUM; jl >1; jne >5
+    |  // RD is a number, RA is an integer.
+    |  cdfbr f0, RA
+    |  j >1
+    |
+    |8:  // RD is an integer, RA is not an integer.
+    |  jh >5
+    |  // RD is an integer, RA is a number.
+    |  cdfbr f1, RD
+    |  j >1
+    |
+    |1:
+    |  cdbr f0, f1
+    |4:
+  iseqne_fp:
+    if (vk) {
+      |  jne >2				// Unordered means not equal.
+    } else {
+      |  je >1				// Unordered means not equal.
+    }
+  iseqne_end:
+    if (vk) {
+      |1:				// EQ: Branch to the target.
+      |  llgh RD, PC_RD
+      |  branchPC RD
+      |2:				// NE: Fallthrough to next instruction.
+      |.if not FFI
+      |3:
+      |.endif
+    } else {
+      |.if not FFI
+      |3:
+      |.endif
+      |2:				// NE: Branch to the target.
+      |  llgh RD, PC_RD
+      |  branchPC RD
+      |1:				// EQ: Fallthrough to next instruction.
+    }
+    if (LJ_DUALNUM && (op == BC_ISEQV || op == BC_ISNEV ||
+		       op == BC_ISEQN || op == BC_ISNEN)) {
+      |  j <9
+    } else {
+      |  ins_next
+    }
+    |
+    if (op == BC_ISEQV || op == BC_ISNEV) {
+      |5:  // Either or both types are not numbers.
+      |.if FFI
+      |  clfi RB, LJ_TCDATA; je ->vmeta_equal_cd
+      |  clfi ITYPE, LJ_TCDATA; je ->vmeta_equal_cd
+      |.endif
+      |  cgr RA, RD
+      |  je <1				// Same GCobjs or pvalues?
+      |  cr RB, ITYPE
+      |  jne <2				// Not the same type?
+      |  clfi RB, LJ_TISTABUD
+      |  jh <2				// Different objects and not table/ud?
+      |
+      |  // Different tables or userdatas. Need to check __eq metamethod.
+      |  // Field metatable must be at same offset for GCtab and GCudata!
+      |  cleartp TAB:RA
+      |  lg TAB:RB, TAB:RA->metatable
+      |  cghi TAB:RB, 0
+      |  je <2				// No metatable?
+      |  tm TAB:RB->nomm, 1<<MM_eq
+      |  jne <2				// Or 'no __eq' flag set?
+      if (vk) {
+	|  lghi RB, 0			// ne = 0
+      } else {
+	|  lghi RB, 1			// ne = 1
+      }
+      |  j ->vmeta_equal		// Handle __eq metamethod.
+    } else {
+      |.if FFI
+      |3:
+      |  clfi ITYPE, LJ_TCDATA
+      if (LJ_DUALNUM && vk) {
+	|  jne <9
+      } else {
+	|  jne <2
+      }
+      |  j ->vmeta_equal_cd
+      |.endif
+    }
+    break;
+  case BC_ISEQS: case BC_ISNES:
+    vk = op == BC_ISEQS;
+    |  ins_AND	// RA = src, RD = str const, JMP with RD = target
+    |  sllg RA, RA, 3
+    |  sllg RD, RD, 3
+    |  lg RB, 0(RA, BASE)
+    |  la PC, 4(PC)
+    |  checkstr RB, >3
+    |  cg RB, 0(RD, KBASE)
+  iseqne_test:
+    if (vk) {
+      |  jne >2
+    } else {
+      |  je >1
+    }
+    goto iseqne_end;
+  case BC_ISEQN: case BC_ISNEN:
+    vk = op == BC_ISEQN;
+    |  ins_AD	// RA = src, RD = num const, JMP with RD = target
+    |  sllg RA, RA, 3
+    |  sllg RD, RD, 3
+    |  ld f0, 0(RA, BASE)
+    |  lg RB, 0(RA, BASE)
+    |  ld f1, 0(RD, KBASE)
+    |  lg RD, 0(RD, KBASE)
+    |  la PC, 4(PC)
+    |  checkint RB, >7
+    |  checkint RD, >8
+    |  cr RB, RD
+    if (vk) {
+      |  jne >9
+    } else {
+      |  je >9
+    }
+    |  llgh RD, PC_RD
+    |  branchPC RD
+    |9:
+    |  ins_next
+    |
+    |7:  // RA is not an integer.
+    |  jh >3
+    |  // RA is a number.
+    |  checkint RD, >1
+    |  // RA is a number, RD is an integer.
+    |  cdfbr f1, RD
+    |  j >1
+    |
+    |8:  // RA is an integer, RD is a number.
+    |  cdfbr f0, RB
+    |  cdbr f0, f1
+    |  j >4
+    |1:
+    |  cdbr f0, f1
+    |4:
+    goto iseqne_fp;
+  case BC_ISEQP: case BC_ISNEP:
+    vk = op == BC_ISEQP;
+    |  ins_AND	// RA = src, RD = primitive type (~), JMP with RD = target
+    |  sllg RA, RA, 3
+    |  lg RB, 0(RA, BASE)
+    |  srag RB, RB, 47
+    |  la PC, 4(PC)
+    |  cr RB, RD
+    if (!LJ_HASFFI) goto iseqne_test;
+    if (vk) {
+      |  jne >3
+      |  llgh RD, PC_RD
+      |  branchPC RD
+      |2:
+      |  ins_next
+      |3:
+      |  cghi RB, LJ_TCDATA; jne <2
+      |  j ->vmeta_equal_cd
+    } else {
+      |  je >2
+      |  cghi RB, LJ_TCDATA; je ->vmeta_equal_cd
+      |  llgh RD, PC_RD
+      |  branchPC RD
+      |2:
+      |  ins_next
+    }
+    break;
+  /* -- Unary test and copy ops ------------------------------------------- */
+  case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
+    |  ins_AD	// RA = dst or unused, RD = src, JMP with RD = target
+    |  sllg RD, RD, 3
+    |  sllg RA, RA, 3
+    |  lg ITYPE, 0(RD, BASE)
+    |  la PC, 4(PC)
+    if (op == BC_ISTC || op == BC_ISFC) {
+      |  lgr RB, ITYPE
+    }
+    |  srag ITYPE, ITYPE, 47
+    if (op == BC_IST || op == BC_ISTC) {
+      |  jhe >1
+    } else {
+      |  jl >1
+    }
+    if (op == BC_ISTC || op == BC_ISFC) {
+      |  stg RB, 0(RA, BASE)
+    }
+    |  llgh RD, PC_RD
+    |  branchPC RD
+    |1:					// Fallthrough to the next instruction.
+    |  ins_next
+    break;
+  case BC_ISTYPE:
+    |  ins_AD	// RA = src, RD = -type
+    |  lghr RD, RD
+    |  sllg RA, RA, 3
+    |  lg RB, 0(RA, BASE)
+    |  srag RB, RB, 47
+    |  agr RB, RD
+    |  jne ->vmeta_istype
+    |  ins_next
+    break;
+  case BC_ISNUM:
+    |  ins_AD	// RA = src, RD = -(TISNUM-1)
+    |  sllg TMPR1, RA, 3
+    |  lg TMPR1, 0(TMPR1, BASE)
+    |  checknumtp TMPR1, ->vmeta_istype
+    |  ins_next
+    break;
+  case BC_MOV:
+    |  ins_AD	// RA = dst, RD = src
+    |  sllg RD, RD, 3
+    |  lg RB, 0(RD, BASE)
+    |  sllg RA, RA, 3
+    |  stg RB, 0(RA, BASE)
+    |  ins_next_
+    break;
+  case BC_NOT:
+    |  ins_AD	// RA = dst, RD = src
+    |  sllg RD, RD, 3
+    |  sllg RA, RA, 3
+    |  lg RB, 0(RD, BASE)
+    |  srag RB, RB, 47
+    |  load_false RC
+    |  cghi RB, LJ_TTRUE
+    |  je >1
+    |  load_true RC
+    |1:
+    |  stg RC, 0(RA, BASE)
+    |  ins_next
+    break;
+  case BC_UNM:
+    |  ins_AD	// RA = dst, RD = src
+    |  sllg RA, RA, 3
+    |  sllg RD, RD, 3
+    |  lg RB, 0(RD, BASE)
+    |  checkint RB, >3
+    |  lcr RB, RB; jo >2
+    |1:
+    |  stg RB, 0(RA, BASE)
+    |  ins_next
+    |2:
+    |  llihh RB, 0x41e0 // (double)2^31
+    |  j <1
+    |3:
+    |  jh ->vmeta_unm
+    |  // Toggle sign bit.
+    |  llihh TMPR0, 0x8000
+    |  xgr RB, TMPR0
+    |  j <1
+    break;
+  case BC_LEN:
+    |  ins_AD	// RA = dst, RD = src
+    |  sllg RD, RD, 3
+    |  lg RD, 0(RD, BASE)
+    |  checkstr RD, >2
+    |  llgf RD, STR:RD->len
+    |1:
+    |  sllg RA, RA, 3
+    |  setint RD
+    |  stg RD, 0(RA, BASE)
+    |  ins_next
+    |2:
+    |  cghi ITYPE, LJ_TTAB; jne ->vmeta_len
+    |  lgr TAB:CARG1, TAB:RD
+#if LJ_52
+    |  lg TAB:RB, TAB:RD->metatable
+    |  cghi TAB:RB, 0
+    |  jne >9
+    |3:
+    |->BC_LEN_Z:
+    |  brasl r14, extern lj_tab_len	// (GCtab *t)
+    |  // Length of table returned in r2 (CRET1).
+    |  lgr RD, CRET1
+    |  llgc RA, PC_RA
+    |  j <1
+#if LJ_52
+    |9:  // Check for __len.
+    |  tm TAB:RB->nomm, 1<<MM_len
+    |  jne <3
+    |  j ->vmeta_len			// 'no __len' flag NOT set: check.
+    break;
+  /* -- Binary ops -------------------------------------------------------- */
+    |.macro ins_arithpre
+    |  ins_ABC
+    |  sllg RB, RB, 3
+    |  sllg RC, RC, 3
+    |  sllg RA, RA, 3
+    |.endmacro
+    |
+    |.macro ins_arithfp, ins
+    |  ins_arithpre
+    ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
+    ||switch (vk) {
+    ||case 0:
+    |   ld f0, 0(RB, BASE)
+    |   ld f1, 0(RC, KBASE)
+    |   lg RB, 0(RB, BASE)
+    |   lg RC, 0(RC, KBASE)
+    |   checknumtp RB, ->vmeta_arith_vno
+    |   checknumtp RC, ->vmeta_arith_vno
+    |   ins f0, f1
+    ||  break;
+    ||case 1:
+    |   ld f1, 0(RB, BASE)
+    |   ld f0, 0(RC, KBASE)
+    |   lg RB, 0(RB, BASE)
+    |   lg RC, 0(RC, KBASE)
+    |   checknumtp RB, ->vmeta_arith_nvo
+    |   checknumtp RC, ->vmeta_arith_nvo
+    |   ins f0, f1
+    ||  break;
+    ||default:
+    |   ld f0, 0(RB, BASE)
+    |   ld f1, 0(RC, BASE)
+    |   lg RB, 0(RB, BASE)
+    |   lg RC, 0(RC, BASE)
+    |   checknumtp RB, ->vmeta_arith_vvo
+    |   checknumtp RC, ->vmeta_arith_vvo
+    |   ins f0, f1
+    ||  break;
+    ||}
+    |  std f0, 0(RA, BASE)
+    |  ins_next
+    |.endmacro
+    |
+    |.macro ins_arithdn, intins
+    |  ins_arithpre
+    ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
+    ||switch (vk) {
+    ||case 0:
+    |   lg RB, 0(RB, BASE)
+    |   lg RC, 0(RC, KBASE)
+    |   checkint RB, ->vmeta_arith_vno
+    |   checkint RC, ->vmeta_arith_vno
+    |   intins RB, RC; jo ->vmeta_arith_vno
+    ||  break;
+    ||case 1:
+    |   lg RB, 0(RB, BASE)
+    |   lg RC, 0(RC, KBASE)
+    |   checkint RB, ->vmeta_arith_nvo
+    |   checkint RC, ->vmeta_arith_nvo
+    |   intins RC, RB; jo ->vmeta_arith_nvo
+    ||  break;
+    ||default:
+    |   lg RB, 0(RB, BASE)
+    |   lg RC, 0(RC, BASE)
+    |   checkint RB, ->vmeta_arith_vvo
+    |   checkint RC, ->vmeta_arith_vvo
+    |   intins RB, RC; jo ->vmeta_arith_vvo
+    ||  break;
+    ||}
+    ||if (vk == 1) {
+    |   // setint RC
+    |   stg RC, 0(RA, BASE)
+    ||} else {
+    |   // setint RB
+    |   stg RB, 0(RA, BASE)
+    ||}
+    |  ins_next
+    |.endmacro
+    |  // RA = dst, RB = src1 or num const, RC = src2 or num const
+  case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
+    |  ins_arithdn ar
+    break;
+  case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
+    |  ins_arithdn sr
+    break;
+  case BC_MULVN: case BC_MULNV: case BC_MULVV:
+    |  ins_arithpre
+    |  // For multiplication we use msgfr and check if the result
+    |  // fits in an int32_t.
+    switch(op) {
+    case BC_MULVN:
+      |  lg RB, 0(RB, BASE)
+      |  lg RC, 0(RC, KBASE)
+      |  checkint RB, ->vmeta_arith_vno
+      |  checkint RC, ->vmeta_arith_vno
+      |  lgfr RB, RB
+      |  msgfr RB, RC
+      |  lgfr RC, RB
+      |  cgr RB, RC; jne ->vmeta_arith_vno
+      break;
+    case BC_MULNV:
+      |  lg RB, 0(RB, BASE)
+      |  lg RC, 0(RC, KBASE)
+      |  checkint RB, ->vmeta_arith_nvo
+      |  checkint RC, ->vmeta_arith_nvo
+      |  lgfr RB, RB
+      |  msgfr RB, RC
+      |  lgfr RC, RB
+      |  cgr RB, RC; jne ->vmeta_arith_nvo
+      break;
+    default:
+      |  lg RB, 0(RB, BASE)
+      |  lg RC, 0(RC, BASE)
+      |  checkint RB, ->vmeta_arith_vvo
+      |  checkint RC, ->vmeta_arith_vvo
+      |  lgfr RB, RB
+      |  msgfr RB, RC
+      |  lgfr RC, RB
+      |  cgr RB, RC; jne ->vmeta_arith_vvo
+      break;
+    }
+    |  llgfr RB, RB
+    |  setint RB
+    |  stg RB, 0(RA, BASE)
+    |  ins_next
+    break;
+  case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
+    |  ins_arithfp ddbr
+    break;
+  // TODO: implement fast mod operation.
+  // x86_64 does floating point mod, however it might be better to use integer mod.
+  case BC_MODVN:
+    |  j ->vmeta_arith_vno
+    break;
+  case BC_MODNV:
+    |  j ->vmeta_arith_nvo
+    break;
+  case BC_MODVV:
+    |  j ->vmeta_arith_vvo
+    break;
+  case BC_POW:
+    |  ins_ABC
+    |  sllg RB, RB, 3
+    |  sllg RC, RC, 3
+    |  ld FARG1, 0(RB, BASE)
+    |  ld FARG2, 0(RC, BASE)
+    |  lg TMPR0, 0(RB, BASE)
+    |  checknumtp TMPR0, ->vmeta_arith_vvo
+    |  lg TMPR0, 0(RC, BASE)
+    |  checknumtp TMPR0, ->vmeta_arith_vvo
+    |  brasl r14, extern pow	// double pow(double x, double y), result in f0.
+    |  llgc RA, PC_RA
+    |  sllg RA, RA, 3
+    |  std f0, 0(RA, BASE)
+    |  ins_next
+    break;
+  case BC_CAT:
+    |  ins_ABC	// RA = dst, RB = src_start, RC = src_end
+    |  lg L:CARG1, SAVE_L
+    |  stg BASE, L:CARG1->base
+    |  lgr CARG3, RC
+    |  sgr CARG3, RB
+    |  sllg RC, RC, 3
+    |  la CARG2, 0(RC, BASE)
+    |->BC_CAT_Z:
+    |  lgr L:RB, L:CARG1
+    |  stg PC, SAVE_PC
+    |  brasl r14, extern lj_meta_cat		// (lua_State *L, TValue *top, int left)
+    |  // NULL (finished) or TValue * (metamethod) returned in r2 (CRET1).
+    |  lg BASE, L:RB->base
+    |  ltgr RC, CRET1
+    |  jne ->vmeta_binop
+    |  llgc RB, PC_RB			// Copy result to Stk[RA] from Stk[RB].
+    |  sllg RB, RB, 3
+    |  llgc RA, PC_RA
+    |  sllg RA, RA, 3
+    |  lg RC, 0(RB, BASE)
+    |  stg RC, 0(RA, BASE)
+    |  ins_next
+    break;
+  /* -- Constant ops ------------------------------------------------------ */
+  case BC_KSTR:
+    |  ins_AND	// RA = dst, RD = str const (~)
+    |  sllg RD, RD, 3
+    |  lg RD, 0(RD, KBASE)
+    |  settp RD, LJ_TSTR
+    |  sllg RA, RA, 3
+    |  stg RD, 0(RA, BASE)
+    |  ins_next
+    break;
+  case BC_KCDATA:
+    |.if FFI
+    |  ins_AND	// RA = dst, RD = cdata const (~)
+    |  sllg RD, RD, 3
+    |  sllg RA, RA, 3
+    |  lg RD, 0(RD, KBASE)
+    |  settp RD, LJ_TCDATA
+    |  stg RD, 0(RA, BASE)
+    |  ins_next
+    |.endif
+    break;
+  case BC_KSHORT:
+    |  ins_AD	// RA = dst, RD = signed int16 literal
+    |  // Assumes DUALNUM.
+    |  lhr RD, RD			// Sign-extend literal to 32-bits.
+    |  setint RD
+    |  sllg RA, RA, 3
+    |  stg RD, 0(RA, BASE)
+    |  ins_next
+    break;
+  case BC_KNUM:
+    |  ins_AD	// RA = dst, RD = num const
+    |  sllg RD, RD, 3
+    |  ld f0, 0(RD, KBASE)
+    |  sllg RA, RA, 3
+    |  std f0, 0(RA, BASE)
+    |  ins_next
+    break;
+  case BC_KPRI:
+    |  ins_AD	// RA = dst, RD = primitive type (~)
+    |  sllg RA, RA, 3
+    |  sllg RD, RD, 47
+    |  lghi TMPR0, -1
+    |  xgr RD, TMPR0 // not
+    |  stg RD, 0(RA, BASE)
+    |  ins_next
+    break;
+  case BC_KNIL:
+    |  ins_AD	// RA = dst_start, RD = dst_end
+    |  sllg RA, RA, 3
+    |  sllg RD, RD, 3
+    |  la RA, 8(RA, BASE)
+    |  la RD, 0(RD, BASE)
+    |  lghi RB, LJ_TNIL
+    |  stg RB, -8(RA)			// Sets minimum 2 slots.
+    |1:
+    |  stg RB, 0(RA)
+    |  la RA, 8(RA)
+    |  clgr RA, RD
+    |  jle <1
+    |  ins_next
+    break;
+/* -- Upvalue and function ops ------------------------------------------ */
+  case BC_UGET:
+    |  ins_AD	// RA = dst, RD = upvalue #
+    |  sllg RA, RA, 3
+    |  sllg RD, RD, 3
+    |  lg LFUNC:RB, -16(BASE)
+    |  cleartp LFUNC:RB
+    |  lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RD, LFUNC:RB)
+    |  lg RB, UPVAL:RB->v
+    |  lg RD, 0(RB)
+    |  stg RD, 0(RA, BASE)
+    |  ins_next
+    break;
+  case BC_USETV:
+#define TV2MARKOFS \
+ ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv))
+    |  ins_AD	// RA = upvalue #, RD = src
+    |  lg LFUNC:RB, -16(BASE)
+    |  cleartp LFUNC:RB
+    |  sllg RA, RA, 3
+    |  lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RA, LFUNC:RB)
+    |  tm UPVAL:RB->closed, 0xff
+    |  lg RB, UPVAL:RB->v
+    |  sllg TMPR1, RD, 3
+    |  lg RA, 0(TMPR1, BASE)
+    |  stg RA, 0(RB)
+    |  je >1
+    |  // Check barrier for closed upvalue.
+    |  tmy TV2MARKOFS(RB), LJ_GC_BLACK		// isblack(uv)
+    |  jne >2
+    |1:
+    |  ins_next
+    |
+    |2:  // Upvalue is black. Check if new value is collectable and white.
+    |  srag RD, RA, 47
+    |  ahi RD, -LJ_TISGCV
+    |  clfi RD, LJ_TNUMX - LJ_TISGCV		// tvisgcv(v)
+    |  jle <1
+    |  cleartp GCOBJ:RA
+    |  tm GCOBJ:RA->gch.marked, LJ_GC_WHITES	// iswhite(v)
+    |  je <1
+    |  // Crossed a write barrier. Move the barrier forward.
+    |  lgr CARG2, RB
+    |  brasl r14, extern lj_gc_barrieruv	// (global_State *g, TValue *tv)
+    |  j <1
+    break;
+#undef TV2MARKOFS
+  case BC_USETS:
+    |  ins_AND	// RA = upvalue #, RD = str const (~)
+    |  lg LFUNC:RB, -16(BASE)
+    |  sllg RA, RA, 3
+    |  sllg RD, RD, 3
+    |  cleartp LFUNC:RB
+    |  lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RA, LFUNC:RB)
+    |  lg STR:RA, 0(RD, KBASE)
+    |  lg RD, UPVAL:RB->v
+    |  settp STR:ITYPE, STR:RA, LJ_TSTR
+    |  stg STR:ITYPE, 0(RD)
+    |  tm UPVAL:RB->marked, LJ_GC_BLACK		// isblack(uv)
+    |  jne >2
+    |1:
+    |  ins_next
+    |
+    |2:  // Check if string is white and ensure upvalue is closed.
+    |  tm GCOBJ:RA->gch.marked, LJ_GC_WHITES	// iswhite(str)
+    |  je <1
+    |  tm UPVAL:RB->closed, 0xff
+    |  je <1
+    |  // Crossed a write barrier. Move the barrier forward.
+    |  lgr CARG2, RD
+    |  brasl r14, extern lj_gc_barrieruv	// (global_State *g, TValue *tv)
+    |  j <1
+    break;
+  case BC_USETN:
+    |  ins_AD	// RA = upvalue #, RD = num const
+    |  lg LFUNC:RB, -16(BASE)
+    |  sllg RA, RA, 3
+    |  sllg RD, RD, 3
+    |  cleartp LFUNC:RB
+    |  ld f0, 0(RD, KBASE)
+    |  lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RA, LFUNC:RB)
+    |  lg RA, UPVAL:RB->v
+    |  std f0, 0(RA)
+    |  ins_next
+    break;
+  case BC_USETP:
+    |  ins_AD	// RA = upvalue #, RD = primitive type (~)
+    |  lg LFUNC:RB, -16(BASE)
+    |  sllg RA, RA, 3
+    |  cleartp LFUNC:RB
+    |  lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RA, LFUNC:RB)
+    |  sllg RD, RD, 47
+    |  lghi TMPR0, -1
+    |  xgr RD, TMPR0
+    |  lg RA, UPVAL:RB->v
+    |  stg RD, 0(RA)
+    |  ins_next
+    break;
+  case BC_UCLO:
+    |  ins_AD	// RA = level, RD = target
+    |  branchPC RD				// Do this first to free RD.
+    |  lg L:RB, SAVE_L
+    |  ltg TMPR0, L:RB->openupval
+    |  je >1
+    |  stg BASE, L:RB->base
+    |  sllg RA, RA, 3
+    |  la CARG2, 0(RA, BASE)
+    |  lgr L:CARG1, L:RB
+    |  brasl r14, extern lj_func_closeuv	// (lua_State *L, TValue *level)
+    |  lg BASE, L:RB->base
+    |1:
+    |  ins_next
+    break;
+  case BC_FNEW:
+    |  ins_AND	// RA = dst, RD = proto const (~) (holding function prototype)
+    |  lg L:RB, SAVE_L
+    |  stg BASE, L:RB->base
+    |  lg CARG3, -16(BASE)
+    |  cleartp CARG3
+    |  sllg RD, RD, 3
+    |  lg CARG2, 0(RD, KBASE)		// Fetch GCproto *.
+    |  lgr CARG1, L:RB
+    |  stg PC, SAVE_PC
+    |  // (lua_State *L, GCproto *pt, GCfuncL *parent)
+    |  brasl r14, extern lj_func_newL_gc
+    |  // GCfuncL * returned in r2 (CRET1).
+    |  lg BASE, L:RB->base
+    |  llgc RA, PC_RA
+    |  sllg RA, RA, 3
+    |  settp LFUNC:CRET1, LJ_TFUNC
+    |  stg LFUNC:CRET1, 0(RA, BASE)
+    |  ins_next
+    break;
+  case BC_TNEW:
+    |  ins_AD	// RA = dst, RD = hbits|asize
+    |  lg L:RB, SAVE_L
+    |  stg BASE, L:RB->base
+    |  lg RA, (DISPATCH_GL(
+    |  clg RA, (DISPATCH_GL(gc.threshold))(DISPATCH)
+    |  stg PC, SAVE_PC
+    |  jhe >5
+    |1:
+    |  srlg CARG3, RD, 11
+    |  llill TMPR0, 0x7ff
+    |  nr RD, TMPR0
+    |  cr RD, TMPR0
+    |  je >3
+    |2:
+    |  lgr L:CARG1, L:RB
+    |  llgfr CARG2, RD
+    |  brasl r14, extern lj_tab_new  // (lua_State *L, uint32_t asize, uint32_t hbits)
+    |  // Table * returned in r2 (CRET1).
+    |  lg BASE, L:RB->base
+    |  llgc RA, PC_RA
+    |  sllg RA, RA, 3
+    |  settp TAB:CRET1, LJ_TTAB
+    |  stg TAB:CRET1, 0(RA, BASE)
+    |  ins_next
+    |3:  // Turn 0x7ff into 0x801.
+    |  llill RD, 0x801
+    |  j <2
+    |5:
+    |  lgr L:CARG1, L:RB
+    |  brasl r14, extern lj_gc_step_fixtop	// (lua_State *L)
+    |  llgh RD, PC_RD
+    |  j <1
+    break;
+  case BC_TDUP:
+    |  ins_AND	// RA = dst, RD = table const (~) (holding template table)
+    |  lg L:RB, SAVE_L
+    |  lg RA, (DISPATCH_GL(
+    |  stg PC, SAVE_PC
+    |  clg RA, (DISPATCH_GL(gc.threshold))(DISPATCH)
+    |  stg BASE, L:RB->base
+    |  jhe >3
+    |2:
+    |  sllg RD, RD, 3
+    |  lg TAB:CARG2, 0(RD, KBASE)
+    |  lgr L:CARG1, L:RB
+    |  brasl r14, extern lj_tab_dup		// (lua_State *L, Table *kt)
+    |  // Table * returned in r2 (CRET1).
+    |  lg BASE, L:RB->base
+    |  llgc RA, PC_RA
+    |  settp TAB:CRET1, LJ_TTAB
+    |  sllg RA, RA, 3
+    |  stg TAB:CRET1, 0(RA, BASE)
+    |  ins_next
+    |3:
+    |  lgr L:CARG1, L:RB
+    |  brasl r14, extern lj_gc_step_fixtop	// (lua_State *L)
+    |  llgh RD, PC_RD				// Need to reload RD.
+    |  lghi TMPR0, -1
+    |  xgr RD, TMPR0				// not RD
+    |  j <2
+    break;
+  case BC_GGET:
+    |  ins_AND	// RA = dst, RD = str const (~)
+    |  lg LFUNC:RB, -16(BASE)
+    |  cleartp LFUNC:RB
+    |  lg TAB:RB, LFUNC:RB->env
+    |  sllg TMPR1, RD, 3
+    |  lg STR:RC, 0(TMPR1, KBASE)
+    |  j ->BC_TGETS_Z
+    break;
+  case BC_GSET:
+    |  ins_AND	// RA = src, RD = str const (~)
+    |  lg LFUNC:RB, -16(BASE)
+    |  cleartp LFUNC:RB
+    |  lg TAB:RB, LFUNC:RB->env
+    |  sllg TMPR1, RD, 3
+    |  lg STR:RC, 0(TMPR1, KBASE)
+    |  j ->BC_TSETS_Z
+    break;
+  case BC_TGETV:
+    |  ins_ABC	// RA = dst, RB = table, RC = key
+    |  sllg RB, RB, 3
+    |  lg TAB:RB, 0(RB, BASE)
+    |  sllg RC, RC, 3
+    |  lg RC, 0(RC, BASE)
+    |  checktab TAB:RB, ->vmeta_tgetv
+    |
+    |  // Integer key?
+    |  checkint RC, >5
+    |  cl RC, TAB:RB->asize		// Takes care of unordered, too.
+    |  jhe ->vmeta_tgetv		// Not in array part? Use fallback.
+    |  llgfr RC, RC
+    |  sllg RC, RC, 3
+    |  ag RC, TAB:RB->array
+    |  // Get array slot.
+    |  lg ITYPE, 0(RC)
+    |  cghi ITYPE, LJ_TNIL		// Avoid overwriting RB in fastpath.
+    |  je >2
+    |1:
+    |  sllg RA, RA, 3
+    |  stg ITYPE, 0(RA, BASE)
+    |  ins_next
+    |
+    |2:  // Check for __index if table value is nil.
+    |  lg TAB:TMPR1, TAB:RB->metatable
+    |  cghi TAB:TMPR1, 0
+    |  je <1
+    |  tm TAB:TMPR1->nomm, 1<<MM_index
+    |  je ->vmeta_tgetv			// 'no __index' flag NOT set: check.
+    |  j <1
+    |
+    |5:  // String key?
+    |  cghi ITYPE, LJ_TSTR; jne ->vmeta_tgetv
+    |  cleartp STR:RC
+    |  j ->BC_TGETS_Z
+    break;
+  case BC_TGETS:
+    |  ins_ABC
+    |  sllg RB, RB, 3
+    |  lg TAB:RB, 0(RB, BASE)
+    |  lghi TMPR1, -1
+    |  xgr RC, TMPR1
+    |  sllg RC, RC, 3
+    |  lg STR:RC, 0(RC, KBASE)
+    |  checktab TAB:RB, ->vmeta_tgets
+    |->BC_TGETS_Z:	// RB = GCtab *, RC = GCstr *
+    |  l TMPR1, TAB:RB->hmask
+    |  n TMPR1, STR:RC->hash
+    |  lgfr TMPR1, TMPR1
+    |  mghi TMPR1, #NODE
+    |  ag NODE:TMPR1, TAB:RB->node
+    |  settp ITYPE, STR:RC, LJ_TSTR
+    |1:
+    |  cg ITYPE, NODE:TMPR1->key
+    |  jne >4
+    |  // Get node value.
+    |  lg ITYPE, NODE:TMPR1->val
+    |  cghi ITYPE, LJ_TNIL
+    |  je >5				// Key found, but nil value?
+    |2:
+    |  sllg RA, RA, 3
+    |  stg ITYPE, 0(RA, BASE)
+    |  ins_next
+    |
+    |4:  // Follow hash chain.
+    |  lg NODE:TMPR1, NODE:TMPR1->next
+    |  cghi NODE:TMPR1, 0
+    |  jne <1
+    |  // End of hash chain: key not found, nil result.
+    |  lghi ITYPE, LJ_TNIL
+    |
+    |5:  // Check for __index if table value is nil.
+    |  lg TAB:TMPR1, TAB:RB->metatable
+    |  cghi TAB:TMPR1, 0
+    |  je <2				// No metatable: done.
+    |  tm TAB:TMPR1->nomm, 1<<MM_index
+    |  jne <2				// 'no __index' flag set: done.
+    |  j ->vmeta_tgets			// Caveat: preserve STR:RC.
+    break;
+  case BC_TGETB:
+    |  ins_ABC	// RA = dst, RB = table, RC = byte literal
+    |  sllg RB, RB, 3
+    |  lg TAB:RB, 0(RB, BASE)
+    |  checktab TAB:RB, ->vmeta_tgetb
+    |  cl RC, TAB:RB->asize
+    |  jhe ->vmeta_tgetb
+    |  sllg RC, RC, 3
+    |  ag RC, TAB:RB->array
+    |  // Get array slot.
+    |  lg ITYPE, 0(RC)
+    |  cghi ITYPE, LJ_TNIL
+    |  je >2
+    |1:
+    |  sllg RA, RA, 3
+    |  stg ITYPE, 0(RA, BASE)
+    |  ins_next
+    |
+    |2:  // Check for __index if table value is nil.
+    |  lg TAB:TMPR1, TAB:RB->metatable
+    |  cghi TAB:TMPR1, 0
+    |  je <1
+    |  tm TAB:TMPR1->nomm, 1<<MM_index
+    |  je ->vmeta_tgetb			// 'no __index' flag NOT set: check.
+    |  j <1
+    break;
+  case BC_TGETR:
+    |  ins_ABC	// RA = dst, RB = table, RC = key
+    |  sllg RB, RB, 3
+    |  lg TAB:RB, 0(RB, BASE)
+    |  cleartp TAB:RB
+    |  sllg RC, RC, 3
+    |  llgf RC, 4(RC, BASE)		// Load low word (big endian).
+    |  cl RC, TAB:RB->asize
+    |  jhe ->vmeta_tgetr		// Not in array part? Use fallback.
+    |  sllg RC, RC, 3
+    |  ag RC, TAB:RB->array
+    |  // Get array slot.
+    |->BC_TGETR_Z:
+    |  lg ITYPE, 0(RC)
+    |->BC_TGETR2_Z:
+    |  sllg RA, RA, 3
+    |  stg ITYPE, 0(RA, BASE)
+    |  ins_next
+    break;
+  case BC_TSETV:
+    |  ins_ABC	// RA = src, RB = table, RC = key
+    |  sllg RB, RB, 3
+    |  lg TAB:RB, 0(RB, BASE)
+    |  sllg RC, RC, 3
+    |  lg RC, 0(RC, BASE)
+    |  checktab TAB:RB, ->vmeta_tsetv
+    |
+    |  // Integer key?
+    |  checkint RC, >5
+    |  cl RC, TAB:RB->asize		// Takes care of unordered, too.
+    |  jhe ->vmeta_tsetv
+    |  llgfr RC, RC
+    |  sllg RC, RC, 3
+    |  ag RC, TAB:RB->array
+    |  lghi TMPR0, LJ_TNIL
+    |  cg TMPR0, 0(RC)
+    |  je >3				// Previous value is nil?
+    |1:
+    |  tm TAB:RB->marked, LJ_GC_BLACK	// isblack(table)
+    |  jne >7
+    |2:  // Set array slot.
+    |  sllg RA, RA, 3
+    |  lg RB, 0(RA, BASE)
+    |  stg RB, 0(RC)
+    |  ins_next
+    |
+    |3:  // Check for __newindex if previous value is nil.
+    |  lg TAB:TMPR1, TAB:RB->metatable
+    |  cghi TAB:TMPR1, 0
+    |  je <1
+    |  tm TAB:TMPR1->nomm, 1<<MM_newindex
+    |  je ->vmeta_tsetv			// 'no __newindex' flag NOT set: check.
+    |  j <1
+    |
+    |5:  // String key?
+    |  cghi ITYPE, LJ_TSTR; jne ->vmeta_tsetv
+    |  cleartp STR:RC
+    |  j ->BC_TSETS_Z
+    |
+    |7:  // Possible table write barrier for the value. Skip valiswhite check.
+    |  barrierback TAB:RB, TMPR1
+    |  j <2
+    break;
+  case BC_TSETS:
+    |  ins_ABC	// RA = src, RB = table, RC = str const (~)
+    |  sllg RB, RB, 3
+    |  lg TAB:RB, 0(RB, BASE)
+    |  lghi TMPR0, -1
+    |  xgr RC, TMPR0 // ~RC
+    |  sllg RC, RC, 3
+    |  lg STR:RC, 0(RC, KBASE)
+    |  checktab TAB:RB, ->vmeta_tsets
+    |->BC_TSETS_Z:	// RB = GCtab *, RC = GCstr *
+    |  l TMPR1, TAB:RB->hmask
+    |  n TMPR1, STR:RC->hash
+    |  lgfr TMPR1, TMPR1
+    |  mghi TMPR1, #NODE
+    |  mvi TAB:RB->nomm, 0		// Clear metamethod cache.
+    |  ag NODE:TMPR1, TAB:RB->node
+    |  settp ITYPE, STR:RC, LJ_TSTR
+    |1:
+    |  cg ITYPE, NODE:TMPR1->key
+    |  jne >5
+    |  // Ok, key found. Assumes: offsetof(Node, val) == 0
+    |  lghi TMPR0, LJ_TNIL
+    |  cg TMPR0, 0(TMPR1)
+    |  je >4				// Previous value is nil?
+    |2:
+    |  tm TAB:RB->marked, LJ_GC_BLACK	// isblack(table)
+    |  jne >7
+    |3:  // Set node value.
+    |  sllg RA, RA, 3
+    |  lg ITYPE, 0(RA, BASE)
+    |  stg ITYPE, 0(TMPR1)
+    |  ins_next
+    |
+    |4:  // Check for __newindex if previous value is nil.
+    |  lg TAB:ITYPE, TAB:RB->metatable
+    |  cghi TAB:ITYPE, 0
+    |  je <2
+    |  tm TAB:ITYPE->nomm, 1<<MM_newindex
+    |  je ->vmeta_tsets			// 'no __newindex' flag NOT set: check.
+    |  j <2
+    |
+    |5:  // Follow hash chain.
+    |  lg NODE:TMPR1, NODE:TMPR1->next
+    |  cghi NODE:TMPR1, 0
+    |  jne <1
+    |  // End of hash chain: key not found, add a new one.
+    |
+    |  // But check for __newindex first.
+    |  lg TAB:TMPR1, TAB:RB->metatable
+    |  cghi TAB:TMPR1, 0
+    |  je >6				// No metatable: continue.
+    |  tm TAB:TMPR1->nomm, 1<<MM_newindex
+    |  je ->vmeta_tsets			// 'no __newindex' flag NOT set: check.
+    |6:
+    |  stg ITYPE, SAVE_TMP
+    |  lg L:CARG1, SAVE_L
+    |  stg BASE, L:CARG1->base
+    |  la CARG3, SAVE_TMP
+    |  lgr CARG2, TAB:RB
+    |  stg PC, SAVE_PC
+    |  brasl r14, extern lj_tab_newkey	// (lua_State *L, GCtab *t, TValue *k)
+    |  // Handles write barrier for the new key. TValue * returned in r2 (CRET1).
+    |  lgr TMPR1, CRET1
+    |  lg L:CRET1, SAVE_L
+    |  lg BASE, L:CRET1->base
+    |  llgc RA, PC_RA
+    |  j <2				// Must check write barrier for value.
+    |
+    |7:  // Possible table write barrier for the value. Skip valiswhite check.
+    |  barrierback TAB:RB, ITYPE
+    |  j <3
+    break;
+  case BC_TSETB:
+    |  ins_ABC	// RA = src, RB = table, RC = byte literal
+    |  sllg RB, RB, 3
+    |  lg TAB:RB, 0(RB, BASE)
+    |  checktab TAB:RB, ->vmeta_tsetb
+    |  cl RC, TAB:RB->asize
+    |  jhe ->vmeta_tsetb
+    |  sllg RC, RC, 3
+    |  ag RC, TAB:RB->array
+    |  lghi TMPR0, LJ_TNIL
+    |  cg TMPR0, 0(RC)
+    |  je >3				// Previous value is nil?
+    |1:
+    |  tm TAB:RB->marked, LJ_GC_BLACK		// isblack(table)
+    |  jne >7
+    |2:	 // Set array slot.
+    |  sllg RA, RA, 3
+    |  lg ITYPE, 0(RA, BASE)
+    |  stg ITYPE, 0(RC)
+    |  ins_next
+    |
+    |3:  // Check for __newindex if previous value is nil.
+    |  lg TAB:TMPR1, TAB:RB->metatable
+    |  cghi TAB:TMPR1, 0
+    |  je <1
+    |  tm TAB:TMPR1->nomm, 1<<MM_newindex
+    |  je ->vmeta_tsetb			// 'no __newindex' flag NOT set: check.
+    |  j <1
+    |
+    |7:  // Possible table write barrier for the value. Skip valiswhite check.
+    |  barrierback TAB:RB, TMPR1
+    |  j <2
+    break;
+  case BC_TSETR:
+    |  ins_ABC	// RA = src, RB = table, RC = key
+    |  sllg RB, RB, 3
+    |  lg TAB:RB, 0(RB, BASE)
+    |  cleartp TAB:RB
+    |  sllg RC, RC, 3
+    |  lg RC, 0(RC, BASE)
+    |  tm TAB:RB->marked, LJ_GC_BLACK			// isblack(table)
+    |  jne >7
+    |2:
+    |  cl RC, TAB:RB->asize
+    |  jhe ->vmeta_tsetr
+    |  llgfr RC, RC
+    |  sllg RC, RC, 3
+    |  ag RC, TAB:RB->array
+    |  // Set array slot.
+    |->BC_TSETR_Z:
+    |  sllg RA, RA, 3
+    |  lg ITYPE, 0(RA, BASE)
+    |  stg ITYPE, 0(RC)
+    |  ins_next
+    |
+    |7:  // Possible table write barrier for the value. Skip valiswhite check.
+    |  barrierback TAB:RB, TMPR1
+    |  j <2
+    break;
+  case BC_TSETM:
+    |  ins_AD	// RA = base (table at base-1), RD = num const (start index)
+    |1:
+    |  sllg RA, RA, 3
+    |  sllg TMPR1, RD, 3
+    |  llgf TMPR1, 4(TMPR1, KBASE)	// Integer constant is in lo-word.
+    |  la RA, 0(RA, BASE)
+    |  lg TAB:RB, -8(RA)		// Guaranteed to be a table.
+    |  cleartp TAB:RB
+    |  tm TAB:RB->marked, LJ_GC_BLACK		// isblack(table)
+    |  jne >7
+    |2:
+    |  llgf RD, SAVE_MULTRES
+    |  aghi RD, -1
+    |  je >4				// Nothing to copy?
+    |  agr RD, TMPR1			// Compute needed size.
+    |  clgf RD, TAB:RB->asize
+    |  jh >5				// Doesn't fit into array part?
+    |  sgr RD, TMPR1
+    |  sllg TMPR1, TMPR1, 3
+    |  ag TMPR1, TAB:RB->array
+    |3:  // Copy result slots to table.
+    |  lg RB, 0(RA)
+    |  la RA, 8(RA)
+    |  stg RB, 0(TMPR1)
+    |  la TMPR1, 8(TMPR1)
+    |  brctg RD, <3
+    |4:
+    |  ins_next
+    |
+    |5:  // Need to resize array part.
+    |  lg L:CARG1, SAVE_L
+    |  stg BASE, L:CARG1->base
+    |  lgr CARG2, TAB:RB
+    |  lgfr CARG3, RD
+    |  lgr L:RB, L:CARG1
+    |  stg PC, SAVE_PC
+    |  brasl r14, extern lj_tab_reasize	// (lua_State *L, GCtab *t, int nasize)
+    |  lg BASE, L:RB->base
+    |  llgc RA, PC_RA			// Restore RA.
+    |  llgh RD, PC_RD			// Restore RD.
+    |  j <1				// Retry.
+    |
+    |7:  // Possible table write barrier for any value. Skip valiswhite check.
+    |  barrierback TAB:RB, RD
+    |  j <2
+    break;
+  /* -- Calls and vararg handling ----------------------------------------- */
+  case BC_CALL: case BC_CALLM:
+    |  ins_A_C	// RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
+    |  sllg RA, RA, 3
+    |  lgr RD, RC
+    if (op == BC_CALLM) {
+      |  agf NARGS:RD, SAVE_MULTRES
+    }
+    |  lg LFUNC:RB, 0(RA, BASE)
+    |  checkfunc LFUNC:RB, ->vmeta_call_ra
+    |  la BASE, 16(RA, BASE)
+    |  ins_call
+    break;
+  case BC_CALLMT:
+    |  ins_AD	// RA = base, RD = extra_nargs
+    |  // Fall through. Assumes BC_CALLT follows and ins_AD is a no-op.
+    break;
+  case BC_CALLT:
+    |  ins_AD	// RA = base, RD = nargs+1
+    |  sllg RA, RA, 3
+    |  la RA, 16(RA, BASE)
+    |  lgr KBASE, BASE			// Use KBASE for move + vmeta_call hint.
+    |  lg LFUNC:RB, -16(RA)
+    |  checktp_nc LFUNC:RB, LJ_TFUNC, ->vmeta_call
+    |->BC_CALLT_Z:
+    |  lg PC, -8(BASE)
+    |  tmll PC, FRAME_TYPE
+    |  jne >7
+    |1:
+    |  stg LFUNC:RB, -16(BASE)		// Copy func+tag down, reloaded below.
+    |  aghi NARGS:RD, -1
+    |  je >3
+    |2:  // Move args down.
+    |  lg RB, 0(RA)
+    |  la RA, 8(RA)
+    |  stg RB, 0(KBASE)
+    |  la KBASE, 8(KBASE)
+    |  brctg NARGS:RD, <2
+    |
+    |  lg LFUNC:RB, -16(BASE)
+    |3:
+    |  cleartp LFUNC:RB
+    |  llgc TMPR1, LFUNC:RB->ffid
+    |  cghi TMPR1, 1			// (> FF_C) Calling a fast function?
+    |  jh >5
+    |4:
+    |  ins_callt
+    |
+    |5:  // Tailcall to a fast function.
+    |  tmll PC, FRAME_TYPE		// Lua frame below?
+    |  jne <4
+    |  llgc RA, PC_RA
+    |  lcgr RA, RA
+    |  sllg RA, RA, 3
+    |  lg LFUNC:KBASE, -32(RA, BASE)	// Need to prepare KBASE.
+    |  cleartp LFUNC:KBASE
+    |  lg KBASE, LFUNC:KBASE->pc
+    |  lg KBASE, (PC2PROTO(k))(KBASE)
+    |  j <4
+    |
+    |7:  // Tailcall from a vararg function.
+    |  aghi PC, -FRAME_VARG
+    |  tmll PC, FRAME_TYPEP
+    |  jne >8				// Vararg frame below?
+    |  sgr BASE, PC			// Need to relocate BASE/KBASE down.
+    |  lgr KBASE, BASE
+    |  lg PC, -8(BASE)
+    |  j <1
+    |8:
+    |  aghi PC, FRAME_VARG
+    |  j <1
+    break;
+  case BC_ITERC:
+    |  ins_A	// RA = base, (RB = nresults+1,) RC = nargs+1 (2+1)
+    |  sllg RA, RA, 3
+    |  la RA, 16(RA, BASE)		// fb = base+2
+    |  lg RB, -32(RA)			// Copy state. fb[0] = fb[-4].
+    |  lg RC, -24(RA)			// Copy control var. fb[1] = fb[-3].
+    |  stg RB, 0(RA)
+    |  stg RC, 8(RA)
+    |  lg LFUNC:RB, -40(RA)		// Copy callable. fb[-2] = fb[-5]
+    |  stg LFUNC:RB, -16(RA)
+    |  lghi NARGS:RD, 2+1		// Handle like a regular 2-arg call.
+    |  checkfunc LFUNC:RB, ->vmeta_call
+    |  lgr BASE, RA
+    |  ins_call
+    break;
+  case BC_ITERN:
+    |  ins_A	// RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
+    |.if JIT
+    |  // NYI: add hotloop, record BC_ITERN.
+    |.endif
+    |  sllg RA, RA, 3
+    |  lg TAB:RB, -16(RA, BASE)
+    |  cleartp TAB:RB
+    |  llgf RC, -4(RA, BASE)		// Get index from control var.
+    |  llgf TMPR1, TAB:RB->asize
+    |  la PC, 4(PC)
+    |  lg ITYPE, TAB:RB->array
+    |1:  // Traverse array part.
+    |  clr RC, TMPR1; jhe >5		// Index points after array part?
+    |  sllg RD, RC, 3		// Warning: won't work if RD==RC!
+    |  lg TMPR0, 0(RD, ITYPE)
+    |  cghi TMPR0, LJ_TNIL;  je >4
+    |  // Copy array slot to returned value.
+    |  lgr RB, TMPR0
+    |  stg RB, 8(RA, BASE)
+    |  // Return array index as a numeric key.
+    |  setint ITYPE, RC
+    |  stg ITYPE, 0(RA, BASE)
+    |  ahi RC, 1
+    |  sty RC, -4(RA, BASE)		// Update control var.
+    |2:
+    |  llgh RD, PC_RD			// Get target from ITERL.
+    |  branchPC RD
+    |3:
+    |  ins_next
+    |
+    |4:  // Skip holes in array part.
+    |  ahi RC, 1
+    |  j <1
+    |
+    |5:  // Traverse hash part.
+    |  sr RC, TMPR1
+    |6:
+    |  cl RC, TAB:RB->hmask; jh <3	// End of iteration? Branch to ITERL+1.
+    |  llgfr ITYPE, RC
+    |  mghi ITYPE, #NODE
+    |  ag NODE:ITYPE, TAB:RB->node
+    |  lghi TMPR0, LJ_TNIL
+    |  cg TMPR0, NODE:ITYPE->val; je >7
+    |  ar TMPR1, RC
+    |  ahi TMPR1, 1
+    |  // Copy key and value from hash slot.
+    |  lg RB, NODE:ITYPE->key
+    |  lg RC, NODE:ITYPE->val
+    |  stg RB, 0(RA, BASE)
+    |  stg RC, 8(RA, BASE)
+    |  sty TMPR1, -4(RA, BASE)
+    |  j <2
+    |
+    |7:  // Skip holes in hash part.
+    |  ahi RC, 1
+    |  j <6
+    break;
+  case BC_ISNEXT:
+    |  ins_AD	// RA = base, RD = target (points to ITERN)
+    |  sllg RA, RA, 3
+    |  lg CFUNC:RB, -24(RA, BASE)
+    |  checkfunc CFUNC:RB, >5
+    |  lg TMPR1, -16(RA, BASE)
+    |  checktptp TMPR1, LJ_TTAB, >5
+    |  lghi TMPR0, LJ_TNIL
+    |  cg TMPR0, -8(RA, BASE); jne >5
+    |  llgc TMPR1, CFUNC:RB->ffid
+    |  clfi TMPR1, (uint8_t)FF_next_N; jne >5
+    |  branchPC RD
+    |  llihl TMPR1, 0x7fff
+    |  iihh TMPR1, 0xfffe
+    |  stg TMPR1, -8(RA, BASE)		// Initialize control var.
+    |1:
+    |  ins_next
+    |5:  // Despecialize bytecode if any of the checks fail.
+    |  lghi TMPR0, BC_JMP
+    |  stcy  TMPR0, PC_OP
+    |  branchPC RD
+    |  mvi 3(PC), BC_ITERC
+    |  j <1
+    break;
+  case BC_VARG:
+    |  ins_ABC	// RA = base, RB = nresults+1, RC = numparams
+    |  sllg RA, RA, 3
+    |  sllg RB, RB, 3
+    |  sllg RC, RC, 3
+    |  la TMPR1, (16+FRAME_VARG)(RC, BASE)
+    |  la RA, 0(RA, BASE)
+    |  sg TMPR1, -8(BASE)
+    |  // Note: TMPR1 may now be even _above_ BASE if nargs was < numparams.
+    |  cghi RB, 0
+    |  je >5				// Copy all varargs?
+    |  lay RB, -8(RA, RB)
+    |  clgr TMPR1, BASE			// No vararg slots?
+    |  lghi TMPR0, LJ_TNIL
+    |  jnl >2
+    |1:  // Copy vararg slots to destination slots.
+    |  lg RC, -16(TMPR1)
+    |  la TMPR1, 8(TMPR1)
+    |  stg RC, 0(RA)
+    |  la RA, 8(RA)
+    |  clgr RA, RB			// All destination slots filled?
+    |  jnl >3
+    |  clgr TMPR1, BASE			// No more vararg slots?
+    |  jl <1
+    |2:  // Fill up remainder with nil.
+    |  stg TMPR0, 0(RA)
+    |  la RA, 8(RA)
+    |  clgr RA, RB
+    |  jl <2
+    |3:
+    |  ins_next
+    |
+    |5:  // Copy all varargs.
+    |  lghi TMPR0, 1
+    |  st TMPR0, SAVE_MULTRES		// MULTRES = 0+1
+    |  lgr RC, BASE
+    |  slgr RC, TMPR1
+    |  jno <3				// No vararg slots? (borrow or zero)
+    |  llgfr RB, RC
+    |  srlg RB, RB, 3
+    |  ahi RB, 1
+    |  st RB, SAVE_MULTRES		// MULTRES = #varargs+1
+    |  lg L:RB, SAVE_L
+    |  agr RC, RA
+    |  clg RC, L:RB->maxstack
+    |  jh >7				// Need to grow stack?
+    |6:  // Copy all vararg slots.
+    |  lg RC, -16(TMPR1)
+    |  la TMPR1, 8(TMPR1)
+    |  stg RC, 0(RA)
+    |  la RA, 8(RA)
+    |  clgr TMPR1, BASE			// No more vararg slots?
+    |  jl <6
+    |  j <3
+    |
+    |7:  // Grow stack for varargs.
+    |  stg BASE, L:RB->base
+    |  stg RA, L:RB->top
+    |  stg PC, SAVE_PC
+    |  sgr TMPR1, BASE			// Need delta, because BASE may change.
+    |  st TMPR1, SAVE_TMP_HI
+    |  llgf CARG2, SAVE_MULTRES
+    |  aghi CARG2, -1
+    |  lgr CARG1, L:RB
+    |  brasl r14, extern lj_state_growstack	// (lua_State *L, int n)
+    |  lg BASE, L:RB->base
+    |  lgf TMPR1, SAVE_TMP_HI
+    |  lg RA, L:RB->top
+    |  agr TMPR1, BASE
+    |  j <6
+    break;
+  /* -- Returns ----------------------------------------------------------- */
+  case BC_RETM:
+    |  ins_AD	// RA = results, RD = extra_nresults
+    |  agf RD, SAVE_MULTRES			// MULTRES >=1, so RD >=1.
+    |  // Fall through. Assumes BC_RET follows and ins_AD is a no-op.
+    break;
+  case BC_RET: case BC_RET0: case BC_RET1:
+    |  ins_AD	// RA = results, RD = nresults+1
+    if (op != BC_RET0) {
+      |  sllg RA, RA, 3
+    }
+    |1:
+    |  lg PC, -8(BASE)
+    |  st RD, SAVE_MULTRES		// Save nresults+1.
+    |  tmll PC, FRAME_TYPE		// Check frame type marker.
+    |  jne >7				// Not returning to a fixarg Lua func?
+    switch (op) {
+    case BC_RET:
+      |->BC_RET_Z:
+      |  lgr KBASE, BASE		// Use KBASE for result move.
+      |  aghi RD, -1
+      |  je >3
+      |2:  // Move results down.
+      |  lg RB, 0(KBASE, RA)
+      |  stg RB, -16(KBASE)
+      |  la KBASE, 8(KBASE)
+      |  brctg RD, <2
+      |3:
+      |  llgf RD, SAVE_MULTRES		// Note: MULTRES may be >256.
+      |  llgc RB, PC_RB
+      |5:
+      |  cgr RB, RD			// More results expected?
+      |  jh >6
+      break;
+    case BC_RET1:
+      |  lg RB, 0(BASE, RA)
+      |  stg RB, -16(BASE)
+      /* fallthrough */
+    case BC_RET0:
+      |5:
+      |  llgc TMPR1, PC_RB
+      |  cgr TMPR1, RD
+      |  jh >6
+    default:
+      break;
+    }
+    |  llgc RA, PC_RA
+    |  lcgr RA, RA
+    |  sllg RA, RA, 3
+    |  lay BASE, -16(RA, BASE)		// base = base - (RA+2)*8
+    |  lg LFUNC:KBASE, -16(BASE)
+    |  cleartp LFUNC:KBASE
+    |  lg KBASE, LFUNC:KBASE->pc
+    |  lg KBASE, PC2PROTO(k)(KBASE)
+    |  ins_next
+    |
+    |6:  // Fill up results with nil.
+    |  lghi TMPR1, LJ_TNIL
+    if (op == BC_RET) {
+      |  stg TMPR1, -16(KBASE)		// Note: relies on shifted base.
+      |  la KBASE, 8(KBASE)
+    } else {
+      |  sllg RC, RD, 3 		// RC used as temp.
+      |  stg TMPR1, -24(RC, BASE)
+    }
+    |  la RD, 1(RD)
+    |  j <5
+    |
+    |7:  // Non-standard return case.
+    |  lay RB, -FRAME_VARG(PC)
+    |  tmll RB, FRAME_TYPEP
+    |  jne ->vm_return
+    |  // Return from vararg function: relocate BASE down and RA up.
+    |  sgr BASE, RB
+    if (op != BC_RET0) {
+      |  agr RA, RB
+    }
+    |  j <1
+    break;
+  /* -- Loops and branches ------------------------------------------------ */
+  |.define FOR_IDX,  0(RA)
+  |.define FOR_STOP, 8(RA)
+  |.define FOR_STEP, 16(RA)
+  |.define FOR_EXT,  24(RA)
+  case BC_FORL:
+    |.if JIT
+    |  hotloop RB
+    |.endif
+    | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op.
+    break;
+  case BC_JFORI:
+  case BC_JFORL:
+    break;
+  case BC_FORI:
+  case BC_IFORL:
+    vk = (op == BC_IFORL || op == BC_JFORL);
+    |  ins_AJ	// RA = base, RD = target (after end of loop or start of loop)
+    |  sllg RA, RA, 3
+    |  la RA, 0(RA, BASE)
+    |  lg RB, FOR_IDX
+    |  checkint RB, >9
+    |  lg TMPR1, FOR_STOP
+    if (!vk) {
+      |  checkint TMPR1, ->vmeta_for
+      |  lg ITYPE, FOR_STEP
+      |  chi ITYPE, 0; jl >5
+      |  srag ITYPE, ITYPE, 47
+      |  cghi ITYPE, LJ_TISNUM; jne ->vmeta_for
+    } else {
+      |  // lg TMPR1, FOR_STOP
+      |  checkinttp TMPR1, ->assert_bad_for_arg_type
+      |  lg TMPR0, FOR_STEP
+      |  checkinttp TMPR0, ->assert_bad_for_arg_type
+      |  lg ITYPE, FOR_STEP
+      |  chi ITYPE, 0; jl >5
+      |  ar RB, ITYPE; jo >1
+      |  setint RB
+      |  stg RB, FOR_IDX
+    }
+    |  cr RB, TMPR1
+    |  stg RB, FOR_EXT
+    if (op == BC_FORI) {
+      |  jle >7
+      |1:
+      |6:
+      |  branchPC RD
+    } else if (op == BC_JFORI) {
+      |  branchPC RD
+      |  llgh RD, PC_RD
+      |  jle =>BC_JLOOP
+      |1:
+      |6:
+    } else if (op == BC_IFORL) {
+      |  jh >7
+      |6:
+      |  branchPC RD
+      |1:
+    } else {
+      |  jle =>BC_JLOOP
+      |1:
+      |6:
+    }
+    |7:
+    |  ins_next
+    |
+    |5:  // Invert check for negative step.
+    if (!vk) {
+      |  srag ITYPE, ITYPE, 47
+      |  cghi ITYPE, LJ_TISNUM; jne ->vmeta_for
+    } else {
+      |  ar RB, ITYPE; jo <1
+      |  setint RB
+      |  stg RB, FOR_IDX
+    }
+    |  cr RB, TMPR1
+    |  stg RB, FOR_EXT
+    if (op == BC_FORI) {
+      |  jhe <7
+    } else if (op == BC_JFORI) {
+      |  branchPC RD
+      |  llgh RD, PC_RD
+      |  jhe =>BC_JLOOP
+    } else if (op == BC_IFORL) {
+      |  jl <7
+    } else {
+      |  jhe =>BC_JLOOP
+    }
+    |  j <6
+    |9:  // Fallback to FP variant.
+    if (!vk) {
+      |  jhe ->vmeta_for
+    }
+    if (!vk) {
+      |  lg TMPR0, FOR_STOP
+      |  checknumtp TMPR0, ->vmeta_for
+    } else {
+      |  lg TMPR0, FOR_STOP
+      |  checknumtp TMPR0, ->assert_bad_for_arg_type
+      |  lg TMPR0, FOR_STEP
+      |  checknumtp TMPR0, ->assert_bad_for_arg_type
+    }
+    |  lg RB, FOR_STEP
+    if (!vk) {
+      |  checknum RB, ->vmeta_for
+    }
+    |  ld f0, FOR_IDX
+    |  ld f1, FOR_STOP
+    if (vk) {
+      |  adb f0, FOR_STEP
+      |  std f0, FOR_IDX
+    }
+    |  cghi RB, 0; jl >3
+    |  cdbr f1, f0
+    |1:
+    |  std f0, FOR_EXT
+    if (op == BC_FORI) {
+      |  jnl <7
+    } else if (op == BC_JFORI) {
+      |  branchPC RD
+      |  llgh RD, PC_RD
+      |  jnl =>BC_JLOOP
+    } else if (op == BC_IFORL) {
+      |  jl <7
+    } else {
+      |  jnl =>BC_JLOOP
+    }
+    |  j <6
+    |
+    |3:  // Invert comparison if step is negative.
+    |  cdbr f0, f1
+    |  j <1
+    break;
+  case BC_ITERL:
+    |.if JIT
+    |  hotloop RB
+    |.endif
+    | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op.
+    break;
+  case BC_JITERL:
+    break;
+  case BC_IITERL:
+    |  ins_AJ	// RA = base, RD = target
+    |  sllg RA, RA, 3
+    |  la RA, 0(RA, BASE)
+    |  lg RB, 0(RA)
+    |  cghi RB, LJ_TNIL; je >1		// Stop if iterator returned nil.
+    if (op == BC_JITERL) {
+      |  stg RB, -8(RA)
+      |  j =>BC_JLOOP
+    } else {
+      |  branchPC RD			// Otherwise save control var + branch.
+      |  stg RB, -8(RA)
+    }
+    |1:
+    |  ins_next
+    break;
+  case BC_LOOP:
+    |  ins_A	// RA = base, RD = target (loop extent)
+    |  // Note: RA/RD is only used by trace recorder to determine scope/extent
+    |  // This opcode does NOT jump, it's only purpose is to detect a hot loop.
+    |.if JIT
+    |  hotloop RBd
+    |.endif
+    | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op.
+    break;
+  case BC_ILOOP:
+    |  ins_A	// RA = base, RD = target (loop extent)
+    |  ins_next
+    break;
+  case BC_JLOOP:
+    |  stg r0, 0
+    |  stg r0, 0
+    break;
+  case BC_JMP:
+    |  ins_AJ	// RA = unused, RD = target
+    |  branchPC RD
+    |  ins_next
+    break;
+  /* -- Function headers -------------------------------------------------- */
+   /*
+   ** Reminder: A function may be called with func/args above L->maxstack,
+   ** i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot,
+   ** too. This means all FUNC* ops (including fast functions) must check
+   ** for stack overflow _before_ adding more slots!
+   */
+  case BC_FUNCF:
+    |.if JIT
+    |  stg r0, 0
+    |.endif
+  case BC_FUNCV:  /* NYI: compiled vararg functions. */
+    | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op.
+    break;
+  case BC_JFUNCF:
+    break;
+  case BC_IFUNCF:
+    |  ins_AD  // BASE = new base, RA = framesize, RD = nargs+1
+    |  lg KBASE, (PC2PROTO(k)-4)(PC)
+    |  lg L:RB, SAVE_L
+    |  sllg RA, RA, 3
+    |  la RA, 0(RA, BASE)		// Top of frame.
+    |  clg RA, L:RB->maxstack
+    |  jh ->vm_growstack_f
+    |  llgc RA, (PC2PROTO(numparams)-4)(PC)
+    |  clgr NARGS:RD, RA		// Check for missing parameters.
+    |  jle >3
+    |2:
+    if (op == BC_JFUNCF) {
+      |  llgh RD, PC_RD
+      |  j =>BC_JLOOP
+    } else {
+      |  ins_next
+    }
+    |
+    |3:  // Clear missing parameters.
+    |  sllg TMPR1, NARGS:RD, 3
+    |  lghi TMPR0, LJ_TNIL
+    |4:
+    |  stg TMPR0, -8(TMPR1, BASE)
+    |  la TMPR1, 8(TMPR1)
+    |  la RD, 1(RD)
+    |  clgr RD, RA
+    |  jle <4
+    |  j <2
+    break;
+  case BC_JFUNCV:
+    break;
+    | stg r0, 0  // NYI: compiled vararg functions
+    break;           /* NYI: compiled vararg functions. */
+  case BC_IFUNCV:
+    |  ins_AD  // BASE = new base, RA = framesize, RD = nargs+1
+    |  sllg TMPR1, NARGS:RD, 3
+    |  la RB, (FRAME_VARG+8)(TMPR1)
+    |  la RD, 8(TMPR1, BASE)
+    |  lg LFUNC:KBASE, -16(BASE)
+    |  stg RB, -8(RD)			// Store delta + FRAME_VARG.
+    |  stg LFUNC:KBASE, -16(RD)		// Store copy of LFUNC.
+    |  lg L:RB, SAVE_L
+    |  sllg RA, RA, 3
+    |  la RA, 0(RA, RD)
+    |  cg RA, L:RB->maxstack
+    |  jh ->vm_growstack_v		// Need to grow stack.
+    |  lgr RA, BASE
+    |  lgr BASE, RD
+    |  llgc RB, (PC2PROTO(numparams)-4)(PC)
+    |  cghi RB, 0
+    |  je >2
+    |  aghi RA, 8
+    |  lghi TMPR1, LJ_TNIL
+    |1:  // Copy fixarg slots up to new frame.
+    |  la RA, 8(RA)
+    |  cgr RA, BASE
+    |  jnl >3				// Less args than parameters?
+    |  lg KBASE, -16(RA)
+    |  stg KBASE, 0(RD)
+    |  la RD, 8(RD)
+    |  stg TMPR1, -16(RA)	// Clear old fixarg slot (help the GC).
+    |  brctg RB, <1
+    |2:
+    if (op == BC_JFUNCV) {
+      |  llgh RD, PC_RD
+      |  j =>BC_JLOOP
+    } else {
+      |  lg KBASE, (PC2PROTO(k)-4)(PC)
+      |  ins_next
+    }
+    |
+    |3:  // Clear missing parameters.
+    |  stg TMPR1, 0(RD)			// TMPR1=LJ_TNIL (-1) here.
+    |  la RD, 8(RD)
+    |  brctg RB, <3
+    |  j <2
+    break;
+  case BC_FUNCC:
+  case BC_FUNCCW:
+    |  ins_AD  // BASE = new base, RD = nargs+1
+    |  lg CFUNC:RB, -16(BASE)
+    |  cleartp CFUNC:RB
+    |  lg KBASE, CFUNC:RB->f
+    |  lg L:RB, SAVE_L
+    |  sllg RD, NARGS:RD, 3
+    |  lay RD, -8(RD,BASE)
+    |  stg BASE, L:RB->base
+    |  la RA, (8*LUA_MINSTACK)(RD)
+    |  clg RA, L:RB->maxstack
+    |  stg RD, L:RB->top
+    |  lgr CARG1, L:RB
+    if (op != BC_FUNCC) {
+      |  lgr CARG2, KBASE
+    }
+    |  jh ->vm_growstack_c		// Need to grow stack.
+    |  set_vmstate C
+    if (op == BC_FUNCC) {
+      |  basr r14, KBASE		// (lua_State *L)
+    } else {
+      |  // (lua_State *L, lua_CFunction f)
+      |  lg TMPR1, (DISPATCH_GL(wrapf))(DISPATCH)
+      |  basr r14, TMPR1
+    }
+    |  // nresults returned in r2 (CRET1).
+    |  lgr RD, CRET1
+    |  lg BASE, L:RB->base
+    |  stg L:RB, (DISPATCH_GL(cur_L))(DISPATCH)
+    |  set_vmstate INTERP
+    |  sllg TMPR1, RD, 3
+    |  la RA, 0(TMPR1, BASE)
+    |  lcgr RA, RA
+    |  ag RA, L:RB->top			// RA = (L->top-(L->base+nresults))*8
+    |  lg PC, -8(BASE)			// Fetch PC of caller.
+    |  j ->vm_returnc
+    break;
+  /* ---------------------------------------------------------------------- */
+  default:
+    fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
+    exit(2);
+    break;
+  }
+static int build_backend(BuildCtx *ctx)
+  int op;
+  dasm_growpc(Dst, BC__MAX);
+  build_subroutines(ctx);
+  |.code_op
+  for (op = 0; op < BC__MAX; op++)
+    build_ins(ctx, (BCOp)op, op);
+  return BC__MAX;
+/* Emit pseudo frame-info for all assembler functions. */
+static void emit_asm_debug(BuildCtx *ctx)
+  int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
+  switch (ctx->mode) {
+  case BUILD_elfasm:
+    fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n");
+    fprintf(ctx->fp,
+	".Lframe0:\n"
+	"\t.long .LECIE0-.LSCIE0\n"
+	".LSCIE0:\n"
+	"\t.long 0xffffffff\n"
+	"\t.byte 0x1\n"
+	"\t.string \"\"\n"
+	"\t.uleb128 1\n"
+	"\t.sleb128 -8\n"
+	"\t.byte 0xe\n"
+	"\t.byte 0xc\n\t.uleb128 0xf\n\t.uleb128 160\n"
+	"\t.align 8\n"
+	".LECIE0:\n\n");
+    fprintf(ctx->fp,
+	".LSFDE0:\n"
+	"\t.long .LEFDE0-.LASFDE0\n"
+	".LASFDE0:\n"
+	"\t.long .Lframe0\n"
+	"\t.quad .Lbegin\n"
+	"\t.quad %d\n"
+	"\t.byte 0xe\n\t.uleb128 %d\n"		/* def_cfa_offset */
+	"\t.byte 0x86\n\t.uleb128 0xe\n"	/* offset r6 */
+	"\t.byte 0x87\n\t.uleb128 0xd\n"	/* offset r7 */
+	"\t.byte 0x88\n\t.uleb128 0xc\n"	/* offset r8 */
+	"\t.byte 0x89\n\t.uleb128 0xb\n"	/* offset r9 */
+	"\t.byte 0x8a\n\t.uleb128 0xa\n"	/* offset r10 */
+	"\t.byte 0x8b\n\t.uleb128 0x9\n"	/* offset r11 */
+	"\t.byte 0x8c\n\t.uleb128 0x8\n"	/* offset r12 */
+	"\t.byte 0x8d\n\t.uleb128 0x7\n"	/* offset r13 */
+	"\t.byte 0x8e\n\t.uleb128 0x6\n"	/* offset r14 */
+	"\t.byte 0x8f\n\t.uleb128 0x5\n"	/* offset r15 */
+	"\t.align 8\n"
+	".LEFDE0:\n\n", fcofs, CFRAME_SIZE+160);
+    fprintf(ctx->fp,
+	".LSFDE1:\n"
+	"\t.long .LEFDE1-.LASFDE1\n"
+	".LASFDE1:\n"
+	"\t.long .Lframe0\n"
+	"\t.quad lj_vm_ffi_call\n"
+	"\t.quad %d\n"
+	"\t.byte 0xe\n\t.uleb128 160\n"		/* def_cfa_offset */
+	"\t.byte 0xd\n\t.uleb128 0xd\n"		/* def_cfa_register r13 (FP) */
+	"\t.byte 0x86\n\t.uleb128 0xe\n"	/* offset r6 */
+	"\t.byte 0x87\n\t.uleb128 0xd\n"	/* offset r7 */
+	"\t.byte 0x88\n\t.uleb128 0xc\n"	/* offset r8 */
+	"\t.byte 0x89\n\t.uleb128 0xb\n"	/* offset r9 */
+	"\t.byte 0x8a\n\t.uleb128 0xa\n"	/* offset r10 */
+	"\t.byte 0x8b\n\t.uleb128 0x9\n"	/* offset r11 */
+	"\t.byte 0x8c\n\t.uleb128 0x8\n"	/* offset r12 */
+	"\t.byte 0x8d\n\t.uleb128 0x7\n"	/* offset r13 */
+	"\t.byte 0x8e\n\t.uleb128 0x6\n"	/* offset r14 */
+	"\t.byte 0x8f\n\t.uleb128 0x5\n"	/* offset r15 */
+	"\t.align 8\n"
+	".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
+    fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n");
+    fprintf(ctx->fp,
+	".Lframe1:\n"
+	"\t.long .LECIE1-.LSCIE1\n"
+	".LSCIE1:\n"
+	"\t.long 0\n"
+	"\t.byte 0x1\n"
+	"\t.string \"zPR\"\n"
+	"\t.uleb128 0x1\n"
+	"\t.sleb128 -8\n"
+	"\t.byte 0xe\n"
+	"\t.uleb128 6\n"			/* augmentation length */
+	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
+	"\t.long lj_err_unwind_dwarf-.\n"
+	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
+	"\t.byte 0xc\n\t.uleb128 0xf\n\t.uleb128 160\n"
+	"\t.align 8\n"
+	".LECIE1:\n\n");
+    fprintf(ctx->fp,
+	".LSFDE2:\n"
+	"\t.long .LEFDE2-.LASFDE2\n"
+	".LASFDE2:\n"
+	"\t.long .LASFDE2-.Lframe1\n"
+	"\t.long .Lbegin-.\n"
+	"\t.long %d\n"
+	"\t.uleb128 0\n"			/* augmentation length */
+	"\t.byte 0xe\n\t.uleb128 %d\n"		/* def_cfa_offset */
+	"\t.byte 0x86\n\t.uleb128 0xe\n"	/* offset r6 */
+	"\t.byte 0x87\n\t.uleb128 0xd\n"	/* offset r7 */
+	"\t.byte 0x88\n\t.uleb128 0xc\n"	/* offset r8 */
+	"\t.byte 0x89\n\t.uleb128 0xb\n"	/* offset r9 */
+	"\t.byte 0x8a\n\t.uleb128 0xa\n"	/* offset r10 */
+	"\t.byte 0x8b\n\t.uleb128 0x9\n"	/* offset r11 */
+	"\t.byte 0x8c\n\t.uleb128 0x8\n"	/* offset r12 */
+	"\t.byte 0x8d\n\t.uleb128 0x7\n"	/* offset r13 */
+	"\t.byte 0x8e\n\t.uleb128 0x6\n"	/* offset r14 */
+	"\t.byte 0x8f\n\t.uleb128 0x5\n"	/* offset r15 */
+	"\t.align 8\n"
+	".LEFDE2:\n\n", fcofs, CFRAME_SIZE+160);
+    fprintf(ctx->fp,
+	".Lframe2:\n"
+	"\t.long .LECIE2-.LSCIE2\n"
+	".LSCIE2:\n"
+	"\t.long 0\n"
+	"\t.byte 0x1\n"
+	"\t.string \"zR\"\n"
+	"\t.uleb128 0x1\n"
+	"\t.sleb128 -8\n"
+	"\t.byte 0xe\n"
+	"\t.uleb128 1\n"			/* augmentation length */
+	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
+	"\t.byte 0xc\n\t.uleb128 0xf\n\t.uleb128 160\n"
+	"\t.align 8\n"
+	".LECIE2:\n\n");
+    fprintf(ctx->fp,
+	".LSFDE3:\n"
+	"\t.long .LEFDE3-.LASFDE3\n"
+	".LASFDE3:\n"
+	"\t.long .LASFDE3-.Lframe2\n"
+	"\t.long lj_vm_ffi_call-.\n"
+	"\t.long %d\n"
+	"\t.uleb128 0\n"			/* augmentation length */
+	"\t.byte 0xe\n\t.uleb128 160\n"		/* def_cfa_offset */
+	"\t.byte 0xd\n\t.uleb128 0xd\n"		/* def_cfa_register r13 (FP) */
+	"\t.byte 0x86\n\t.uleb128 0xe\n"	/* offset r6 */
+	"\t.byte 0x87\n\t.uleb128 0xd\n"	/* offset r7 */
+	"\t.byte 0x88\n\t.uleb128 0xc\n"	/* offset r8 */
+	"\t.byte 0x89\n\t.uleb128 0xb\n"	/* offset r9 */
+	"\t.byte 0x8a\n\t.uleb128 0xa\n"	/* offset r10 */
+	"\t.byte 0x8b\n\t.uleb128 0x9\n"	/* offset r11 */
+	"\t.byte 0x8c\n\t.uleb128 0x8\n"	/* offset r12 */
+	"\t.byte 0x8d\n\t.uleb128 0x7\n"	/* offset r13 */
+	"\t.byte 0x8e\n\t.uleb128 0x6\n"	/* offset r14 */
+	"\t.byte 0x8f\n\t.uleb128 0x5\n"	/* offset r15 */
+	"\t.align 8\n"
+	".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
+    break;
+  default:  /* No other modes. */
+    break;
+  }
openSUSE Build Service is sponsored by