File 0003-Introduce-operand_offset.patch of Package ghidra

From b897450f611baf95938ffc54ff8ad686e1155703 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Klaus=20K=C3=A4mpf?= <kkaempf@gmail.com>
Date: Sun, 30 Oct 2022 20:01:10 +0100
Subject: [PATCH 3/8] Introduce operand_offset
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Klaus Kämpf <kkaempf@gmail.com>
---
 .../Decompiler/src/decompile/cpp/pcodeparse.y |  1 +
 .../Decompiler/src/decompile/cpp/semantics.cc |  9 +++
 .../Decompiler/src/decompile/cpp/semantics.hh |  2 +-
 .../Decompiler/src/decompile/cpp/slaformat.cc |  4 ++
 .../Decompiler/src/decompile/cpp/slaformat.hh |  4 ++
 .../src/decompile/cpp/slgh_compile.cc         |  4 +-
 .../src/decompile/cpp/slghpatexpress.cc       | 16 +++++
 .../src/decompile/cpp/slghpatexpress.hh       | 14 ++++
 .../src/decompile/cpp/slghsymbol.cc           | 69 ++++++++++++++++++-
 .../src/decompile/cpp/slghsymbol.hh           | 19 ++++-
 GhidraDocs/languages/html/sleigh_symbols.html |  7 ++
 11 files changed, 145 insertions(+), 4 deletions(-)

diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/pcodeparse.y b/Ghidra/Features/Decompiler/src/decompile/cpp/pcodeparse.y
index 87cd725974..27667bf777 100644
--- a/Ghidra/Features/Decompiler/src/decompile/cpp/pcodeparse.y
+++ b/Ghidra/Features/Decompiler/src/decompile/cpp/pcodeparse.y
@@ -741,6 +741,7 @@ int4 PcodeSnippet::lex(void)
 	yylval.operandsym = (OperandSymbol *)sym;
 	return OPERANDSYM;
       case SleighSymbol::start_symbol:
+      case SleighSymbol::offset_symbol:
       case SleighSymbol::end_symbol:
       case SleighSymbol::next2_symbol:
       case SleighSymbol::flowdest_symbol:
diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/semantics.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/semantics.cc
index cd9b9835b1..d9b02eea18 100644
--- a/Ghidra/Features/Decompiler/src/decompile/cpp/semantics.cc
+++ b/Ghidra/Features/Decompiler/src/decompile/cpp/semantics.cc
@@ -121,6 +121,8 @@ uintb ConstTpl::fix(const ParserWalker &walker) const
   switch(type) {
   case j_start:
     return walker.getAddr().getOffset(); // Fill in starting address placeholder with real address
+  case j_offset:
+    return walker.getAddr().getOffset(); // Fill in starting address placeholder with real address
   case j_next:
     return walker.getNaddr().getOffset(); // Fill in next address placeholder with real address
   case j_next2:
@@ -318,6 +320,10 @@ void ConstTpl::encode(Encoder &encoder) const
     encoder.openElement(sla::ELEM_CONST_START);
     encoder.closeElement(sla::ELEM_CONST_START);
     break;
+  case j_offset:
+    encoder.openElement(sla::ELEM_CONST_OFFSET);
+    encoder.closeElement(sla::ELEM_CONST_OFFSET);
+    break;
   case j_next:
     encoder.openElement(sla::ELEM_CONST_NEXT);
     encoder.closeElement(sla::ELEM_CONST_NEXT);
@@ -417,6 +423,9 @@ void ConstTpl::decode(Decoder &decoder)
   else if (el == sla::ELEM_CONST_FLOWDEST_SIZE) {
     type = j_flowdest_size;
   }
+  else if (el == sla::ELEM_CONST_OFFSET) {
+    type = j_offset;
+  }
   else
     throw LowlevelError("Bad constant type");
   decoder.closeElement(el);
diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/semantics.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/semantics.hh
index e0b069959d..c8ca547856 100644
--- a/Ghidra/Features/Decompiler/src/decompile/cpp/semantics.hh
+++ b/Ghidra/Features/Decompiler/src/decompile/cpp/semantics.hh
@@ -35,7 +35,7 @@ class ConstTpl {
 public:
   enum const_type { real=0, handle=1, j_start=2, j_next=3, j_next2=4, j_curspace=5, 
 		    j_curspace_size=6, spaceid=7, j_relative=8,
-		    j_flowref=9, j_flowref_size=10, j_flowdest=11, j_flowdest_size=12 };
+		    j_flowref=9, j_flowref_size=10, j_flowdest=11, j_flowdest_size=12, j_offset=13 };
   enum v_field { v_space=0, v_offset=1, v_size=2, v_offset_plus=3 };
 private:
   const_type type;
diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/slaformat.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/slaformat.cc
index f8b3bcfa73..59269347ff 100644
--- a/Ghidra/Features/Decompiler/src/decompile/cpp/slaformat.cc
+++ b/Ghidra/Features/Decompiler/src/decompile/cpp/slaformat.cc
@@ -166,6 +166,10 @@ ElementId ELEM_CONST_FLOWREF = ElementId("const_flowref", 85, FORMAT_SCOPE);
 ElementId ELEM_CONST_FLOWREF_SIZE = ElementId("const_flowref_size", 86, FORMAT_SCOPE);
 ElementId ELEM_CONST_FLOWDEST = ElementId("const_flowdest", 87, FORMAT_SCOPE);
 ElementId ELEM_CONST_FLOWDEST_SIZE = ElementId("const_flowdest_size", 88, FORMAT_SCOPE);
+ElementId ELEM_OFFSET_EXP = ElementId("offset_exp", 89, FORMAT_SCOPE);
+ElementId ELEM_OFFSET_SYM = ElementId("offset_sym", 90, FORMAT_SCOPE);
+ElementId ELEM_OFFSET_SYM_HEAD = ElementId("offset_sym_head", 91, FORMAT_SCOPE);
+ElementId ELEM_CONST_OFFSET = ElementId("const_offset", 92, FORMAT_SCOPE);
 
 /// The bytes of the header are read from the stream and verified against the required form and current version.
 /// If the form matches, \b true is returned.  No additional bytes are read.
diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/slaformat.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/slaformat.hh
index a8eb11b63c..7034cf794e 100644
--- a/Ghidra/Features/Decompiler/src/decompile/cpp/slaformat.hh
+++ b/Ghidra/Features/Decompiler/src/decompile/cpp/slaformat.hh
@@ -172,6 +172,10 @@ extern ElementId ELEM_CONST_FLOWREF;	///< SLA format element "const_flowref"
 extern ElementId ELEM_CONST_FLOWREF_SIZE;	///< SLA format element "const_flowref_size"
 extern ElementId ELEM_CONST_FLOWDEST;	///< SLA format element "const_flowdest"
 extern ElementId ELEM_CONST_FLOWDEST_SIZE;	///< SLA format element "const_flowdest_size"
+extern ElementId ELEM_OFFSET_EXP;	///< SLA format element "offset_exp"
+extern ElementId ELEM_OFFSET_SYM;	///< SLA format element "operand_offset_sym"
+extern ElementId ELEM_OFFSET_SYM_HEAD;	///< SLA format element "operand_offset_sym_head"
+extern ElementId ELEM_CONST_OFFSET;	///< SLA format element "offset_start"
 
 extern bool isSlaFormat(istream &s);	///< Verify a .sla file header at the current point of the given stream
 extern void writeSlaHeader(ostream &s);	///< Write a .sla file header to the given stream
diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/slgh_compile.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/slgh_compile.cc
index 50d85e22ba..6f311f230c 100644
--- a/Ghidra/Features/Decompiler/src/decompile/cpp/slgh_compile.cc
+++ b/Ghidra/Features/Decompiler/src/decompile/cpp/slgh_compile.cc
@@ -1796,7 +1796,7 @@ SleighCompile::SleighCompile(void)
 }
 
 /// Create the address spaces: \b const, \b unique, and \b other.
-/// Define the special symbols: \b inst_start, \b inst_next, \b inst_next2, \b epsilon.
+/// Define the special symbols: \b inst_start, \b operand_offset, \b inst_next, \b inst_next2, \b epsilon.
 /// Define the root subtable symbol: \b instruction
 void SleighCompile::predefinedSymbols(void)
 
@@ -1818,6 +1818,8 @@ void SleighCompile::predefinedSymbols(void)
   symtab.addSymbol(spacesym);
   StartSymbol *startsym = new StartSymbol("inst_start",getConstantSpace());
   symtab.addSymbol(startsym);
+  OffsetSymbol *offsetsym = new OffsetSymbol("operand_offset",getConstantSpace());
+  symtab.addSymbol(offsetsym);
   EndSymbol *endsym = new EndSymbol("inst_next",getConstantSpace());
   symtab.addSymbol(endsym);
   Next2Symbol *next2sym = new Next2Symbol("inst_next2",getConstantSpace());
diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/slghpatexpress.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/slghpatexpress.cc
index 9410978595..e16bf18b74 100644
--- a/Ghidra/Features/Decompiler/src/decompile/cpp/slghpatexpress.cc
+++ b/Ghidra/Features/Decompiler/src/decompile/cpp/slghpatexpress.cc
@@ -502,6 +502,8 @@ PatternExpression *PatternExpression::decodeExpression(Decoder &decoder,Translat
     res = new MinusExpression();
   else if (el == sla::ELEM_NOT_EXP)
     res = new NotExpression();
+  else if (el == sla::ELEM_OFFSET_EXP)
+    res = new OffsetInstructionValue();
   else
     return (PatternExpression *)0;
 
@@ -711,6 +713,20 @@ void StartInstructionValue::decode(Decoder &decoder,Translate *trans)
   decoder.closeElement(el);
 }
 
+void OffsetInstructionValue::encode(Encoder &encoder) const
+
+{
+  encoder.openElement(sla::ELEM_OFFSET_EXP);
+  encoder.closeElement(sla::ELEM_OFFSET_EXP);
+}
+
+void OffsetInstructionValue::decode(Decoder &decoder,Translate *trans)
+
+{
+  uint4 el = decoder.openElement(sla::ELEM_OFFSET_EXP);
+  decoder.closeElement(el);
+}
+
 void EndInstructionValue::encode(Encoder &encoder) const
 
 {
diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/slghpatexpress.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/slghpatexpress.hh
index 118fe3cc94..6e264ded3f 100644
--- a/Ghidra/Features/Decompiler/src/decompile/cpp/slghpatexpress.hh
+++ b/Ghidra/Features/Decompiler/src/decompile/cpp/slghpatexpress.hh
@@ -153,6 +153,20 @@ public:
   virtual void encode(Encoder &encoder) const;
   virtual void decode(Decoder &decoder,Translate *trans);
 };
+
+class OffsetInstructionValue : public PatternValue {
+public:
+  OffsetInstructionValue(void) {}
+  virtual intb getValue(ParserWalker &walker) const {
+    return (intb)walker.getOffset(-1);
+  }
+  virtual TokenPattern genMinPattern(const vector<TokenPattern> &ops) const { return TokenPattern(); }
+  virtual TokenPattern genPattern(intb val) const { return TokenPattern(); }
+  virtual intb minValue(void) const { return (intb)0; }
+  virtual intb maxValue(void) const { return (intb)0; }
+  virtual void encode(Encoder &encoder) const;
+  virtual void decode(Decoder &decoder,Translate *trans);
+};
                                                                                         
 class EndInstructionValue : public PatternValue {
 public:
diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/slghsymbol.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/slghsymbol.cc
index b35dd6ec79..7c9f2abe69 100644
--- a/Ghidra/Features/Decompiler/src/decompile/cpp/slghsymbol.cc
+++ b/Ghidra/Features/Decompiler/src/decompile/cpp/slghsymbol.cc
@@ -1,4 +1,4 @@
-/* ###
+#/* ###
  * IP: GHIDRA
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -224,6 +224,8 @@ void SymbolTable::decodeSymbolHeader(Decoder &decoder)
     sym = new OperandSymbol();
   else if (el == sla::ELEM_START_SYM_HEAD)
     sym = new StartSymbol();
+  else if (el == sla::ELEM_OFFSET_SYM_HEAD)
+    sym = new OffsetSymbol();
   else if (el == sla::ELEM_END_SYM_HEAD)
     sym = new EndSymbol();
   else if (el == sla::ELEM_NEXT2_SYM_HEAD)
@@ -1137,6 +1139,71 @@ void StartSymbol::decode(Decoder &decoder,SleighBase *trans)
   decoder.closeElement(sla::ELEM_START_SYM.getId());
 }
 
+OffsetSymbol::OffsetSymbol(const string &nm,AddrSpace *cspc) : SpecificSymbol(nm)
+
+{
+  const_space = cspc;
+  patexp = new OffsetInstructionValue();
+  patexp->layClaim();
+}
+
+OffsetSymbol::~OffsetSymbol(void)
+
+{
+  if (patexp != (PatternExpression *)0)
+    PatternExpression::release(patexp);
+}
+
+VarnodeTpl *OffsetSymbol::getVarnode(void) const
+
+{ // Returns current operand offset as a constant
+  ConstTpl spc(const_space);
+  ConstTpl off(ConstTpl::j_offset);
+  ConstTpl sz_zero;
+  return new VarnodeTpl(spc,off,sz_zero);
+}
+
+void OffsetSymbol::getFixedHandle(FixedHandle &hand,ParserWalker &walker) const
+
+{
+  hand.space = walker.getCurSpace();
+  hand.offset_space = (AddrSpace *)0;
+  hand.offset_offset = walker.getAddr().getOffset(); // Get starting address of instruction
+  hand.size = hand.space->getAddrSize();
+}
+
+void OffsetSymbol::print(ostream &s,ParserWalker &walker) const
+
+{
+  intb val = (intb) walker.getAddr().getOffset();
+    s << "0x" << std::hex << val << std::dec;
+}
+
+void OffsetSymbol::encode(Encoder &encoder) const
+
+{
+  encoder.openElement(sla::ELEM_OFFSET_SYM);
+  encoder.writeUnsignedInteger(sla::ATTRIB_ID, getId());
+  encoder.closeElement(sla::ELEM_OFFSET_SYM);
+}
+
+void OffsetSymbol::encodeHeader(Encoder &encoder) const
+
+{
+  encoder.openElement(sla::ELEM_OFFSET_SYM_HEAD);
+  SleighSymbol::encodeHeader(encoder);
+  encoder.closeElement(sla::ELEM_OFFSET_SYM_HEAD);
+}
+
+void OffsetSymbol::decode(Decoder &decoder,SleighBase *trans)
+
+{
+  const_space = trans->getConstantSpace();
+  patexp = new StartInstructionValue();
+  patexp->layClaim();
+  decoder.closeElement(sla::ELEM_OFFSET_SYM.getId());
+}
+
 EndSymbol::EndSymbol(const string &nm,AddrSpace *cspc) : SpecificSymbol(nm)
 
 {
diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/slghsymbol.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/slghsymbol.hh
index 5e8b4d3dfd..e460a43bac 100644
--- a/Ghidra/Features/Decompiler/src/decompile/cpp/slghsymbol.hh
+++ b/Ghidra/Features/Decompiler/src/decompile/cpp/slghsymbol.hh
@@ -27,7 +27,7 @@ class SleighSymbol {
 public:
   enum symbol_type { space_symbol, token_symbol, userop_symbol, value_symbol, valuemap_symbol,
 		     name_symbol, varnode_symbol, varnodelist_symbol, operand_symbol,
-		     start_symbol, end_symbol, next2_symbol, subtable_symbol, macro_symbol, section_symbol,
+		     start_symbol, offset_symbol, end_symbol, next2_symbol, subtable_symbol, macro_symbol, section_symbol,
                      bitrange_symbol, context_symbol, epsilon_symbol, label_symbol, flowdest_symbol, flowref_symbol,
 		     dummy_symbol };
 private:
@@ -373,6 +373,23 @@ public:
   virtual void decode(Decoder &decoder,SleighBase *trans);
 };
 
+class OffsetSymbol : public SpecificSymbol {
+  AddrSpace *const_space;
+  PatternExpression *patexp;
+public:
+  OffsetSymbol(void) { patexp = (PatternExpression *)0; } // For use with decode
+  OffsetSymbol(const string &nm,AddrSpace *cspc);
+  virtual ~OffsetSymbol(void);
+  virtual VarnodeTpl *getVarnode(void) const;
+  virtual PatternExpression *getPatternExpression(void) const { return patexp; }
+  virtual void getFixedHandle(FixedHandle &hand,ParserWalker &walker) const;
+  virtual void print(ostream &s,ParserWalker &walker) const;
+  virtual symbol_type getType(void) const { return offset_symbol; }
+  virtual void encode(Encoder &encoder) const;
+  virtual void encodeHeader(Encoder &encoder) const;
+  virtual void decode(Decoder &decoder,SleighBase *trans);
+};
+
 class EndSymbol : public SpecificSymbol {
   AddrSpace *const_space;
   PatternExpression *patexp;
diff --git a/GhidraDocs/languages/html/sleigh_symbols.html b/GhidraDocs/languages/html/sleigh_symbols.html
index 70598b7310..f5ee18878e 100644
--- a/GhidraDocs/languages/html/sleigh_symbols.html
+++ b/GhidraDocs/languages/html/sleigh_symbols.html
@@ -186,6 +186,10 @@ We list all of the symbols that are predefined by SLEIGH.
   <td><code class="code">epsilon</code></td>
   <td>A special identifier indicating an empty bit pattern.</td>
 </tr>
+<tr>
+  <td><code class="code">operand_offset</code></td>
+  <td>Offset of the address of the current operand. Useful for variable-length instructions.</td>
+</tr>
 </tbody>
 </table></div>
 </div>
@@ -205,6 +209,9 @@ identifiers are address spaces. The <span class="emphasis"><em>epsilon</em></spa
 identifier is inherited from SLED and is a specific symbol equivalent
 to the constant zero. The <span class="emphasis"><em>instruction</em></span> identifier
 is the root instruction table.
+<span class="emphasis"><em>operand_offset</em></span> was introduced to support VAX
+variable-length, multi-operand instructions. PC-relative addressing in
+VAX is relative to the operand address, not the instruction address.
 </p>
 </div>
 </div>
-- 
2.49.0

openSUSE Build Service is sponsored by