File 0003-Introduce-operand_offset.patch of Package ghidra
From b897450f611baf95938ffc54ff8ad686e1155703 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Klaus=20K=C3=A4mpf?= <kkaempf@gmail.com>
Date: Sun, 30 Oct 2022 20:01:10 +0100
Subject: [PATCH 3/8] Introduce operand_offset
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Klaus Kämpf <kkaempf@gmail.com>
---
.../Decompiler/src/decompile/cpp/pcodeparse.y | 1 +
.../Decompiler/src/decompile/cpp/semantics.cc | 9 +++
.../Decompiler/src/decompile/cpp/semantics.hh | 2 +-
.../Decompiler/src/decompile/cpp/slaformat.cc | 4 ++
.../Decompiler/src/decompile/cpp/slaformat.hh | 4 ++
.../src/decompile/cpp/slgh_compile.cc | 4 +-
.../src/decompile/cpp/slghpatexpress.cc | 16 +++++
.../src/decompile/cpp/slghpatexpress.hh | 14 ++++
.../src/decompile/cpp/slghsymbol.cc | 69 ++++++++++++++++++-
.../src/decompile/cpp/slghsymbol.hh | 19 ++++-
GhidraDocs/languages/html/sleigh_symbols.html | 7 ++
11 files changed, 145 insertions(+), 4 deletions(-)
diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/pcodeparse.y b/Ghidra/Features/Decompiler/src/decompile/cpp/pcodeparse.y
index 87cd725974..27667bf777 100644
--- a/Ghidra/Features/Decompiler/src/decompile/cpp/pcodeparse.y
+++ b/Ghidra/Features/Decompiler/src/decompile/cpp/pcodeparse.y
@@ -741,6 +741,7 @@ int4 PcodeSnippet::lex(void)
yylval.operandsym = (OperandSymbol *)sym;
return OPERANDSYM;
case SleighSymbol::start_symbol:
+ case SleighSymbol::offset_symbol:
case SleighSymbol::end_symbol:
case SleighSymbol::next2_symbol:
case SleighSymbol::flowdest_symbol:
diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/semantics.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/semantics.cc
index cd9b9835b1..d9b02eea18 100644
--- a/Ghidra/Features/Decompiler/src/decompile/cpp/semantics.cc
+++ b/Ghidra/Features/Decompiler/src/decompile/cpp/semantics.cc
@@ -121,6 +121,8 @@ uintb ConstTpl::fix(const ParserWalker &walker) const
switch(type) {
case j_start:
return walker.getAddr().getOffset(); // Fill in starting address placeholder with real address
+ case j_offset:
+ return walker.getAddr().getOffset(); // Fill in starting address placeholder with real address
case j_next:
return walker.getNaddr().getOffset(); // Fill in next address placeholder with real address
case j_next2:
@@ -318,6 +320,10 @@ void ConstTpl::encode(Encoder &encoder) const
encoder.openElement(sla::ELEM_CONST_START);
encoder.closeElement(sla::ELEM_CONST_START);
break;
+ case j_offset:
+ encoder.openElement(sla::ELEM_CONST_OFFSET);
+ encoder.closeElement(sla::ELEM_CONST_OFFSET);
+ break;
case j_next:
encoder.openElement(sla::ELEM_CONST_NEXT);
encoder.closeElement(sla::ELEM_CONST_NEXT);
@@ -417,6 +423,9 @@ void ConstTpl::decode(Decoder &decoder)
else if (el == sla::ELEM_CONST_FLOWDEST_SIZE) {
type = j_flowdest_size;
}
+ else if (el == sla::ELEM_CONST_OFFSET) {
+ type = j_offset;
+ }
else
throw LowlevelError("Bad constant type");
decoder.closeElement(el);
diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/semantics.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/semantics.hh
index e0b069959d..c8ca547856 100644
--- a/Ghidra/Features/Decompiler/src/decompile/cpp/semantics.hh
+++ b/Ghidra/Features/Decompiler/src/decompile/cpp/semantics.hh
@@ -35,7 +35,7 @@ class ConstTpl {
public:
enum const_type { real=0, handle=1, j_start=2, j_next=3, j_next2=4, j_curspace=5,
j_curspace_size=6, spaceid=7, j_relative=8,
- j_flowref=9, j_flowref_size=10, j_flowdest=11, j_flowdest_size=12 };
+ j_flowref=9, j_flowref_size=10, j_flowdest=11, j_flowdest_size=12, j_offset=13 };
enum v_field { v_space=0, v_offset=1, v_size=2, v_offset_plus=3 };
private:
const_type type;
diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/slaformat.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/slaformat.cc
index f8b3bcfa73..59269347ff 100644
--- a/Ghidra/Features/Decompiler/src/decompile/cpp/slaformat.cc
+++ b/Ghidra/Features/Decompiler/src/decompile/cpp/slaformat.cc
@@ -166,6 +166,10 @@ ElementId ELEM_CONST_FLOWREF = ElementId("const_flowref", 85, FORMAT_SCOPE);
ElementId ELEM_CONST_FLOWREF_SIZE = ElementId("const_flowref_size", 86, FORMAT_SCOPE);
ElementId ELEM_CONST_FLOWDEST = ElementId("const_flowdest", 87, FORMAT_SCOPE);
ElementId ELEM_CONST_FLOWDEST_SIZE = ElementId("const_flowdest_size", 88, FORMAT_SCOPE);
+ElementId ELEM_OFFSET_EXP = ElementId("offset_exp", 89, FORMAT_SCOPE);
+ElementId ELEM_OFFSET_SYM = ElementId("offset_sym", 90, FORMAT_SCOPE);
+ElementId ELEM_OFFSET_SYM_HEAD = ElementId("offset_sym_head", 91, FORMAT_SCOPE);
+ElementId ELEM_CONST_OFFSET = ElementId("const_offset", 92, FORMAT_SCOPE);
/// The bytes of the header are read from the stream and verified against the required form and current version.
/// If the form matches, \b true is returned. No additional bytes are read.
diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/slaformat.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/slaformat.hh
index a8eb11b63c..7034cf794e 100644
--- a/Ghidra/Features/Decompiler/src/decompile/cpp/slaformat.hh
+++ b/Ghidra/Features/Decompiler/src/decompile/cpp/slaformat.hh
@@ -172,6 +172,10 @@ extern ElementId ELEM_CONST_FLOWREF; ///< SLA format element "const_flowref"
extern ElementId ELEM_CONST_FLOWREF_SIZE; ///< SLA format element "const_flowref_size"
extern ElementId ELEM_CONST_FLOWDEST; ///< SLA format element "const_flowdest"
extern ElementId ELEM_CONST_FLOWDEST_SIZE; ///< SLA format element "const_flowdest_size"
+extern ElementId ELEM_OFFSET_EXP; ///< SLA format element "offset_exp"
+extern ElementId ELEM_OFFSET_SYM; ///< SLA format element "operand_offset_sym"
+extern ElementId ELEM_OFFSET_SYM_HEAD; ///< SLA format element "operand_offset_sym_head"
+extern ElementId ELEM_CONST_OFFSET; ///< SLA format element "offset_start"
extern bool isSlaFormat(istream &s); ///< Verify a .sla file header at the current point of the given stream
extern void writeSlaHeader(ostream &s); ///< Write a .sla file header to the given stream
diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/slgh_compile.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/slgh_compile.cc
index 50d85e22ba..6f311f230c 100644
--- a/Ghidra/Features/Decompiler/src/decompile/cpp/slgh_compile.cc
+++ b/Ghidra/Features/Decompiler/src/decompile/cpp/slgh_compile.cc
@@ -1796,7 +1796,7 @@ SleighCompile::SleighCompile(void)
}
/// Create the address spaces: \b const, \b unique, and \b other.
-/// Define the special symbols: \b inst_start, \b inst_next, \b inst_next2, \b epsilon.
+/// Define the special symbols: \b inst_start, \b operand_offset, \b inst_next, \b inst_next2, \b epsilon.
/// Define the root subtable symbol: \b instruction
void SleighCompile::predefinedSymbols(void)
@@ -1818,6 +1818,8 @@ void SleighCompile::predefinedSymbols(void)
symtab.addSymbol(spacesym);
StartSymbol *startsym = new StartSymbol("inst_start",getConstantSpace());
symtab.addSymbol(startsym);
+ OffsetSymbol *offsetsym = new OffsetSymbol("operand_offset",getConstantSpace());
+ symtab.addSymbol(offsetsym);
EndSymbol *endsym = new EndSymbol("inst_next",getConstantSpace());
symtab.addSymbol(endsym);
Next2Symbol *next2sym = new Next2Symbol("inst_next2",getConstantSpace());
diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/slghpatexpress.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/slghpatexpress.cc
index 9410978595..e16bf18b74 100644
--- a/Ghidra/Features/Decompiler/src/decompile/cpp/slghpatexpress.cc
+++ b/Ghidra/Features/Decompiler/src/decompile/cpp/slghpatexpress.cc
@@ -502,6 +502,8 @@ PatternExpression *PatternExpression::decodeExpression(Decoder &decoder,Translat
res = new MinusExpression();
else if (el == sla::ELEM_NOT_EXP)
res = new NotExpression();
+ else if (el == sla::ELEM_OFFSET_EXP)
+ res = new OffsetInstructionValue();
else
return (PatternExpression *)0;
@@ -711,6 +713,20 @@ void StartInstructionValue::decode(Decoder &decoder,Translate *trans)
decoder.closeElement(el);
}
+void OffsetInstructionValue::encode(Encoder &encoder) const
+
+{
+ encoder.openElement(sla::ELEM_OFFSET_EXP);
+ encoder.closeElement(sla::ELEM_OFFSET_EXP);
+}
+
+void OffsetInstructionValue::decode(Decoder &decoder,Translate *trans)
+
+{
+ uint4 el = decoder.openElement(sla::ELEM_OFFSET_EXP);
+ decoder.closeElement(el);
+}
+
void EndInstructionValue::encode(Encoder &encoder) const
{
diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/slghpatexpress.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/slghpatexpress.hh
index 118fe3cc94..6e264ded3f 100644
--- a/Ghidra/Features/Decompiler/src/decompile/cpp/slghpatexpress.hh
+++ b/Ghidra/Features/Decompiler/src/decompile/cpp/slghpatexpress.hh
@@ -153,6 +153,20 @@ public:
virtual void encode(Encoder &encoder) const;
virtual void decode(Decoder &decoder,Translate *trans);
};
+
+class OffsetInstructionValue : public PatternValue {
+public:
+ OffsetInstructionValue(void) {}
+ virtual intb getValue(ParserWalker &walker) const {
+ return (intb)walker.getOffset(-1);
+ }
+ virtual TokenPattern genMinPattern(const vector<TokenPattern> &ops) const { return TokenPattern(); }
+ virtual TokenPattern genPattern(intb val) const { return TokenPattern(); }
+ virtual intb minValue(void) const { return (intb)0; }
+ virtual intb maxValue(void) const { return (intb)0; }
+ virtual void encode(Encoder &encoder) const;
+ virtual void decode(Decoder &decoder,Translate *trans);
+};
class EndInstructionValue : public PatternValue {
public:
diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/slghsymbol.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/slghsymbol.cc
index b35dd6ec79..7c9f2abe69 100644
--- a/Ghidra/Features/Decompiler/src/decompile/cpp/slghsymbol.cc
+++ b/Ghidra/Features/Decompiler/src/decompile/cpp/slghsymbol.cc
@@ -1,4 +1,4 @@
-/* ###
+#/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
@@ -224,6 +224,8 @@ void SymbolTable::decodeSymbolHeader(Decoder &decoder)
sym = new OperandSymbol();
else if (el == sla::ELEM_START_SYM_HEAD)
sym = new StartSymbol();
+ else if (el == sla::ELEM_OFFSET_SYM_HEAD)
+ sym = new OffsetSymbol();
else if (el == sla::ELEM_END_SYM_HEAD)
sym = new EndSymbol();
else if (el == sla::ELEM_NEXT2_SYM_HEAD)
@@ -1137,6 +1139,71 @@ void StartSymbol::decode(Decoder &decoder,SleighBase *trans)
decoder.closeElement(sla::ELEM_START_SYM.getId());
}
+OffsetSymbol::OffsetSymbol(const string &nm,AddrSpace *cspc) : SpecificSymbol(nm)
+
+{
+ const_space = cspc;
+ patexp = new OffsetInstructionValue();
+ patexp->layClaim();
+}
+
+OffsetSymbol::~OffsetSymbol(void)
+
+{
+ if (patexp != (PatternExpression *)0)
+ PatternExpression::release(patexp);
+}
+
+VarnodeTpl *OffsetSymbol::getVarnode(void) const
+
+{ // Returns current operand offset as a constant
+ ConstTpl spc(const_space);
+ ConstTpl off(ConstTpl::j_offset);
+ ConstTpl sz_zero;
+ return new VarnodeTpl(spc,off,sz_zero);
+}
+
+void OffsetSymbol::getFixedHandle(FixedHandle &hand,ParserWalker &walker) const
+
+{
+ hand.space = walker.getCurSpace();
+ hand.offset_space = (AddrSpace *)0;
+ hand.offset_offset = walker.getAddr().getOffset(); // Get starting address of instruction
+ hand.size = hand.space->getAddrSize();
+}
+
+void OffsetSymbol::print(ostream &s,ParserWalker &walker) const
+
+{
+ intb val = (intb) walker.getAddr().getOffset();
+ s << "0x" << std::hex << val << std::dec;
+}
+
+void OffsetSymbol::encode(Encoder &encoder) const
+
+{
+ encoder.openElement(sla::ELEM_OFFSET_SYM);
+ encoder.writeUnsignedInteger(sla::ATTRIB_ID, getId());
+ encoder.closeElement(sla::ELEM_OFFSET_SYM);
+}
+
+void OffsetSymbol::encodeHeader(Encoder &encoder) const
+
+{
+ encoder.openElement(sla::ELEM_OFFSET_SYM_HEAD);
+ SleighSymbol::encodeHeader(encoder);
+ encoder.closeElement(sla::ELEM_OFFSET_SYM_HEAD);
+}
+
+void OffsetSymbol::decode(Decoder &decoder,SleighBase *trans)
+
+{
+ const_space = trans->getConstantSpace();
+ patexp = new StartInstructionValue();
+ patexp->layClaim();
+ decoder.closeElement(sla::ELEM_OFFSET_SYM.getId());
+}
+
EndSymbol::EndSymbol(const string &nm,AddrSpace *cspc) : SpecificSymbol(nm)
{
diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/slghsymbol.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/slghsymbol.hh
index 5e8b4d3dfd..e460a43bac 100644
--- a/Ghidra/Features/Decompiler/src/decompile/cpp/slghsymbol.hh
+++ b/Ghidra/Features/Decompiler/src/decompile/cpp/slghsymbol.hh
@@ -27,7 +27,7 @@ class SleighSymbol {
public:
enum symbol_type { space_symbol, token_symbol, userop_symbol, value_symbol, valuemap_symbol,
name_symbol, varnode_symbol, varnodelist_symbol, operand_symbol,
- start_symbol, end_symbol, next2_symbol, subtable_symbol, macro_symbol, section_symbol,
+ start_symbol, offset_symbol, end_symbol, next2_symbol, subtable_symbol, macro_symbol, section_symbol,
bitrange_symbol, context_symbol, epsilon_symbol, label_symbol, flowdest_symbol, flowref_symbol,
dummy_symbol };
private:
@@ -373,6 +373,23 @@ public:
virtual void decode(Decoder &decoder,SleighBase *trans);
};
+class OffsetSymbol : public SpecificSymbol {
+ AddrSpace *const_space;
+ PatternExpression *patexp;
+public:
+ OffsetSymbol(void) { patexp = (PatternExpression *)0; } // For use with decode
+ OffsetSymbol(const string &nm,AddrSpace *cspc);
+ virtual ~OffsetSymbol(void);
+ virtual VarnodeTpl *getVarnode(void) const;
+ virtual PatternExpression *getPatternExpression(void) const { return patexp; }
+ virtual void getFixedHandle(FixedHandle &hand,ParserWalker &walker) const;
+ virtual void print(ostream &s,ParserWalker &walker) const;
+ virtual symbol_type getType(void) const { return offset_symbol; }
+ virtual void encode(Encoder &encoder) const;
+ virtual void encodeHeader(Encoder &encoder) const;
+ virtual void decode(Decoder &decoder,SleighBase *trans);
+};
+
class EndSymbol : public SpecificSymbol {
AddrSpace *const_space;
PatternExpression *patexp;
diff --git a/GhidraDocs/languages/html/sleigh_symbols.html b/GhidraDocs/languages/html/sleigh_symbols.html
index 70598b7310..f5ee18878e 100644
--- a/GhidraDocs/languages/html/sleigh_symbols.html
+++ b/GhidraDocs/languages/html/sleigh_symbols.html
@@ -186,6 +186,10 @@ We list all of the symbols that are predefined by SLEIGH.
<td><code class="code">epsilon</code></td>
<td>A special identifier indicating an empty bit pattern.</td>
</tr>
+<tr>
+ <td><code class="code">operand_offset</code></td>
+ <td>Offset of the address of the current operand. Useful for variable-length instructions.</td>
+</tr>
</tbody>
</table></div>
</div>
@@ -205,6 +209,9 @@ identifiers are address spaces. The <span class="emphasis"><em>epsilon</em></spa
identifier is inherited from SLED and is a specific symbol equivalent
to the constant zero. The <span class="emphasis"><em>instruction</em></span> identifier
is the root instruction table.
+<span class="emphasis"><em>operand_offset</em></span> was introduced to support VAX
+variable-length, multi-operand instructions. PC-relative addressing in
+VAX is relative to the operand address, not the instruction address.
</p>
</div>
</div>
--
2.49.0