Skip to content

Commit

Permalink
(Binary Analysis) Null instruction set architecture
Browse files Browse the repository at this point in the history
* Added a new "Null" instruction set architecture that can be used
  when the actual ISA isn't known or isn't supported by ROSE. This
  allows most forms of analysis to work as-is without needing any
  extra checks for nullptr.

* The SgAsmNullInstruction is an instruction of the Null ISA. It's
  only instruction is the "unknown" instruction that's always one byte
  long.

* The Null decoder decodes each byte of a program into a the "unknown"
  SgAsmNullInstruction AST.

* The Null RegisterDictionary contains two 8-bit registers named "pc"
  and "sp" that are the program counter and stack pointer. These are
  needed because various analysis assume they exist.

* The Null unparser knows how to unparse the single "unknown"
  SgAsmNullInstruction that has no operands.

RPM-88
  • Loading branch information
matzke1 committed Nov 24, 2020
1 parent 80f0c89 commit fa1c5cc
Show file tree
Hide file tree
Showing 22 changed files with 367 additions and 34 deletions.
1 change: 1 addition & 0 deletions src/ROSETTA/astNodeList
Original file line number Diff line number Diff line change
Expand Up @@ -962,3 +962,4 @@ SgAtExp
SgFinishExp
SgHereExp
SgDotDotExp
SgAsmNullInstruction
50 changes: 48 additions & 2 deletions src/ROSETTA/src/binaryInstruction.C
Original file line number Diff line number Diff line change
Expand Up @@ -665,13 +665,59 @@ void Grammar::setUpBinaryInstructions() {

////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

DECLARE_LEAF_CLASS(AsmNullInstruction);
IS_SERIALIZABLE(AsmNullInstruction);

#ifdef DOCUMENTATION
class SgAsmNullInstruction: public SgAsmInstruction {
public:
#endif

DECLARE_OTHERS(AsmNullInstruction);
#if defined(SgAsmNullInstruction_OTHERS) || defined(DOCUMENTATION)
#ifdef ROSE_HAVE_BOOST_SERIALIZATION_LIB
private:
friend class boost::serialization::access;

template<class S>
void serialize(S & s, const unsigned /*version*/) {
s & BOOST_SERIALIZATION_BASE_OBJECT_NVP(SgAsmInstruction);
}
#endif

public:
// there's only one kind of null instruction
enum Kind { null_unknown };

public: // overrides
virtual bool terminatesBasicBlock() $ROSE_OVERRIDE;
virtual bool isFunctionCallFast(const std::vector<SgAsmInstruction*> &insns,
rose_addr_t *target/*out*/, rose_addr_t *ret/*out*/) $ROSE_OVERRIDE;
virtual bool isFunctionCallSlow(const std::vector<SgAsmInstruction*>&,
rose_addr_t *target, rose_addr_t *ret) $ROSE_OVERRIDE;
virtual bool isFunctionReturnFast(const std::vector<SgAsmInstruction*> &insns) $ROSE_OVERRIDE;
virtual bool isFunctionReturnSlow(const std::vector<SgAsmInstruction*> &insns) $ROSE_OVERRIDE;
virtual bool getBranchTarget(rose_addr_t *target) $ROSE_OVERRIDE;
virtual Rose::BinaryAnalysis::AddressSet getSuccessors(bool &complete) $ROSE_OVERRIDE;
virtual Rose::BinaryAnalysis::AddressSet getSuccessors(const std::vector<SgAsmInstruction*>&,
bool &complete,
const Rose::BinaryAnalysis::MemoryMap::Ptr &initial_memory =
Rose::BinaryAnalysis::MemoryMap::Ptr()) $ROSE_OVERRIDE;
virtual bool isUnknown() const $ROSE_OVERRIDE;
virtual unsigned get_anyKind() const $ROSE_OVERRIDE;
#endif // SgAsmNullInstruction_OTHERS
#ifdef DOCUMENTATION
};
#endif

////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

NEW_NONTERMINAL_MACRO(AsmInstruction,
AsmX86Instruction
#ifdef ROSE_ENABLE_ASM_A64
| AsmA64Instruction
#endif
| AsmPowerpcInstruction | AsmMipsInstruction |
AsmM68kInstruction,
| AsmPowerpcInstruction | AsmMipsInstruction | AsmM68kInstruction | AsmNullInstruction,
"AsmInstruction", "AsmInstructionTag", true);
AsmInstruction.setCppCondition("!defined(DOCUMENTATION)");
IS_SERIALIZABLE(AsmInstruction);
Expand Down
19 changes: 19 additions & 0 deletions src/backend/asmUnparser/BinaryUnparserNull.C
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#include <featureTests.h>
#ifdef ROSE_BUILD_BINARY_ANALYSIS_SUPPORT
#include <sage3basic.h>
#include <BinaryUnparserNull.h>

namespace Rose {
namespace BinaryAnalysis {
namespace Unparser {

void
Null::emitInstructionMnemonic(std::ostream &out, SgAsmInstruction *insn, State&) const {
out <<insn->get_mnemonic();
}

} // namespace
} // namespace
} // namespace

#endif
45 changes: 45 additions & 0 deletions src/backend/asmUnparser/BinaryUnparserNull.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#ifndef ROSE_BinaryAnalysis_UnparserNull_H
#define ROSE_BinaryAnalysis_UnparserNull_H
#include <featureTests.h>
#ifdef ROSE_BUILD_BINARY_ANALYSIS_SUPPORT

#include <BinaryUnparserBase.h>

namespace Rose {
namespace BinaryAnalysis {
namespace Unparser {

struct NullSettings: public Settings {};

/** Unparser for null architecture. */
class Null: public Base {
NullSettings settings_;

protected:
explicit Null(const NullSettings &settings)
: settings_(settings) {}

public:
~Null() {}

static Ptr instance(const NullSettings &settings = NullSettings()) {
return Ptr(new Null(settings));
}

Ptr copy() const ROSE_OVERRIDE {
return instance(settings());
}

const NullSettings& settings() const ROSE_OVERRIDE { return settings_; }
NullSettings& settings() ROSE_OVERRIDE { return settings_; }

protected:
void emitInstructionMnemonic(std::ostream&, SgAsmInstruction*, State&) const ROSE_OVERRIDE;
};

} // namespace
} // namespace
} // namespace

#endif
#endif
2 changes: 2 additions & 0 deletions src/backend/asmUnparser/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ add_library(roseAsmUnparser OBJECT
BinaryUnparserBase.C
BinaryUnparserM68k.C
BinaryUnparserMips.C
BinaryUnparserNull.C
BinaryUnparserPowerpc.C
BinaryUnparserX86.C
unparseA64Asm.C
Expand All @@ -29,6 +30,7 @@ install(FILES
BinaryUnparserBase.h
BinaryUnparserM68k.h
BinaryUnparserMips.h
BinaryUnparserNull.h
BinaryUnparserPowerpc.h
BinaryUnparserX86.h
DESTINATION ${INCLUDE_INSTALL_DIR})
2 changes: 2 additions & 0 deletions src/backend/asmUnparser/Makefile_variables
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ asmUnparser_la_sources= \
$(asmUnparserPath)/BinaryUnparserBase.C \
$(asmUnparserPath)/BinaryUnparserM68k.C \
$(asmUnparserPath)/BinaryUnparserMips.C \
$(asmUnparserPath)/BinaryUnparserNull.C \
$(asmUnparserPath)/BinaryUnparserPowerpc.C \
$(asmUnparserPath)/BinaryUnparserX86.C \
$(asmUnparserPath)/AsmUnparser.C \
Expand All @@ -26,6 +27,7 @@ asmUnparser_distIncludeHeaders= \
$(asmUnparserPath)/BinaryUnparserBase.h \
$(asmUnparserPath)/BinaryUnparserM68k.h \
$(asmUnparserPath)/BinaryUnparserMips.h \
$(asmUnparserPath)/BinaryUnparserNull.h \
$(asmUnparserPath)/BinaryUnparserPowerpc.h \
$(asmUnparserPath)/BinaryUnparserX86.h \
$(asmUnparserPath)/AsmUnparser.h \
Expand Down
7 changes: 4 additions & 3 deletions src/backend/asmUnparser/Tupfile
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
include_rules

SOURCES = BinaryEdgeArrows.C BinarySerialIo.C BinaryUnparserArm.C BinaryUnparserBase.C BinaryUnparserM68k.C \
BinaryUnparserMips.C BinaryUnparserPowerpc.C BinaryUnparserX86.C AsmUnparser.C AsmUnparser_compat.C \
AsmFunctionIndex.C unparseX86Asm.C unparsePowerpcAsm.C unparseM68kAsm.C unparseMipsAsm.C unparseA64Asm.C
BinaryUnparserMips.C BinaryUnparserNull.C BinaryUnparserPowerpc.C BinaryUnparserX86.C AsmUnparser.C \
AsmUnparser_compat.C AsmFunctionIndex.C unparseX86Asm.C unparsePowerpcAsm.C unparseM68kAsm.C unparseMipsAsm.C \
unparseA64Asm.C

run $(librose_compile) $(SOURCES)

Expand All @@ -11,4 +12,4 @@ run $(public_header) AsmUnparser.h AsmUnparser_compat.h AsmFunctionIndex.h

# Headers for the newer API
run $(public_header) BinaryEdgeArrows.h BinarySerialIo.h BinaryUnparser.h BinaryUnparserArm.h BinaryUnparserBase.h \
BinaryUnparserM68k.h BinaryUnparserMips.h BinaryUnparserPowerpc.h BinaryUnparserX86.h
BinaryUnparserM68k.h BinaryUnparserMips.h BinaryUnparserNull.h BinaryUnparserPowerpc.h BinaryUnparserX86.h
6 changes: 3 additions & 3 deletions src/frontend/Disassemblers/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@ add_library(roseDisassemblers OBJECT
AssemblerX86Init4.C AssemblerX86Init5.C AssemblerX86Init6.C
AssemblerX86Init7.C AssemblerX86Init8.C AssemblerX86Init9.C
AssemblerX86Init.C DisassemblerA64.C Disassembler.C DisassemblerMips.C
DisassemblerM68k.C DisassemblerPowerpc.C DisassemblerX86.C
DisassemblerM68k.C DisassemblerNull.C DisassemblerPowerpc.C DisassemblerX86.C
Registers.C RegisterDescriptor.C SgAsmA64Instruction.C SgAsmBlock.C SgAsmExecutableFileFormat.C SgAsmExpression.C
SgAsmFloatValueExpression.C SgAsmFunction.C SgAsmInstruction.C
SgAsmIntegerValueExpression.C SgAsmInterpretation.C SgAsmType.C
SgAsmM68kInstruction.C SgAsmMipsInstruction.C SgAsmPowerpcInstruction.C
SgAsmM68kInstruction.C SgAsmMipsInstruction.C SgAsmNullInstruction.C SgAsmPowerpcInstruction.C
SgAsmX86Instruction.C x86InstructionProperties.C RegisterParts.C
BinaryInstructionCache.C)

Expand All @@ -20,7 +20,7 @@ install(
FILES
Assembler.h AssemblerX86.h AssemblerX86Init.h Disassembler.h
DisassemblerA64.h DisassemblerM68k.h
DisassemblerMips.h DisassemblerPowerpc.h DisassemblerX86.h
DisassemblerMips.h DisassemblerNull.h DisassemblerPowerpc.h DisassemblerX86.h
InstructionEnumsM68k.h x86InstructionProperties.h
InstructionEnumsA64.h InstructionEnumsMips.h InstructionEnumsX86.h
Registers.h RegisterDescriptor.h InstructionEnumsPowerpc.h RegisterParts.h
Expand Down
4 changes: 4 additions & 0 deletions src/frontend/Disassemblers/Disassembler.C
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <DisassemblerA64.h>
#include <DisassemblerM68k.h>
#include <DisassemblerMips.h>
#include <DisassemblerNull.h>
#include <DisassemblerX86.h>
#include <BinaryLoader.h>
#include <stringify.h>
Expand Down Expand Up @@ -187,6 +188,7 @@ Disassembler::isaNames() {
v.push_back("m68040");
v.push_back("mips-be");
v.push_back("mips-le");
// v.push_back("null"); -- intentionally undocumented
v.push_back("ppc32-be");
v.push_back("ppc32-le");
v.push_back("ppc64-be");
Expand Down Expand Up @@ -230,6 +232,8 @@ Disassembler::lookup(const std::string &name)
retval = new DisassemblerM68k(m68k_68040);
} else if (name == "coldfire") {
retval = new DisassemblerM68k(m68k_freescale_emacb);
} else if (name == "null") {
retval = new DisassemblerNull;
} else {
throw std::runtime_error("invalid ISA name \"" + StringUtility::cEscape(name) + "\"; use --isa=list");
}
Expand Down
62 changes: 62 additions & 0 deletions src/frontend/Disassemblers/DisassemblerNull.C
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#include <featureTests.h>
#ifdef ROSE_BUILD_BINARY_ANALYSIS_SUPPORT
#include <sage3basic.h>
#include <DisassemblerNull.h>
#include <BinaryUnparserNull.h>

namespace Rose {
namespace BinaryAnalysis {

DisassemblerNull::DisassemblerNull() {
name("null");
wordSizeBytes(1);
byteOrder(ByteOrder::ORDER_LSB);
registerDictionary(RegisterDictionary::dictionary_null());

REG_IP = registerDictionary()->findOrThrow("pc");
REG_SP = registerDictionary()->findOrThrow("sp");
}

DisassemblerNull::~DisassemblerNull() {}

Disassembler*
DisassemblerNull::clone() const {
return new DisassemblerNull;
}

bool
DisassemblerNull::canDisassemble(SgAsmGenericHeader*) const {
return false;
}

Unparser::BasePtr
DisassemblerNull::unparser() const {
return Unparser::Null::instance();
}

SgAsmInstruction*
DisassemblerNull::disassembleOne(const MemoryMap::Ptr &map, rose_addr_t va, AddressSet*) {
uint8_t byte = 0;
size_t nRead = map->at(va).limit(1).require(MemoryMap::EXECUTABLE).read(&byte).size();
if (0 == nRead)
throw Exception("short read", va);

return makeUnknownInstruction(Exception("unknown", va,
SgUnsignedCharList((const unsigned char*)&byte, (const unsigned char*)&byte+1),
0));
}

SgAsmInstruction*
DisassemblerNull::makeUnknownInstruction(const Exception &e) {
SgAsmInstruction *insn = new SgAsmNullInstruction(e.ip, "unknown");
SgAsmOperandList *operands = new SgAsmOperandList;
insn->set_operandList(operands);
operands->set_parent(insn);
insn->set_raw_bytes(e.bytes);
return insn;
}

} // namespace
} // namespace

#endif
44 changes: 44 additions & 0 deletions src/frontend/Disassemblers/DisassemblerNull.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#ifndef ROSE_BinaryAnalysis_Disassembler_Null_H
#define ROSE_BinaryAnalysis_Disassembler_Null_H

#include <featureTests.h>
#ifdef ROSE_BUILD_BINARY_ANALYSIS_SUPPORT

#include <Disassembler.h>
#include <boost/serialization/access.hpp>
#include <boost/serialization/base_object.hpp>

namespace Rose {
namespace BinaryAnalysis {

/** Disassembler for nothing.
*
* This disassembler is a stub that can be used when no ISA is specified. It's never chosen automatically. It has unknown
* byte order. It always decodes to an unknown instruction that's one byte long. */
class DisassemblerNull: public Disassembler {

#ifdef ROSE_HAVE_BOOST_SERIALIZATION_LIB
private:
friend class boost::serialization::access;

template<class S>
void serialize(S &s, const unsigned /*version*/) {
s & BOOST_SERIALIZATION_BASE_OBJECT_NVP(Disassembler);
}
#endif

public:
DisassemblerNull();
virtual ~DisassemblerNull();
virtual Disassembler* clone() const ROSE_OVERRIDE;
virtual bool canDisassemble(SgAsmGenericHeader*) const ROSE_OVERRIDE;
virtual Unparser::BasePtr unparser() const ROSE_OVERRIDE;
virtual SgAsmInstruction* disassembleOne(const MemoryMap::Ptr&, rose_addr_t va, AddressSet *successors = NULL) ROSE_OVERRIDE;
virtual SgAsmInstruction* makeUnknownInstruction(const Exception&) ROSE_OVERRIDE;
};

} // namespace
} // namespace

#endif
#endif
4 changes: 3 additions & 1 deletion src/frontend/Disassemblers/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,19 @@ libroseDisassemblers_la_DEPENDENCIES =
libroseDisassemblers_la_SOURCES = \
SgAsmBlock.C SgAsmFunction.C \
SgAsmInstruction.C SgAsmA64Instruction.C SgAsmPowerpcInstruction.C SgAsmX86Instruction.C SgAsmMipsInstruction.C \
SgAsmM68kInstruction.C SgAsmExecutableFileFormat.C x86InstructionProperties.C \
SgAsmM68kInstruction.C SgAsmNullInstruction.C SgAsmExecutableFileFormat.C x86InstructionProperties.C \
SgAsmInterpretation.C SgAsmIntegerValueExpression.C SgAsmFloatValueExpression.C SgAsmExpression.C SgAsmType.C \
Registers.C RegisterDescriptor.C \
Disassembler.C DisassemblerA64.C DisassemblerMips.C DisassemblerM68k.C DisassemblerPowerpc.C DisassemblerX86.C \
DisassemblerNull.C \
Assembler.C AssemblerX86.C AssemblerX86Init.C RegisterParts.C \
AssemblerX86Init1.C AssemblerX86Init2.C AssemblerX86Init3.C AssemblerX86Init4.C AssemblerX86Init5.C \
AssemblerX86Init6.C AssemblerX86Init7.C AssemblerX86Init8.C AssemblerX86Init9.C BinaryInstructionCache.C

pkginclude_HEADERS = \
Registers.h RegisterDescriptor.h BitPattern.h \
Disassembler.h DisassemblerA64.h DisassemblerMips.h DisassemblerM68k.h DisassemblerPowerpc.h DisassemblerX86.h \
DisassemblerNull.h \
Assembler.h AssemblerX86.h AssemblerX86Init.h \
InstructionEnumsX86.h InstructionEnumsMips.h InstructionEnumsM68k.h x86InstructionProperties.h \
InstructionEnumsA64.h InstructionEnumsPowerpc.h RegisterParts.h BinaryInstructionCache.h
Expand Down
14 changes: 14 additions & 0 deletions src/frontend/Disassemblers/Registers.C
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,20 @@ RegisterDictionary::dictionary_for_isa(SgAsmInterpretation *interp) {
return hdrs.empty() ? NULL : dictionary_for_isa(hdrs.front()->get_isa());
}

const RegisterDictionary*
RegisterDictionary::dictionary_null() {
static SAWYER_THREAD_TRAITS::Mutex mutex;
SAWYER_THREAD_TRAITS::LockGuard lock(mutex);

static RegisterDictionary *regs = NULL;
if (!regs) {
regs = new RegisterDictionary("null");
regs->insert("pc", 0, 0, 0, 8); // program counter
regs->insert("sp", 0, 1, 0, 8); // stack pointer
}
return regs;
}

const RegisterDictionary *
RegisterDictionary::dictionary_i8086() {
static SAWYER_THREAD_TRAITS::Mutex mutex;
Expand Down
2 changes: 2 additions & 0 deletions src/frontend/Disassemblers/Registers.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ class RegisterDictionary {
typedef std::map<std::string/*name*/, RegisterDescriptor> Entries;
typedef std::vector<RegisterDescriptor> RegisterDescriptors;

/** Mostly empty dictionary for the null ISA. */
static const RegisterDictionary *dictionary_null();

/** Intel 8086 registers.
*
Expand Down
Loading

0 comments on commit fa1c5cc

Please sign in to comment.