forked from rose-compiler/rose
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
(Binary Analysis) Useful binary analysis tools distributed with ROSE
* Many of the tools from the Megachiropteran repository have been moved into ROSE's "tools" directory so they're more easily distributed. ROSE-2708
- Loading branch information
Showing
24 changed files
with
4,408 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
if(enable-binary-analysis) | ||
include_directories(. ${ROSE_INCLUDES}) | ||
|
||
add_library(bat SHARED batSupport.C) | ||
target_link_libraries(bat ROSE_DLL) | ||
|
||
add_executable(bat-ana bat-ana.C) | ||
target_link_libraries(bat-ana bat ROSE_DLL) | ||
|
||
add_executable(bat-cc bat-cc.C) | ||
target_link_libraries(bat-cc bat ROSE_DLL) | ||
|
||
add_executable(bat-cfg bat-cfg.C) | ||
target_link_libraries(bat-cfg bat ROSE_DLL) | ||
|
||
add_executable(bat-cg bat-cg.C) | ||
target_link_libraries(bat-cg bat ROSE_DLL) | ||
|
||
add_executable(bat-container bat-container.C) | ||
target_link_libraries(bat-container bat ROSE_DLL) | ||
|
||
add_executable(bat-dis bat-dis.C) | ||
target_link_libraries(bat-dis bat ROSE_DLL) | ||
|
||
add_executable(bat-entropy bat-entropy.C) | ||
target_link_libraries(bat-entropy bat ROSE_DLL) | ||
|
||
add_executable(bat-lsb bat-lsb.C) | ||
target_link_libraries(bat-lsb bat ROSE_DLL) | ||
|
||
add_executable(bat-lsd bat-lsd.C) | ||
target_link_libraries(bat-lsd bat ROSE_DLL) | ||
|
||
add_executable(bat-lsf bat-lsf.C) | ||
target_link_libraries(bat-lsf bat ROSE_DLL) | ||
|
||
add_executable(bat-mem bat-mem.C) | ||
target_link_libraries(bat-mem bat ROSE_DLL) | ||
|
||
add_executable(bat-prop bat-prop.C) | ||
target_link_libraries(bat-prop bat ROSE_DLL) | ||
|
||
add_executable(bat-stack-deltas bat-stack-deltas.C) | ||
target_link_libraries(bat-stack-deltas bat ROSE_DLL) | ||
|
||
add_executable(bat-trace bat-trace.C) | ||
target_link_libraries(bat-trace bat ROSE_DLL) | ||
|
||
endif() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
include $(top_srcdir)/config/Makefile.for.ROSE.includes.and.libs | ||
if ROSE_BUILD_BINARY_ANALYSIS_SUPPORT | ||
|
||
bin_PROGRAMS = | ||
tests = | ||
|
||
libbatSupport_la_SOURCES = batSupport.C | ||
pkginclude_HEADERS = batSupport.h | ||
|
||
bin_PROGRAMS += bat-ana | ||
bat_ana_SOURCES = bat-ana.C | ||
bat_ana_CPPFLAGS = $(ROSE_INCLUDES) | ||
bat_ana_LDFLAGS = $(ROSE_RPATHS) | ||
bat_ana_LDADD = $(ROSE_LIBS) | ||
tests += bat-ana.passed | ||
|
||
bin_PROGRAMS += bat-cc | ||
bat_cc_SOURCES = bat-cc.C | ||
bat_cc_CPPFLAGS = $(ROSE_INCLUDES) | ||
bat_cc_LDFLAGS = $(ROSE_RPATHS) | ||
bat_cc_LDADD = $(ROSE_LIBS) | ||
tests += bat-cc.pased | ||
|
||
bin_PROGRAMS += bat-cfg | ||
bat_cfg_SOURCES = bat-cfg.C | ||
bat_cfg_CPPFLAGS = $(ROSE_INCLUDES) | ||
bat_cfg_LDFLAGS = $(ROSE_RPATHS) | ||
bat_cfg_LDADD = $(ROSE_LIBS) | ||
tests += bat-cfg.passed | ||
|
||
bin_PROGRAMS += bat-container | ||
bat_container_SOURCES = bat-container.C | ||
bat_container_CPPFLAGS = $(ROSE_INCLUDES) | ||
bat_container_LDFLAGS = $(ROSE_RPATHS) | ||
bat_container_LDADD = $(ROSE_LIBS) | ||
tests += bat-container.passed | ||
|
||
bin_PROGRAMS += bat-dis | ||
bat_dis_SOURCES = bat-dis.C | ||
bat_dis_CPPFLAGS = $(ROSE_INCLUDES) | ||
bat_dis_LDFLAGS = $(ROSE_RPATHS) | ||
bat_dis_LDADD = $(ROSE_LIBS) | ||
tests += bat-dis.passed | ||
|
||
bin_PROGRAMS += bat-entropy | ||
bat_entropy_SOURCES = bat-entropy.C | ||
bat_entropy_CPPFLAGS = $(ROSE_INCLUDES) | ||
bat_entropy_LDFLAGS = $(ROSE_RPATHS) | ||
bat_entropy_LDADD = $(ROSE_LIBS) | ||
tests += bat-entropy.passed | ||
|
||
bin_PROGRAMS += bat-linear | ||
bat_linear_SOURCES = bat-linear.C | ||
bat_linear_CPPFLAGS = $(ROSE_INCLUDES) | ||
bat_linear_LDFLAGS = $(ROSE_RPATHS) | ||
bat_linear_LDADD = $(ROSE_LIBS) | ||
tests += bat-linear.passed | ||
|
||
bin_PROGRAMS += bat-lsb | ||
bat_lsb_SOURCES = bat-lsb.C | ||
bat_lsb_CPPFLAGS = $(ROSE_INCLUDES) | ||
bat_lsb_LDFLAGS = $(ROSE_RPATHS) | ||
bat_lsb_LDADD = $(ROSE_LIBS) | ||
tests += bat-lsb.passed | ||
|
||
bin_PROGRAMS += bat-lsf | ||
bat_lsf_SOURCES = bat-lsf.C | ||
bat_lsf_CPPFLAGS = $(ROSE_INCLUDES) | ||
bat_lsf_LDFLAGS = $(ROSE_RPATHS) | ||
bat_lsf_LDADD = $(ROSE_LIBS) | ||
tests += bat-lsf.passed | ||
|
||
bin_PROGRAMS += bat-mem | ||
bat_mem_SOURCES = bat-mem.C | ||
bat_mem_CPPFLAGS = $(ROSE_INCLUDES) | ||
bat_mem_LDFLAGS = $(ROSE_RPATHS) | ||
bat_mem_LDADD = $(ROSE_LIBS) | ||
tests += bat-mem.passed | ||
|
||
bin_PROGRAMS += bat-prop | ||
bat_prop_SOURCES = bat-prop.C | ||
bat_prop_CPPFLAGS = $(ROSE_INCLUDES) | ||
bat_prop_LDFLAGS = $(ROSE_RPATHS) | ||
bat_prop_LDADD = $(ROSE_LIBS) | ||
tests += bat-prop.passed | ||
|
||
bin_PROGRAMS += bat-stack-deltas | ||
bat_stack_deltas_SOURCES = bat-stack-deltas.C | ||
bat_stack_deltas_CPPFLAGS = $(ROSE_INCLUDES) | ||
bat_stack_deltas_LDFLAGS = $(ROSE_RPATHS) | ||
bat_stack_deltas_LDADD = $(ROSE_LIBS) | ||
tests += bat-stack.passed | ||
|
||
bin_PROGRAMS += bat-trace | ||
bat_trace_SOURCES = bat-trace.C | ||
bat_trace_CPPFLAGS = $(ROSE_INCLUDES) | ||
bat_trace_LDFLAGS = $(ROSE_RPATHS) | ||
bat_trace_LDADD = $(ROSE_LIBS) | ||
tests += bat-trace.passed | ||
|
||
$(tests): %.passed: % | ||
@$(RTH_RUN) \ | ||
TITLE="testing $* [$@]" \ | ||
CMD="$$(pwd)/$* --self-test" \ | ||
$(top_srcdir)/scripts/test_exit_status $@ | ||
|
||
endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,183 @@ | ||
![Megachiropteran](logo.png) | ||
|
||
# Tools | ||
|
||
🦇 | ||
|
||
This repository contains the "Megachiroptean" tools. Their general design is that each tool does one simple thing, and thus | ||
they can be chained together to do more complex things. Commonly used tools have short names, and less common tools have | ||
longer, more descriptive names. Every tool supports certain common features, among which is a "--help" switch that provides | ||
detailed information about the tool. | ||
|
||
## Common tools | ||
|
||
* `bat-ana` analyzes a specimen and produces an analysis state file. Most other tools read these state files. | ||
|
||
* `bat-cc` calling convention analysis. | ||
|
||
* `bat-cfg` dumps control flow graphs of various types to GraphViz files, which can be converted to various | ||
image and document formats (including SVG, JPEG, and PDF) or viewed interactively. | ||
|
||
* `bat-cg` is similar to bat-cfg, but for function call graphs. | ||
|
||
* `bat-container` dumps all details about an ELF or PE container. | ||
|
||
* `bat-dis` disassembles a specimen to produce an assembly listing. This tool has many command-line switches to | ||
control its output. The `bat-dis-simple` executable is a quick way to specify switches that cause the | ||
output to be quite minimal. | ||
|
||
* `bat-linear` is a simple linear disassembler that doesn't account for any control-flow. | ||
|
||
* `bat-lsb` lists information about each basic block. | ||
|
||
* `bat-lsd` lists information about each static data block. | ||
|
||
* `bat-lsf` lists information about each function. | ||
|
||
* `bat-mem` lists the specimen's memory map or extracts parts of it into files of various formats. | ||
|
||
* `bat-prop` is used to query various simple properties about a specimen, such as the number of functions. | ||
|
||
* `bat-trace` runs a program natively under a debugger to generate a trace or to compare a program with a previous trace. | ||
|
||
## Less common tools | ||
|
||
* `bat-stack-deltas` produces a list of instruction addresses and the difference between the stack pointer at the start | ||
and end of that instruction with respect to the stack pointer at the beginning of the function. | ||
|
||
## Tools under development | ||
|
||
# Kinds of input files | ||
|
||
The `bat-ana` tool preprocesses various specimens to create an RBA file which is then read by most other | ||
tools. The following types of specimens can be processed (for the most up-to-date information see the `bat-ana --help`. | ||
|
||
* ELF (Linux, Unix, etc) executables, shared libraries, and core dumps. | ||
|
||
* PE (Windows) executables and shared libraries. | ||
|
||
* ELF object files. These have limited utility for analysis since they're not yet mapped to virtual memory and they may | ||
have missing or misleading data. | ||
|
||
* Archives of ELF object files, also known as statically linked libraries. As with object files, an archive of object | ||
files has limited information for analysis. | ||
|
||
* Motorola S-Records, a common text format for firmware. | ||
|
||
* Intel-HEX, a common text format for firmware. | ||
|
||
* Running processes on Linux. | ||
|
||
* Linux native executables can be partially run to cause their built-in dynamic linker to link shared libraries, then | ||
the executable is stopped, ROSE grabs its memory, and the process is killed. | ||
|
||
* Files containing raw memory dumps. These are just files that contain the contents of some part of virtual memory. | ||
|
||
* A custom format designed for analyzing a snapshot of the firmware running on hardware. | ||
|
||
* Any combination of the above formats, although some combinations don't make much sense. For instance, you can combine | ||
an ELF executable file with the memory obtained from a running process and augmented with additional memory dumps such | ||
as the Linux VDSO. | ||
|
||
# Instruction set architectures | ||
|
||
ROSE supports the following instruction set architectures. Please refer to the ROSE documentation for the most | ||
up-to-date list. | ||
|
||
* Intel x86 family such as i386, and Pentium. ROSE can disassemble these instructions and knows their semantics. | ||
|
||
* AMD64 family such as x86-64, x86_64, x64 and Intel 64. ROSE can disassemble these instructions and knows their | ||
semantics. | ||
|
||
* Motorola 68000 family. ROSE can disassemble these instructions and knows their semantics. | ||
|
||
* PowerPC. ROSE can disassemble these instructions and knows their semantics. | ||
|
||
* PowerPC-64. ROSE can disassemble these instructions and knows their semantics. | ||
|
||
* MIPS. ROSE can disassemble these instructions but does not know what they do. | ||
|
||
* ARM AArch A64. | ||
|
||
|
||
# Dependencies | ||
|
||
ROSE must be configured with at least binary analysis support (`--enable-languages=binaries`). Although not required, | ||
turning on the following software makes more binary analysis features available. The parenthesized version numbers are | ||
those used and tested during development. | ||
|
||
* boost (1.57.0 through 1.73.0 except 1.65.x). A subset of the following libraries are needed depending on how ROSE is | ||
configured: chrono, date_time, filesystem, iostreams, random, regex, serialization, system, thread. | ||
|
||
* dlib (18.x, but especially 18.17) for some graph altorithms | ||
|
||
* libgcrypt (any recent version) for calculating hashes for some binary analysis. | ||
|
||
* libsqlite (any recent version, especially 3.23.1) for concolic testing databases. | ||
|
||
* z3 (4.8.4 or later) for model checking | ||
|
||
# Tutorial | ||
|
||
## Documentation | ||
|
||
All tools understand a `--help` (also `-h`) switch which causes them to print documentation similar to a man page. | ||
|
||
## RBA files | ||
|
||
Most of the tools operate on (or produce) a ROSE binary analysis (RBA) state file, usually named "*.rba". If no RBA file | ||
name is specified on the command-line, or the name "-" (single hyphen) is specified, then the RBA file is read from | ||
standard input (or written to standard output) provided the operating system and C++ library supports binary I/O on this | ||
stream. | ||
|
||
An RBA file is basically a serialization of the entire ROSE binary analysis state. RBA files come in three formats | ||
chosen by the `--state-format` switch. For technical reasons, the format of an RBA input file cannot be detected | ||
automatically, but must be specified with this switch. The formats are binary (the default), text, and XML. The binary | ||
format is the smallest and fastest and can be compressed for even more space savings. The text format is ASCII text and | ||
therefore somewhat larger and slower. The XML format is orders of magnitude larger and slower but has the benefit of | ||
being convertable to JSON and understood by non-ROSE tools. | ||
|
||
Although not officially supported and tested, RBA files are generally portable between different versions of ROSE and | ||
these tools. They're also portable between tools compiled with different compilers, compiler optimizations, and C++ | ||
language standards. They're not portable across different architectures (e.g., a tool running on a 32-bit machine vs. a | ||
tool running on a 64-bit machine), although the text formats are portable between different byte orders. | ||
|
||
## Generating RBA files | ||
|
||
These tools read RBA files for one main reason: initializing a tool's analysis state from an RBA file is usually many | ||
times faster than initializing it from scratch. Therefore, one can produce an expensive RBA file once and then use it | ||
with many other tools. Some tools can even add additional information to the state and produce a new RBA file. | ||
|
||
The "bat-ana" tool is the primary tool for generating RBA files. It takes as input a binary specimen such as an ELF | ||
executable and analyzes it by parsing the ELF container, mapping file regions into virtual memory, finding and decoding | ||
instructions and static data, organizing instructions into basic blocks and functions, determining calling conventions, | ||
analyzing stack behavior, analyzing whether functions return, finding no-op sequences, etc., and finally producing an | ||
RBA file. | ||
|
||
For example, here's a command that creates an RBA file by analyzing the /bin/bash ELF executable: | ||
|
||
bat-ana -o bash.rba /bin/bash | ||
|
||
## Obtaining a disassembly listing | ||
|
||
Once you have an RBA file, you can obtain a disassembly listing of the entire file or some function(s) within the | ||
file. The "bat-dis" tool has a multitude of command-line options for controlling the style of output and defaults to | ||
showing quite a bit of information. An easy way to get a simplified listing is with the "bat-dis-simple" tool, which is | ||
identical to "bat-dis" except its switches default to produce less verbose output. | ||
|
||
bat-dis bash.rba | ||
|
||
To list one function: | ||
|
||
bat-dis --function=main bash.rba | ||
bat-dis --function=0x08041000 bash.rba | ||
|
||
The output from "bat-dis" and "bat-dis-simple" is not intended for re-assembly, but rather human consumption. | ||
|
||
## Obtaining a control flow graph | ||
|
||
The "bat-cfg" tool generates control flow graphs. It can produce global CFGs that describe the control flow for an | ||
entire specimen, or a single function. Output is adjustable and can be text files describing all the information stored | ||
in the CFG, or GraphViz files that can be converted to various formats or viewed interactively. | ||
|
||
bat-cfg --function=main --format=gv bash.rba |dot -Tpng > main-cfg.png |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
include_rules | ||
ifeq (@(ENABLE_BINARY_ANALYSIS),yes) | ||
|
||
# BAT support library. This is also used by tools outside ROSE | ||
run $(tool_compile_linklib) --install -I. -o libbat batSupport.C ../../src/librose | ||
run $(public_header) batSupport.h | ||
|
||
# Tools | ||
run $(tool_compile_linkexe) --install -I. bat-ana.C libbat | ||
run $(tool_compile_linkexe) --install -I. bat-cc.C libbat | ||
run $(tool_compile_linkexe) --install -I. bat-cfg.C libbat | ||
run $(tool_compile_linkexe) --install -I. bat-cg.C libbat | ||
run $(tool_compile_linkexe) --install -I. bat-container.C libbat | ||
run $(tool_compile_linkexe) --install -I. bat-dis.C libbat | ||
run $(tool_compile_linkexe) --install -I. bat-entropy.C libbat | ||
run $(tool_compile_linkexe) --install -I. bat-linear.C libbat | ||
run $(tool_compile_linkexe) --install -I. bat-lsb.C libbat | ||
run $(tool_compile_linkexe) --install -I. bat-lsd.C libbat | ||
run $(tool_compile_linkexe) --install -I. bat-lsf.C libbat | ||
run $(tool_compile_linkexe) --install -I. bat-mem.C libbat | ||
run $(tool_compile_linkexe) --install -I. bat-prop.C libbat | ||
run $(tool_compile_linkexe) --install -I. bat-stack-deltas.C libbat | ||
run $(tool_compile_linkexe) --install -I. bat-trace.C libbat | ||
: bat-dis |> cp -p %f %o |> bat-dis-simple $(ROOT)/$(INSTALL_STAGING)/bin/<staging> | ||
|
||
# Tests | ||
run $(test) bat-ana ./bat-ana --self-test | ||
run $(test) bat-cc ./bat-cc --self-test | ||
run $(test) bat-cfg ./bat-cfg --self-test | ||
run $(test) bat-container ./bat-container --self-test | ||
run $(test) bat-dis ./bat-dis --self-test | ||
run $(test) bat-entropy ./bat-entropy --self-test | ||
run $(test) bat-linear ./bat-linear --self-test | ||
run $(test) bat-lsb ./bat-lsb --self-test | ||
run $(test) bat-lsf ./bat-lsf --self-test | ||
run $(test) bat-mem ./bat-mem --self-test | ||
run $(test) bat-prop ./bat-prop --self-test | ||
run $(test) bat-stack-deltas ./bat-stack-deltas --self-test | ||
run $(test) bat-trace ./bat-trace --self-test | ||
|
||
endif |
Oops, something went wrong.