Skip to content

Commit

Permalink
Copy variables when renaming
Browse files Browse the repository at this point in the history
  • Loading branch information
rihi authored and NeoQuix committed Jun 11, 2024
1 parent 20facab commit 61ffa12
Show file tree
Hide file tree
Showing 3 changed files with 195 additions and 115 deletions.
197 changes: 111 additions & 86 deletions decompiler/pipeline/controlflowanalysis/variable_name_generation.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,26 @@
import logging
import string
from abc import ABC, abstractmethod
from collections import defaultdict
from dataclasses import dataclass
from enum import Enum
from typing import List, Set
from typing import Counter, List

from decompiler.pipeline.stage import PipelineStage
from decompiler.structures.pseudo import CustomType, Float, GlobalVariable, Integer, Pointer, Type, Variable
from decompiler.structures.visitors.ast_dataflowobjectvisitor import BaseAstDataflowObjectVisitor
from decompiler.structures.visitors.substitute_visitor import SubstituteVisitor
from decompiler.task import DecompilerTask


class VariableCollector(BaseAstDataflowObjectVisitor):
"""Collect all variables in nodes/expressions"""

def __init__(self):
self._variables: List[Variable] = []
self.variables: list[Variable] = []

def get_variables(self) -> list[Variable]:
"""Get collected variables."""
return self._variables

def visit_variable(self, var: Variable):
"""Add visited variables to list"""
self._variables.append(var)
def visit_variable(self, expression: Variable):
self.variables.append(expression)


class NamingConvention(str, Enum):
Expand All @@ -31,102 +30,117 @@ class NamingConvention(str, Enum):
system_hungarian = "system_hungarian"


class RenamingScheme(ABC):
"""Base class for different Renaming schemes."""
@dataclass(frozen=True)
class VariableIdentifier:
name: str
ssa_label: int | None

def __init__(self, task: DecompilerTask) -> None:
"""Collets all needed variables for renaming + filters which should not be renamed"""
collector = VariableCollector()
collector.visit_ast(task.syntax_tree)
self._ast = task.syntax_tree
self._params: List[Variable] = task.function_parameters
self._variables: Set[Variable] = set(filter(self._filter_variables, collector.get_variables()))


def _filter_variables(self, item: Variable) -> bool:
"""Return False if variable is either a:
- parameter
- GlobalVariable
"""
return not item in self._params and not isinstance(item, GlobalVariable)
def identifier(var: Variable) -> VariableIdentifier:
return VariableIdentifier(var.name, var.ssa_label)


def renameVariables(self):
"""Rename all collected variables with a naming scheme."""
names: dict[Variable, Variable] = {}
for var in self._variables:
names[var] = var.copy(name=self.getVariableName(var))

self._ast.replace_variables_in_subtree(self._ast.root, names)

class RenamingScheme(ABC):

@abstractmethod
def getVariableName(self, var: Variable) -> str:
"Should return a new name of a variable based on the old name and the counter"
def rename_variable(self, variable: Variable) -> Variable | None:
pass


class NoRenamingScheme(RenamingScheme):
def rename_variable(self, variable: Variable) -> Variable | None:
return None


class HungarianScheme(RenamingScheme):
"""Class which renames variables into hungarian notation."""

type_prefix = {
Float: {16: "h", 32: "f", 64: "d", 80: "ld", 128: "q", 256: "o"},
Integer: {8: "ch", 16: "s", 32: "i", 64: "l", 128: "i128"},
}


def __init__(self, task: DecompilerTask) -> None:
super().__init__(task)
self._name = VariableNameGeneration.name
self._pointer_base: bool = task.options.getboolean(f"{self._name}.pointer_base", fallback=True)
self._type_separator: str = task.options.getstring(f"{self._name}.type_separator", fallback="")
self._counter_separator: str = task.options.getstring(f"{self._name}.counter_separator", fallback="")
self._variable_counter: dict[Variable, Integer] = {}
self._counter: Integer = 0


def _get_counter(self, var: Variable) -> Integer:
"""Look up if variable already has a counter, if not assign new one"""
if var not in self._variable_counter: # If the set _really_ works, then it's enough to just return a increasing number
self._variable_counter[var] = self._counter
self._counter += 1
return self._variable_counter[var]
self._task = task
self._var_name: str = task.options.getstring(f"{VariableNameGeneration.name}.variable_name", fallback="var")
self._pointer_base: bool = task.options.getboolean(f"{VariableNameGeneration.name}.pointer_base", fallback=True)
self._type_separator: str = task.options.getstring(f"{VariableNameGeneration.name}.type_separator", fallback="")
self._counter_separator: str = task.options.getstring(f"{VariableNameGeneration.name}.counter_separator", fallback="")

self._variables = self._get_variables_to_rename()

counter = Counter[tuple[str, Type]]()
self._variable_rename_map: dict[VariableIdentifier, str] = {}

variable_id: VariableIdentifier
vars: list[Variable]
for variable_id, vars in self._variables.items():
# because the way our cfg works, each use site of each variable could theoretically have a different type
# we just take the first assuming that they are all the same...
var_type = vars[0].type
name_identifier = self._get_name_identifier(variable_id.name)

counter_postfix = f"{self._counter_separator}{counter[(name_identifier, var_type)]}"
counter[(name_identifier, var_type)] += 1

prefix = self._hungarian_prefix(var_type)

new_name: str
if prefix is not None:
new_name = f"{prefix}{self._type_separator}{name_identifier.capitalize()}{counter_postfix}"
else:
new_name = f"{name_identifier}{counter_postfix}"

self._variable_rename_map[variable_id] = new_name

def rename_variable(self, variable: Variable) -> Variable | None:
new_name = self._variable_rename_map.get(identifier(variable))
if new_name is None:
return None
else:
return variable.copy(name=new_name)

def _get_name_identifier(self, name: str) -> str:
"""Return identifier by purging non alpha chars + capitalize the char afterwards. If string is too short, return generic"""
if len(name) < 2:
return "var"
return self._var_name

x = string.capwords("".join([c if c.isalnum() else " " for c in name]))
x = x[0].lower() + x[1:]
x = x[0].lower() + x[1:] # important! We want to be able to choose later if the first letter should be capitalized
return "".join(filter(str.isalpha, x))

def _hungarian_prefix(self, var_type: Type) -> str | None:
"""Return hungarian prefix to a given variable type."""
match var_type:
case Pointer():
if self._pointer_base:
return f"{self._hungarian_prefix(var_type.type)}p"
else:
return "p"
case CustomType():
if var_type.is_boolean:
return "b"
elif var_type.size == 0:
return "v"
case _ if isinstance(var_type, Integer | Float):
sign = "u" if isinstance(var_type, Integer) and not var_type.is_signed else ""
prefix = self.type_prefix[type(var_type)].get(var_type.size, "unk")
return f"{sign}{prefix}"

return None

def _get_variables_to_rename(self) -> dict[VariableIdentifier, list[Variable]]:
collector = VariableCollector()
collector.visit_ast(self._task.ast)

def getVariableName(self, var: Variable) -> str:
"""Return hungarian notation to a given variable. If no prefix exists, make the first char lower case."""
newName = self._get_name_identifier(var._name)
if (prefix := self._hungarian_prefix(var.type)):
return f"{prefix}{self._type_separator}{newName.capitalize()}{self._counter_separator}{self._get_counter(var.name)}"
return f"{newName}{self._counter_separator}{self._get_counter(var.name)}"
def include_variable(item: Variable):
return item not in self._task.function_parameters and not isinstance(item, GlobalVariable)

def _hungarian_prefix(self, var_type: Type) -> str:
"""Return hungarian prefix to a given variable type."""
if isinstance(var_type, Pointer):
if self._pointer_base:
return f"{self._hungarian_prefix(var_type.type)}p"
return "p"
if isinstance(var_type, CustomType):
if var_type.is_boolean:
return "b"
elif var_type.size == 0:
return "v"
else:
return ""
if isinstance(var_type, (Integer, Float)):
sign = "u" if isinstance(var_type, Integer) and not var_type.is_signed else ""
prefix = self.type_prefix[type(var_type)].get(var_type.size, "unk")
return f"{sign}{prefix}"
return ""
variables: dict[VariableIdentifier, List[Variable]] = defaultdict(list)
for variable in collector.variables:
if include_variable(variable):
variables[identifier(variable)].append(variable)
return variables


class VariableNameGeneration(PipelineStage):
Expand All @@ -137,18 +151,29 @@ class VariableNameGeneration(PipelineStage):

name: str = "variable-name-generation"

def __init__(self):
self._notation: str = None

def run(self, task: DecompilerTask):
"""Rename variable names to the given scheme."""
self._notation = task.options.getstring(f"{self.name}.notation", fallback="default")
renamer: RenamingScheme = None
notation = task.options.getstring(f"{self.name}.notation", fallback=NamingConvention.default)

match self._notation:
scheme: RenamingScheme
match notation:
case NamingConvention.default:
scheme = NoRenamingScheme()
case NamingConvention.system_hungarian:
renamer = HungarianScheme(task)
case _: # Implicit default convention, will not rename anything
scheme = HungarianScheme(task)
case _:
logging.warning("Unknown naming convention: %s", notation)
return

renamer.renameVariables()
self._rename_with_scheme(task, scheme)

@staticmethod
def _rename_with_scheme(task: DecompilerTask, rename_scheme: RenamingScheme):
rename_visitor = SubstituteVisitor(lambda o: rename_scheme.rename_variable(o) if isinstance(o, Variable) else None)

for node in task.ast.nodes:
for obj in node.get_dataflow_objets(task.ast.condition_map):
new_obj = rename_visitor.visit(obj)
if new_obj is not None:
# while this should not happen, in theory, there is nothing preventing this case...
logging.warning("Variable name renaming couldn't rename %s", new_obj)
10 changes: 10 additions & 0 deletions decompiler/util/default.json
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,16 @@
"is_hidden_from_cli": false,
"argument_name": "--variable-generation-notation"
},
{
"dest": "variable-name-generation.variable_name",
"default": "var",
"title": "Variable Base Name for hungarian notation",
"type": "string",
"description": "",
"is_hidden_from_gui": false,
"is_hidden_from_cli": false,
"argument_name": "--variable-generation-variable-name"
},
{
"dest": "variable-name-generation.pointer_base",
"default": true,
Expand Down
Loading

0 comments on commit 61ffa12

Please sign in to comment.