Skip to content

Commit

Permalink
Copy variables when renaming
Browse files Browse the repository at this point in the history
  • Loading branch information
rihi authored and NeoQuix committed Jun 11, 2024
1 parent ba2a67a commit 0edf3f9
Show file tree
Hide file tree
Showing 3 changed files with 200 additions and 194 deletions.
250 changes: 118 additions & 132 deletions decompiler/pipeline/controlflowanalysis/variable_name_generation.py
Original file line number Diff line number Diff line change
@@ -1,66 +1,26 @@
import re
import logging
import string
from abc import ABC, abstractmethod
from collections import defaultdict
from dataclasses import dataclass
from enum import Enum
from typing import Dict, List, Optional
from typing import Counter, List

from decompiler.pipeline.stage import PipelineStage
from decompiler.structures.ast.ast_nodes import ConditionNode, LoopNode
from decompiler.structures.logic.logic_condition import LogicCondition
from decompiler.structures.pseudo import Condition, CustomType, DataflowObject, Float, GlobalVariable, Integer, Pointer, Type, Variable
from decompiler.structures.pseudo import CustomType, Float, GlobalVariable, Integer, Pointer, Type, Variable
from decompiler.structures.visitors.ast_dataflowobjectvisitor import BaseAstDataflowObjectVisitor
from decompiler.structures.visitors.substitute_visitor import SubstituteVisitor
from decompiler.task import DecompilerTask

"""
Small explanation how variables work in the decompiler:
- sometimes they are the same object in different structures (assignments, loops etc.)
- sometimes they are real copies of another
==> Therefore if we change a parameter of a variable (name), we have no guarantee that all usages of the variable will be updated
==> Therefore we always collect EVERY variable used + check with a method (already_renamed) if we already renamed it to our new naming scheme
"""


def _get_var_counter(var_name: str) -> Optional[str]:
"""Return the counter of a given variable name, if any is present."""
if counter := re.match(r".*?([0-9]+)$", var_name):
return counter.group(1)
return None


def _get_containing_variables(dfo: DataflowObject) -> List[Variable]:
"""Returns a list of variables contained in this dataflow object."""
variables: List[Variable] = []
for sub_exp in dfo.subexpressions():
if isinstance(sub_exp, Variable):
variables.append(sub_exp)
return variables


class VariableCollector(BaseAstDataflowObjectVisitor):
"""Visit relevant nodes and collect their variables."""

def __init__(self, cond_map: Dict[LogicCondition, Condition]):
self._cond_map: Dict[LogicCondition, Condition] = cond_map
self._loop_vars: list[Variable] = []
self._variables: list[Variable] = []

def get_variables(self) -> list[Variable]:
"""Get collected variables."""
return self._variables

def get_loop_variables(self) -> list[Variable]:
"""Get collected variables used in loops."""
return self._loop_vars

def visit_condition_node(self, node: ConditionNode):
for expr in [self._cond_map[symbol] for symbol in node.condition.get_symbols()]:
self._variables.extend(_get_containing_variables(expr))

def visit_loop_node(self, node: LoopNode):
for expr in [self._cond_map[symbol] for symbol in node.condition.get_symbols()]:
self._loop_vars.extend(_get_containing_variables(expr))
def __init__(self):
self.variables: list[Variable] = []

def visit_variable(self, expression: Variable):
self._variables.append(expression)
self.variables.append(expression)


class NamingConvention(str, Enum):
Expand All @@ -70,35 +30,28 @@ class NamingConvention(str, Enum):
system_hungarian = "system_hungarian"


class RenamingScheme(ABC):
"""Base class for different Renaming schemes."""
@dataclass(frozen=True)
class VariableIdentifier:
name: str
ssa_label: int | None

def __init__(self, task: DecompilerTask) -> None:
"""Collets all needed variables for renaming + filters already renamed + function arguments out"""
collector = VariableCollector(task.ast.condition_map)
collector.visit_ast(task.ast)
self._params: List[Variable] = task.function_parameters
self._loop_vars: List[Variable] = collector.get_loop_variables()
self._variables: List[Variable] = list(filter(self._filter_variables, collector.get_variables()))

def _filter_variables(self, item: Variable) -> bool:
"""Return False if variable is either a:
- parameter
- renamed loop variable
- GlobalVariable
"""
return (
not item in self._params
and not (item in self._loop_vars and item.name.find("var_") == -1)
and not isinstance(item, GlobalVariable)
)

def identifier(var: Variable) -> VariableIdentifier:
return VariableIdentifier(var.name, var.ssa_label)


class RenamingScheme(ABC):

@abstractmethod
def renameVariableNames(self):
"""Abstract method which should rename variables with respect to the used scheme."""
def rename_variable(self, variable: Variable) -> Variable | None:
pass


class NoRenamingScheme(RenamingScheme):
def rename_variable(self, variable: Variable) -> Variable | None:
return None


class HungarianScheme(RenamingScheme):
"""Class which renames variables into hungarian notation."""

Expand All @@ -107,62 +60,87 @@ class HungarianScheme(RenamingScheme):
Integer: {8: "ch", 16: "s", 32: "i", 64: "l", 128: "i128"},
}

custom_var_names = {"tmp_": "Tmp", "loop_break": "LoopBreak"}

def __init__(self, task: DecompilerTask) -> None:
super().__init__(task)
self._name = VariableNameGeneration.name
self._var_name: str = task.options.getstring(f"{self._name}.variable_name", fallback="Var")
self._pointer_base: bool = task.options.getboolean(f"{self._name}.pointer_base", fallback=True)
self._type_separator: str = task.options.getstring(f"{self._name}.type_separator", fallback="")
self._counter_separator: str = task.options.getstring(f"{self._name}.counter_separator", fallback="")

def renameVariableNames(self):
"""Rename all collected variables to the hungarian notation."""
for var in self._variables:
if self.alread_renamed(var._name):
continue
counter = _get_var_counter(var.name)
var._name = self._hungarian_notation(var, counter if counter else "")

def _hungarian_notation(self, var: Variable, counter: int) -> str:
"""Return hungarian notation to a given variable."""
return f"{self._hungarian_prefix(var.type)}{self._type_separator}{self.custom_var_names.get(var._name.rstrip(counter), self._var_name)}{self._counter_separator}{counter}"

def _hungarian_prefix(self, var_type: Type) -> str:
"""Return hungarian prefix to a given variable type."""
if isinstance(var_type, Pointer):
if self._pointer_base:
return f"{self._hungarian_prefix(var_type.type)}p"
return "p"
if isinstance(var_type, CustomType):
if var_type.is_boolean:
return "b"
elif var_type.size == 0:
return "v"
self._task = task
self._var_name: str = task.options.getstring(f"{VariableNameGeneration.name}.variable_name", fallback="var")
self._pointer_base: bool = task.options.getboolean(f"{VariableNameGeneration.name}.pointer_base", fallback=True)
self._type_separator: str = task.options.getstring(f"{VariableNameGeneration.name}.type_separator", fallback="")
self._counter_separator: str = task.options.getstring(f"{VariableNameGeneration.name}.counter_separator", fallback="")

self._variables = self._get_variables_to_rename()

counter = Counter[tuple[str, Type]]()
self._variable_rename_map: dict[VariableIdentifier, str] = {}

variable_id: VariableIdentifier
vars: list[Variable]
for variable_id, vars in self._variables.items():
# because the way our cfg works, each use site of each variable could theoretically have a different type
# we just take the first assuming that they are all the same...
var_type = vars[0].type
name_identifier = self._get_name_identifier(variable_id.name)

counter_postfix = f"{self._counter_separator}{counter[(name_identifier, var_type)]}"
counter[(name_identifier, var_type)] += 1

prefix = self._hungarian_prefix(var_type)

new_name: str
if prefix is not None:
new_name = f"{prefix}{self._type_separator}{name_identifier.capitalize()}{counter_postfix}"
else:
return ""
if isinstance(var_type, (Integer, Float)):
sign = "u" if isinstance(var_type, Integer) and not var_type.is_signed else ""
prefix = self.type_prefix[type(var_type)].get(var_type.size, "unk")
return f"{sign}{prefix}"
return ""
new_name = f"{name_identifier}{counter_postfix}"

def alread_renamed(self, name) -> bool:
"""Return true if variable with custom name was already renamed, false otherwise"""
renamed_keys_words = [key for key in self.custom_var_names.values()] + ["unk", self._var_name]
return any(keyword in name for keyword in renamed_keys_words)
self._variable_rename_map[variable_id] = new_name

def rename_variable(self, variable: Variable) -> Variable | None:
new_name = self._variable_rename_map.get(identifier(variable))
if new_name is None:
return None
else:
return variable.copy(name=new_name)

class DefaultScheme(RenamingScheme):
"""Class which renames variables into the default scheme."""
def _get_name_identifier(self, name: str) -> str:
"""Return identifier by purging non alpha chars + capitalize the char afterwards. If string is too short, return generic"""
if len(name) < 2:
return self._var_name

def __init__(self, task: DecompilerTask) -> None:
super().__init__(task)
x = string.capwords("".join([c if c.isalnum() else " " for c in name]))
x = x[0].lower() + x[1:] # important! We want to be able to choose later if the first letter should be capitalized
return "".join(filter(str.isalpha, x))

def renameVariableNames(self):
# Maybe make the suboptions more generic, so that the default scheme can also be changed by some parameters?
pass
def _hungarian_prefix(self, var_type: Type) -> str | None:
"""Return hungarian prefix to a given variable type."""
match var_type:
case Pointer():
if self._pointer_base:
return f"{self._hungarian_prefix(var_type.type)}p"
else:
return "p"
case CustomType():
if var_type.is_boolean:
return "b"
elif var_type.size == 0:
return "v"
case _ if isinstance(var_type, Integer | Float):
sign = "u" if isinstance(var_type, Integer) and not var_type.is_signed else ""
prefix = self.type_prefix[type(var_type)].get(var_type.size, "unk")
return f"{sign}{prefix}"

return None

def _get_variables_to_rename(self) -> dict[VariableIdentifier, list[Variable]]:
collector = VariableCollector()
collector.visit_ast(self._task.ast)

def include_variable(item: Variable):
return item not in self._task.function_parameters and not isinstance(item, GlobalVariable)

variables: dict[VariableIdentifier, List[Variable]] = defaultdict(list)
for variable in collector.variables:
if include_variable(variable):
variables[identifier(variable)].append(variable)
return variables


class VariableNameGeneration(PipelineStage):
Expand All @@ -173,21 +151,29 @@ class VariableNameGeneration(PipelineStage):

name: str = "variable-name-generation"

def __init__(self):
self._notation: str = None

def run(self, task: DecompilerTask):
"""Rename variable names to the given scheme."""
self._notation = task.options.getstring(f"{self.name}.notation", fallback="default")

renamer: RenamingScheme = None
notation = task.options.getstring(f"{self.name}.notation", fallback=NamingConvention.default)

match self._notation:
scheme: RenamingScheme
match notation:
case NamingConvention.default:
renamer = DefaultScheme(task)
scheme = NoRenamingScheme()
case NamingConvention.system_hungarian:
renamer = HungarianScheme(task)
scheme = HungarianScheme(task)
case _:
logging.warning("Unknown naming convention: %s", notation)
return

renamer.renameVariableNames()
self._rename_with_scheme(task, scheme)

@staticmethod
def _rename_with_scheme(task: DecompilerTask, rename_scheme: RenamingScheme):
rename_visitor = SubstituteVisitor(lambda o: rename_scheme.rename_variable(o) if isinstance(o, Variable) else None)

for node in task.ast.nodes:
for obj in node.get_dataflow_objets(task.ast.condition_map):
new_obj = rename_visitor.visit(obj)
if new_obj is not None:
# while this should not happen, in theory, there is nothing preventing this case...
logging.warning("Variable name renaming couldn't rename %s", new_obj)
2 changes: 1 addition & 1 deletion decompiler/util/default.json
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@
},
{
"dest": "variable-name-generation.variable_name",
"default": "Var",
"default": "var",
"title": "Variable Base Name for hungarian notation",
"type": "string",
"description": "",
Expand Down
Loading

0 comments on commit 0edf3f9

Please sign in to comment.