Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revive: Fix Hungarian Notation #416

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
196 changes: 109 additions & 87 deletions decompiler/pipeline/controlflowanalysis/variable_name_generation.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,26 @@
import logging
import string
from abc import ABC, abstractmethod
from collections import defaultdict
from dataclasses import dataclass
from enum import Enum
from typing import List, Set
from typing import Counter, List

from decompiler.pipeline.stage import PipelineStage
from decompiler.structures.pseudo import CustomType, Float, GlobalVariable, Integer, Pointer, Type, Variable
from decompiler.structures.pseudo import ArrayType, CustomType, Float, GlobalVariable, Integer, Pointer, Type, Variable
from decompiler.structures.visitors.ast_dataflowobjectvisitor import BaseAstDataflowObjectVisitor
from decompiler.structures.visitors.substitute_visitor import SubstituteVisitor
from decompiler.task import DecompilerTask


class VariableCollector(BaseAstDataflowObjectVisitor):
"""Collect all variables in nodes/expressions"""

def __init__(self):
self._variables: List[Variable] = []
self.variables: list[Variable] = []

def get_variables(self) -> list[Variable]:
"""Get collected variables."""
return self._variables

def visit_variable(self, var: Variable):
"""Add visited variables to list"""
self._variables.append(var)
def visit_variable(self, expression: Variable):
self.variables.append(expression)


class NamingConvention(str, Enum):
Expand All @@ -31,102 +30,114 @@ class NamingConvention(str, Enum):
system_hungarian = "system_hungarian"


class RenamingScheme(ABC):
"""Base class for different Renaming schemes."""

def __init__(self, task: DecompilerTask) -> None:
"""Collets all needed variables for renaming + filters which should not be renamed"""
collector = VariableCollector()
collector.visit_ast(task.syntax_tree)
self._ast = task.syntax_tree
self._params: List[Variable] = task.function_parameters
self._variables: Set[Variable] = set(filter(self._filter_variables, collector.get_variables()))


def _filter_variables(self, item: Variable) -> bool:
"""Return False if variable is either a:
- parameter
- GlobalVariable
"""
return not item in self._params and not isinstance(item, GlobalVariable)
@dataclass(frozen=True)
class VariableIdentifier:
name: str
ssa_label: int | None


def renameVariables(self):
"""Rename all collected variables with a naming scheme."""
names: dict[Variable, Variable] = {}
for var in self._variables:
names[var] = var.copy(name=self.getVariableName(var))
def identifier(var: Variable) -> VariableIdentifier:
return VariableIdentifier(var.name, var.ssa_label)

self._ast.replace_variables_in_subtree(self._ast.root, names)

class RenamingScheme(ABC):

@abstractmethod
def getVariableName(self, var: Variable) -> str:
"Should return a new name of a variable based on the old name and the counter"
def rename_variable(self, variable: Variable) -> Variable | None:
pass


class NoRenamingScheme(RenamingScheme):
def rename_variable(self, variable: Variable) -> Variable | None:
return None


class HungarianScheme(RenamingScheme):
"""Class which renames variables into hungarian notation."""

type_prefix = {
Float: {16: "h", 32: "f", 64: "d", 80: "ld", 128: "q", 256: "o"},
Integer: {8: "ch", 16: "s", 32: "i", 64: "l", 128: "i128"},
}


def __init__(self, task: DecompilerTask) -> None:
super().__init__(task)
self._name = VariableNameGeneration.name
self._pointer_base: bool = task.options.getboolean(f"{self._name}.pointer_base", fallback=True)
self._type_separator: str = task.options.getstring(f"{self._name}.type_separator", fallback="")
self._counter_separator: str = task.options.getstring(f"{self._name}.counter_separator", fallback="")
self._variable_counter: dict[Variable, Integer] = {}
self._counter: Integer = 0


def _get_counter(self, var: Variable) -> Integer:
"""Look up if variable already has a counter, if not assign new one"""
if var not in self._variable_counter: # If the set _really_ works, then it's enough to just return a increasing number
self._variable_counter[var] = self._counter
self._counter += 1
return self._variable_counter[var]
self._task = task
self._var_name: str = task.options.getstring(f"{VariableNameGeneration.name}.variable_name", fallback="var")
self._pointer_base: bool = task.options.getboolean(f"{VariableNameGeneration.name}.pointer_base", fallback=True)
self._type_separator: str = task.options.getstring(f"{VariableNameGeneration.name}.type_separator", fallback="")
self._counter_separator: str = task.options.getstring(f"{VariableNameGeneration.name}.counter_separator", fallback="")

self._variables = self._get_variables_to_rename()

counter = Counter[tuple[str, str]]()
self._variable_rename_map: dict[VariableIdentifier, str] = {}

variable_id: VariableIdentifier
vars: list[Variable]
for variable_id, vars in self._variables.items():
# because the way our cfg works, each use site of each variable could theoretically have a different type
# we just take the first assuming that they are all the same...
var_type = vars[0].type
name_identifier = self._get_name_identifier(variable_id.name)
prefix = self._hungarian_prefix(var_type)

counter_postfix = f"{self._counter_separator}{counter[(name_identifier, prefix)]}"
counter[(name_identifier, prefix)] += 1

new_name: str = f"{prefix}{self._type_separator}{name_identifier.capitalize()}{counter_postfix}"

self._variable_rename_map[variable_id] = new_name

def rename_variable(self, variable: Variable) -> Variable | None:
new_name = self._variable_rename_map.get(identifier(variable))
if new_name is None:
return None
else:
return variable.copy(name=new_name)

def _get_name_identifier(self, name: str) -> str:
"""Return identifier by purging non alpha chars + capitalize the char afterwards. If string is too short, return generic"""
if len(name) < 2:
return "var"
return self._var_name

x = string.capwords("".join([c if c.isalnum() else " " for c in name]))
x = x[0].lower() + x[1:]
x = x[0].lower() + x[1:] # important! We want to be able to choose later if the first letter should be capitalized
return "".join(filter(str.isalpha, x))

def _hungarian_prefix(self, var_type: Type) -> str | None:
"""Return hungarian prefix to a given variable type."""
match var_type:
case Pointer():
if self._pointer_base:
return f"{self._hungarian_prefix(var_type.type)}p"
else:
return "p"
case ArrayType():
return f"arr{self._hungarian_prefix(var_type.type)}"
case CustomType():
if var_type.is_boolean:
return "b"
if var_type.size == 0:
return "v"
case Integer() | Float():
sign = "u" if isinstance(var_type, Integer) and not var_type.is_signed else ""
prefix = self.type_prefix[type(var_type)].get(var_type.size, "unk")
return f"{sign}{prefix}"

return "unk"

def _get_variables_to_rename(self) -> dict[VariableIdentifier, list[Variable]]:
collector = VariableCollector()
collector.visit_ast(self._task.ast)

def getVariableName(self, var: Variable) -> str:
"""Return hungarian notation to a given variable. If no prefix exists, make the first char lower case."""
newName = self._get_name_identifier(var._name)
if (prefix := self._hungarian_prefix(var.type)):
return f"{prefix}{self._type_separator}{newName.capitalize()}{self._counter_separator}{self._get_counter(var.name)}"
return f"{newName}{self._counter_separator}{self._get_counter(var.name)}"
def include_variable(item: Variable):
return item not in self._task.function_parameters and not isinstance(item, GlobalVariable)

def _hungarian_prefix(self, var_type: Type) -> str:
"""Return hungarian prefix to a given variable type."""
if isinstance(var_type, Pointer):
if self._pointer_base:
return f"{self._hungarian_prefix(var_type.type)}p"
return "p"
if isinstance(var_type, CustomType):
if var_type.is_boolean:
return "b"
elif var_type.size == 0:
return "v"
else:
return ""
if isinstance(var_type, (Integer, Float)):
sign = "u" if isinstance(var_type, Integer) and not var_type.is_signed else ""
prefix = self.type_prefix[type(var_type)].get(var_type.size, "unk")
return f"{sign}{prefix}"
return ""
variables: dict[VariableIdentifier, List[Variable]] = defaultdict(list)
for variable in collector.variables:
if include_variable(variable):
variables[identifier(variable)].append(variable)
return variables


class VariableNameGeneration(PipelineStage):
Expand All @@ -137,18 +148,29 @@ class VariableNameGeneration(PipelineStage):

name: str = "variable-name-generation"

def __init__(self):
self._notation: str = None

def run(self, task: DecompilerTask):
"""Rename variable names to the given scheme."""
self._notation = task.options.getstring(f"{self.name}.notation", fallback="default")
renamer: RenamingScheme = None
notation = task.options.getstring(f"{self.name}.notation", fallback=NamingConvention.default)

match self._notation:
scheme: RenamingScheme
match notation:
case NamingConvention.default:
scheme = NoRenamingScheme()
case NamingConvention.system_hungarian:
renamer = HungarianScheme(task)
case _: # Implicit default convention, will not rename anything
scheme = HungarianScheme(task)
case _:
logging.warning("Unknown naming convention: %s", notation)
return

renamer.renameVariables()
self._rename_with_scheme(task, scheme)

@staticmethod
def _rename_with_scheme(task: DecompilerTask, rename_scheme: RenamingScheme):
rename_visitor = SubstituteVisitor(lambda o: rename_scheme.rename_variable(o) if isinstance(o, Variable) else None)

for node in task.ast.nodes:
for obj in node.get_dataflow_objets(task.ast.condition_map):
new_obj = rename_visitor.visit(obj)
if new_obj is not None:
# while this should not happen, in theory, there is nothing preventing this case...
logging.warning("Variable name renaming couldn't rename %s", new_obj)
20 changes: 10 additions & 10 deletions decompiler/util/default.json
Original file line number Diff line number Diff line change
Expand Up @@ -213,16 +213,6 @@
"is_hidden_from_cli": false,
"argument_name": "--for-loop-exclude-conditions"
},
{
"dest": "readability-based-refinement.rename_while_loop_variables",
"default": true,
"type": "boolean",
"title": "Rename while-loop variables",
"description": "Rename while-loop counter variables to counter, counter1, ...",
"is_hidden_from_gui": false,
"is_hidden_from_cli": false,
"argument_name": "--rename-while-loop-variables"
},
{
"dest": "variable-name-generation.notation",
"default": "system_hungarian",
Expand All @@ -238,6 +228,16 @@
"is_hidden_from_cli": false,
"argument_name": "--variable-generation-notation"
},
{
"dest": "variable-name-generation.variable_name",
"default": "var",
"title": "Variable Base Name for hungarian notation",
"type": "string",
"description": "",
"is_hidden_from_gui": false,
"is_hidden_from_cli": false,
"argument_name": "--variable-generation-variable-name"
},
{
"dest": "variable-name-generation.pointer_base",
"default": true,
Expand Down
Loading
Loading