Merge pull request #50 from AlexApps99/master

CSV formatting improvements
2021-07-27 12:12:56 +02:00 · 2021-07-27 12:12:56 +02:00 · 1d4c815fba
parent d7b2e846b9 a68c979491
commit 1d4c815fba
4 changed files with 112783 additions and 112786 deletions
--- a/Contributing.md
+++ b/Contributing.md
@ -149,8 +149,9 @@ public:
      ```

 6. **Add the mangled function name to the list of decompiled functions.**
-    * To do so, open data/uking_functions.csv and search for the name or the address of function you have decompiled, and add the mangled function name to the last column.
-    * Example: `0x00000071010c0d60,sub_71010C0D60,136,_ZN4ksys4util13TaskQueueBaseD1Ev`
+    * To do so, open `data/uking_functions.csv`, search for the name or the address of function you have decompiled, and add the mangled function name to the last column.
+    * Be sure to change the status column from `U` (undecompiled) to `O` (OK).
+    * Example: `0x00000071010c0d60,O,136,_ZN4ksys4util13TaskQueueBaseD1Ev`

 7. **Compare the assembly** with `./diff.py --source <mangled function name>`
    * This will bring up a two-column diff. The code on the left is the original code; the code on the right is your version of the function.
@ -165,8 +166,8 @@ public:

 9. **Update the list of decompiled functions**.
    * If you have a function that matches perfectly, great!
-    * If there are still minor differences left, wrap the function in an `#ifdef NON_MATCHING`, add a comment to explain what is wrong, and add a `?` at the end of the mangled function name in the CSV.
-    * For major differences (lots of entirely red/green/blue lines in the diff), add a `!` at the end of the function name.
+    * If there are still minor differences left, wrap the function in an `#ifdef NON_MATCHING`, add a comment to explain what is wrong, and change the status to `m` (minor difference) in the CSV.
+    * For major differences (lots of entirely red/green/blue lines in the diff), use a capital `M` (major difference) in place of `m`.

 10. Before opening a PR, reformat the code with clang-format and run `tools/check.py`.

--- a/data/uking_functions.csv
+++ b/data/uking_functions.csv
--- a/tools/rename_functions_in_ida.py
+++ b/tools/rename_functions_in_ida.py
@ -7,18 +7,12 @@ import os

 csv_path = os.path.join(os.path.dirname(__file__), "../data/uking_functions.csv")

-MARKERS = ("|", "?", "!")
-
 with open(csv_path, "r") as f:
    reader = csv.reader(f)
+    # Skip headers
+    next(reader)
    for fn in reader:
        addr = int(fn[0], 16)
-        decomp_name = fn[3]
-        if not decomp_name or decomp_name == "l":
-            continue
-
-        # Get rid of status markers.
-        if decomp_name[-1] in MARKERS:
-            decomp_name = decomp_name[:-1]
-
-        idc.set_name(addr, decomp_name)
+        name = fn[3]
+        if name and fn[1] != "L":
+            idc.set_name(addr, name)
--- a/tools/util/utils.py
+++ b/tools/util/utils.py
@ -28,32 +28,31 @@ class FunctionInfo(tp.NamedTuple):
    name: str
    size: int
    decomp_name: str
+    library: bool
    status: FunctionStatus
    raw_row: tp.List[str]


 _markers = {
-    "?": FunctionStatus.Equivalent,
-    "!": FunctionStatus.NonMatching,
-    "|": FunctionStatus.Wip,
+    "O": FunctionStatus.Matching,
+    "m": FunctionStatus.Equivalent,
+    "M": FunctionStatus.NonMatching,
+    "W": FunctionStatus.Wip,
+    "U": FunctionStatus.NotDecompiled,
+    "L": FunctionStatus.NotDecompiled,
 }


 def parse_function_csv_entry(row) -> FunctionInfo:
-    ea, name, size, decomp_name = row
-    if decomp_name:
-        status = FunctionStatus.Matching
+    ea, stat, size, name = row
+    status = _markers.get(stat, FunctionStatus.NotDecompiled)
+    decomp_name = ""

-        for marker, new_status in _markers.items():
-            if decomp_name[-1] == marker:
-                status = new_status
-                decomp_name = decomp_name[:-1]
-                break
-    else:
-        status = FunctionStatus.NotDecompiled
+    if status != FunctionStatus.NotDecompiled:
+        decomp_name = name

    addr = int(ea, 16) - 0x7100000000
-    return FunctionInfo(addr, name, int(size, 0), decomp_name, status, row)
+    return FunctionInfo(addr, name, int(size), decomp_name, stat == "L", status, row)


 def get_functions_csv_path() -> Path:
@ -65,11 +64,13 @@ def get_functions(path: tp.Optional[Path] = None) -> tp.Iterable[FunctionInfo]:
        path = get_functions_csv_path()
    with path.open() as f:
        reader = csv.reader(f)
+        # Skip headers
+        next(reader)
        for row in reader:
            try:
                entry = parse_function_csv_entry(row)
                # excluded library function
-                if entry.decomp_name == "l":
+                if entry.library:
                    continue
                yield entry
            except ValueError as e:
@ -82,7 +83,7 @@ def add_decompiled_functions(new_matches: tp.Dict[int, str],
    writer = csv.writer(buffer, lineterminator="\n")
    for func in get_functions():
        if new_orig_names is not None and func.status == FunctionStatus.NotDecompiled and func.addr in new_orig_names:
-            func.raw_row[1] = new_orig_names[func.addr]
+            func.raw_row[3] = new_orig_names[func.addr]
        if func.status == FunctionStatus.NotDecompiled and func.addr in new_matches:
            func.raw_row[3] = new_matches[func.addr]
        writer.writerow(func.raw_row)