From 9bbfe9592511d6cb6c33ab06af370eaf69e9ab6c Mon Sep 17 00:00:00 2001 From: jiangzhengwenjz Date: Fri, 27 Mar 2020 11:48:34 +0800 Subject: [PATCH] calcrom.pl from pret/pokefirered --- calcrom.pl | 152 ++++++++++++++++++++++++++++++++++++++ tools/preproc/charmap.cpp | 7 +- 2 files changed, 158 insertions(+), 1 deletion(-) create mode 100644 calcrom.pl diff --git a/calcrom.pl b/calcrom.pl new file mode 100644 index 00000000..83051c67 --- /dev/null +++ b/calcrom.pl @@ -0,0 +1,152 @@ +#!/usr/bin/perl + +# Usage: +# calcrom.pl [--verbose] +# +# mapfile: path to .map file output by LD +# verbose: set to get more detailed output + +use IPC::Cmd qw[ run ]; +use Getopt::Long; + +my $verbose = ""; + +GetOptions("verbose" => \$verbose); +(@ARGV == 1) + or die "ERROR: no map file specified.\n"; +open(my $file, $ARGV[0]) + or die "ERROR: could not open file '$ARGV[0]'.\n"; + +my $src = 0; +my $asm = 0; +my $srcdata = 0; +my $data = 0; +my @pairs = (); +while (my $line = <$file>) +{ + if ($line =~ /^ \.(\w+)\s+0x[0-9a-f]+\s+(0x[0-9a-f]+) (\w+)\/(.+)\.o/) + { + my $section = $1; + my $size = hex($2); + my $dir = $3; + my $basename = $4; + if ($size & 3) + { + $size += 4 - ($size % 3); + } + + if ($section =~ /text/) + { + if ($dir eq 'src') + { + $src += $size; + } + elsif ($dir eq 'asm') + { + if (!($basename =~ /(crt0|libagbsyscall|libgcnmultiboot|m4a_1)/)) + { + push @pairs, [$basename, $size]; + } + $asm += $size; + } + } + elsif ($section =~ /rodata/) + { + if ($dir eq 'src') + { + $srcdata += $size; + } + elsif ($dir eq 'data') + { + $data += $size; + } + } + } +} + +my @sorted = sort { $a->[1] <=> $b->[1] } @pairs; + +# Note that the grep filters out all branch labels. It also requires a minimum +# line length of 5, to filter out a ton of generated symbols (like AcCn). No +# settings to nm seem to remove these symbols. Finally, nm prints out a separate +# entry for whenever a name appears in a file, not just where it's defined. uniq +# removes all the duplicate entries. +# +# +# You'd expect this to take a while, because of uniq. It runs in under a second, +# though. Uniq is pretty fast! +my $base_cmd = "nm zeldatmc.elf | awk '{print \$3}' | grep '^[^_].\\{4\\}' | uniq"; + +# This looks for Unknown_, Unknown_, or sub_, followed by just numbers. Note that +# it matches even if stuff precedes the unknown, like sUnknown/gUnknown. +my $undoc_cmd = "grep '[Uu]nk_[0-9a-fA-F]*\\|sub_[0-9a-fA-F]*'"; + +my $count_cmd = "wc -l"; + +# It sucks that we have to run this three times, but I can't figure out how to get +# stdin working for subcommands in perl while still having a timeout. It's decently +# fast anyway. +my $total_syms_as_string; +(run ( + command => "$base_cmd | $count_cmd", + buffer => \$total_syms_as_string, + timeout => 60 +)) + or die "ERROR: Error while getting all symbols: $?"; + +my $undocumented_as_string; +(run ( + command => "$base_cmd | $undoc_cmd | $count_cmd", + buffer => \$undocumented_as_string, + timeout => 60 +)) + or die "ERROR: Error while filtering for undocumented symbols: $?"; + +# Performing addition on a string converts it to a number. Any string that fails +# to convert to a number becomes 0. So if our converted number is 0, but our string +# is nonzero, then the conversion was an error. +my $undocumented = $undocumented_as_string + 0; +(($undocumented != 0) and ($undocumented_as_string ne "0")) + or die "ERROR: Cannot convert string to num: '$undocumented_as_string'"; + +my $total_syms = $total_syms_as_string + 0; +(($total_syms != 0) and ($total_syms_as_string ne "0")) + or die "ERROR: Cannot convert string to num: '$total_syms_as_string'"; + +($total_syms != 0) + or die "ERROR: No symbols found."; + +my $total = $src + $asm; +my $srcPct = sprintf("%.4f", 100 * $src / $total); +my $asmPct = sprintf("%.4f", 100 * $asm / $total); + +my $documented = $total_syms - ($undocumented); +my $docPct = sprintf("%.4f", 100 * $documented / $total_syms); +my $undocPct = sprintf("%.4f", 100 * $undocumented / $total_syms); + +print "$total total bytes of code\n"; +print "$src bytes of code in src ($srcPct%)\n"; +print "$asm bytes of code in asm ($asmPct%)\n"; +print "\n"; + +if ($verbose != 0) +{ + print "BREAKDOWN\n"; + foreach my $item (@sorted) + { + print " $item->[1] bytes in asm/$item->[0].s\n" + } + print "\n"; +} + +print "$total_syms total symbols\n"; +print "$documented symbols documented ($docPct%)\n"; +print "$undocumented symbols undocumented ($undocPct%)\n"; + +print "\n"; +my $dataTotal = $srcdata + $data; +my $srcDataPct = sprintf("%.4f", 100 * $srcdata / $dataTotal); +my $dataPct = sprintf("%.4f", 100 * $data / $dataTotal); +print "$dataTotal total bytes of data\n"; +print "$srcdata bytes of data in src ($srcDataPct%)\n"; +print "$data bytes of data in data ($dataPct%)\n"; diff --git a/tools/preproc/charmap.cpp b/tools/preproc/charmap.cpp index e4a8999b..5625a852 100644 --- a/tools/preproc/charmap.cpp +++ b/tools/preproc/charmap.cpp @@ -67,7 +67,12 @@ private: CharmapReader::CharmapReader(std::string filename) : m_filename(filename) { - if (filename == "") { m_buffer = new char[1] {}; return; } + if (filename == "") + { + m_size = 0; + m_buffer = new char[1] {}; + return; + } FILE *fp = std::fopen(filename.c_str(), "rb");