#!/usr/bin/env perl # # Copyright (c) 2025 Martin Storsjo # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # A script for reformatting ARM/AArch64 assembly according to the following # style: # - Instructions start after 8 columns, operands start after 24 columns # - Vector register layouts and modifiers like "uxtw" are written in lowercase # - Optionally align operand columns vertically according to their # maximum width (accommodating for e.g. x0 vs x10, or v0.8b vs v16.16b). # # The script can be executed as "indent_arm_assembly.pl file [outfile]". # If no outfile is specified, the given file is overwritten in place. # # Alternatively, the if no file parameters are given, the script reads input # code on stdin, and outputs the reformatted code on stdout. use strict; my $indent_operands = 0; my $instr_indent = 8; my $operand_indent = 24; my $match_indent = 0; my $file; my $outfile; while (@ARGV) { my $opt = shift; if ($opt eq "-operands") { $indent_operands = 1; } elsif ($opt eq "-indent") { $instr_indent = shift; } elsif ($opt eq "-operand-indent") { $operand_indent = shift; } elsif ($opt eq "-match-indent") { $match_indent = 1; } else { if (!$file) { $file = $opt; } elsif (!$outfile) { $outfile = $opt; } else { die "Unrecognized parameter $opt\n"; } } } if ($operand_indent < $instr_indent) { die "Can't indent operands to $operand_indent while indenting " . "instructions to $instr_indent\n"; } # Return a string consisting of n spaces sub spaces { my $n = $_[0]; return " " x $n; } sub indentcolumns { my $input = $_[0]; my $chars = $_[1]; my @operands = split(/,/, $input); my $num = @operands; my $ret = ""; for (my $i = 0; $i < $num; $i++) { my $cur = $operands[$i]; # Trim out leading/trailing whitespace $cur =~ s/^\s+|\s+$//g; $ret .= $cur; if ($i + 1 < $num) { # If we have a following operand, add a comma and whitespace to # align the next operand. my $next = $operands[$i+1]; my $len = length($cur); if ($len > $chars) { # If this operand was too wide for the intended column width, # don't try to realign the line at all, just return the input # untouched. return $input; } my $pad = $chars - $len; if ($next =~ /[su]xt[bhw]|[la]s[lr]/) { # If the next item isn't a regular operand, but a modifier, # don't try to align that. E.g. "add x0, x0, w1, uxtw #1". $pad = 0; } $ret .= "," . spaces(1 + $pad); } } return $ret; } # Realign the operands part of an instruction line, making each operand # take up the maximum width for that kind of operand. sub columns { my $rest = $_[0]; if ($rest !~ /,/) { # No commas, no operands to split and align return $rest; } if ($rest =~ /{|[^\w]\[/) { # Check for instructions that use register ranges, like {v0.8b,v1.8b} # or mem address operands, like "ldr x0, [sp]" - we skip trying to # realign these. return $rest; } if ($rest =~ /v[0-9]+\.[0-9]+[bhsd]/) { # If we have references to aarch64 style vector registers, like # v0.8b, then align all operands to the maximum width of such # operands - v16.16b. # # TODO: Ideally, we'd handle mixed operand types individually. return indentcolumns($rest, 7); } # Indent operands according to the maximum width of regular registers, # like x10. return indentcolumns($rest, 3); } my $in; my $out; my $tempfile; if ($file) { open(INPUT, "$file") or die "Unable to open $file: $!"; $in = *INPUT; if ($outfile) { open(OUTPUT, ">$outfile") or die "Unable to open $outfile: $!"; } else { $tempfile = "$file.tmp"; open(OUTPUT, ">$tempfile") or die "Unable to open $tempfile: $!"; } $out = *OUTPUT; } else { $in = *STDIN; $out = *STDOUT; } while (<$in>) { # Trim off trailing whitespace. chomp; if (/^([\.\w\d]+:)?(\s+)([\w\\][\w\\\.]*)(?:(\s+)(.*)|$)/) { my $label = $1; my $indent = $2; my $instr = $3; my $origspace = $4; my $rest = $5; my $orig_operand_indent = length($label) + length($indent) + length($instr) + length($origspace); if ($indent_operands) { $rest = columns($rest); } my $size = $instr_indent; if ($match_indent) { # Try to check the current attempted indent size and normalize # to it; match existing ident sizes of 4, 8, 10 and 12 columns. my $cur_indent = length($label) + length($indent); if ($cur_indent >= 3 && $cur_indent <= 5) { $size = 4; } elsif ($cur_indent >= 7 && $cur_indent <= 9) { $size = 8; } elsif ($cur_indent == 10 || $cur_indent == 12) { $size = $cur_indent; } } if (length($label) >= $size) { # Not enough space for the label; just add a space between the label # and the instruction. $indent = " "; } else { $indent = spaces($size - length($label)); } my $instr_end = length($label) + length($indent) + length($instr); $size = $operand_indent - $instr_end; if ($match_indent) { # Check how the operands currently seem to be indented. my $cur_indent = $orig_operand_indent; if ($cur_indent >= 11 && $cur_indent <= 13) { $size = 12; } elsif ($cur_indent >= 14 && $cur_indent <= 17) { $size = 16; } elsif ($cur_indent >= 18 && $cur_indent <= 22) { $size = 20; } elsif ($cur_indent >= 23 && $cur_indent <= 27) { $size = 24; } $size -= $instr_end; } my $operand_space = " "; if ($size > 0) { $operand_space = spaces($size); } # Lowercase register names. Only apply this on lines up to # comments, as this can match common spec/code references in # code comments. Split the string on // for comments, apply the # substitution on the first segment (up to a comment char), and # join the string again. my @parts = split(/\/\//, $rest); $parts[0] =~ s/\b([XWVQDSHBZP][0-9]+)\b/lc($1)/ge; $rest = join('//', @parts); # Lowercase the aarch64 vector layout description, .8B -> .8b $rest =~ s/(\.[84216]*[BHSD])/lc($1)/ge; # Lowercase modifiers like "uxtw" or "lsl" $rest =~ s/([SU]XT[BWH]|[LA]S[LR])/lc($1)/ge; # Lowercase SVE/SME modifiers like "/Z" or "/M" $rest =~ s,(/[ZM])\b,lc($1),ge; # Lowercase SVE/SME vector lengths $rest =~ s/\b(VL[0-9]+)\b/lc($1)/ge; # Reassemble the line if ($rest eq "") { $_ = $label . $indent . $instr; } else { $_ = $label . $indent . $instr . $operand_space . $rest; } } print $out $_ . "\n"; } if ($file) { close(INPUT); close(OUTPUT); } if ($tempfile) { rename($tempfile, $file); }