Sat Mar 25 2023
Recently I started reading
Reverse Engineering for Beginners by Dennis Yurichev to
learn x86 and arm64 assembly. Chapter 1.18.1 on conditional jumps has an
interesting arm64 exercise, so I thought I’d go through it step by step.
The sauce is as follows:
#include <stdio.h>
void f_signed (int a, int b)
{
if (a>b)
printf ("a>b\n");
if (a==b)
printf ("a==b\n");
if (a<b)
printf ("a<b\n");
};
void f_unsigned (unsigned int a, unsigned int b)
{
if (a>b)
printf ("a>b\n");
if (a==b)
printf ("a==b\n");
if (a<b)
printf ("a<b\n");
};
int main()
{
f_signed(1, 2);
f_unsigned(1, 2);
return 0;
};Dennis’s arm64 compiler (gcc Linaro 4.9) generates this “optimzed”
code for the two f_* functions:
f_signed (int a, int b):
// W0=a, W1=b
cmp w0, w1
bgt .L19 // Branch if Greater Than (a>b)
beq .L20 // Branch if Equal (a==b)
bge .L15 // Branch if Greater than or Equal (a>=b) (impossible here)
// a<b
adrp x0, .LC11 // "a<b"
add x0, x0, :lo12:.LC11
b puts
.L19:
adrp x0, .LC9 // "a>b"
add x0, x0, :lo12:.LC9
b puts
.L15: // impossible to get here
ret
.L20:
adrp x0, .LC10 // "a==b"
add x0, x0, :lo12:.LC10
b puts
f_unsigned (unsigned int a, unsigned int b):
stp x29, x30, [sp, -48]!
// W0=a, W1=b
cmp w0, w1
add x29, sp, 0
str x19, [sp,16]
mov w19, w0
bhi .L25 // Branch if HIgher (a>b)
cmp w19, w1
beq .L26 // Branch if Equal (a==b)
.L23:
bcc .L27 // Branch if Carry Clear (if less than) (a<b)
// function epilogue, impossible to be here
ldr x19, [sp,16]
ldp x29, x30, [sp], 48
ret
.L27:
ldr x19, [sp,16]
adrp x0, .LC11 // "a<b"
ldp x29, x30, [sp], 48
add x0, x0, :lo12:.LC11
b puts
.L25:
adrp x0, .LC9 // "a>b"
str x1, [x29,40]
add x0, x0, :lo12:.LC9
bl puts
ldr x1, [x29,40]
cmp w19, w1
bne .L23 // Branch if Not Equal
.L26:
ldr x19, [sp,16]
adrp x0, .LC10 // "a==b"
ldp x29, x30, [sp], 48
add x0, x0, :lo12:.LC10
b putsThis code contains dead code[1] and redundant instructions which the author asks us to remove.
Before we start I had to add a few things to get the code to assemble and run (with aarch64-gcc 12.2.1):
.LC9:
.string "a>b"
.LC10:
.string "a==b"
.LC11:
.string "a<b"
.LC12:
.string "f_signed:"
.LC13:
.string "\nf_unsigned:".globl main
main:
stp x29, x30, [sp, -16]!
mov x29, sp
adrp x0, .LC12 // "f_signed"
str x1, [x29,40]
add x0, x0, :lo12:.LC12
bl puts
mov w1, 2
mov w0, 1
bl f_signed
mov w1, 1
mov w0, 1
bl f_signed
mov w1, 1
mov w0, 2
bl f_signed
adrp x0, .LC13 // "f_unsigned"
str x1, [x29,40]
add x0, x0, :lo12:.LC13
bl puts
mov w1, 2
mov w0, 1
bl f_unsigned
mov w1, 1
mov w0, 1
bl f_unsigned
mov w1, 1
mov w0, 2
bl f_unsigned
mov w0, 0
ldp x29, x30, [sp], 16
ret f_signed(int, int)
f_unsigned(unsigned int, unsigned int)becomes just
f_signed
f_unsigned.balign 4
f_signed:Now we can assemble and run
qemu-aarch64:~$ gcc ex.s -o ex
qemu-aarch64:~$ ./ex
f_signed:
a<b
a==b
a>b
f_unsigned:
a<b
a==b
a>bLet’s start easy by removing, and move the LDP (Load Pair) call
restoring x29, x30 and the stack pointer into label .L25.
The bl call in .L25 also needs to be converted
to a b call since bl modifies the r14 return
register making us return to the instruction immediately below instead
of back to main. These changes results in the following diff:
--- ex1.18.1_arm64.s 2023-03-25 14:20:13.382373022 +0100
+++ tmp.s 2023-03-25 14:20:20.866261321 +0100
@@ -15,8 +15,6 @@
cmp w0, w1
bgt .L19 // Branch if Greater Than (a>b)
beq .L20 // Branch if Equal (a==b)
- bge .L15 // Branch if Greater than or Equal (a>=b) (impossible
- here)
// a<b
adrp x0, .LC11 // "a<b"
add x0, x0, :lo12:.LC11
@@ -25,8 +23,6 @@
adrp x0, .LC9 // "a>b"
add x0, x0, :lo12:.LC9
b puts
-.L15: // impossible to get here
- ret
.L20:
adrp x0, .LC10 // "a==b"
add x0, x0, :lo12:.LC10
@@ -42,12 +38,6 @@
bhi .L25 // Branch if HIgher (a>b)
cmp w19, w1
beq .L26 // Branch if Equal (a==b)
-.L23:
- bcc .L27 // Branch if Carry Clear (if less than) (a<b)
- // function epilogue, impossible to be here
- ldr x19, [sp,16]
- ldp x29, x30, [sp], 48
- ret
.L27:
ldr x19, [sp,16]
adrp x0, .LC11 // "a<b"
@@ -57,11 +47,9 @@
.L25:
adrp x0, .LC9 // "a>b"
str x1, [x29,40]
+ ldp x29, x30, [sp], 48
add x0, x0, :lo12:.LC9
- bl puts
- ldr x1, [x29,40]
- cmp w19, w1
- bne .L23 // Branch if Not Equal
+ b puts
.L26:
ldr x19, [sp,16]
adrp x0, .LC10 // "a==b"
Since the variables are stored in registers w0 and w1 and we don’t use
the stack we can remove stp (Store Pair) call and the ldp calls, as well
as the add x29, sp, 0 which stores the stack frame in
register x29:
--- tmp.s 2023-03-25 14:42:53.258266321 +0100
+++ tmp2.s 2023-03-25 14:46:42.466947431 +0100
@@ -29,10 +29,8 @@
b puts
f_unsigned:
- stp x29, x30, [sp, -48]!
// W0=a, W1=b
cmp w0, w1
- add x29, sp, 0
str x19, [sp,16]
mov w19, w0
bhi .L25 // Branch if HIgher (a>b)
@@ -41,19 +39,16 @@
.L27:
ldr x19, [sp,16]
adrp x0, .LC11 // "a<b"
- ldp x29, x30, [sp], 48
add x0, x0, :lo12:.LC11
b puts
.L25:
adrp x0, .LC9 // "a>b"
str x1, [x29,40]
- ldp x29, x30, [sp], 48
add x0, x0, :lo12:.LC9
b puts
.L26:
ldr x19, [sp,16]
adrp x0, .LC10 // "a==b"
- ldp x29, x30, [sp], 48
add x0, x0, :lo12:.LC10
b puts
We don’t need to store variable a in register 19 either since we only need to do the comparison once:
--- tmp2.s 2023-03-25 14:46:42.466947431 +0100
+++ tmp3.s 2023-03-25 14:51:52.947461862 +0100
@@ -31,23 +31,17 @@
f_unsigned:
// W0=a, W1=b
cmp w0, w1
- str x19, [sp,16]
- mov w19, w0
bhi .L25 // Branch if HIgher (a>b)
- cmp w19, w1
beq .L26 // Branch if Equal (a==b)
.L27:
- ldr x19, [sp,16]
adrp x0, .LC11 // "a<b"
add x0, x0, :lo12:.LC11
b puts
.L25:
adrp x0, .LC9 // "a>b"
- str x1, [x29,40]
add x0, x0, :lo12:.LC9
b puts
.L26:
- ldr x19, [sp,16]
adrp x0, .LC10 // "a==b"
add x0, x0, :lo12:.LC10
b puts
Now we are left with the much smaller f functions:
f_signed:
cmp w0, w1 // W0=a, W1=b
bgt .L19 // Branch if Greater Than (a>b)
beq .L20 // Branch if Equal (a==b)
// a<b
adrp x0, .LC11 // "a<b"
add x0, x0, :lo12:.LC11
b puts
.L19:
adrp x0, .LC9 // "a>b"
add x0, x0, :lo12:.LC9
b puts
.L20:
adrp x0, .LC10 // "a==b"
add x0, x0, :lo12:.LC10
b puts
f_unsigned:
cmp w0, w1 // W0=a, W1=b
bhi .L25 // Branch if HIgher (a>b)
beq .L26 // Branch if Equal (a==b)
.L27:
adrp x0, .LC11 // "a<b"
add x0, x0, :lo12:.LC11
b puts
.L25:
adrp x0, .LC9 // "a>b"
add x0, x0, :lo12:.LC9
b puts
.L26:
adrp x0, .LC10 // "a==b"
add x0, x0, :lo12:.LC10
b puts[1]: dead code is code that will never be executed