Monday, July 8, 2013

Mixed mode disassembly

I was testing on 64-bit Linux with LLDB and this simple program, and ran into this bit of interesting gdb behavior I had never noticed before.
  1| #include <stdio.h>  
  2| #include <stdlib.h>  
  3|  
  4| int main( int argc, char *argv[] )  
  5| {  
  6|   int blah2[8192];  
  7|   for(size_t i = 0; i < 8192; ++i)  
  8|   {  
  9|     blah2[i] = rand();  
 10|   }  
 11| }  

Set a breakpoint on line 7 with LLDB, and get two locations:
 (lldb) breakpoint set -l 7  
 Breakpoint 2: 2 locations.  

With gdb, it sets one breakpoint:
  (gdb) b 7   
  Breakpoint 1 at 0x4007c9: file ~/data/src/blah2/blah.cpp, line 7.   

I spew out the disassembly with mixed source. LLDB and gdb look quite different. Took a second to figure out what's going on... gdb is moving assembly instructions to match them up with line numbers. It moved the four bold instructions up before the rand() call. Crazy!
 (gdb) disassemble /m main  
 Dump of assembler code for function main(int, char**):  
 5    {  
   0x00000000004007b0 <+0>:   push  rbp  
   0x00000000004007b1 <+1>:   mov  rbp,rsp  
   0x00000000004007b4: sub  rsp,0x8020  
   0x00000000004007bb: mov  DWORD PTR [rbp-0x4],0x0  
   0x00000000004007c2: mov  DWORD PTR [rbp-0x8],edi  
   0x00000000004007c5: mov  QWORD PTR [rbp-0x10],rsi  
 6      int blah2[8192];  
 7      for(size_t i = 0; i < 8192; ++i)  
 => 0x00000000004007c9: mov  QWORD PTR [rbp-0x8018],0x0  
   0x00000000004007d4: cmp  QWORD PTR [rbp-0x8018],0x2000  
   0x00000000004007df: jae  0x400811  
   0x00000000004007f8: mov  rax,QWORD PTR [rbp-0x8018]  
   0x00000000004007ff: add  rax,0x1  
   0x0000000000400805: mov  QWORD PTR [rbp-0x8018],rax  
   0x000000000040080c: jmp  0x4007d4  
 8      {  
 9        blah2[i] = rand();  
   0x00000000004007e5: call  0x400690 <rand@plt>  
   0x00000000004007ea: mov  rcx,QWORD PTR [rbp-0x8018]  
   0x00000000004007f1: mov  DWORD PTR [rbp+rcx*4-0x8010],eax  
 10     }  
 11   }  
   0x0000000000400811: mov  eax,DWORD PTR [rbp-0x4]  
   0x0000000000400814: add  rsp,0x8020  
   0x000000000040081b: pop  rbp  
   0x000000000040081c: ret  

This is the straightforward disassemble call:
 (gdb) disassemble main  
 Dump of assembler code for function main(int, char**):  
   0x00000000004007b0 <+0>:   push  rbp  
   0x00000000004007b1 <+1>:   mov  rbp,rsp  
   0x00000000004007b4: sub  rsp,0x8020  
   0x00000000004007bb: mov  DWORD PTR [rbp-0x4],0x0  
   0x00000000004007c2: mov  DWORD PTR [rbp-0x8],edi  
   0x00000000004007c5: mov  QWORD PTR [rbp-0x10],rsi  
 => 0x00000000004007c9: mov  QWORD PTR [rbp-0x8018],0x0  
   0x00000000004007d4: cmp  QWORD PTR [rbp-0x8018],0x2000  
   0x00000000004007df: jae  0x400811  
   0x00000000004007e5: call  0x400690 <rand@plt>  
   0x00000000004007ea: mov  rcx,QWORD PTR [rbp-0x8018]  
   0x00000000004007f1: mov  DWORD PTR [rbp+rcx*4-0x8010],eax  
   0x00000000004007f8: mov  rax,QWORD PTR [rbp-0x8018]  
   0x00000000004007ff: add  rax,0x1  
   0x0000000000400805: mov  QWORD PTR [rbp-0x8018],rax  
   0x000000000040080c: jmp  0x4007d4  
   0x0000000000400811: mov  eax,DWORD PTR [rbp-0x4]  
   0x0000000000400814: add  rsp,0x8020  
   0x000000000040081b: pop  rbp  
   0x000000000040081c: ret  
 End of assembler dump.  

LLDB looks like the below. The two bold instructions are where the breakpoint locations are. I think I'm going to keep the current LLDB behavior on this one. Although I am going to add the ability to lowercase my registers and get hex constants...
 (lldb) disassemble -m -n main  
 blah`main at blah.cpp:5  
   4  int main( int argc, char *argv[] )  
   5  {  
   6    int blah2[8192];  
   0x4007b0: push  RBP  
   0x4007b1: mov  RBP, RSP  
   0x4007b4: sub  RSP, 32800  
   0x4007bb: mov  DWORD PTR [RBP - 4], 0  
   0x4007c2: mov  DWORD PTR [RBP - 8], EDI  
   0x4007c5: mov  QWORD PTR [RBP - 16], RSI  
 blah`main + 25 at blah.cpp:7  
   6    int blah2[8192];  
   7    for(size_t i = 0; i < 8192; ++i)  
   8    {  
   0x4007c9: mov  QWORD PTR [RBP - 32792], 0  
   0x4007d4: cmp  QWORD PTR [RBP - 32792], 8192  
   0x4007df: jae  0x400811         ; main + 97 at blah.cpp:11  
 blah`main + 53 at blah.cpp:9  
   8    {  
   9      blah2[i] = rand();  
   10    }  
   0x4007e5: call  0x400690         ; symbol stub for: rand  
   0x4007ea: mov  RCX, QWORD PTR [RBP - 32792]  
   0x4007f1: mov  DWORD PTR [RBP + 4*RCX - 32784], EAX  
 blah`main + 72 at blah.cpp:7  
   6    int blah2[8192];  
   7    for(size_t i = 0; i < 8192; ++i)  
   8    {  
   0x4007f8: mov  RAX, QWORD PTR [RBP - 32792]  
   0x4007ff: add  RAX, 1  
   0x400805: mov  QWORD PTR [RBP - 32792], RAX  
   0x40080c: jmpq  0x4007d4         ; main + 36 at blah.cpp:7  
 blah`main + 97 at blah.cpp:11  
   10    }  
   11  }  
   0x400811: mov  EAX, DWORD PTR [RBP - 4]  
   0x400814: add  RSP, 32800  
   0x40081b: pop  RBP  
   0x40081c: ret    

No comments:

Post a Comment