Upgrade to Pro — share decks privately, control downloads, hide ads and more …

What is an ABI, and Why Should You Care?

What is an ABI, and Why Should You Care?

Presented at Open Source Summit Europe (OSS EU) 2024

Avatar for shunghsiyu

shunghsiyu

May 26, 2025
Tweet

More Decks by shunghsiyu

Other Decks in Technology

Transcript

  1. About me Shung-Hsi Yu @shunghsiyu(@fosstodon.org) Based in Taitung, Taiwan Works

    at SUSE Kernel Engineer Maintains (e)BPF stack in SLES and openSUSE 5
  2. 26 Guarantee Python’s range(2) starts from 0 GitLab’s /tree endpoint

    return files in the repo gilbc’s printf() writes to stdout
  3. 33 Binary Programming long x_square(struct point *p) { return p->x

    * p->x; } 00000000100001010011010100000011 00000010101001010000010100110011 00000000000000001000000001100111
  4. 35 Binary Programming long x_square(struct point *p) { return p->x

    * p->x; } x_square: ld a0, 0(a0) mul a0, a0, a0 ret
  5. 36 Binary Programming x_square: ld a0, 0(a0) mul a0, a0,

    a0 ret long x_square(struct point *p) { return p->x * p->x; }
  6. long x_square(struct point *p) { return p->x * p->x; }

    x_square: ld a0, 8(a0) mul a0, a0, a0 ret 37 Binary Programming compiler
  7. 48 Development Header helper routine (declaration) Application business logic //

    geo.c #include <geo-v1.h> long x_square(struct point *p) { return p->x * p->x; }
  8. 49 Development Header helper routine (declaration) Application business logic //

    geo.c #include <geo-v1.h> long x_square(struct point *p) { return p->x * p->x; }
  9. 50 Development Header helper routine (declaration) // geo.c #include <geo-v1.h>

    long x_square(struct point *p) { return p->x * p->x; } // main.c #include <geo-v1.h> int main(void) { struct point p = ... point p = { ... }; long x2 = x_square(&p); return arr[x2]; }
  10. 51 Development Header helper routine (declaration) // geo.c #include <geo-v1.h>

    long x_square(struct point *p) { return p->x * p->x; } // main.c #include <geo-v1.h> int main(void) { struct point p = ... point p = { ... }; long x2 = x_square(&p); return arr[x2]; }
  11. 52 Development Header helper routine (declaration) // geo.c #include <geo-v1.h>

    long x_square(struct point *p) { return p->x * p->x; } // main.c #include <geo-v1.h> int main(void) { struct point p = ... point p = { ... }; long x2 = x_square(&p); return arr[x2]; }
  12. 53 // geo.h struct point { long x; long y;

    }; // geo.c #include <geo-v1.h> long x_square(struct point *p) { return p->x * p->x; } // main.c #include <geo-v1.h> int main(void) { struct point p = ... point p = { ... }; long x2 = x_square(&p); return arr[x2]; }
  13. 54 // geo.c #include <geo.h> long x_square(struct point *p) {

    return p->x * p->x; } // main.c #include <geo.h> int main(void) { struct point p = ... point p = { ... }; long x2 = x_square(&p); return arr[x2]; } // geo.h struct point { long x; long y; };
  14. 55 // geo.c #include <geo.h> long x_square(struct point *p) {

    return p->x * p->x; } // main.c #include <geo.h> int main(void) { struct point p = { .x = 2, .y = 1, }; }; long x2 = x_square(&p); return arr[x2]; } // geo.h struct point { long x; long y; };
  15. 56 // geo.c #include <geo.h> long x_square(struct point *p) {

    return p->x * p->x; } // main.c #include <geo.h> int main(void) { struct point p = { .x = 2, .y = 1, }; }; long x2 = x_square(&p); return arr[x2]; } // geo.h struct point { long x; long y; };
  16. 57 // geo.c #include <geo.h> long x_square(struct point *p) {

    return p->x * p->x; } // main.c #include <geo.h> int main(void) { struct point p = { .x = 2, .y = 1, }; }; /* you get x2 == 4 */ long x2 = x_square(&p); return arr[x2]; } // geo.h struct point { long x; long y; };
  17. 58 // geo.c #include <geo.h> long x_square(struct point *p) {

    return p->x * p->x; } // main.c #include <geo.h> int main(void) { struct point p = { .x = 2, .y = 1, }; /* you get x2 == 4 */ long x2 = x_square(&p); /* use x2 to index arr*/ return arr[x2]; } // geo.h struct point { long x; long y; };
  18. 59 // geo.h struct point { long x; long y;

    }; // geo.c #include <geo.h> long x_square(struct point *p) { return p->x * p->x; } // main.c #include <geo.h> long arr[] = { 0, 1, 2, 3, 4 }; // n-th element = n int main(void) { struct point p = { .x = 2, .y = 1, }; printf("Calculating\n"); /* you get x2 == 4 */ long x2 = x_square(&p); /* arr[4] == 4 */ return arr[x2]; }
  19. 60 // geo.h struct point { long x; long y;

    }; long x_square(struct point *); // geo.c #include <geo.h> long x_square(struct point *p) { return p->x * p->x; } // main.c #include <geo.h> long arr[] = { 0, 1, 2, 3, 4 }; // n-th element = n int main(void) { struct point p = { .x = 2, .y = 1, }; printf("Calculating\n"); /* you get x2 == 4 */ long x2 = x_square(&p); /* arr[4] == 4 */ return arr[x2]; }
  20. 61 // geo.h struct point { long x; long y;

    }; long x_square(struct point *); // geo.c #include <geo.h> long x_square(struct point *p) { return p->x * p->x; } // main.c #include <geo.h> long arr[] = { 0, 1, 2, 3, 4 }; // n-th element = n int main(void) { struct point p = { .x = 2, .y = 1, }; printf("Calculating\n"); /* you get x2 == 4 */ long x2 = x_square(&p); /* arr[4] == 4 */ return arr[x2]; }
  21. 63 # Compile the library -> libgeo.so $ gcc -I$(pwd)

    -shared -o ./libgeo.so geo.c # Compile application which uses the library -> main $ gcc -I$(pwd) -o ./main libgeo.so main.c
  22. 64 # Compile the library -> libgeo.so $ gcc -I$(pwd)

    -shared -o ./libgeo.so geo.c # Compile application which uses the library -> main $ gcc -I$(pwd) -o ./main libgeo.so main.c # Run the application $ ./main; echo $? Calculating 4 # 2 * 2 = 4 and arr[4] = 4, no surprise here
  23. 67 # Re-compile the library that has x_2() $ gcc

    -I$(pwd) -shared -o ./libgeo.so geo.c
  24. 68 # Re-compile the library that has x_2() $ gcc

    -I$(pwd) -shared -o ./libgeo.so geo.c # Run the application, WITHOUT re-compiling $ ./main; echo $?
  25. 69 # Re-compile the library that has x_2() $ gcc

    -I$(pwd) -shared -o ./libgeo.so geo.c # Run the application, WITHOUT re-compiling # It’s not ran at all (no “Calculating”) $ ./main; echo $? ./main: symbol lookup error: ./main: undefined symbol: x_square
  26. 76 long x_square(struct point *p) { return p->x * p->x;

    } long x_square(long x) { return x * x; }
  27. 77 # Re-compile the library that has x_square(long x) $

    gcc -I$(pwd) -shared -o ./libgeo.so geo.c
  28. 78 # Re-compile the library that has x_square(long x) $

    gcc -I$(pwd) -shared -o ./libgeo.so geo.c # Run the application, WITHOUT re-compiling $ ./main; echo $?
  29. 79 # Re-compile the library that has x_square(long x) $

    gcc -I$(pwd) -shared -o ./libgeo.so geo.c # Run the application, WITHOUT re-compiling # Started running, but crashes $ ./main; echo $? Calculating Segmentation fault (core dumped) 139
  30. 82 struct point { long x; long y; }; struct

    point { long z; long x; long y; };
  31. 83 struct point { long x; long y; }; struct

    point { long z; long x; long y; }; 😏
  32. 84 # Re-compile the library, z is first field in

    point $ gcc -I$(pwd) -shared -o ./libgeo.so geo.c
  33. 85 # Re-compile the library, z is first field in

    point $ gcc -I$(pwd) -shared -o ./libgeo.so geo.c # Run the application, WITHOUT re-compiling $ ./main; echo $?
  34. 86 # Re-compile the library, z is first field in

    point $ gcc -I$(pwd) -shared -o ./libgeo.so geo.c # Run the application, WITHOUT re-compiling # No crash, just wrong $ ./main; echo $? Calculating 1 # returns 1 instead of 4
  35. 88 struct point { long x; long y; }; struct

    point { long x; long y; long z; };
  36. 89 struct point { long x; long y; }; struct

    point { long x; long y; long z; };
  37. 90 # Re-compile the library, z is last field in

    point $ gcc -I$(pwd) -shared -o ./libgeo.so geo.c
  38. 91 # Re-compile the library, z is last field in

    point $ gcc -I$(pwd) -shared -o ./libgeo.so geo.c # Run the application, without re-compiling $ ./main; echo $?
  39. 92 # Re-compile the library, z is last field in

    point $ gcc -I$(pwd) -shared -o ./libgeo.so geo.c # Run the application, without re-compiling # No crash, everything works $ ./main; echo $? Calculating 4 # returns 4 as expected
  40. 96 struct point { long x; long y; }; struct

    point { long z; long x; long y; };
  41. 97 struct point { long x; long y; }; struct

    point { long x; long y; long z; };
  42. 99 Binary Programming long x_square(struct point *p) { return p->x

    * p->x; } x_square: ld a0, 0(a0) mul a0, a0, a0 ret
  43. 104 Binary Programming long x_square(struct point *p) { return p->x

    * p->x; } x_square: ld a0, 0(a0) # load x mul a0, a0, a0 # x*x ret
  44. 105 Binary Programming int main(void) { struct point p =

    { .x = 2, .y = 1, }; ... } main: # a0 points to p li a1, 2 # get 2 sd a1, 0(a0) # store x li a1, 1 # get 1 sd a1, 8(a0) # store y ...
  45. 106 Binary Programming int main(void) { struct point p =

    { .x = 2, .y = 1, }; ... } main: # a0 points to p li a1, 2 # get 2 sd a1, 0(a0) # store x li a1, 1 # get 1 sd a1, 8(a0) # store y ...
  46. 107 Binary Programming int main(void) { struct point p =

    { .x = 2, .y = 1, }; ... } main: # a0 points to p li a1, 2 # get 2 sd a1, 0(a0) # store x li a1, 1 # get 1 sd a1, 8(a0) # store y ...
  47. 108 Binary Programming long x_square(struct point *p) { return p->x

    * p->x; } x_square: ld a0, 0(a0) # load x mul a0, a0, a0 # x*x ret
  48. 109 Binary Programming long x_square(struct point *p) { return p->x

    * p->x; } x_square: ld a0, 0(a0) # load x mul a0, a0, a0 # x*x ret
  49. 110 struct point { long x; long y; }; struct

    point { long z; long x; long y; };
  50. 111 struct point { long x; long y; }; struct

    point { long z; long x; long y; };
  51. 112 Binary Programming long x_square(struct point *p) { return p->x

    * p->x; } x_square: ld a0, 8(a0) mul a0, a0, a0 ret
  52. 113 Binary Programming long x_square(struct point *p) { return p->x

    * p->x; } x_square: ld a0, 8(a0) mul a0, a0, a0 ret
  53. 114 Binary Programming int main(void) { struct point p =

    { .x = 2, .y = 1, }; ... } main: # a0 points to p li a1, 2 # get 2 sd a1, 0(a0) # store x li a1, 1 # get 1 sd a1, 8(a0) # store y ...
  54. 115 Binary Programming int main(void) { struct point p =

    { .x = 2, .y = 1, }; ... } main: # a0 points to p li a1, 2 # get 2 sd a1, 0(a0) # store x li a1, 1 # get 1 sd a1, 8(a0) # store y ...
  55. 116 struct point { long x; // 0 long y;

    // 8 }; struct point { long z; // 0 long x; // 8 long y; // 16 };
  56. 117 struct point { long x; // 0 long y;

    // 8 }; struct point { long z; // 0 long x; // 8 long y; // 16 };
  57. 119 struct point { long x; // 0 long y;

    // 8 }; struct point { long x; // 0 long y; // 8 long z; // 16 };
  58. 136 $ rpm -q --provides glibc libc.so.6(GLIBC_2.10)(64bit) libc.so.6(GLIBC_2.11)(64bit) ... libc.so.6(GLIBC_2.39)(64bit)

    libc.so.6(GLIBC_2.4)(64bit) libc.so.6(GLIBC_2.5)(64bit) libc.so.6(GLIBC_2.6)(64bit) libc.so.6(GLIBC_2.7)(64bit) libc.so.6(GLIBC_2.8)(64bit) ...
  59. 137 $ rpm -q --requires bash ... libc.so.6()(64bit) libc.so.6(GLIBC_2.11)(64bit) libc.so.6(GLIBC_2.14)(64bit)

    libc.so.6(GLIBC_2.15)(64bit) libc.so.6(GLIBC_2.2.5)(64bit) libc.so.6(GLIBC_2.25)(64bit) libc.so.6(GLIBC_2.3)(64bit) libc.so.6(GLIBC_2.3.4)(64bit) ...
  60. 139 struct point { long x; // 0 long y;

    // 8 }; struct point { long x; // 0 long y; // 8 long z; // 16 };
  61. int openat(int dirfd, const char *pathname, int flags, ... /*

    mode_t mode */ ); int openat2(int dirfd, const char *pathname, const struct open_how *how, size_t size); 144
  62. 165 Arch/ABI arg1 arg2 arg3 arg4 arg5 arg6 i386 ebx

    ecx edx esi edi ebp x86-64 rdi rsi rdx r10 r8 r9 aarch64 x0 x1 x2 x3 x4 x5 riscv a0 a1 a2 a3 a4 a5
  63. 166 Arch/ABI arg1 arg2 arg3 arg4 arg5 arg6 i386 ebx

    ecx edx esi edi ebp x86-64 rdi rsi rdx r10 r8 r9 aarch64 x0 x1 x2 x3 x4 x5 riscv a0 a1 a2 a3 a4 a5
  64. 167 Arch/ABI arg1 arg2 arg3 arg4 i386 ebx ecx edx

    esi x86-64 rdi rsi rdx r10 aarch64 x0 x1 x2 x3 riscv a0 a1 a2 a3
  65. 197

  66. 201 #include <stdio.h> int main(void) { /* Can you use

    printf in Python? */ printf("Hello %s!\n", "world"); }
  67. 202 #!/usr/bin/python3 import ctypes # We can load _any_ shared

    library # Let's use the ubiquitous libc here libc = ctypes.CDLL('libc.so.6') # This is the _exact_ same printf you # get in C libc.printf(b'Hello %s!\n', b'world')
  68. 205 Ruby → C Java → C Go → C

    Python → Swift Julia → Rust
  69. 213 A 10-minute guide to the Linux ABI by Alison

    Chaiken Binary Banshees and Digital Demons by JeanHeyd Meneide C Isn't A Programming Language Anymore by Aria Desires To Save C, We Must Save ABI by JeanHeyd Meneide Pair Your Compilers At The ABI Café by Aria Desires The Lost Art of Structure Packing by Eric S. Raymond Application binary interface compatibility testing with libabigail by Frank Eigler ABI stable symbols - Linux Kernel Documentation A look at dynamic linking - LWN.net by Daroc Alden