Upgrade to Pro — share decks privately, control downloads, hide ads and more …

Faster FFI for Ruby

Faster FFI for Ruby

Building a faster FFI for Ruby. Leveraging an FFI type DSL to generate C extensions that contain hints for the JIT compiler. CRuby gets faster because C extensions are faster than FFI. When the JIT is enabled calls get even faster

Avatar for Aaron Patterson

Aaron Patterson

May 19, 2026

More Decks by Aaron Patterson

Other Decks in Technology

Transcript

  1. FFI

  2. Ca l l Foreign Functions Anything that implements C ca

    l ling convention Foreign Function Inter f ace C Ca l ling Convention
  3. Ca l l Foreign Functions with Ruby Usua l ly

    done with libf f libf fi : f fi gem Fiddle gem C Ca l ling Convention
  4. Ruby ca l ling C via FFI We can ca

    l l a function written in C without libf f require 'ffi' module Adder extend FFI::Library ffi_lib './libadder.so' attach_function :add, [:int, :int], :int end result = Adder.add(5, 10) puts "Output: #{result}" # Output: 15 test.rb int add(int a, int b) { return a + b; } add.c libf f
  5. FFI vs C extension: strlen FFI Per f ormance overhead

    #include <ruby.h> #include <string.h> static VALUE rb_strlen(VALUE self, VALUE str) { return LONG2NUM( strlen( StringValueCStr(str))); } void Init_strlen(void) { VALUE rb_mStrlen = rb_define_module("CStrlen"); rb_define_module_function( rb_mStrlen, "strlen", rb_strlen, 1); } C extension require "ffi" module FFIStrlen extend FFI::Library ffi_lib 'c' attach_function :strlen, [:string], :int end FFI extension
  6. Benchmark Code Compare C extension with FFI require "ips" def

    use_ffi(str) FFIStrlen.strlen str end def use_c_ext(str) CStrlen.strlen str end IPS.run do |x| x.report("c-ext") { use_c_ext("hello") } x.report("ffi") { use_ffi("hello") } end
  7. Benchmark Results C extension is 2.4x faster ruby -I lib

    test.rb c-ext: 27.223M i/s (± 0.9%) ffi: 11.255M i/s (± 0.6%) Summary c-ext ran 2.42 ± 0.03 times faster than ffi C ext vs FFI (higher is better) Iterations Per Second (million) 0 10 20 30 Iterations Per Second FFI C extension
  8. How Fast is a Ruby method? Compare with ca l

    ling “bytesize” def use_ruby(str) str.bytesize end def use_ffi(str) FFIStrlen.strlen str end def use_c_ext(str) CStrlen.strlen str end IPS.run do |x| x.report("c-ext") { use_c_ext("hello") } x.report("ffi") { use_ffi("hello") } x.report("ruby") { use_ruby("hello") } end C ext vs FFI (higher is better) Iterations Per Second (million) 0 10 20 30 Iterations Per Second FFI C extension Ruby
  9. Ruby uses a stack, C uses registers Stack must be

    copied to registers some_c_function(1, 2, 3) Ruby Code self 1 2 3 Ruby Stack X0 X1 X2 X3 C Registers (ARM64)
  10. rb_define_module_function( rb_mStrlen, "strlen", rb_strlen, 1); rb_define_method has stack size We

    know the number of values to copy at compile time rb_define_module_function( rb_mStrlen, "strlen", rb_strlen, 1); We copy N+1 stack values (in this case 2)
  11. static VALUE call_cfunc_1(VALUE recv, int argc, const VALUE *argv, VALUE

    (*func)(ANYARGS)) { ractor_unsafe_check(); VALUE(*f)(VALUE, VALUE) = (VALUE(*)(VALUE, VALUE))func; return (*f)(recv, argv[0]); } vm_inshelper.c Copies 2 values to registers by ca l ling a function pointer static VALUE call_cfunc_1(VALUE recv, int argc, const VALUE *argv, VALUE (*func)(ANYARGS)) { ractor_unsafe_check(); VALUE(*f)(VALUE, VALUE) = (VALUE(*)(VALUE, VALUE))func; return (*f)(recv, argv[0]); } Copy Receiver Copy 1 parameter
  12. vm_inshelper.c Copies 3 values to registers by ca l ling

    a function pointer static VALUE call_cfunc_2(VALUE recv, int argc, const VALUE *argv, VALUE (*func)(ANYARGS)) { ractor_unsafe_check(); VALUE(*f)(VALUE, VALUE, VALUE) = (VALUE(*)(VALUE, VALUE, VALUE))func; return (*f)(recv, argv[0], argv[1]); } Copy Receiver Copy 2 parameters
  13. vm_inshelper.c Copies 4 values to registers by ca l ling

    a function pointer static VALUE call_cfunc_3(VALUE recv, int argc, const VALUE *argv, VALUE (*func)(ANYARGS)) { ractor_unsafe_check(); VALUE(*f)(VALUE, VALUE, VALUE, VALUE) = (VALUE(*)(VALUE, VALUE, VALUE, VALUE))func; return (*f)(recv, argv[0], argv[1], argv[2]); } Copy Receiver Copy 3 parameters
  14. vm_inshelper.c Copies 5 values to registers by ca l ling

    a function pointer static VALUE call_cfunc_4(VALUE recv, int argc, const VALUE *argv, VALUE (*func)(ANYARGS)) { ractor_unsafe_check(); VALUE(*f)(VALUE, VALUE, VALUE, VALUE, VALUE) = (VALUE(*)(VALUE, VALUE, VALUE, VALUE, VALUE))func; return (*f)(recv, argv[0], argv[1], argv[2], argv[3]); } Copy Receiver Copy 4 parameters
  15. static VALUE call_cfunc_0(VALUE recv, int argc, const VALUE *argv, VALUE

    (*func)(ANYARGS)) { ractor_unsafe_check(); VALUE(*f)(VALUE) = (VALUE(*)(VALUE))func; return (*f)(recv); } static VALUE call_cfunc_1(VALUE recv, int argc, const VALUE *argv, VALUE (*func)(ANYARGS)) { ractor_unsafe_check(); VALUE(*f)(VALUE, VALUE) = (VALUE(*)(VALUE, VALUE))func; return (*f)(recv, argv[0]); } static VALUE call_cfunc_2(VALUE recv, int argc, const VALUE *argv, VALUE (*func)(ANYARGS)) { ractor_unsafe_check(); VALUE(*f)(VALUE, VALUE, VALUE) = (VALUE(*)(VALUE, VALUE, VALUE))func; return (*f)(recv, argv[0], argv[1]); } static VALUE call_cfunc_3(VALUE recv, int argc, const VALUE *argv, VALUE (*func)(ANYARGS)) { ractor_unsafe_check(); VALUE(*f)(VALUE, VALUE, VALUE, VALUE) = (VALUE(*)(VALUE, VALUE, VALUE, VALUE))func; return (*f)(recv, argv[0], argv[1], argv[2]); } static VALUE call_cfunc_4(VALUE recv, int argc, const VALUE *argv, VALUE (*func)(ANYARGS))
  16. Unbox Parameters / Box Return Value Conver t `str` to

    a `char *` static VALUE rb_strlen(VALUE self, VALUE str) { return LONG2NUM( strlen( StringValueCStr(str))); } static VALUE rb_strlen(VALUE self, VALUE str) { return LONG2NUM( strlen( StringValueCStr(str))); } Unbox C string static VALUE rb_strlen(VALUE self, VALUE str) { return LONG2NUM( strlen( StringValueCStr(str))); } Ca l l strlen
  17. High Level Overview Steps to ca l ling a C

    extension function Push a Ruby Frame for Ruby stlren method Ca l l ca l l_cfunc_1 Ca l l rb_strlen (the C extension) Unbox Parameters Ca l l Strlen
  18. Compile / Runtime Information C extension knows values at compile

    time #include <ruby.h> #include <string.h> static VALUE rb_strlen(VALUE self, VALUE str) { return LONG2NUM( strlen( StringValueCStr(str))); } void Init_strlen(void) { VALUE rb_mStrlen = rb_define_module("CStrlen"); rb_define_module_function( rb_mStrlen, "strlen", rb_strlen, 1); } C extension require "ffi" module FFIStrlen extend FFI::Library ffi_lib 'c' attach_function :strlen, [:string], :int end FFI extension #include <ruby.h> #include <string.h> static VALUE rb_strlen(VALUE self, VALUE str) { return LONG2NUM( strlen( StringValueCStr(str))); } void Init_strlen(void) { VALUE rb_mStrlen = rb_define_module("CStrlen"); rb_define_module_function( rb_mStrlen, "strlen", rb_strlen, 1); } C extension require "ffi" module FFIStrlen extend FFI::Library ffi_lib 'c' attach_function :strlen, [:string], :int end FFI extension Known at Compile Time Known at Run Time
  19. Setup platform ca l l frame Ca l l strlen

    Enter FFI C extension Unbox Parameters
  20. FJIT: DSL for JIT compiled FFI FFI vs FJIT module

    FFIStrlen extend FFI::Library ffi_lib 'c' attach_function :strlen, [:string], :int end Code using FFI module FJITStrlen extend FJIT attach_function :strlen, [:string], :int end Code using FJIT
  21. FJIT vs FFI vs C ext Per f ormance FJIT:

    2x faster than FFI, slightly faster than C extension def use_c_ext(str) CStrlen.strlen str end def use_ffi(str) FFIStrlen.strlen str end def use_fjit(str) FJITStrlen.strlen str end IPS.run do |x| x.report("ffi") { use_ffi("hello") } x.report("c-ext") { use_c_ext("hello") } x.report("fjit") { use_fjit("hello") } end Iterations / s (million) 0 8.5 17 25.5 34 Strlen Iterations / s FFI FJIT C-ext
  22. FJIT module definition require "fiddle" require "jit_buffer" require "hacks" require

    "aarch64" module FJIT C = RubyVM::RJIT.const_get(:C) include AArch64::Registers def attach_function name, params, ret # ... end end require "fiddle" require "jit_buffer" require "hacks" require "aarch64" module FJIT C = RubyVM::RJIT.const_get(:C) include AArch64::Registers def attach_function name, params, ret # ... end end require "fiddle" require "jit_buffer" require "hacks" require "aarch64" module FJIT C = RubyVM::RJIT.const_get(:C) include AArch64::Registers def attach_function name, params, ret # ... end end require "fiddle" require "jit_buffer" require "hacks" require "aarch64" module FJIT C = RubyVM::RJIT.const_get(:C) include AArch64::Registers def attach_function name, params, ret # ... end end require "fiddle" require "jit_buffer" require "hacks" require "aarch64" module FJIT C = RubyVM::RJIT.const_get(:C) include AArch64::Registers def attach_function name, params, ret # ... end end require "fiddle" require "jit_buffer" require "hacks" require "aarch64" module FJIT C = RubyVM::RJIT.const_get(:C) include AArch64::Registers def attach_function name, params, ret # ... end end require "fiddle" require "jit_buffer" require "hacks" require "aarch64" module FJIT C = RubyVM::RJIT.const_get(:C) include AArch64::Registers def attach_function name, params, ret # ... end end
  23. attach_function Step 1: de fi ne method, get the underlying

    iseq pointer def attach_function name, params, ret params = params.map { "_" }.join(", ") class_eval "def self.#{name}(#{params}); end" m = method(name) rb_iseq = RubyVM::InstructionSequence.of(m) # Get the pointer to the iseq obj addr = Fiddle.dlwrap(rb_iseq) offset = Hacks::STRUCTS["RTypedData"]["data"][0] addr = read_ptr(read_ptr(addr, offset), 0) iseq_t = C.rb_iseq_t.new addr
  24. attach_function Step 2: generate function entry prelude asm = AArch64::Assembler.new

    # X0 has the ec, x1 has the CFP # save x0 and X1 on the stack asm.stp X0, X1, [SP, -16], :! # save X30 (the branch link reg) asm.stp X29, X30, [SP, -16], :! # SP is in X0 asm.ldr X0, [X1, C.rb_control_frame_t.offsetof(:sp)] # Put top of stack in X0 asm.sub(X0, X0, (4 * 8))
  25. attach_function Step 3: Unbox parameters # Get the underlying string

    pointer loadi(asm, X2, Fiddle::Handle::DEFAULT["rb_string_value_cstr"]) asm.blr X2
  26. attach_function Step 4: Ca l l C function (strlen) #

    Call the function loadi(asm, X2, Fiddle::Handle::DEFAULT[name.to_s]) asm.blr X2 asm.ldp X29, X30, [SP], 16 strlen
  27. attach_function Step 5: Box the return value case ret when

    :int # convert to int asm.lsl(X0, X0, 1) asm.add(X0, X0, 1) else raise ArgumentError, "unknown type #{ret}" end
  28. attach_function Step 6: Return from the function # restore X0

    and X1, but in to X1 and X2 to avoid mov asm.ldp X1, X2, [SP], 16 # pop frame asm.add(X2, X2, C.rb_control_frame_t.size) asm.stur(X2, [X1, C.rb_execution_context_t.offsetof(:cfp)]) asm.ret
  29. attach_function Step 7: Write machine code jit = JITBuffer.new 1024

    jit.writeable! asm.write_to jit jit.executable! iseq_t.body.jit_entry = jit.to_i jit = JITBuffer.new 1024 jit.writeable! asm.write_to jit jit.executable! iseq_t.body.jit_entry = jit.to_i jit = JITBuffer.new 1024 jit.writeable! asm.write_to jit jit.executable! iseq_t.body.jit_entry = jit.to_i
  30. It depends on Ruby internals rb_control_frame_t, etc offset = Hacks::STRUCTS["RTypedData"]["data"][0]

    asm.ldr X0, [X1, C.rb_control_frame_t.offsetof(:sp)] asm.add(X2, X2, C.rb_control_frame_t.size) asm.stur(X2, [X1, C.rb_execution_context_t.offsetof(:cfp)]) offset = Hacks::STRUCTS["RTypedData"]["data"][0] asm.ldr X0, [X1, C.rb_control_frame_t.offsetof(:sp)] asm.add(X2, X2, C.rb_control_frame_t.size) asm.stur(X2, [X1, C.rb_execution_context_t.offsetof(:cfp)])
  31. DrWenowdis: Specializing Dynamic Langu a ge C Extensions using Type

    Information https://bernsteinbear.com/assets/img/dr-wenowdis.pdf By M a x Bernstein and CF Bolz-Tereick
  32. Write FFI Ruby code Developer writes normal FFI Ruby code

    require "ffi" module Strlen extend FFI::Library ffi_lib 'c' attach_function :strlen, [:string], :size_t end Source FFI code 🧑💻 Just write FFI code
  33. FFX: Generate a C extension with hints Hints teach the

    JIT compiler how to use the extension require "ffi" module Strlen extend FFI::Library ffi_lib 'c' attach_function :strlen, [:string], :size_t end Source FFI code /* Generated by FFX - do not edit */ #include <ruby.h> #include <string.h> #include <stdlib.h> #include <math.h> static VALUE rb_strlen_strlen_impl(VALUE self, VALUE arg0) __asm__("_rb_strlen_strlen_impl"); __attribute__((used)) static VALUE rb_strlen_strlen_impl(VALUE self, VALUE arg0) { return SIZET2NUM(strlen(StringValueCStr(arg0))); } __attribute__((naked, aligned(16))) static VALUE rb_strlen_strlen(VALUE self, VALUE arg0) { __asm__( "b _rb_strlen_strlen_impl\n" ".long 0x46464930\n" ".byte 1\n" ".byte 3\n" ".byte 5\n" ".asciz \"strlen\"\n" ); } void Init_strlen(void) { VALUE rb_mStrlen = rb_define_module("Strlen"); rb_define_module_function(rb_mStrlen, "strlen", rb_strlen_strlen, 1); } Translated C extension Translation done by FFX
  34. C extension is compiled norma l ly We get the

    usual dylib /* Generated by FFX - do not edit */ #include <ruby.h> #include <string.h> #include <stdlib.h> #include <math.h> static VALUE rb_strlen_strlen_impl(VALUE self, VALUE arg0) __asm__("_rb_strlen_strlen_impl"); __attribute__((used)) static VALUE rb_strlen_strlen_impl(VALUE self, VALUE arg0) { return SIZET2NUM(strlen(StringValueCStr(arg0))); } __attribute__((naked, aligned(16))) static VALUE rb_strlen_strlen(VALUE self, VALUE arg0) { __asm__( "b _rb_strlen_strlen_impl\n" ".long 0x46464930\n" ".byte 1\n" ".byte 3\n" ".byte 5\n" ".asciz \"strlen\"\n" ); } void Init_strlen(void) { VALUE rb_mStrlen = rb_define_module("Strlen"); rb_define_module_function(rb_mStrlen, "strlen", rb_strlen_strlen, 1); } Translated C extension Normal compilation process strlen.dylib
  35. rb_define_module_function( rb_mStrlen, "strlen", rb_strlen_strlen, 1); rb_define_* problem No type information

    is passed to rb_de fi ne_method rb_define_module_function( rb_mStrlen, "strlen", rb_strlen_strlen, 1); Function pointer Arity
  36. We know the types! FFI declaration has type information require

    "ffi" module Strlen extend FFI::Library ffi_lib 'c' attach_function :strlen, [:string], :size_t end Param eter type Return type require "ffi" module Strlen extend FFI::Library ffi_lib 'c' attach_function :strlen, [:string], :size_t end
  37. ZJIT specializes based on function type The JIT knows what

    type of function it’s ca l ling in to def call_function recv.strlen end Ruby source # Insn: v25 LoadField v24, :len@0x10 # Load field id=len offset=16 0x123af0428: ldur x1, [x0, #0x10] # Insn: v26 BoxFixnum v25 0x123af042c: lsl x1, x1, #1 0x123af0430: b.vs #0x123af0468 0x123af0434: nop 0x123af0438: nop 0x123af043c: orr x1, x1, #1 Strlen is a Ruby function # Insn: v27 CCallWithFrame v26, :Strlen.strlen@0x16ee772b0, v16 # stack overflow check 0x1217ec190: add x1, x21, #0x80 0x1217ec194: cmp x19, x1 0x1217ec198: b.ls #0x1217ec28c 0x1217ec19c: nop 0x1217ec1a0: nop # save PC to CFP 0x1217ec1a4: mov x1, #-1 0x1217ec1a8: stur x1, [x19] 0x1217ec1ac: mov x1, #0x41e0 0x1217ec1b0: movk x1, #0x5e9a, lsl #16 0x1217ec1b4: movk x1, #0xb, lsl #32 0x1217ec1b8: stur x1, [x19, #0x30] # save SP to CFP: 0 0x1217ec1bc: add x1, x21, #0 0x1217ec1c0: stur x1, [x19, #8] # spill stack 0x1217ec1c4: stur x0, [x21] 0x1217ec1c8: stur x2, [x21, #8] # spill locals 0x1217ec1cc: stur x0, [x21, #-0x20] # push cme, specval, frame type 0x1217ec1d0: ldr x1, #0x1217ec1d8 0x1217ec1d4: b #0x1217ec1e0 Strlen is a C function Ruby C
  38. Trampoline Trick Treat C functions as data Unconditional Jump Stashed

    metadata Real function Interpreter ZJIT Reads this
  39. Trampoline Layout ARM64 offset 0: b _impl // call to

    real impl offset 4: .long 0x46464930 // magic: "FFI0" offset 8: .byte 1 // param_count offset 9: .byte 3 // param_types[0] = string offset 10: .byte 5 // return_type = size_t offset 11: .asciz "strlen" // native function name
  40. Generated C Code FFX generates this at gem insta l

    l time __attribute__((naked, aligned(16))) static VALUE rb_strlen_strlen(VALUE self, VALUE arg0) { __asm__( "b _rb_strlen_strlen_impl\n" ".long 0x46464930\n" ".byte 1\n" ".byte 3\n" ".byte 5\n" ".asciz \"strlen\"\n" ); } Trampoline __attribute__((used)) static VALUE rb_strlen_strlen_impl(VALUE self, VALUE arg0) { return SIZET2NUM( strlen( StringValueCStr(arg0))); } Implementation
  41. Simplified ZJIT code Check for FFI trampoline unsafe fn check_ffi_trampoline(cfunc_ptr:

    *const u8) -> Option<FfiTrampoline> { // Check magic at offset 4 let magic = *(cfunc_ptr.add(4) as *const u32); if magic != 0x46464930 { return None; } // Read param_count, param_types, return_type let param_count = *cfunc_ptr.add(8) as usize; // ...read param types at offset 9.. // ...read return type... // Read function name, resolve with dlsym let name = CStr::from_ptr(cfunc_ptr.add(11)); let native_func = dlsym(RTLD_DEFAULT, name); Some(FfiTrampoline { param_types, native_func, .. }) } unsafe fn check_ffi_trampoline(cfunc_ptr: *const u8) -> Option<FfiTrampoline> { // Check magic at offset 4 let magic = *(cfunc_ptr.add(4) as *const u32); if magic != 0x46464930 { return None; } // Read param_count, param_types, return_type let param_count = *cfunc_ptr.add(8) as usize; // ...read param types at offset 9.. // ...read return type... // Read function name, resolve with dlsym let name = CStr::from_ptr(cfunc_ptr.add(11)); let native_func = dlsym(RTLD_DEFAULT, name); Some(FfiTrampoline { param_types, native_func, .. }) } unsafe fn check_ffi_trampoline(cfunc_ptr: *const u8) -> Option<FfiTrampoline> { // Check magic at offset 4 let magic = *(cfunc_ptr.add(4) as *const u32); if magic != 0x46464930 { return None; } // Read param_count, param_types, return_type let param_count = *cfunc_ptr.add(8) as usize; // ...read param types at offset 9.. // ...read return type... // Read function name, resolve with dlsym let name = CStr::from_ptr(cfunc_ptr.add(11)); let native_func = dlsym(RTLD_DEFAULT, name); Some(FfiTrampoline { param_types, native_func, .. }) } unsafe fn check_ffi_trampoline(cfunc_ptr: *const u8) -> Option<FfiTrampoline> { // Check magic at offset 4 let magic = *(cfunc_ptr.add(4) as *const u32); if magic != 0x46464930 { return None; } // Read param_count, param_types, return_type let param_count = *cfunc_ptr.add(8) as usize; // ...read param types at offset 9.. // ...read return type... // Read function name, resolve with dlsym let name = CStr::from_ptr(cfunc_ptr.add(11)); let native_func = dlsym(RTLD_DEFAULT, name); Some(FfiTrampoline { param_types, native_func, .. }) }
  42. New FfiCa l l Instruction New HIR instruction generated for

    ca l ling FFI functions bb3(v9:BasicObject, v10:BasicObject): PatchPoint SingleRactorMode PatchPoint StableConstantNames(0x7bdaef2e8, Strlen) v27:ModuleExact[VALUE(0x120fd0f80)] = Const Value(VALUE(0x120fd0f80)) PatchPoint NoEPEscape(use_ffx) PatchPoint MethodRedefined(Module@0x120fd0ec0, strlen@0xf901, cme:0x12101bcb0) v30:StringExact = GuardType v10, StringExact v31:Fixnum = FfiCall v27, :Strlen.strlen@0x16fa63958, v30 CheckInterrupts Return v31 bb3(v9:BasicObject, v10:BasicObject): PatchPoint SingleRactorMode PatchPoint StableConstantNames(0x7bdaef2e8, Strlen) v27:ModuleExact[VALUE(0x120fd0f80)] = Const Value(VALUE(0x120fd0f80)) PatchPoint NoEPEscape(use_ffx) PatchPoint MethodRedefined(Module@0x120fd0ec0, strlen@0xf901, cme:0x12101bcb0) v30:StringExact = GuardType v10, StringExact v31:Fixnum = FfiCall v27, :Strlen.strlen@0x16fa63958, v30 CheckInterrupts Return v31 bb3(v9:BasicObject, v10:BasicObject): PatchPoint SingleRactorMode PatchPoint StableConstantNames(0x7bdaef2e8, Strlen) v27:ModuleExact[VALUE(0x120fd0f80)] = Const Value(VALUE(0x120fd0f80)) PatchPoint NoEPEscape(use_ffx) PatchPoint MethodRedefined(Module@0x120fd0ec0, strlen@0xf901, cme:0x12101bcb0) v30:StringExact = GuardType v10, StringExact v31:Fixnum = FfiCall v27, :Strlen.strlen@0x16fa63958, v30 CheckInterrupts Return v31 bb3(v9:BasicObject, v10:BasicObject): PatchPoint SingleRactorMode PatchPoint StableConstantNames(0x7bdaef2e8, Strlen) v27:ModuleExact[VALUE(0x120fd0f80)] = Const Value(VALUE(0x120fd0f80)) PatchPoint NoEPEscape(use_ffx) PatchPoint MethodRedefined(Module@0x120fd0ec0, strlen@0xf901, cme:0x12101bcb0) v30:StringExact = GuardType v10, StringExact v31:Fixnum = FfiCall v27, :Strlen.strlen@0x16fa63958, v30 CheckInterrupts Return v31
  43. What ZJIT generates (before) Pseudo machine code from before //

    ZJIT output for a normal cfunc: push frame move args to stack call rb_funcall / cfunc dispatch check return pop frame
  44. What ZJIT generates (after) Pseudo machine code (ARM64) ; guard

    arg is String ; (side exit if not) ; extract RSTRING_PTR → char* ldur x0, [x0, #0x10] ; direct call to strlen bl _strlen ; LONG2FIX: (result << 1) | 1 lsl x0, x0, #1 orr x0, x0, #1
  45. Benchmarks FFI, C extension, C extension + ZJIT hints def

    use_ffi(str); FFIStrlen.strlen(str); end def use_ffx(str); Strlen.strlen(str); end def use_cext(str); Strlen.strlen_c(str); end IPS.run do |x| x.report("ffi") { use_ffi("hello") } x.report("cext") { use_cext("hello") } x.report("ffx") { use_ffx("hello") } end
  46. SQLite3 wrapper Works with FFI and FFX require "ffi" module

    Sqliteffx extend FFI::Library ffi_lib "sqlite3" # Prepared statements attach_function :sqlite3_prepare_v2, [:pointer, :string, :int, :pointer, :pointer], :int attach_function :sqlite3_step, [:pointer], :int attach_function :sqlite3_reset, [:pointer], :int attach_function :sqlite3_clear_bindings, [:pointer], :int attach_function :sqlite3_finalize, [:pointer], :int attach_function :sqlite3_column_count, [:pointer], :int attach_function :sqlite3_column_type, [:pointer, :int], :int attach_function :sqlite3_column_int64, [:pointer, :int], :long # .... end
  47. SQLite3 High Level API Open a database / prepare a

    statement module Sqliteffx class Database def self.open(path) db = new(path) return db unless block_given? begin yield db ensure db.close end end def prepare(sql) Statement.new(self, sql) end # ... end end FFX Implementation # FFX Interface db = Sqliteffx::Database.new(":memory:") stmt = db.prepare("SELECT x, y, z FROM t") # FFI Interface db = Sqliteffi::Database.new(":memory:") stmt = db.prepare("SELECT x, y, z FROM t") # C extension db = SQLite3::Database.new(":memory:") stmt = db.prepare("SELECT x, y, z FROM t") Us a ge
  48. SQLite3 High Level API Iterate over results module Sqliteffx class

    Statement def each stmt = @handle ncols = Sqliteffx.sqlite3_column_count(stmt) while (rc = Sqliteffx.sqlite3_step(stmt)) == SQLITE_ROW row = [] i = 0 while i < ncols row << case Sqliteffx.sqlite3_column_type(stmt, i) when SQLITE_INTEGER Sqliteffx.sqlite3_column_int64(stmt, i) # ... end i += 1 end yield row end self end end end FFX Implementation # FFX interface db = Sqliteffx::Database.new(":memory:") stmt = db.prepare("SELECT x, y, z FROM t") stmt.each { |row| p row } # FFI interface db = Sqliteffi::Database.new(":memory:") stmt = db.prepare("SELECT x, y, z FROM t") stmt.each { |row| p row } # C extension db = SQLite3::Database.new(":memory:") stmt = db.prepare("SELECT x, y, z FROM t") stmt.each { |row| p row } Us a ge
  49. Benchmarks FFX vs FFI vs C extension IPS.run do |x|

    # Full open-sql flow: prepare + step + finalize every time. x.report("sqlite3-ruby db.execute (one-shot)") do rows = 0 c_db.execute("SELECT x, y, z FROM t") { |_| rows += 1 } rows end x.report("ffi db.execute (one-shot)") do rows = 0 ffi_db.execute("SELECT x, y, z FROM t") { |_| rows += 1 } rows end x.report("sqliteffx db.execute (one-shot)") do rows = 0 ffx_db.execute("SELECT x, y, z FROM t") { |_| rows += 1 } rows end end execute IPS.run do |x| # Reusing a prepared statement: step + column work only. x.report("sqlite3-ruby prepared each") do rows = 0 c_select.reset! c_select.each { |_| rows += 1 } rows end x.report("ffi prepared each") do rows = 0 ffi_select.reset! ffi_select.each { |_| rows += 1 } rows end x.report("sqliteffx prepared each") do rows = 0 ffx_select.reset! ffx_select.each { |_| rows += 1 } rows end end prepare / loop