Upgrade to Pro — share decks privately, control downloads, hide ads and more …

RubyVM読んでみた。

ocha-
April 19, 2014

 RubyVM読んでみた。

ocha-

April 19, 2014
Tweet

Other Decks in Programming

Transcript

  1. rb_iseq_t でかい /* method.h */ typedef struct rb_iseq_struct rb_iseq_t; /*

    vm_core.h */ struct rb_iseq_struct { /***************/ /* static data */ /***************/ enum iseq_type { ISEQ_TYPE_TOP, ISEQ_TYPE_METHOD, ISEQ_TYPE_BLOCK, ISEQ_TYPE_CLASS, ISEQ_TYPE_RESCUE, ISEQ_TYPE_ENSURE, ISEQ_TYPE_EVAL, ISEQ_TYPE_MAIN, ISEQ_TYPE_DEFINED_GUARD } type; /* instruction sequence type */ rb_iseq_location_t location; VALUE *iseq; /* iseq (insn number and operands) */ VALUE *iseq_encoded; /* encoded iseq */ unsigned long iseq_size; const VALUE mark_ary; /* Array: includes operands which should be GC marked */ const VALUE coverage; /* coverage array */ /* insn info, must be freed */ struct iseq_line_info_entry *line_info_table; size_t line_info_size; ID *local_table; /* must free */ int local_table_size; /* sizeof(vars) + 1 */ int local_size; union iseq_inline_storage_entry *is_entries; int is_size; rb_call_info_t *callinfo_entries; int callinfo_size; ...
  2. rb_iseq_t 特にそれっぽいとこ /* method.h */ typedef struct rb_iseq_struct rb_iseq_t; /*

    vm_core.h */ struct rb_iseq_struct { ... VALUE *iseq; /* iseq (insn number and operands) */ VALUE *iseq_encoded; /* encoded iseq */
  3. LINK_ELEMENT (iseq_link_element *) な next, prev で、なんかリ ンクリストっぽい typedef struct

    iseq_link_element { enum { ISEQ_ELEMENT_NONE, ISEQ_ELEMENT_LABEL, ISEQ_ELEMENT_INSN, ISEQ_ELEMENT_ADJUST } type; struct iseq_link_element *next; struct iseq_link_element *prev; } LINK_ELEMENT;
  4. iseq_insn_data とかいろいろ 最初にLINK_ELEMENTがあるから全部 LINK_ELEMENT typedef struct iseq_link_anchor { LINK_ELEMENT anchor;

    LINK_ELEMENT *last; } LINK_ANCHOR; typedef struct iseq_label_data { LINK_ELEMENT link; ... } LABEL; typedef struct iseq_insn_data { LINK_ELEMENT link; ... } INSN; typedef struct iseq_adjust_data { LINK_ELEMENT link; ... } ADJUST;
  5. AST( ruby --dump parsetree ) # @ NODE_SCOPE (line: 2)

    # +- nd_tbl: :x # +- nd_args: # | (null node) # +- nd_body: # @ NODE_BLOCK (line: 1) # +- nd_head: # | @ NODE_DASGN_CURR (line: 1) # | +- nd_vid: :x # | +- nd_value: # | @ NODE_LIT (line: 1) # | +- nd_lit: 1 # +- nd_next: # @ NODE_BLOCK (line: 2) # +- nd_head: # | @ NODE_CALL (line: 2) # | +- nd_mid: :+ # | +- nd_recv: # | | @ NODE_DVAR (line: 2) # | | +- nd_vid: :x # | +- nd_args: # | @ NODE_ARRAY (line: 2) # | +- nd_alen: 1 # | +- nd_head: # | | @ NODE_LIT (line: 2) # | | +- nd_lit: 2 # | +- nd_next: # | (null node) # +- nd_next: # (null node)
  6. insns (ruby --dump insns) バイトコードの様子(最適化後) == disasm: <RubyVM::InstructionSequence:<main>@simple.rb>=============== local table

    (size: 2, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, keyword: 0@3] s1) [ 2] x 0000 trace 1 ( 1) 0002 putobject_OP_INT2FIX_O_1_C_ 0003 setlocal_OP__WC__0 2 0005 trace 1 ( 2) 0007 getlocal_OP__WC__0 2 0009 putobject 2 0011 opt_plus <callinfo!mid:+, argc:1, ARGS_SKIP> 0013 leave
  7. 結構シンプルになります == disasm: <RubyVM::InstructionSequence:<main>@simple.rb>=============== local table (size: 2, argc: 0

    [opts: 0, rest: -1, post: 0, block: -1, keyword: 0@3] s1) [ 2] x 0000 putobject 1 ( 1) 0002 setlocal x, 0 0005 getlocal x, 0 ( 2) 0008 putobject 2 0010 send <callinfo!mid:+, argc:1, ARGS_SKIP> 0012 leave
  8. rb_iseq_new_with_bopt_and_opt rb_iseq_tのallocate地点になります /* iseq.c */ static VALUE rb_iseq_new_with_bopt_and_opt(NODE *node, VALUE

    name, VALUE path, VALUE absolute_path, VALUE first_lineno, VALUE parent, enum iseq_type type, VALUE bopt, const rb_compile_option_t *option) { rb_iseq_t *iseq; VALUE self = iseq_alloc(rb_cISeq); GetISeqPtr(self, iseq); iseq->self = self; prepare_iseq_build(iseq, name, path, absolute_path, first_lineno, parent, type, bopt, option); rb_iseq_compile_node(self, node); cleanup_iseq_build(iseq); return self; }
  9. rb_iseq_new_with_bopt_and_opt /* iseq.c */ static VALUE rb_iseq_new_with_bopt_and_opt(NODE *node, VALUE name,

    VALUE path, VALUE absolute_path, VALUE first_lineno, VALUE parent, enum iseq_type type, VALUE bopt, const rb_compile_option_t *option) { rb_iseq_t *iseq; VALUE self = iseq_alloc(rb_cISeq); GetISeqPtr(self, iseq); iseq->self = self; prepare_iseq_build(iseq, name, path, absolute_path, first_lineno, parent, type, bopt, option); rb_iseq_compile_node(self, node); /* <-- Here! */ cleanup_iseq_build(iseq); return self; }
  10. ast 最初はNODE_SCOPE # @ NODE_SCOPE (line: 2) # +- nd_tbl:

    :x # +- nd_args: # | (null node) # +- nd_body: # @ NODE_BLOCK (line: 1) # +- nd_head: # | @ NODE_DASGN_CURR (line: 1) # | +- nd_vid: :x # | +- nd_value: # | @ NODE_LIT (line: 1) # | +- nd_lit: 1 # +- nd_next: # @ NODE_BLOCK (line: 2) # +- nd_head: # | @ NODE_CALL (line: 2)
  11. rb_iseq_compile_node (extremely simplified) nodeがNODE_SCOPEだった場合 /* compile.c */ VALUE rb_iseq_compile_node(VALUE self,

    NODE *node) { DECL_ANCHOR(ret); rb_iseq_t *iseq; INIT_ANCHOR(ret); GetISeqPtr(self, iseq); ... iseq_set_local_table(iseq, node->nd_tbl); iseq_set_arguments(iseq, ret, node->nd_args); ... COMPILE(ret, "scoped node", node->nd_body); ... ADD_INSN(ret, iseq->compile_data->last_line, leave); ... return iseq_setup(iseq, ret); }
  12. COMPILE, COMPILE_ /* compile node */ #define COMPILE(anchor, desc, node)

    \ (debug_compile("== " desc "\n", \ iseq_compile_each(iseq, (anchor), (node), 0))) /* compile node, which is popped when 'poped' is true */ #define COMPILE_(anchor, desc, node, poped) \ (debug_compile("== " desc "\n", \ iseq_compile_each(iseq, (anchor), (node), (poped))))
  13. iseq_compile_each: NODE_BLOCK nd_headをCOMPILE_してnd_nextをたどる case NODE_BLOCK:{ while (node && nd_type(node) ==

    NODE_BLOCK) { COMPILE_(ret, "BLOCK body", node->nd_head, (node->nd_next == 0 && poped == 0) ? 0 : 1); node = node->nd_next; } if (node) { COMPILE_(ret, "BLOCK next", node->nd_next, poped); } break; }
  14. iseq_compile_each: NODE_DASGN_CURR x = 1, nd_valueのCOMPILEが先 case NODE_DASGN_CURR:{ int idx,

    lv, ls; COMPILE(ret, "dvalue", node->nd_value); debugp_param("dassn id", rb_str_new2(rb_id2name(node->nd_vid) ? rb_id2name(node->nd_vid) : "*")); if (!poped) { ADD_INSN(ret, line, dup); } idx = get_dyna_var_idx(iseq, node->nd_vid, &lv, &ls); if (idx < 0) { rb_bug("NODE_DASGN(_CURR): unknown id (%s)", rb_id2name(node->nd_vid)); } ADD_INSN2(ret, line, setlocal, INT2FIX(ls - idx), INT2FIX(lv)); break; }
  15. ADD_INSN insn (+ operands) なLINK_ELEMENT 作って ADD_ELEM #define ADD_INSN(seq, line,

    insn) \ ADD_ELEM((seq), (LINK_ELEMENT *) new_insn_body(iseq, (line), BIN(insn), 0)) #define ADD_INSN1(seq, line, insn, op1) \ ADD_ELEM((seq), (LINK_ELEMENT *) \ new_insn_body(iseq, (line), BIN(insn), 1, (VALUE)(op1))) #define ADD_INSN2(seq, line, insn, op1, op2) \ ADD_ELEM((seq), (LINK_ELEMENT *) \ new_insn_body(iseq, (line), BIN(insn), 2, (VALUE)(op1), (VALUE)(op2)))
  16. ADD_ELEM こちらは関数、elemつなげてます /* * elem1, elem2 => elem1, elem2, elem

    */ static void ADD_ELEM(ISEQ_ARG_DECLARE LINK_ANCHOR *anchor, LINK_ELEMENT *elem) { elem->prev = anchor->last; anchor->last->next = elem; anchor->last = elem; verify_list("add", anchor); }
  17. iseq_setup いろいろ最適化でretの内容かわっていきます static int iseq_setup(rb_iseq_t *iseq, LINK_ANCHOR *anchor) { /*

    debugs("[compile step 2] (iseq_array_to_linkedlist)\n"); */ if (compile_debug > 5) dump_disasm_list(FIRST_ELEMENT(anchor)); debugs("[compile step 3.1 (iseq_optimize)]\n"); iseq_optimize(iseq, anchor); if (compile_debug > 5) dump_disasm_list(FIRST_ELEMENT(anchor)); if (iseq->compile_data->option->instructions_unification) { debugs("[compile step 3.2 (iseq_insns_unification)]\n"); iseq_insns_unification(iseq, anchor); if (compile_debug > 5) dump_disasm_list(FIRST_ELEMENT(anchor)); } if (iseq->compile_data->option->stack_caching) { debugs("[compile step 3.3 (iseq_set_sequence_stackcaching)]\n"); iseq_set_sequence_stackcaching(iseq, anchor); if (compile_debug > 5) dump_disasm_list(FIRST_ELEMENT(anchor)); } ...
  18. iseq_set_sequence (1) リストをたどって必要なサイズを計算し ます static int iseq_set_sequence(rb_iseq_t *iseq, LINK_ANCHOR *anchor)

    { ... list = FIRST_ELEMENT(anchor); k = pos = 0; while (list) { switch (list->type) { case ISEQ_ELEMENT_INSN: { iobj = (INSN *)list; line = iobj->line_no; pos += insn_data_length(iobj); k++; break; }
  19. iseq_set_sequence (3) リストをたどって値をいれていきます while (list) { switch (list->type) { case

    ISEQ_ELEMENT_INSN: { ... iobj = (INSN *)list; ... operands = iobj->operands; insn = iobj->insn_id; generated_iseq[pos] = insn; ... for (j = 0; types[j]; j++) { ... case TS_VALUE: /* VALUE */ { VALUE v = operands[j]; generated_iseq[pos + 1 + j] = v;
  20. mruby の ast --> bytecode 入力と結果 (mruby -v simple.rb) mruby

    1.0.0 (2014-01-10) NODE_SCOPE: local variables: x NODE_BEGIN: NODE_ASGN: lhs: NODE_LVAR x rhs: NODE_INT 1 base 10 NODE_CALL: NODE_LVAR x method='+' (112) args: NODE_INT 2 base 10 irep 0x225c6f0 nregs=4 nlocals=2 pools=0 syms=1 reps=0 000 OP_LOADI R1 1 001 OP_MOVE R2 R1 002 OP_ADDI R2 :+ 2 003 OP_STOP
  21. RubyVM バイトコード比較してみる == disasm: <RubyVM::InstructionSequence:<main>@simple.rb>=============== local table (size: 2, argc:

    0 [opts: 0, rest: -1, post: 0, block: -1, keyword: 0@3] s1) [ 2] x 0000 putobject 1 ( 1) 0002 setlocal x, 0 0005 getlocal x, 0 ( 2) 0008 putobject 2 0010 send <callinfo!mid:+, argc:1, ARGS_SKIP> 0012 leave
  22. mruby の ast node: mrb_ast_node /* AST node structure */

    typedef struct mrb_ast_node { struct mrb_ast_node *car, *cdr; uint16_t lineno, filename_index; } mrb_ast_node;
  23. MRI の ast node: RNode /* node.h */ typedef struct

    RNode { VALUE flags; VALUE nd_reserved; /* ex nd_file */ union { struct RNode *node; ID id; VALUE value; VALUE (*cfunc)(ANYARGS); ID *tbl; } u1; union { struct RNode *node; ID id; long argc; VALUE value; } u2; union { struct RNode *node; ID id; long state; struct rb_global_entry *entry; struct rb_args_info *args; long cnt; VALUE value; } u3; } NODE;
  24. mruby の NODE_WHILE /* (:while cond body) */ static node*

    new_while(parser_state *p, node *a, node *b) { return cons((node*)NODE_WHILE, cons(a, b)); }
  25. mruby: scope_new 大きめ確保 static codegen_scope* scope_new(mrb_state *mrb, codegen_scope *prev, node

    *lv) { ... static const codegen_scope codegen_scope_zero = { 0 }; ... *p = codegen_scope_zero; ... p->irep = mrb_add_irep(mrb); scope_add_irep(prev, p->irep); p->rcapa = 8; p->irep->reps = (mrb_irep**)mrb_malloc(mrb, sizeof(mrb_irep*)*p->rcapa); p->icapa = 1024; p->iseq = (mrb_code*)mrb_malloc(mrb, sizeof(mrb_code)*p->icapa); p->irep->iseq = p->iseq; ...
  26. mruby: genop 前からいれてきます static inline int genop(codegen_scope *s, mrb_code i)

    { if (s->pc == s->icapa) { s->icapa *= 2; s->iseq = (mrb_code *)codegen_realloc(s, s->iseq, sizeof(mrb_code)*s->icapa); ... } s->iseq[s->pc] = i; ... return s->pc++; }
  27. mruby: scope_finish ぴったりサイズになります static void scope_finish(codegen_scope *s) { mrb_state *mrb

    = s->mrb; mrb_irep *irep = s->irep; ... if (s->iseq) { irep->iseq = (mrb_code *)codegen_realloc(s, s->iseq, sizeof(mrb_code)*s->pc); irep->ilen = s->pc; ...