Upgrade to Pro — share decks privately, control downloads, hide ads and more …

大テーブルと小テーブルのJOINのコスト計算の話

kubo ayumu
March 05, 2024

 大テーブルと小テーブルのJOINのコスト計算の話

kubo ayumu

March 05, 2024
Tweet

More Decks by kubo ayumu

Other Decks in Technology

Transcript

  1. 激しめの要約 • 大きめのテーブル(100万行)と小さめのテーブル(3行)をLEFT JOINする クエリが妙に遅い • 後者の結合タイプが全検索となりhash joinが選択されていることが分 かった •

    後者のテーブルの行数を10行に増やすと、後者の結合タイプがeq_refに 変わる → 手元で再現して原因をいろいろ調べてみることにした ※当発表の内容はブログ記事にも載せてます https://amamanamam.hatenablog.com/entry/2024/02/11/005331
  2. 環境/設定 mysql> desc mytable; +---------+--------------+------+-----+---------+----------------+ | Field | Type |

    Null | Key | Default | Extra | +---------+--------------+------+-----+---------+----------------+ | id | int | NO | PRI | NULL | auto_increment | | name | varchar(255) | YES | | NULL | | | type_id | int | NO | MUL | NULL | | | value | varchar(255) | YES | | NULL | | +---------+--------------+------+-----+---------+----------------+ 4 rows in set (0.10 sec) mysql> desc types; +-------+--------------+------+-----+---------+----------------+ | Field | Type | Null | Key | Default | Extra | +-------+--------------+------+-----+---------+----------------+ | id | int | NO | PRI | NULL | auto_increment | | name | varchar(255) | YES | | NULL | | +-------+--------------+------+-----+---------+----------------+ 2 rows in set (0.01 sec)
  3. 環境/設定 mysql> select count(*) from mytable; +----------+ | count(*) |

    +----------+ | 524283 | +----------+ 1 row in set (0.92 sec) mysql> select count(*) from types; +----------+ | count(*) | +----------+ | 3 | +----------+ 1 row in set (0.10 sec)
  4. 再現してみた mysql> explain select * from mytable left join types

    on mytable.type_id = types.id; +----+-------------+---------+------------+------+---------------+------+---------+------+--------+----------+------ --------------------------------------+ | id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra | +----+-------------+---------+------------+------+---------------+------+---------+------+--------+----------+------ --------------------------------------+ | 1 | SIMPLE | mytable | NULL | ALL | NULL | NULL | NULL | NULL | 488245 | 100.00 | NULL | | 1 | SIMPLE | types | NULL | ALL | PRIMARY | NULL | NULL | NULL | 3 | 100.00 | Using where; Using join buffer (hash join) | +----+-------------+---------+------------+------+---------------+------+---------+------+--------+----------+------ --------------------------------------+ 2 rows in set, 1 warning (0.01 sec)
  5. 再現してみた mysql> insert into types values (4,'typeD'),(5,'typeE'),(6,'typeF'),(7,'typeG'),(8,'typeH'),(9,'typeI'),(10,'typeJ'); Query OK, 7

    rows affected (0.03 sec) Records: 7 Duplicates: 0 Warnings: 0 mysql> explain select * from mytable left join types on mytable.type_id = types.id; +----+-------------+---------+------------+--------+---------------+---------+---------+----------------------+----- ---+----------+-------+ | id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra | +----+-------------+---------+------------+--------+---------------+---------+---------+----------------------+----- ---+----------+-------+ | 1 | SIMPLE | mytable | NULL | ALL | NULL | NULL | NULL | NULL | 488245 | 100.00 | NULL | | 1 | SIMPLE | types | NULL | eq_ref | PRIMARY | PRIMARY | 4 | kubo.mytable.type_id | 1 | 100.00 | NULL | +----+-------------+---------+------------+--------+---------------+---------+---------+----------------------+----- ---+----------+-------+ 2 rows in set, 1 warning (0.00 sec)
  6. best_access_path(sql_planner.cc) void Optimize_table_order::best_access_path(JOIN_TAB *tab, const table_map remaining_tables, const uint idx,

    bool disable_jbuf, const double prefix_rowcount, POSITION *pos) { ... // The 'ref' access method with lowest cost as found by find_best_ref() Key_use *best_ref = nullptr; // Look for the best ref access if the storage engine supports index access. if (tab->keyuse() != nullptr && (table->file->ha_table_flags() & HA_NO_INDEX_ACCESS) == 0) best_ref = find_best_ref(tab, remaining_tables, idx, prefix_rowcount, &found_condition, &ref_depend_map, &used_key_parts); double rows_fetched = best_ref ? best_ref->fanout : DBL_MAX; /* Cost of executing the best access method prefix_rowcount number of times */ double best_read_cost = best_ref ? best_ref->read_cost : DBL_MAX;
  7. best_access_path(sql_planner.cc) double scan_read_cost = calculate_scan_cost( tab, idx, best_ref, prefix_rowcount, found_condition,

    disable_jbuf, &rows_after_filtering, &trace_access_scan); /* We estimate the cost of evaluating WHERE clause for found records as row_evaluate_cost(prefix_rowcount * rows_after_filtering). This cost plus scan_cost gives us total cost of using TABLE/INDEX/RANGE SCAN. */ const double scan_total_cost = scan_read_cost + cost_model->row_evaluate_cost(prefix_rowcount * rows_after_filtering); trace_access_scan.add("resulting_rows", rows_after_filtering); trace_access_scan.add("cost", scan_total_cost); if (best_ref == nullptr || (scan_total_cost < best_read_cost + cost_model->row_evaluate_cost(prefix_rowcount * rows_fetched))) {
  8. best_access_path(sql_planner.cc) /* If the table has a range (tab->quick is

    set) make_join_query_block() will ensure that this will be used */ best_read_cost = scan_read_cost; rows_fetched = rows_after_filtering; if (tab->found_records) { /* Although join buffering may be used for this table, this filter calculation is not done to calculate the cost of join buffering itself (that is done inside calculate_scan_cost()). The is_join_buffering parameter is therefore 'false'. */ const float full_filter = calculate_condition_filter( tab, nullptr, ~remaining_tables & ~excluded_tables, static_cast<double>(tab->found_records), false, false, trace_access_scan); filter_effect = static_cast<float>(std::min( 1.0, tab->found_records * full_filter / rows_after_filtering)); } best_ref = nullptr; best_uses_jbuf = !disable_jbuf; ref_depend_map = 0; }
  9. コスト比較の部分のソース if (best_ref == nullptr || (scan_total_cost < best_read_cost +

    cost_model->row_evaluate_cost(prefix_rowcount * rows_fetched))) {
  10. コスト比較の部分のソース if (best_ref == nullptr || (scan_total_cost < best_read_cost +

    cost_model->row_evaluate_cost(prefix_rowcount * rows_fetched))) {
  11. find_best_ref(sql_planner.cc) Key_use *Optimize_table_order::find_best_ref( … // Check if we found full

    key if (all_key_parts_covered && !ref_or_null_part) /* use eq key */ { cur_used_keyparts = (uint)~0; if (keyinfo->flags & HA_NOSAME && ((keyinfo->flags & HA_NULL_PART_KEY) == 0 || all_key_parts_non_null)) { cur_read_cost = prev_record_reads(join, idx, table_deps) * table->cost_model()->page_read_cost(1.0);
  12. まずは前者
 Key_use *Optimize_table_order::find_best_ref( … // Check if we found full

    key if (all_key_parts_covered && !ref_or_null_part) /* use eq key */ { cur_used_keyparts = (uint)~0; if (keyinfo->flags & HA_NOSAME && ((keyinfo->flags & HA_NULL_PART_KEY) == 0 || all_key_parts_non_null)) { cur_read_cost = prev_record_reads(join, idx, table_deps) * table->cost_model()->page_read_cost(1.0);
  13. prev_read_cost(sql_planner.cc) static double prev_record_reads(JOIN *join, uint idx, table_map found_ref) {

    double found = 1.0; POSITION *pos_end = join->positions - 1; for (POSITION *pos = join->positions + idx - 1; pos != pos_end; pos--) { const double fanout = pos->rows_fetched * pos->filter_effect; if (pos->table->table_ref->map() & found_ref) { found_ref |= pos->ref_depend_map; if (pos->rows_fetched > DBL_EPSILON) found *= fanout; } else if (fanout < 1.0) { found *= fanout; } } return found;
  14. prev_read_cost(sql_planner.cc) static double prev_record_reads(JOIN *join, uint idx, table_map found_ref) {

    double found = 1.0; POSITION *pos_end = join->positions - 1; for (POSITION *pos = join->positions + idx - 1; pos != pos_end; pos--) { const double fanout = pos->rows_fetched * pos->filter_effect; if (pos->table->table_ref->map() & found_ref) { found_ref |= pos->ref_depend_map; if (pos->rows_fetched > DBL_EPSILON) found *= fanout; } else if (fanout < 1.0) { found *= fanout; } } return found; JOIN元のテーブルのrowsとfilteredの乗算 つまり、前のテーブルから渡される行数
  15. 今度は後者 Key_use *Optimize_table_order::find_best_ref( … // Check if we found full

    key if (all_key_parts_covered && !ref_or_null_part) /* use eq key */ { cur_used_keyparts = (uint)~0; if (keyinfo->flags & HA_NOSAME && ((keyinfo->flags & HA_NULL_PART_KEY) == 0 || all_key_parts_non_null)) { cur_read_cost = prev_record_reads(join, idx, table_deps) * table->cost_model()->page_read_cost(1.0);
  16. page_read_cost(opt_costmodel.cc) double Cost_model_table::page_read_cost(double pages) const { assert(m_initialized); assert(pages >= 0.0);

    const double in_mem = m_table->file->table_in_memory_estimate(); const double pages_in_mem = pages * in_mem; const double pages_on_disk = pages - pages_in_mem; assert(pages_on_disk >= 0.0); const double cost = buffer_block_read_cost(pages_in_mem) + io_block_read_cost(pages_on_disk); return cost; }
  17. page_read_cost(opt_costmodel.cc) double Cost_model_table::page_read_cost(double pages) const { assert(m_initialized); assert(pages >= 0.0);

    const double in_mem = m_table->file->table_in_memory_estimate(); const double pages_in_mem = pages * in_mem; const double pages_on_disk = pages - pages_in_mem; assert(pages_on_disk >= 0.0); const double cost = buffer_block_read_cost(pages_in_mem) + io_block_read_cost(pages_on_disk); return cost; }
  18. page_read_cost(opt_costmodel.cc) double Cost_model_table::page_read_cost(double pages) const { assert(m_initialized); assert(pages >= 0.0);

    const double in_mem = m_table->file->table_in_memory_estimate(); const double pages_in_mem = pages * in_mem; const double pages_on_disk = pages - pages_in_mem; assert(pages_on_disk >= 0.0); const double cost = buffer_block_read_cost(pages_in_mem) + io_block_read_cost(pages_on_disk); return cost; } in_memはメモリ内に該当テーブルが何%のってるか を表す割合
  19. page_read_cost(opt_costmodel.cc) double Cost_model_table::page_read_cost(double pages) const { assert(m_initialized); assert(pages >= 0.0);

    const double in_mem = m_table->file->table_in_memory_estimate(); const double pages_in_mem = pages * in_mem; const double pages_on_disk = pages - pages_in_mem; assert(pages_on_disk >= 0.0); const double cost = buffer_block_read_cost(pages_in_mem) + io_block_read_cost(pages_on_disk); return cost; }
  20. buffer_block_read_costとio_block_read_cost mysql> SELECT * FROM mysql.engine_cost; +-------------+-------------+------------------------+------------+---------------------+---------+---- -----------+ | engine_name

    | device_type | cost_name | cost_value | last_update | comment | default_value | +-------------+-------------+------------------------+------------+---------------------+---------+---- -----------+ | default | 0 | io_block_read_cost | NULL | 2022-08-08 07:12:45 | NULL | 1 | | default | 0 | memory_block_read_cost | NULL | 2022-08-08 07:12:45 | NULL | 0.25 | +-------------+-------------+------------------------+------------+---------------------+---------+---- -----------+ 2 rows in set (0.00 sec) 公式によると、メモリやディスクからインデックスまたはデータブロックを読み 取るコストとのこと
  21. page_read_cost(opt_costmodel.cc) double Cost_model_table::page_read_cost(double pages) const { assert(m_initialized); assert(pages >= 0.0);

    const double in_mem = m_table->file->table_in_memory_estimate(); const double pages_in_mem = pages * in_mem; const double pages_on_disk = pages - pages_in_mem; assert(pages_on_disk >= 0.0); const double cost = buffer_block_read_cost(pages_in_mem) + io_block_read_cost(pages_on_disk); return cost; } メモリから該当データページを取り出すコスト+ ディスクから該当データページを取り出すコスト
  22. 何の話だったっけ Key_use *Optimize_table_order::find_best_ref( … // Check if we found full

    key if (all_key_parts_covered && !ref_or_null_part) /* use eq key */ { cur_used_keyparts = (uint)~0; if (keyinfo->flags & HA_NOSAME && ((keyinfo->flags & HA_NULL_PART_KEY) == 0 || all_key_parts_non_null)) { cur_read_cost = prev_record_reads(join, idx, table_deps) * table->cost_model()->page_read_cost(1.0);
  23. 何の話だったっけ Key_use *Optimize_table_order::find_best_ref( … // Check if we found full

    key if (all_key_parts_covered && !ref_or_null_part) /* use eq key */ { cur_used_keyparts = (uint)~0; if (keyinfo->flags & HA_NOSAME && ((keyinfo->flags & HA_NULL_PART_KEY) == 0 || all_key_parts_non_null)) { cur_read_cost = prev_record_reads(join, idx, table_deps) * table->cost_model()->page_read_cost(1.0); JOIN元テーブルから渡される行数×(メモリから該当データページを取 り出すコスト+ディスクから該当データページを取り出すコスト)
  24. コスト比較の部分のソース if (best_ref == nullptr || (scan_total_cost < best_read_cost +

    cost_model->row_evaluate_cost(prefix_rowcount * rows_fetched))) {
  25. row_evaluate_cost mysql> SELECT * FROM mysql.server_cost where cost_name="row_evaluate_cost"; +-------------------+------------+---------------------+---------+---------------+ |

    cost_name | cost_value | last_update | comment | default_value | +-------------------+------------+---------------------+---------+---------------+ | row_evaluate_cost | NULL | 2022-08-08 07:12:45 | NULL | 0.1 | +-------------------+------------+---------------------+---------+---------------+ 1 row in set (0.02 sec) 公式によると、レコード条件を評価するコストとのこと
  26. コスト比較の部分のソース
 if (best_ref == nullptr || (scan_total_cost < best_read_cost +

    cost_model->row_evaluate_cost(prefix_rowcount * rows_fetched))) { 行組み合わせ数分の評価コスト
  27. コスト比較の部分のソース if (best_ref == nullptr || (scan_total_cost < best_read_cost +

    cost_model->row_evaluate_cost(prefix_rowcount * rows_fetched))) {
  28. best_access_path(sql_planner.cc) double scan_read_cost = calculate_scan_cost( tab, idx, best_ref, prefix_rowcount, found_condition,

    disable_jbuf, &rows_after_filtering, &trace_access_scan); /* We estimate the cost of evaluating WHERE clause for found records as row_evaluate_cost(prefix_rowcount * rows_after_filtering). This cost plus scan_cost gives us total cost of using TABLE/INDEX/RANGE SCAN. */ const double scan_total_cost = scan_read_cost + cost_model->row_evaluate_cost(prefix_rowcount * rows_after_filtering); trace_access_scan.add("resulting_rows", rows_after_filtering); trace_access_scan.add("cost", scan_total_cost); if (best_ref == nullptr || (scan_total_cost < best_read_cost + cost_model->row_evaluate_cost(prefix_rowcount * rows_fetched))) {
  29. page_read_cost(opt_costmodel.cc) double Optimize_table_order::calculate_scan_cost( const JOIN_TAB *tab, const uint idx, const

    Key_use *best_ref, const double prefix_rowcount, const bool found_condition, const bool disable_jbuf, double *rows_after_filtering, Opt_trace_object *trace_access_scan) { ... else scan_cost = table->file->table_scan_cost(); // table scan const double single_scan_read_cost = scan_cost.total_cost(); ... const double buffer_count = 1.0 + ((double)cache_record_length(join, idx) * prefix_rowcount / (double)thd->variables.join_buff_size); scan_and_filter_cost = buffer_count * (single_scan_read_cost + cost_model->row_evaluate_cost( tab->records() - *rows_after_filtering)); ややこしわからん
  30. 分かったこと • refアクセスのエンジン側のコストは以下で見積もられる ◦ JOIN元テーブルから渡される行数×(メモリから該当データページ を取り出すコスト+ディスクから該当データページを取り出すコス ト) • refアクセスのサーバー側のコストは以下で見積もられる ◦

    行組み合わせの数×条件評価のコスト • その後にそれらの和とALLのコストと比較する • scanアクセスのコストのサーバー側のコストは大体同じ。エンジン側の コストはややこしそう