Upgrade to PRO for Only $50/Year—Limited-Time Offer! 🔥

大テーブルと小テーブルのJOINのコスト計算の話

Avatar for kubo ayumu kubo ayumu
March 05, 2024

 大テーブルと小テーブルのJOINのコスト計算の話

Avatar for kubo ayumu

kubo ayumu

March 05, 2024
Tweet

More Decks by kubo ayumu

Other Decks in Technology

Transcript

  1. 激しめの要約 • 大きめのテーブル(100万行)と小さめのテーブル(3行)をLEFT JOINする クエリが妙に遅い • 後者の結合タイプが全検索となりhash joinが選択されていることが分 かった •

    後者のテーブルの行数を10行に増やすと、後者の結合タイプがeq_refに 変わる → 手元で再現して原因をいろいろ調べてみることにした ※当発表の内容はブログ記事にも載せてます https://amamanamam.hatenablog.com/entry/2024/02/11/005331
  2. 環境/設定 mysql> desc mytable; +---------+--------------+------+-----+---------+----------------+ | Field | Type |

    Null | Key | Default | Extra | +---------+--------------+------+-----+---------+----------------+ | id | int | NO | PRI | NULL | auto_increment | | name | varchar(255) | YES | | NULL | | | type_id | int | NO | MUL | NULL | | | value | varchar(255) | YES | | NULL | | +---------+--------------+------+-----+---------+----------------+ 4 rows in set (0.10 sec) mysql> desc types; +-------+--------------+------+-----+---------+----------------+ | Field | Type | Null | Key | Default | Extra | +-------+--------------+------+-----+---------+----------------+ | id | int | NO | PRI | NULL | auto_increment | | name | varchar(255) | YES | | NULL | | +-------+--------------+------+-----+---------+----------------+ 2 rows in set (0.01 sec)
  3. 環境/設定 mysql> select count(*) from mytable; +----------+ | count(*) |

    +----------+ | 524283 | +----------+ 1 row in set (0.92 sec) mysql> select count(*) from types; +----------+ | count(*) | +----------+ | 3 | +----------+ 1 row in set (0.10 sec)
  4. 再現してみた mysql> explain select * from mytable left join types

    on mytable.type_id = types.id; +----+-------------+---------+------------+------+---------------+------+---------+------+--------+----------+------ --------------------------------------+ | id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra | +----+-------------+---------+------------+------+---------------+------+---------+------+--------+----------+------ --------------------------------------+ | 1 | SIMPLE | mytable | NULL | ALL | NULL | NULL | NULL | NULL | 488245 | 100.00 | NULL | | 1 | SIMPLE | types | NULL | ALL | PRIMARY | NULL | NULL | NULL | 3 | 100.00 | Using where; Using join buffer (hash join) | +----+-------------+---------+------------+------+---------------+------+---------+------+--------+----------+------ --------------------------------------+ 2 rows in set, 1 warning (0.01 sec)
  5. 再現してみた mysql> insert into types values (4,'typeD'),(5,'typeE'),(6,'typeF'),(7,'typeG'),(8,'typeH'),(9,'typeI'),(10,'typeJ'); Query OK, 7

    rows affected (0.03 sec) Records: 7 Duplicates: 0 Warnings: 0 mysql> explain select * from mytable left join types on mytable.type_id = types.id; +----+-------------+---------+------------+--------+---------------+---------+---------+----------------------+----- ---+----------+-------+ | id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra | +----+-------------+---------+------------+--------+---------------+---------+---------+----------------------+----- ---+----------+-------+ | 1 | SIMPLE | mytable | NULL | ALL | NULL | NULL | NULL | NULL | 488245 | 100.00 | NULL | | 1 | SIMPLE | types | NULL | eq_ref | PRIMARY | PRIMARY | 4 | kubo.mytable.type_id | 1 | 100.00 | NULL | +----+-------------+---------+------------+--------+---------------+---------+---------+----------------------+----- ---+----------+-------+ 2 rows in set, 1 warning (0.00 sec)
  6. best_access_path(sql_planner.cc) void Optimize_table_order::best_access_path(JOIN_TAB *tab, const table_map remaining_tables, const uint idx,

    bool disable_jbuf, const double prefix_rowcount, POSITION *pos) { ... // The 'ref' access method with lowest cost as found by find_best_ref() Key_use *best_ref = nullptr; // Look for the best ref access if the storage engine supports index access. if (tab->keyuse() != nullptr && (table->file->ha_table_flags() & HA_NO_INDEX_ACCESS) == 0) best_ref = find_best_ref(tab, remaining_tables, idx, prefix_rowcount, &found_condition, &ref_depend_map, &used_key_parts); double rows_fetched = best_ref ? best_ref->fanout : DBL_MAX; /* Cost of executing the best access method prefix_rowcount number of times */ double best_read_cost = best_ref ? best_ref->read_cost : DBL_MAX;
  7. best_access_path(sql_planner.cc) double scan_read_cost = calculate_scan_cost( tab, idx, best_ref, prefix_rowcount, found_condition,

    disable_jbuf, &rows_after_filtering, &trace_access_scan); /* We estimate the cost of evaluating WHERE clause for found records as row_evaluate_cost(prefix_rowcount * rows_after_filtering). This cost plus scan_cost gives us total cost of using TABLE/INDEX/RANGE SCAN. */ const double scan_total_cost = scan_read_cost + cost_model->row_evaluate_cost(prefix_rowcount * rows_after_filtering); trace_access_scan.add("resulting_rows", rows_after_filtering); trace_access_scan.add("cost", scan_total_cost); if (best_ref == nullptr || (scan_total_cost < best_read_cost + cost_model->row_evaluate_cost(prefix_rowcount * rows_fetched))) {
  8. best_access_path(sql_planner.cc) /* If the table has a range (tab->quick is

    set) make_join_query_block() will ensure that this will be used */ best_read_cost = scan_read_cost; rows_fetched = rows_after_filtering; if (tab->found_records) { /* Although join buffering may be used for this table, this filter calculation is not done to calculate the cost of join buffering itself (that is done inside calculate_scan_cost()). The is_join_buffering parameter is therefore 'false'. */ const float full_filter = calculate_condition_filter( tab, nullptr, ~remaining_tables & ~excluded_tables, static_cast<double>(tab->found_records), false, false, trace_access_scan); filter_effect = static_cast<float>(std::min( 1.0, tab->found_records * full_filter / rows_after_filtering)); } best_ref = nullptr; best_uses_jbuf = !disable_jbuf; ref_depend_map = 0; }
  9. コスト比較の部分のソース if (best_ref == nullptr || (scan_total_cost < best_read_cost +

    cost_model->row_evaluate_cost(prefix_rowcount * rows_fetched))) {
  10. コスト比較の部分のソース if (best_ref == nullptr || (scan_total_cost < best_read_cost +

    cost_model->row_evaluate_cost(prefix_rowcount * rows_fetched))) {
  11. find_best_ref(sql_planner.cc) Key_use *Optimize_table_order::find_best_ref( … // Check if we found full

    key if (all_key_parts_covered && !ref_or_null_part) /* use eq key */ { cur_used_keyparts = (uint)~0; if (keyinfo->flags & HA_NOSAME && ((keyinfo->flags & HA_NULL_PART_KEY) == 0 || all_key_parts_non_null)) { cur_read_cost = prev_record_reads(join, idx, table_deps) * table->cost_model()->page_read_cost(1.0);
  12. まずは前者
 Key_use *Optimize_table_order::find_best_ref( … // Check if we found full

    key if (all_key_parts_covered && !ref_or_null_part) /* use eq key */ { cur_used_keyparts = (uint)~0; if (keyinfo->flags & HA_NOSAME && ((keyinfo->flags & HA_NULL_PART_KEY) == 0 || all_key_parts_non_null)) { cur_read_cost = prev_record_reads(join, idx, table_deps) * table->cost_model()->page_read_cost(1.0);
  13. prev_read_cost(sql_planner.cc) static double prev_record_reads(JOIN *join, uint idx, table_map found_ref) {

    double found = 1.0; POSITION *pos_end = join->positions - 1; for (POSITION *pos = join->positions + idx - 1; pos != pos_end; pos--) { const double fanout = pos->rows_fetched * pos->filter_effect; if (pos->table->table_ref->map() & found_ref) { found_ref |= pos->ref_depend_map; if (pos->rows_fetched > DBL_EPSILON) found *= fanout; } else if (fanout < 1.0) { found *= fanout; } } return found;
  14. prev_read_cost(sql_planner.cc) static double prev_record_reads(JOIN *join, uint idx, table_map found_ref) {

    double found = 1.0; POSITION *pos_end = join->positions - 1; for (POSITION *pos = join->positions + idx - 1; pos != pos_end; pos--) { const double fanout = pos->rows_fetched * pos->filter_effect; if (pos->table->table_ref->map() & found_ref) { found_ref |= pos->ref_depend_map; if (pos->rows_fetched > DBL_EPSILON) found *= fanout; } else if (fanout < 1.0) { found *= fanout; } } return found; JOIN元のテーブルのrowsとfilteredの乗算 つまり、前のテーブルから渡される行数
  15. 今度は後者 Key_use *Optimize_table_order::find_best_ref( … // Check if we found full

    key if (all_key_parts_covered && !ref_or_null_part) /* use eq key */ { cur_used_keyparts = (uint)~0; if (keyinfo->flags & HA_NOSAME && ((keyinfo->flags & HA_NULL_PART_KEY) == 0 || all_key_parts_non_null)) { cur_read_cost = prev_record_reads(join, idx, table_deps) * table->cost_model()->page_read_cost(1.0);
  16. page_read_cost(opt_costmodel.cc) double Cost_model_table::page_read_cost(double pages) const { assert(m_initialized); assert(pages >= 0.0);

    const double in_mem = m_table->file->table_in_memory_estimate(); const double pages_in_mem = pages * in_mem; const double pages_on_disk = pages - pages_in_mem; assert(pages_on_disk >= 0.0); const double cost = buffer_block_read_cost(pages_in_mem) + io_block_read_cost(pages_on_disk); return cost; }
  17. page_read_cost(opt_costmodel.cc) double Cost_model_table::page_read_cost(double pages) const { assert(m_initialized); assert(pages >= 0.0);

    const double in_mem = m_table->file->table_in_memory_estimate(); const double pages_in_mem = pages * in_mem; const double pages_on_disk = pages - pages_in_mem; assert(pages_on_disk >= 0.0); const double cost = buffer_block_read_cost(pages_in_mem) + io_block_read_cost(pages_on_disk); return cost; }
  18. page_read_cost(opt_costmodel.cc) double Cost_model_table::page_read_cost(double pages) const { assert(m_initialized); assert(pages >= 0.0);

    const double in_mem = m_table->file->table_in_memory_estimate(); const double pages_in_mem = pages * in_mem; const double pages_on_disk = pages - pages_in_mem; assert(pages_on_disk >= 0.0); const double cost = buffer_block_read_cost(pages_in_mem) + io_block_read_cost(pages_on_disk); return cost; } in_memはメモリ内に該当テーブルが何%のってるか を表す割合
  19. page_read_cost(opt_costmodel.cc) double Cost_model_table::page_read_cost(double pages) const { assert(m_initialized); assert(pages >= 0.0);

    const double in_mem = m_table->file->table_in_memory_estimate(); const double pages_in_mem = pages * in_mem; const double pages_on_disk = pages - pages_in_mem; assert(pages_on_disk >= 0.0); const double cost = buffer_block_read_cost(pages_in_mem) + io_block_read_cost(pages_on_disk); return cost; }
  20. buffer_block_read_costとio_block_read_cost mysql> SELECT * FROM mysql.engine_cost; +-------------+-------------+------------------------+------------+---------------------+---------+---- -----------+ | engine_name

    | device_type | cost_name | cost_value | last_update | comment | default_value | +-------------+-------------+------------------------+------------+---------------------+---------+---- -----------+ | default | 0 | io_block_read_cost | NULL | 2022-08-08 07:12:45 | NULL | 1 | | default | 0 | memory_block_read_cost | NULL | 2022-08-08 07:12:45 | NULL | 0.25 | +-------------+-------------+------------------------+------------+---------------------+---------+---- -----------+ 2 rows in set (0.00 sec) 公式によると、メモリやディスクからインデックスまたはデータブロックを読み 取るコストとのこと
  21. page_read_cost(opt_costmodel.cc) double Cost_model_table::page_read_cost(double pages) const { assert(m_initialized); assert(pages >= 0.0);

    const double in_mem = m_table->file->table_in_memory_estimate(); const double pages_in_mem = pages * in_mem; const double pages_on_disk = pages - pages_in_mem; assert(pages_on_disk >= 0.0); const double cost = buffer_block_read_cost(pages_in_mem) + io_block_read_cost(pages_on_disk); return cost; } メモリから該当データページを取り出すコスト+ ディスクから該当データページを取り出すコスト
  22. 何の話だったっけ Key_use *Optimize_table_order::find_best_ref( … // Check if we found full

    key if (all_key_parts_covered && !ref_or_null_part) /* use eq key */ { cur_used_keyparts = (uint)~0; if (keyinfo->flags & HA_NOSAME && ((keyinfo->flags & HA_NULL_PART_KEY) == 0 || all_key_parts_non_null)) { cur_read_cost = prev_record_reads(join, idx, table_deps) * table->cost_model()->page_read_cost(1.0);
  23. 何の話だったっけ Key_use *Optimize_table_order::find_best_ref( … // Check if we found full

    key if (all_key_parts_covered && !ref_or_null_part) /* use eq key */ { cur_used_keyparts = (uint)~0; if (keyinfo->flags & HA_NOSAME && ((keyinfo->flags & HA_NULL_PART_KEY) == 0 || all_key_parts_non_null)) { cur_read_cost = prev_record_reads(join, idx, table_deps) * table->cost_model()->page_read_cost(1.0); JOIN元テーブルから渡される行数×(メモリから該当データページを取 り出すコスト+ディスクから該当データページを取り出すコスト)
  24. コスト比較の部分のソース if (best_ref == nullptr || (scan_total_cost < best_read_cost +

    cost_model->row_evaluate_cost(prefix_rowcount * rows_fetched))) {
  25. row_evaluate_cost mysql> SELECT * FROM mysql.server_cost where cost_name="row_evaluate_cost"; +-------------------+------------+---------------------+---------+---------------+ |

    cost_name | cost_value | last_update | comment | default_value | +-------------------+------------+---------------------+---------+---------------+ | row_evaluate_cost | NULL | 2022-08-08 07:12:45 | NULL | 0.1 | +-------------------+------------+---------------------+---------+---------------+ 1 row in set (0.02 sec) 公式によると、レコード条件を評価するコストとのこと
  26. コスト比較の部分のソース
 if (best_ref == nullptr || (scan_total_cost < best_read_cost +

    cost_model->row_evaluate_cost(prefix_rowcount * rows_fetched))) { 行組み合わせ数分の評価コスト
  27. コスト比較の部分のソース if (best_ref == nullptr || (scan_total_cost < best_read_cost +

    cost_model->row_evaluate_cost(prefix_rowcount * rows_fetched))) {
  28. best_access_path(sql_planner.cc) double scan_read_cost = calculate_scan_cost( tab, idx, best_ref, prefix_rowcount, found_condition,

    disable_jbuf, &rows_after_filtering, &trace_access_scan); /* We estimate the cost of evaluating WHERE clause for found records as row_evaluate_cost(prefix_rowcount * rows_after_filtering). This cost plus scan_cost gives us total cost of using TABLE/INDEX/RANGE SCAN. */ const double scan_total_cost = scan_read_cost + cost_model->row_evaluate_cost(prefix_rowcount * rows_after_filtering); trace_access_scan.add("resulting_rows", rows_after_filtering); trace_access_scan.add("cost", scan_total_cost); if (best_ref == nullptr || (scan_total_cost < best_read_cost + cost_model->row_evaluate_cost(prefix_rowcount * rows_fetched))) {
  29. page_read_cost(opt_costmodel.cc) double Optimize_table_order::calculate_scan_cost( const JOIN_TAB *tab, const uint idx, const

    Key_use *best_ref, const double prefix_rowcount, const bool found_condition, const bool disable_jbuf, double *rows_after_filtering, Opt_trace_object *trace_access_scan) { ... else scan_cost = table->file->table_scan_cost(); // table scan const double single_scan_read_cost = scan_cost.total_cost(); ... const double buffer_count = 1.0 + ((double)cache_record_length(join, idx) * prefix_rowcount / (double)thd->variables.join_buff_size); scan_and_filter_cost = buffer_count * (single_scan_read_cost + cost_model->row_evaluate_cost( tab->records() - *rows_after_filtering)); ややこしわからん
  30. 分かったこと • refアクセスのエンジン側のコストは以下で見積もられる ◦ JOIN元テーブルから渡される行数×(メモリから該当データページ を取り出すコスト+ディスクから該当データページを取り出すコス ト) • refアクセスのサーバー側のコストは以下で見積もられる ◦

    行組み合わせの数×条件評価のコスト • その後にそれらの和とALLのコストと比較する • scanアクセスのコストのサーバー側のコストは大体同じ。エンジン側の コストはややこしそう