Browse Source
MDEV-34740 mariadb-import: optimize index and constraint creation
MDEV-34740 mariadb-import: optimize index and constraint creation
For InnoDB tables, parse the CREATE TABLE statement to defer index and constraint creation until after data loading. For other storage engines, the DISABLE/ENABLE KEYS commands achieve similar optimization. This behavior is controlled by a new option, innodb-optimize-keys (default: ON), compatible with mydumper. Additionally, this commit separates the table creation phase from data loading. Running DDL statements (such as DROP IF EXISTS) in a single thread avoids the "table not locked" issue from MDEV-34741. As a bonus, view creation no longer requires a separate step.bb-11.8-MDEV-34551
10 changed files with 927 additions and 187 deletions
-
15client/CMakeLists.txt
-
217client/import_util.cc
-
89client/import_util.h
-
330client/mysqlimport.cc
-
214mysql-test/main/mariadb-import.result
-
93mysql-test/main/mariadb-import.test
-
3mysql-test/main/mysqldump.result
-
3unittest/client/CMakeLists.txt
-
149unittest/client/import_util-t.cc
-
1unittest/mytap/CMakeLists.txt
@ -0,0 +1,217 @@ |
|||
/*
|
|||
Copyright (c) 2024, MariaDB |
|||
|
|||
This program is free software; you can redistribute it and/or modify |
|||
it under the terms of the GNU General Public License as published by |
|||
the Free Software Foundation; version 2 of the License. |
|||
|
|||
This program is distributed in the hope that it will be useful, |
|||
but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|||
GNU General Public License for more details. |
|||
|
|||
You should have received a copy of the GNU General Public License |
|||
along with this program; if not, write to the Free Software |
|||
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA |
|||
*/ |
|||
|
|||
/*
|
|||
This file contains some routines to do client-side parsing of CREATE TABLE |
|||
statements. The goal is to extract the primary key, constraints, and |
|||
secondary key. his is useful for optimizing the import process, to delay |
|||
secondary index creation until after the data has been loaded. |
|||
*/ |
|||
|
|||
#include <string>
|
|||
#include <vector>
|
|||
#include <pcre2posix.h>
|
|||
|
|||
#include "import_util.h"
|
|||
#include <assert.h>
|
|||
|
|||
/**
|
|||
* Extract the first CREATE TABLE statement from a script. |
|||
* |
|||
* @param script The input script containing SQL statements. |
|||
* @return std::string The first CREATE TABLE statement found, or an empty |
|||
* string if not found. |
|||
*/ |
|||
std::string extract_first_create_table(const std::string &script) |
|||
{ |
|||
regex_t create_table_regex; |
|||
regmatch_t match[2]; |
|||
const char *pattern= "(CREATE\\s+TABLE\\s+[^;]+;)\\s*\\n"; |
|||
regcomp(&create_table_regex, pattern, REG_EXTENDED); |
|||
|
|||
if (regexec(&create_table_regex, script.c_str(), 2, match, 0) == 0) |
|||
{ |
|||
std::string result= |
|||
script.substr(match[1].rm_so, match[1].rm_eo - match[1].rm_so); |
|||
regfree(&create_table_regex); |
|||
return result; |
|||
} |
|||
|
|||
regfree(&create_table_regex); |
|||
return ""; |
|||
} |
|||
|
|||
TableDDLInfo::TableDDLInfo(const std::string &create_table_stmt) |
|||
{ |
|||
regex_t primary_key_regex, constraint_regex, index_regex, engine_regex, |
|||
table_name_regex; |
|||
constexpr size_t MAX_MATCHES= 10; |
|||
regmatch_t match[10]; |
|||
|
|||
regcomp(&primary_key_regex, "\\n\\s*(PRIMARY\\s+KEY\\s+(.*?)),?\\n", |
|||
REG_EXTENDED); |
|||
regcomp(&constraint_regex, |
|||
"\\n\\s*(CONSTRAINT\\s+(`?(?:[^`]|``)+`?)\\s+.*?),?\\n", |
|||
REG_EXTENDED); |
|||
regcomp(&index_regex, |
|||
"\\n\\s*(((?:UNIQUE|FULLTEXT|VECTOR|SPATIAL)\\s+)?(INDEX|KEY)\\s+(`(?:[^`]|``)+`)\\s+.*?),?\\n", |
|||
REG_EXTENDED); |
|||
regcomp(&engine_regex, "\\bENGINE\\s*=\\s*(\\w+)", REG_EXTENDED); |
|||
regcomp(&table_name_regex, "CREATE\\s+TABLE\\s+(`?(?:[^`]|``)+`?)\\s*\\(", |
|||
REG_EXTENDED); |
|||
|
|||
const char *stmt= create_table_stmt.c_str(); |
|||
const char *search_start= stmt; |
|||
|
|||
// Extract primary key
|
|||
if (regexec(&primary_key_regex, search_start, MAX_MATCHES, match, 0) == 0) |
|||
{ |
|||
primary_key= {std::string(stmt + match[1].rm_so, match[1].rm_eo - match[1].rm_so), |
|||
"PRIMARY"}; |
|||
} |
|||
|
|||
// Extract constraints and foreign keys
|
|||
search_start= stmt; |
|||
while (regexec(&constraint_regex, search_start, MAX_MATCHES, match, 0) == 0) |
|||
{ |
|||
assert(match[2].rm_so != -1); |
|||
assert(match[1].rm_so != -1); |
|||
std::string name(search_start + match[2].rm_so, match[2].rm_eo - match[2].rm_so); |
|||
std::string definition(search_start + match[1].rm_so, match[1].rm_eo - match[1].rm_so); |
|||
constraints.push_back({definition, name}); |
|||
search_start+= match[0].rm_eo - 1; |
|||
} |
|||
|
|||
// Extract secondary indexes
|
|||
search_start= stmt; |
|||
while (regexec(&index_regex, search_start, MAX_MATCHES, match, 0) == 0) |
|||
{ |
|||
assert(match[4].rm_so != -1); |
|||
std::string name(search_start + match[4].rm_so, match[4].rm_eo - match[4].rm_so); |
|||
std::string definition(search_start + match[1].rm_so, match[1].rm_eo - match[1].rm_so); |
|||
secondary_indexes.push_back({definition, name}); |
|||
search_start+= match[0].rm_eo -1; |
|||
} |
|||
|
|||
// Extract storage engine
|
|||
if (regexec(&engine_regex, stmt, MAX_MATCHES, match, 0) == 0) |
|||
{ |
|||
storage_engine= std::string(stmt + match[1].rm_so, match[1].rm_eo - match[1].rm_so); |
|||
} |
|||
|
|||
// Extract table name
|
|||
if (regexec(&table_name_regex, stmt, MAX_MATCHES, match, 0) == 0) |
|||
{ |
|||
table_name= std::string(stmt + match[1].rm_so, match[1].rm_eo - match[1].rm_so); |
|||
} |
|||
if (primary_key.definition.empty() && storage_engine == "InnoDB") |
|||
{ |
|||
for (const auto &index : secondary_indexes) |
|||
{ |
|||
if (index.definition.find("UNIQUE") != std::string::npos) |
|||
{ |
|||
non_pk_clustering_key_name= index.name; |
|||
break; |
|||
} |
|||
} |
|||
} |
|||
regfree(&primary_key_regex); |
|||
regfree(&constraint_regex); |
|||
regfree(&index_regex); |
|||
regfree(&engine_regex); |
|||
regfree(&table_name_regex); |
|||
} |
|||
|
|||
/**
|
|||
Convert a KeyOrConstraintDefinitionType enum value to its |
|||
corresponding string representation. |
|||
|
|||
@param type The KeyOrConstraintDefinitionType enum value. |
|||
@return std::string The string representation of the |
|||
KeyOrConstraintDefinitionType. |
|||
*/ |
|||
static std::string to_string(KeyOrConstraintType type) |
|||
{ |
|||
switch (type) |
|||
{ |
|||
case KeyOrConstraintType::CONSTRAINT: |
|||
return "CONSTRAINT"; |
|||
case KeyOrConstraintType::INDEX: |
|||
return "INDEX"; |
|||
default: |
|||
return "UNKNOWN"; |
|||
} |
|||
} |
|||
|
|||
std::string TableDDLInfo::generate_alter_add( |
|||
const std::vector<KeyDefinition> &definitions, |
|||
KeyOrConstraintType type) const |
|||
{ |
|||
if (definitions.empty() || |
|||
(type == KeyOrConstraintType::INDEX && definitions.size() == 1 |
|||
&& !non_pk_clustering_key_name.empty())) |
|||
{ |
|||
return ""; |
|||
} |
|||
|
|||
std::string sql= "ALTER TABLE " + table_name + " "; |
|||
bool need_comma= false; |
|||
for (const auto &definition : definitions) |
|||
{ |
|||
/*
|
|||
Do not add or drop clustering secondary index |
|||
*/ |
|||
if (type == KeyOrConstraintType::INDEX && |
|||
definition.name == non_pk_clustering_key_name) |
|||
continue; |
|||
|
|||
if (need_comma) |
|||
sql+= ", "; |
|||
else |
|||
need_comma= true; |
|||
sql+= "ADD " + definition.definition; |
|||
} |
|||
return sql; |
|||
} |
|||
|
|||
std::string TableDDLInfo::generate_alter_drop( |
|||
const std::vector<KeyDefinition> &definitions, KeyOrConstraintType type) const |
|||
{ |
|||
if (definitions.empty() || |
|||
(type == KeyOrConstraintType::INDEX && definitions.size() == 1 && |
|||
!non_pk_clustering_key_name.empty())) |
|||
{ |
|||
return ""; |
|||
} |
|||
|
|||
std::string sql= "ALTER TABLE " + table_name + " "; |
|||
bool need_comma= false; |
|||
for (const auto &definition : definitions) |
|||
{ |
|||
if (type == KeyOrConstraintType::INDEX && |
|||
definition.name == non_pk_clustering_key_name) |
|||
continue; |
|||
|
|||
if (need_comma) |
|||
sql+= ", "; |
|||
else |
|||
need_comma= true; |
|||
sql+= "DROP " + to_string(type) + " " + |
|||
definition.name; |
|||
} |
|||
return sql; |
|||
} |
@ -0,0 +1,89 @@ |
|||
/* |
|||
Copyright (c) 2024, MariaDB |
|||
|
|||
This program is free software; you can redistribute it and/or modify |
|||
it under the terms of the GNU General Public License as published by |
|||
the Free Software Foundation; version 2 of the License. |
|||
|
|||
This program is distributed in the hope that it will be useful, |
|||
but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|||
GNU General Public License for more details. |
|||
|
|||
You should have received a copy of the GNU General Public License |
|||
along with this program; if not, write to the Free Software |
|||
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA |
|||
*/ |
|||
|
|||
#pragma once |
|||
#include <string> |
|||
#include <vector> |
|||
|
|||
/* TABLE DDL INFO - representation of parsed CREATE TABLE Statement */ |
|||
|
|||
enum class KeyOrConstraintType |
|||
{ |
|||
CONSTRAINT, |
|||
INDEX, |
|||
UNKNOWN |
|||
}; |
|||
|
|||
/** |
|||
* Struct representing a table keyor constraint definition |
|||
*/ |
|||
struct KeyDefinition |
|||
{ |
|||
/** Full key or constraint definition string, |
|||
e.g UNIQUE KEY `uniq_idx` (`col`) */ |
|||
std::string definition; |
|||
/** The name of key or constraint, including escape chars */ |
|||
std::string name; |
|||
}; |
|||
|
|||
/** |
|||
Information about keys and constraints, extracted from |
|||
CREATE TABLE statement |
|||
*/ |
|||
struct TableDDLInfo |
|||
{ |
|||
TableDDLInfo(const std::string &create_table_stmt); |
|||
KeyDefinition primary_key; |
|||
std::vector<KeyDefinition> constraints; |
|||
std::vector<KeyDefinition> secondary_indexes; |
|||
std::string storage_engine; |
|||
std::string table_name; |
|||
/* Innodb is using first UNIQUE key for clustering, if no PK is set*/ |
|||
std::string non_pk_clustering_key_name; |
|||
|
|||
/** |
|||
Generate ALTER TABLE ADD/DROP statements for keys or constraints. |
|||
The goal is to remove indexes/constraints before the data is imported |
|||
and recreate them after import. |
|||
PRIMARY key is not affected by these operations |
|||
*/ |
|||
std::string generate_alter_add(const std::vector<KeyDefinition> &defs, |
|||
KeyOrConstraintType type) const; |
|||
std::string generate_alter_drop(const std::vector<KeyDefinition> &defs, |
|||
KeyOrConstraintType type) const; |
|||
|
|||
|
|||
std::string drop_constraints_sql() const |
|||
{ |
|||
return generate_alter_drop(constraints, KeyOrConstraintType::CONSTRAINT); |
|||
} |
|||
std::string add_constraints_sql() const |
|||
{ |
|||
return generate_alter_add(constraints, KeyOrConstraintType::CONSTRAINT); |
|||
} |
|||
std::string drop_secondary_indexes_sql() const |
|||
{ |
|||
return generate_alter_drop(secondary_indexes, |
|||
KeyOrConstraintType::INDEX); |
|||
} |
|||
std::string add_secondary_indexes_sql() const |
|||
{ |
|||
return generate_alter_add(secondary_indexes, |
|||
KeyOrConstraintType::INDEX); |
|||
} |
|||
}; |
|||
std::string extract_first_create_table(const std::string &script); |
@ -0,0 +1,3 @@ |
|||
ADD_EXECUTABLE(import_util-t import_util-t.cc) |
|||
TARGET_LINK_LIBRARIES(import_util-t PRIVATE import_util mytap) |
|||
ADD_TEST(import_util import_util-t) |
@ -0,0 +1,149 @@ |
|||
/* Copyright (c) 2024, MariaDB
|
|||
|
|||
This program is free software; you can redistribute it and/or modify |
|||
it under the terms of the GNU General Public License as published by |
|||
the Free Software Foundation; version 2 of the License. |
|||
|
|||
This program is distributed in the hope that it will be useful, |
|||
but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|||
GNU General Public License for more details. |
|||
|
|||
You should have received a copy of the GNU General Public License |
|||
along with this program; if not, write to the Free Software |
|||
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA |
|||
*/ |
|||
|
|||
#include "my_config.h"
|
|||
#include "import_util.h"
|
|||
|
|||
#include <tap.h>
|
|||
#include <string>
|
|||
|
|||
inline bool operator==(const KeyDefinition &lhs, const KeyDefinition &rhs) |
|||
{ |
|||
return lhs.definition == rhs.definition && lhs.name == rhs.name; |
|||
} |
|||
|
|||
/*
|
|||
Test parsing of CREATE TABLE in mariadb-import utility |
|||
*/ |
|||
static void test_ddl_parser() |
|||
{ |
|||
std::string script= R"( |
|||
-- Some SQL script |
|||
CREATE TABLE `book` ( |
|||
`id` mediumint(8) unsigned NOT NULL AUTO_INCREMENT, |
|||
`title` varchar(200) NOT NULL, |
|||
`author_id` smallint(5) unsigned NOT NULL, |
|||
`publisher_id` smallint(5) unsigned NOT NULL, |
|||
`excerpt` text, |
|||
PRIMARY KEY (`id`), |
|||
KEY `fk_book_author` (`author_id`), |
|||
KEY `fk_book_publisher` (`publisher_id`), |
|||
UNIQUE KEY `title_author` (`title`,`author`), |
|||
FULLTEXT KEY `excerpt` (`excerpt`), |
|||
CONSTRAINT `fk_book_author` FOREIGN KEY (`author_id`) REFERENCES `author` (`id`) ON DELETE CASCADE |
|||
CONSTRAINT `fk_book_publisher` FOREIGN KEY (`publisher_id`) REFERENCES `publisher` (`id`) ON DELETE CASCADE |
|||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_uca1400_ai_ci; |
|||
)"; |
|||
|
|||
auto create_table_stmt= extract_first_create_table(script); |
|||
ok(!create_table_stmt.empty(), "CREATE TABLE statement found"); |
|||
|
|||
TableDDLInfo ddl_info(create_table_stmt); |
|||
|
|||
const std::string& table_name= ddl_info.table_name; |
|||
const std::string& storage_engine= ddl_info.storage_engine; |
|||
|
|||
ok(table_name == "`book`", "Table name is OK"); |
|||
ok(storage_engine == "InnoDB", "Storage engine is OK"); |
|||
ok(ddl_info.primary_key == KeyDefinition{"PRIMARY KEY (`id`)", "PRIMARY"}, |
|||
"Primary key def is OK"); |
|||
|
|||
ok(ddl_info.secondary_indexes.size() == 4, "Secondary index size is OK"); |
|||
const auto &sec_indexes= ddl_info.secondary_indexes; |
|||
ok(sec_indexes[0] == KeyDefinition{"KEY `fk_book_author` (`author_id`)","`fk_book_author`"}, |
|||
"First key is OK"); |
|||
ok(sec_indexes[1] == |
|||
KeyDefinition{"KEY `fk_book_publisher` (`publisher_id`)", |
|||
"`fk_book_publisher`"}, |
|||
"Second key is OK"); |
|||
|
|||
ok(ddl_info.constraints.size() == 2, "Constraints size correct"); |
|||
ok(ddl_info.constraints[0] == |
|||
KeyDefinition{"CONSTRAINT `fk_book_author` FOREIGN KEY (`author_id`) REFERENCES " |
|||
"`author` (`id`) ON DELETE CASCADE","`fk_book_author`"}, |
|||
"First constraint OK"); |
|||
|
|||
std::string drop_constraints= ddl_info.drop_constraints_sql(); |
|||
ok(drop_constraints == |
|||
"ALTER TABLE `book` DROP CONSTRAINT `fk_book_author`, DROP CONSTRAINT `fk_book_publisher`", |
|||
"Drop constraints SQL is \"%s\"", drop_constraints.c_str()); |
|||
std::string add_constraints= ddl_info.add_constraints_sql(); |
|||
ok(add_constraints == |
|||
"ALTER TABLE `book` ADD CONSTRAINT `fk_book_author` FOREIGN KEY (`author_id`) " |
|||
"REFERENCES `author` (`id`) ON DELETE CASCADE, " |
|||
"ADD CONSTRAINT `fk_book_publisher` FOREIGN KEY (`publisher_id`) " |
|||
"REFERENCES `publisher` (`id`) ON DELETE CASCADE", |
|||
"Add constraints SQL is \"%s\"",add_constraints.c_str()); |
|||
|
|||
std::string drop_secondary_indexes= |
|||
ddl_info.drop_secondary_indexes_sql(); |
|||
ok(drop_secondary_indexes == |
|||
"ALTER TABLE `book` " |
|||
"DROP INDEX `fk_book_author`, " |
|||
"DROP INDEX `fk_book_publisher`, " |
|||
"DROP INDEX `title_author`, " |
|||
"DROP INDEX `excerpt`", |
|||
"Drop secondary indexes SQL is \"%s\"", drop_secondary_indexes.c_str()); |
|||
std::string add_secondary_indexes= |
|||
ddl_info.add_secondary_indexes_sql(); |
|||
ok(add_secondary_indexes == |
|||
"ALTER TABLE `book` ADD KEY `fk_book_author` (`author_id`), " |
|||
"ADD KEY `fk_book_publisher` (`publisher_id`), " |
|||
"ADD UNIQUE KEY `title_author` (`title`,`author`), " |
|||
"ADD FULLTEXT KEY `excerpt` (`excerpt`)", |
|||
"Add secondary indexes SQL is \"%s\"", add_secondary_indexes.c_str()); |
|||
} |
|||
|
|||
/*
|
|||
For Innodb table without PK, and but with Unique key |
|||
(which is used for clustering, instead of PK) |
|||
this key will not be added and dropped by |
|||
the import utility |
|||
*/ |
|||
static void innodb_non_pk_clustering_key() |
|||
{ |
|||
auto create_table_stmt= R"( |
|||
CREATE TABLE `book` ( |
|||
`id` mediumint(8), |
|||
`uniq` varchar(200), |
|||
UNIQUE KEY `id` (`id`), |
|||
UNIQUE KEY `uniq` (`uniq`), |
|||
KEY `id_uniq` (`id`,`uniq`) |
|||
) ENGINE=InnoDB; |
|||
)"; |
|||
TableDDLInfo ddl_info(create_table_stmt); |
|||
ok(ddl_info.non_pk_clustering_key_name == "`id`", |
|||
"Non-PK clustering key is %s", |
|||
ddl_info.non_pk_clustering_key_name.c_str()); |
|||
ok(ddl_info.primary_key.definition.empty(), |
|||
"Primary key is %s", ddl_info.primary_key.definition.c_str()); |
|||
ok(ddl_info.secondary_indexes.size() == 3, |
|||
"Secondary indexes size is %zu", |
|||
ddl_info.secondary_indexes.size()); |
|||
ok(!ddl_info.add_secondary_indexes_sql().empty(), |
|||
"Some secondary indexes to add"); |
|||
ok(!ddl_info.drop_secondary_indexes_sql().empty(), |
|||
"Some secondary indexes to drop"); |
|||
} |
|||
int main() |
|||
{ |
|||
plan(18); |
|||
diag("Testing DDL parser"); |
|||
|
|||
test_ddl_parser(); |
|||
innodb_non_pk_clustering_key(); |
|||
return exit_status(); |
|||
} |
Write
Preview
Loading…
Cancel
Save
Reference in new issue