Browse Source
MDEV-36234: Add innodb_linux_aio
MDEV-36234: Add innodb_linux_aio
This controls which linux implementation to use for
innodb_use_native_aio=ON.
innodb_linux_aio=auto is equivalent to innodb_linux_aio=io_uring when
it is available, and falling back to innodb_linux_aio=aio when not.
Debian packaging is no longer aio exclusive or uring, so
for those older Debian or Ubuntu releases, its a remove_uring directive.
For more recent releases, add mandatory liburing for consistent packaging.
WITH_LIBAIO is now an independent option from WITH_URING.
LINUX_NATIVE_AIO preprocessor constant is renamed to HAVE_LIBAIO,
analogous to existing HAVE_URING.
tpool::is_aio_supported(): A common feature check.
is_linux_native_aio_supported(): Remove. This had originally been added in
mysql/mysql-server@0da310b69db3a39ba2e6d63ff62ef3c027cd63ff in 2012
to fix an issue where io_submit() on CentOS 5.5 would return EINVAL
for a /tmp/#sql*.ibd file associated with CREATE TEMPORARY TABLE.
But, starting with commit 2e814d4702
InnoDB
temporary tables will be written to innodb_temp_data_file_path.
The 2012 commit said that the error could occur on "old kernels".
Any GNU/Linux distribution that we currently support should be based
on a newer Linux kernel; for example, Red Hat Enterprise Linux 7
was released in 2014.
tpool::create_linux_aio(): Wraps the Linux implementations:
create_libaio() and create_liburing(), each defined in separate
compilation units (aio_linux.cc, aio_libaio.cc, aio_liburing.cc).
The CMake definitions are simplified using target_sources() and
target_compile_definitions(), all available since CMake 2.8.12.
With this change, there is no need to include ${CMAKE_SOURCE_DIR}/tpool
or add TPOOL_DEFINES flags anymore, target_link_libraries(lib tpool)
does all that.
This is joint work with Daniel Black and Vladislav Vaintroub.
pull/3976/head
29 changed files with 512 additions and 453 deletions
-
3cmake/plugin.cmake
-
13debian/autobake-deb.sh
-
5debian/rules
-
2extra/mariabackup/CMakeLists.txt
-
40extra/mariabackup/xtrabackup.cc
-
1libmysqld/CMakeLists.txt
-
2mysql-test/mariadb-test-run.pl
-
21mysql-test/suite/sys_vars/r/innodb_linux_aio_basic.result
-
1mysql-test/suite/sys_vars/r/sysvars_innodb.result
-
23mysql-test/suite/sys_vars/t/innodb_linux_aio_basic.test
-
2mysql-test/suite/sys_vars/t/innodb_read_io_threads_basic.test
-
1mysql-test/suite/sys_vars/t/sysvars_innodb.test
-
1sql/CMakeLists.txt
-
9storage/innobase/CMakeLists.txt
-
39storage/innobase/handler/ha_innodb.cc
-
13storage/innobase/include/fil0fil.h
-
6storage/innobase/include/srv0srv.h
-
184storage/innobase/os/os0file.cc
-
4storage/innobase/srv/srv0srv.cc
-
13storage/innobase/srv/srv0start.cc
-
61tpool/CMakeLists.txt
-
193tpool/aio_libaio.cc
-
18tpool/aio_liburing.cc
-
211tpool/aio_linux.cc
-
1tpool/aio_simulated.cc
-
1tpool/aio_win.cc
-
65tpool/tpool.h
-
25tpool/tpool_generic.cc
-
7tpool/tpool_win.cc
@ -0,0 +1,21 @@ |
|||
select @@global.innodb_linux_aio; |
|||
@@global.innodb_linux_aio |
|||
auto |
|||
select @@session.innodb_linux_aio; |
|||
ERROR HY000: Variable 'innodb_linux_aio' is a GLOBAL variable |
|||
show global variables like 'innodb_linux_aio'; |
|||
Variable_name Value |
|||
innodb_linux_aio auto |
|||
show session variables like 'innodb_linux_aio'; |
|||
Variable_name Value |
|||
innodb_linux_aio auto |
|||
select * from information_schema.global_variables where variable_name='innodb_linux_aio'; |
|||
VARIABLE_NAME VARIABLE_VALUE |
|||
INNODB_LINUX_AIO auto |
|||
select * from information_schema.session_variables where variable_name='innodb_linux_aio'; |
|||
VARIABLE_NAME VARIABLE_VALUE |
|||
INNODB_LINUX_AIO auto |
|||
set global innodb_linux_aio='auto'; |
|||
ERROR HY000: Variable 'innodb_linux_aio' is a read only variable |
|||
set session innodb_linux_aio='aio'; |
|||
ERROR HY000: Variable 'innodb_linux_aio' is a read only variable |
@ -0,0 +1,23 @@ |
|||
--source include/have_innodb.inc |
|||
--source include/linux.inc |
|||
# enum readonly |
|||
|
|||
# |
|||
# show values; |
|||
# |
|||
select @@global.innodb_linux_aio; |
|||
--error ER_INCORRECT_GLOBAL_LOCAL_VAR |
|||
select @@session.innodb_linux_aio; |
|||
show global variables like 'innodb_linux_aio'; |
|||
show session variables like 'innodb_linux_aio'; |
|||
select * from information_schema.global_variables where variable_name='innodb_linux_aio'; |
|||
select * from information_schema.session_variables where variable_name='innodb_linux_aio'; |
|||
|
|||
# |
|||
# show that it's read-only |
|||
# |
|||
--error ER_INCORRECT_GLOBAL_LOCAL_VAR |
|||
set global innodb_linux_aio='auto'; |
|||
--error ER_INCORRECT_GLOBAL_LOCAL_VAR |
|||
set session innodb_linux_aio='aio'; |
|||
|
@ -0,0 +1,193 @@ |
|||
/* Copyright (C) 2019, 2020, MariaDB Corporation.
|
|||
|
|||
This program is free software; you can redistribute itand /or modify |
|||
it under the terms of the GNU General Public License as published by |
|||
the Free Software Foundation; version 2 of the License. |
|||
|
|||
This program is distributed in the hope that it will be useful, |
|||
but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the |
|||
GNU General Public License for more details. |
|||
|
|||
You should have received a copy of the GNU General Public License |
|||
along with this program; if not, write to the Free Software |
|||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111 - 1301 USA*/ |
|||
|
|||
#include "tpool.h"
|
|||
#include <thread>
|
|||
#include <sys/syscall.h>
|
|||
#include <libaio.h>
|
|||
|
|||
/**
|
|||
Invoke the io_getevents() system call, without timeout parameter. |
|||
|
|||
@param ctx context from io_setup() |
|||
@param min_nr minimum number of completion events to wait for |
|||
@param nr maximum number of completion events to collect |
|||
@param ev the collected events |
|||
|
|||
In https://pagure.io/libaio/c/7cede5af5adf01ad26155061cc476aad0804d3fc
|
|||
the io_getevents() implementation in libaio was "optimized" so that it |
|||
would elide the system call when there are no outstanding requests |
|||
and a timeout was specified. |
|||
|
|||
The libaio code for dereferencing ctx would occasionally trigger |
|||
SIGSEGV if io_destroy() was concurrently invoked from another thread. |
|||
Hence, we have to use the raw system call. |
|||
|
|||
WHY are we doing this at all? |
|||
Because we want io_destroy() from another thread to interrupt io_getevents(). |
|||
|
|||
And, WHY do we want io_destroy() from another thread to interrupt |
|||
io_getevents()? |
|||
|
|||
Because there is no documented, libaio-friendly and |
|||
race-condition-free way to interrupt io_getevents(). io_destroy() |
|||
coupled with raw syscall seemed to work for us so far. |
|||
|
|||
Historical note: in the past, we used io_getevents with |
|||
timeouts. We'd wake up periodically, check for shutdown flag, return |
|||
from the main routine. This was admittedly safer, yet it did cost |
|||
periodic wakeups, which we are not willing to do anymore. |
|||
|
|||
@note we also rely on the undocumented property, that io_destroy(ctx) |
|||
will make this version of io_getevents return EINVAL. |
|||
*/ |
|||
static int my_getevents(io_context_t ctx, long min_nr, long nr, io_event *ev) |
|||
noexcept |
|||
{ |
|||
int saved_errno= errno; |
|||
int ret= syscall(__NR_io_getevents, reinterpret_cast<long>(ctx), |
|||
min_nr, nr, ev, 0); |
|||
if (ret < 0) |
|||
{ |
|||
ret= -errno; |
|||
errno= saved_errno; |
|||
} |
|||
return ret; |
|||
} |
|||
|
|||
|
|||
/*
|
|||
Linux AIO implementation, based on native AIO. |
|||
Needs libaio.h and -laio at the compile time. |
|||
|
|||
io_submit() is used to submit async IO. |
|||
|
|||
A single thread will collect the completion notification |
|||
with io_getevents() and forward io completion callback to |
|||
the worker threadpool. |
|||
*/ |
|||
namespace |
|||
{ |
|||
using namespace tpool; |
|||
|
|||
class aio_libaio final : public aio |
|||
{ |
|||
thread_pool *m_pool; |
|||
io_context_t m_io_ctx; |
|||
std::thread m_getevent_thread; |
|||
static std::atomic<bool> shutdown_in_progress; |
|||
|
|||
static void getevent_thread_routine(aio_libaio *aio) |
|||
{ |
|||
/*
|
|||
We collect events in small batches to hopefully reduce the |
|||
number of system calls. |
|||
*/ |
|||
constexpr unsigned MAX_EVENTS= 256; |
|||
|
|||
aio->m_pool->m_worker_init_callback(); |
|||
io_event events[MAX_EVENTS]; |
|||
for (;;) |
|||
{ |
|||
switch (int ret= my_getevents(aio->m_io_ctx, 1, MAX_EVENTS, events)) { |
|||
case -EINTR: |
|||
continue; |
|||
case -EINVAL: |
|||
if (shutdown_in_progress) |
|||
goto end; |
|||
/* fall through */ |
|||
default: |
|||
if (ret < 0) |
|||
{ |
|||
fprintf(stderr, "io_getevents returned %d\n", ret); |
|||
abort(); |
|||
goto end; |
|||
} |
|||
for (int i= 0; i < ret; i++) |
|||
{ |
|||
const io_event &event= events[i]; |
|||
aiocb *iocb= reinterpret_cast<aiocb*>(event.obj); |
|||
if (static_cast<int>(event.res) < 0) |
|||
{ |
|||
iocb->m_err= -event.res; |
|||
iocb->m_ret_len= 0; |
|||
} |
|||
else |
|||
{ |
|||
iocb->m_ret_len= event.res; |
|||
iocb->m_err= 0; |
|||
finish_synchronous(iocb); |
|||
} |
|||
iocb->m_internal_task.m_func= iocb->m_callback; |
|||
iocb->m_internal_task.m_arg= iocb; |
|||
iocb->m_internal_task.m_group= iocb->m_group; |
|||
aio->m_pool->submit_task(&iocb->m_internal_task); |
|||
} |
|||
} |
|||
} |
|||
end: |
|||
aio->m_pool->m_worker_destroy_callback(); |
|||
} |
|||
|
|||
public: |
|||
aio_libaio(io_context_t ctx, thread_pool *pool) |
|||
: m_pool(pool), m_io_ctx(ctx), |
|||
m_getevent_thread(getevent_thread_routine, this) |
|||
{ |
|||
} |
|||
|
|||
~aio_libaio() |
|||
{ |
|||
shutdown_in_progress= true; |
|||
io_destroy(m_io_ctx); |
|||
m_getevent_thread.join(); |
|||
shutdown_in_progress= false; |
|||
} |
|||
|
|||
int submit_io(aiocb *cb) override |
|||
{ |
|||
io_prep_pread(&cb->m_iocb, cb->m_fh, cb->m_buffer, cb->m_len, cb->m_offset); |
|||
if (cb->m_opcode != aio_opcode::AIO_PREAD) |
|||
cb->m_iocb.aio_lio_opcode= IO_CMD_PWRITE; |
|||
iocb *icb= &cb->m_iocb; |
|||
int ret= io_submit(m_io_ctx, 1, &icb); |
|||
if (ret == 1) |
|||
return 0; |
|||
errno= -ret; |
|||
return -1; |
|||
} |
|||
|
|||
int bind(native_file_handle&) override { return 0; } |
|||
int unbind(const native_file_handle&) override { return 0; } |
|||
const char *get_implementation() const override { return "Linux native AIO"; }; |
|||
}; |
|||
|
|||
std::atomic<bool> aio_libaio::shutdown_in_progress; |
|||
} |
|||
|
|||
namespace tpool |
|||
{ |
|||
aio *create_libaio(thread_pool *pool, int max_io) |
|||
{ |
|||
io_context_t ctx; |
|||
memset(&ctx, 0, sizeof ctx); |
|||
if (int ret= io_setup(max_io, &ctx)) |
|||
{ |
|||
fprintf(stderr, "io_setup(%d) returned %d\n", max_io, ret); |
|||
return nullptr; |
|||
} |
|||
return new aio_libaio(ctx, pool); |
|||
} |
|||
} |
Write
Preview
Loading…
Cancel
Save
Reference in new issue