Browse Source

WIP: Try to fix things (does not work)

Let us see if it would help to apply the changes of
innodb_log_write_ahead_size on log checkpoint completion.

TODO: Many things would be easier if we make innodb_log_write_ahead_size
a read-only parameter, with a maximum of 4096 bytes.
10.11-MDEV-33894
Marko Mäkelä 1 year ago
parent
commit
4444436eea
  1. 22
      storage/innobase/buf/buf0flu.cc
  2. 19
      storage/innobase/handler/ha_innodb.cc
  3. 17
      storage/innobase/include/log0log.h
  4. 1
      storage/innobase/include/srv0srv.h
  5. 54
      storage/innobase/log/log0log.cc
  6. 39
      storage/innobase/log/log0recv.cc
  7. 35
      storage/innobase/os/os0file.cc
  8. 1
      storage/innobase/srv/srv0srv.cc

22
storage/innobase/buf/buf0flu.cc

@ -1897,6 +1897,28 @@ inline void log_t::write_checkpoint(lsn_t end_lsn) noexcept
resize_lsn.store(0, std::memory_order_relaxed);
}
if (UNIV_UNLIKELY(write_size < write_size_requested))
{
/* The write unit size is being reduced. Discard a part of the buffer
that may already have been written out using this smaller size. */
#if 0 // TODO
ssize_t old_buf_free= length - size_t(lsn - write_lsn);
if (old_buf_free > ssize_t(write_size_1))
{
const size_t written{size_t(old_buf_free) & ~write_size_1};
length-= written;
memmove_aligned<512>(buf, buf + written, length);
if (resize_buf)
memmove_aligned<512>(resize_buf, resize_buf + written, length);
if (length > write_size_1)
goto buffer_swap;
buf_free.store(length, std::memory_order_relaxed);
goto no_buffer_swap;
}
#endif
}
log_resize_release();
if (UNIV_LIKELY(resizing <= 1));

19
storage/innobase/handler/ha_innodb.cc

@ -3688,7 +3688,7 @@ static MYSQL_SYSVAR_ULONGLONG(buffer_pool_size, innobase_buffer_pool_size,
static void innodb_log_write_ahead_size_update(THD *thd, st_mysql_sys_var*,
void *var, const void *save);
static MYSQL_SYSVAR_UINT(log_write_ahead_size, innodb_log_write_ahead_size,
static MYSQL_SYSVAR_UINT(log_write_ahead_size, log_sys.write_size,
PLUGIN_VAR_RQCMDARG,
"Redo log write size to avoid read-on-write; must be a power of two,"
" an integer fraction of innodb_log_file_size,"
@ -3817,14 +3817,14 @@ static int innodb_init_params()
}
MYSQL_SYSVAR_NAME(log_write_ahead_size).max_val = log_sys.buf_size;
if (!ut_is_2pow(innodb_log_write_ahead_size)
|| innodb_log_write_ahead_size > log_sys.buf_size
|| size_t(srv_log_file_size) & (innodb_log_write_ahead_size - 1)) {
if (!ut_is_2pow(log_sys.write_size)
|| log_sys.write_size > log_sys.buf_size
|| size_t(srv_log_file_size) & (log_sys.write_size - 1)) {
sql_print_error("InnoDB: innodb_log_write_ahead_size=%u"
" is not a power of two, an integer fraction"
" of innodb_log_file_size=%llu,"
" or up to innodb_log_buffer_size=%u",
innodb_log_write_ahead_size,
log_sys.write_size,
srv_log_file_size,
log_sys.buf_size);
DBUG_RETURN(HA_ERR_INITIALIZATION);
@ -18547,7 +18547,7 @@ static void innodb_log_write_ahead_size_update(THD *thd, st_mysql_sys_var*,
{
if (high_level_read_only)
ib_senderrf(thd, IB_LOG_LEVEL_ERROR, ER_READ_ONLY_MODE);
else if (!log_sys.set_write_ahead_size(*static_cast<const uint*>(save)))
else if (!log_sys.set_write_size(*static_cast<const uint*>(save)))
ib_senderrf(thd, IB_LOG_LEVEL_ERROR, ER_WRONG_ARGUMENTS,
"innodb_log_write_ahead_size");
}
@ -20034,13 +20034,12 @@ static void innodb_params_adjust()
#if defined __linux__ || defined _WIN32
uint& min_val= MYSQL_SYSVAR_NAME(log_write_ahead_size).min_val;
if (min_val < innodb_log_write_ahead_size)
min_val= innodb_log_write_ahead_size;
if (min_val < log_sys.write_size)
min_val= log_sys.write_size;
#endif
ut_ad(MYSQL_SYSVAR_NAME(log_write_ahead_size).min_val <=
innodb_log_write_ahead_size);
log_sys.write_size);
ut_ad(MYSQL_SYSVAR_NAME(log_write_ahead_size).max_val == log_sys.buf_size);
log_sys.set_write_ahead_size(innodb_log_write_ahead_size);
}
/****************************************************************************

17
storage/innobase/include/log0log.h

@ -275,12 +275,16 @@ private:
/** the log sequence number at the start of the log file */
lsn_t first_lsn;
/** write block size - 1 during the previous write_buf() */
uint32_t old_block_size_1;
uint32_t old_write_size_1;
#if defined __linux__ || defined _WIN32
/** The physical block size of the storage */
uint32_t block_size;
#endif
/** requested innodb_log_write_ahead_size */
uint write_size_requested;
public:
/** current innodb_log_write_ahead_size */
uint write_size;
/** format of the redo log: e.g., FORMAT_10_8 */
uint32_t format;
#if defined __linux__ || defined _WIN32
@ -330,6 +334,8 @@ public:
max_buf_free;
}
inline void set_recovered() noexcept;
void set_buf_free(size_t f) noexcept
{ ut_ad(f < buf_free_LOCK); buf_free.store(f, std::memory_order_relaxed); }
@ -413,7 +419,7 @@ public:
/** Update innodb_log_write_ahead_size
@param size the requested size
@return whether the size was assigned as is */
bool set_write_ahead_size(size_t size);
bool set_write_size(size_t size);
#if defined __linux__ || defined _WIN32
/** Try to enable or disable file system caching (update log_buffered) */
@ -483,7 +489,12 @@ public:
size_t get_block_size() const noexcept
{ ut_ad(block_size); return block_size; }
/** Set the log block size for file I/O. */
void set_block_size(uint32_t size) noexcept { block_size= size; }
void set_block_size(uint32_t size) noexcept
{
if (write_size < size)
write_size= uint(size);
block_size= size;
}
#else
/** @return the physical block size of the storage */
static constexpr size_t get_block_size() { return 512; }

1
storage/innobase/include/srv0srv.h

@ -219,7 +219,6 @@ extern char* srv_log_group_home_dir;
at startup (while disallowing writes to the redo log). */
extern ulonglong srv_log_file_size;
extern ulong srv_flush_log_at_trx_commit;
extern uint innodb_log_write_ahead_size;
extern uint srv_flush_log_at_timeout;
extern my_bool srv_adaptive_flushing;
extern my_bool srv_flush_sync;

54
storage/innobase/log/log0log.cc

@ -321,7 +321,8 @@ void log_t::create(lsn_t lsn) noexcept
this->flushed_to_disk_lsn.store(lsn, std::memory_order_relaxed);
first_lsn= lsn;
write_lsn= lsn;
old_block_size_1= 0;
old_write_size_1= uint32_t(get_block_size() - 1);
write_size_requested= 0;
last_checkpoint_lsn= 0;
@ -409,7 +410,7 @@ void log_resize_release()
}
}
bool log_t::set_write_ahead_size(size_t size)
bool log_t::set_write_size(size_t size)
{
ut_ad(size >= 512);
ut_ad(size <= log_sys.buf_size);
@ -418,7 +419,7 @@ bool log_t::set_write_ahead_size(size_t size)
const bool is_valid= ut_is_2pow(size) &&
!((size_t(file_size) | size_t(resize_target)) & (size - 1));
if (is_valid)
innodb_log_write_ahead_size= uint(size);
write_size_requested= uint(size);
latch.rd_unlock();
return is_valid;
}
@ -483,7 +484,7 @@ log_t::resize_start_status log_t::resize_start(os_offset_t size) noexcept
if (success)
{
ut_ad(!((size_t(file_size) | size_t(resize_target)) &
(innodb_log_write_ahead_size - 1)));
(write_size - 1)));
log_resize_release();
void *ptr= nullptr, *ptr2= nullptr;
@ -527,10 +528,12 @@ log_t::resize_start_status log_t::resize_start(os_offset_t size) noexcept
}
else
{
uint32_t bs= innodb_log_write_ahead_size;
while (size & (bs - 1))
bs>>= 1;
innodb_log_write_ahead_size= bs;
{
uint32_t bs= write_size_requested;
while (size & (bs - 1))
bs>>= 1;
write_size_requested= bs;
}
resize_target= size;
resize_buf= static_cast<byte*>(ptr);
resize_flush_buf= static_cast<byte*>(ptr2);
@ -543,7 +546,7 @@ log_t::resize_start_status log_t::resize_start(os_offset_t size) noexcept
{
memcpy_aligned<16>(resize_buf, buf, (buf_free + 15) & ~15);
start_lsn= first_lsn +
(~lsn_t{old_block_size_1} & (write_lsn - first_lsn));
(~lsn_t{old_write_size_1} & (write_lsn - first_lsn));
}
}
resize_lsn.store(start_lsn, std::memory_order_relaxed);
@ -802,20 +805,20 @@ ATTRIBUTE_COLD ATTRIBUTE_NOINLINE
@param length the used length of resize_buf */
void log_t::resize_write_buf(const byte *buf, size_t length) noexcept
{
ut_ad(!(resize_target & old_block_size_1));
ut_ad(!(length & old_block_size_1));
ut_ad(length > old_block_size_1);
ut_ad(!(resize_target & old_write_size_1));
ut_ad(!(length & old_write_size_1));
ut_ad(length > old_write_size_1);
ut_ad(length <= resize_target);
const lsn_t resizing{resize_in_progress()};
ut_ad(resizing <= write_lsn);
lsn_t offset= START_OFFSET +
((write_lsn - resizing) & ~lsn_t{old_block_size_1});
((write_lsn - resizing) & ~lsn_t{old_write_size_1});
if (UNIV_UNLIKELY(offset + length > resize_target))
{
offset= START_OFFSET;
resize_lsn.store(first_lsn +
(~lsn_t{old_block_size_1} & (write_lsn - first_lsn)),
(~lsn_t{old_write_size_1} & (write_lsn - first_lsn)),
std::memory_order_relaxed);
}
@ -846,19 +849,16 @@ template<bool release_latch> inline lsn_t log_t::write_buf() noexcept
ut_ad(!recv_no_log_write);
write_lock.set_pending(lsn);
ut_ad(write_lsn >= get_flushed_lsn());
const size_t block_size_1{get_block_size() - 1};
size_t write_size_1{innodb_log_write_ahead_size - 1};
ut_ad(ut_is_2pow(write_size_1 + 1));
ut_ad(write_size_1 >= block_size_1);
ut_d(const size_t block_size_1{get_block_size() - 1});
ut_ad(ut_is_2pow(write_size));
size_t write_size_1{write_size - 1};
size_t length{buf_free.load(std::memory_order_relaxed)};
lsn_t offset{calc_lsn_offset(write_lsn)};
ut_ad(length >= (offset & block_size_1));
{
ut_ad(old_write_size_1);
const size_t mask{(length ^ (size_t(lsn) - size_t(first_lsn))) |
(size_t(offset) &
~size_t{old_block_size_1
? old_block_size_1
: log_sys.get_block_size() - 1})};
(size_t(offset) & ~size_t{old_write_size_1})};
while (write_size_1 & mask)
write_size_1>>= 1;
}
@ -868,7 +868,7 @@ template<bool release_latch> inline lsn_t log_t::write_buf() noexcept
const byte *const rbuf{resize_buf};
offset&= ~lsn_t{write_size_1};
if (UNIV_UNLIKELY(write_size_1 < old_block_size_1))
if (UNIV_UNLIKELY(write_size_1 < old_write_size_1))
{
/* The write unit size is being reduced. Discard a part of the buffer
that may already have been written out using this smaller size. */
@ -890,6 +890,7 @@ template<bool release_latch> inline lsn_t log_t::write_buf() noexcept
if (length <= write_size_1)
{
no_buffer_swap:
ut_ad(!((length ^ (size_t(lsn) - size_t(first_lsn))) & write_size_1));
/* Keep filling the same buffer until we have more than one block. */
#if 0 /* TODO: Pad the last log block with dummy records. */
buf_free= log_pad(lsn, (write_size_1 + 1) - length,
@ -931,13 +932,18 @@ template<bool release_latch> inline lsn_t log_t::write_buf() noexcept
memcpy_aligned<16>(resize_flush_buf, resize_buf + length,
(new_buf_free + 15) & ~15);
length+= write_size_1 + 1;
/* Try to use write_size on the subsequent write. */
const size_t target_ws_1{write_size - 1};
const size_t padded_length{(length + target_ws_1) & ~target_ws_1};
if (padded_length + offset <= file_size)
write_size_1= target_ws_1;
}
std::swap(buf, flush_buf);
std::swap(resize_buf, resize_flush_buf);
}
old_block_size_1= uint32_t(write_size_1);
old_write_size_1= uint32_t(write_size_1);
write_to_log++;
if (release_latch)
latch.wr_unlock();

39
storage/innobase/log/log0recv.cc

@ -4201,7 +4201,7 @@ static bool recv_scan_log(bool last_phase)
if (recv_sys.is_corrupt_log())
break;
if (recv_sys.offset < log_sys.get_block_size() &&
if (recv_sys.offset < log_sys.write_size &&
recv_sys.lsn == recv_sys.scanned_lsn)
goto got_eof;
@ -4537,6 +4537,26 @@ dberr_t recv_recovery_read_checkpoint()
return err;
}
inline void log_t::set_recovered() noexcept
{
ut_ad(get_flushed_lsn() == get_lsn());
ut_ad(recv_sys.lsn == get_lsn());
ut_ad(!old_write_size_1);
size_t ro{recv_sys.offset};
if (!is_pmem())
{
const size_t bs{log_sys.get_block_size()}, bs_1{bs - 1};
memmove_aligned<512>(buf, buf + (ro & ~bs_1), bs);
ro&= bs_1;
old_write_size_1= uint32_t(bs_1);
}
#ifdef HAVE_PMEM
else
mprotect(buf, size_t(file_size), PROT_READ | PROT_WRITE);
#endif
set_buf_free(ro);
}
/** Start recovering from a redo log checkpoint.
of first system tablespace page
@return error code or DB_SUCCESS */
@ -4710,22 +4730,7 @@ err_exit:
}
if (!srv_read_only_mode && log_sys.is_latest()) {
ut_ad(log_sys.get_flushed_lsn() == log_sys.get_lsn());
ut_ad(recv_sys.lsn == log_sys.get_lsn());
if (!log_sys.is_pmem()) {
const size_t bs_1{log_sys.get_block_size() - 1};
const size_t ro{recv_sys.offset};
recv_sys.offset &= bs_1;
memmove_aligned<64>(log_sys.buf,
log_sys.buf + (ro & ~bs_1),
log_sys.get_block_size());
#ifdef HAVE_PMEM
} else {
mprotect(log_sys.buf, size_t(log_sys.file_size),
PROT_READ | PROT_WRITE);
#endif
}
log_sys.set_buf_free(recv_sys.offset);
log_sys.set_recovered();
if (recv_needed_recovery
&& srv_operation <= SRV_OPERATION_EXPORT_RESTORED) {
/* Write a FILE_CHECKPOINT marker as the first thing,

35
storage/innobase/os/os0file.cc

@ -1094,7 +1094,6 @@ static ATTRIBUTE_COLD void os_file_log_buffered()
{
log_sys.log_maybe_unbuffered= false;
log_sys.log_buffered= true;
log_sys.set_block_size(512);
}
# endif
@ -1107,7 +1106,11 @@ static ATTRIBUTE_COLD bool os_file_log_maybe_unbuffered(const struct stat &st)
if (snprintf(b, sizeof b, "/sys/dev/block/%u:%u/queue/physical_block_size",
major(st.st_dev), minor(st.st_dev)) >=
static_cast<int>(sizeof b))
{
fallback:
log_sys.set_block_size(512);
return false;
}
int f= open(b, O_RDONLY);
if (f == -1)
{
@ -1115,7 +1118,7 @@ static ATTRIBUTE_COLD bool os_file_log_maybe_unbuffered(const struct stat &st)
"physical_block_size",
major(st.st_dev), minor(st.st_dev)) >=
static_cast<int>(sizeof b))
return false;
goto fallback;
f= open(b, O_RDONLY);
}
unsigned long s= 0;
@ -1132,9 +1135,7 @@ static ATTRIBUTE_COLD bool os_file_log_maybe_unbuffered(const struct stat &st)
close(f);
}
if (s > 4096 || s < 64 || !ut_is_2pow(s))
return false;
if (innodb_log_write_ahead_size < s)
innodb_log_write_ahead_size= uint(s);
goto fallback;
log_sys.set_block_size(uint32_t(s));
# else
constexpr unsigned long s= 4096;
@ -1211,11 +1212,7 @@ os_file_create_func(
break;
}
# ifdef __linux__
} else if (type != OS_LOG_FILE) {
} else if (log_sys.log_buffered) {
skip_o_direct:
os_file_log_buffered();
} else if (create_mode != OS_FILE_CREATE
} else if (type == OS_LOG_FILE && create_mode != OS_FILE_CREATE
&& create_mode != OS_FILE_CREATE_SILENT
&& !log_sys.is_opened()) {
if (stat(name, &st)) {
@ -1227,15 +1224,16 @@ os_file_create_func(
"InnoDB: File %s was not found", name);
goto not_found;
}
log_sys.set_block_size(512);
goto skip_o_direct;
} else if (!os_file_log_maybe_unbuffered(st)
|| log_sys.log_buffered) {
skip_o_direct:
os_file_log_buffered();
} else {
direct_flag = O_DIRECT;
log_sys.log_maybe_unbuffered = true;
}
if (!os_file_log_maybe_unbuffered(st)) {
goto skip_o_direct;
}
direct_flag = O_DIRECT;
log_sys.log_maybe_unbuffered= true;
# endif
}
#else
@ -2091,9 +2089,6 @@ os_file_create_func(
if (*success && type == OS_LOG_FILE) {
uint32_t s = uint32_t(get_sector_size(file));
if (innodb_log_write_ahead_size < s) {
innodb_log_write_ahead_size = s;
}
log_sys.set_block_size(s);
if (attributes & FILE_FLAG_NO_BUFFERING) {
if (os_file_get_size(file) % s) {

1
storage/innobase/srv/srv0srv.cc

@ -151,7 +151,6 @@ at startup (while disallowing writes to the redo log). */
ulonglong srv_log_file_size;
/** innodb_flush_log_at_trx_commit */
ulong srv_flush_log_at_trx_commit;
uint innodb_log_write_ahead_size;
/** innodb_flush_log_at_timeout */
uint srv_flush_log_at_timeout;
/** innodb_page_size */

Loading…
Cancel
Save