From 129616c70a69f5e0fe2f10bdd0e2785594305e44 Mon Sep 17 00:00:00 2001 From: Daniel Black Date: Sun, 31 Jul 2022 13:41:59 +1000 Subject: [PATCH 1/3] MDEV-28592 disks plugin - getmntinfo (BSD) & getmntent (AIX) Thanks to references from Brad Smith, BSDs use getmntinfo as a system call for mounted filesystems. Most BSDs return statfs structures, (and we use OSX's statfs64), but NetBSD uses a statvfs structure. Simplify Linux getmntent_r to just use getmntent. AIX uses getmntent. An attempt at writing Solaris compatibility with a small bit of HPUX compatibility was made based on man page entries only. Fixes welcome. statvfs structures now use f_bsize for consistency with statfs Test case adjusted as PATH_MAX is OS defined (e.g. 1023 on AIX) Fixes: 0ee5cf837e3a0464acc20db2a2aee0adaff3f2ac also fixes: MDEV-27818: Disk plugin does not show zpool mounted devices This is because zpool mounted point don't begin with /. Due to the proliferation of multiple filesystem types since this was written, we restrict the entries listed in the disks plugin to excude: * read only mount points (no point monitoring, and includes squash, snaps, sysfs, procfs, cgroups...) * mount points that aren't directories (excludes /etc/hostname and similar mounts in containers). (getmntent (Linux/AIX) only) * exclude systems where there is no capacity listed (excludes various virtual filesystem types). Reviewer: Sergei Golubchik --- cmake/os/WindowsCache.cmake | 5 + config.h.cmake | 6 + plugin/disks/CMakeLists.txt | 21 +- plugin/disks/information_schema_disks.cc | 229 ++++++++++++++++----- plugin/disks/mysql-test/disks/disks.result | 4 +- plugin/disks/mysql-test/disks/disks.test | 1 + 6 files changed, 211 insertions(+), 55 deletions(-) diff --git a/cmake/os/WindowsCache.cmake b/cmake/os/WindowsCache.cmake index 31b46365539..5628db7f989 100644 --- a/cmake/os/WindowsCache.cmake +++ b/cmake/os/WindowsCache.cmake @@ -64,6 +64,10 @@ SET(HAVE_GETHOSTBYADDR_R CACHE INTERNAL "") SET(HAVE_GETHRTIME CACHE INTERNAL "") SET(HAVE_GETPAGESIZE CACHE INTERNAL "") SET(HAVE_GETPASS CACHE INTERNAL "") +SET(HAVE_GETMNTENT CACHE INTERNAL "") +SET(HAVE_GETMNTENT_IN_SYS_MNTAB CACHE INTERNAL "") +SET(HAVE_GETMNTINFO CACHE INTERNAL "") +SET(HAVE_GETMNTINFO64 CACHE INTERNAL "") SET(HAVE_GETPASSPHRASE CACHE INTERNAL "") SET(HAVE_GETPWNAM CACHE INTERNAL "") SET(HAVE_GETPWUID CACHE INTERNAL "") @@ -146,6 +150,7 @@ SET(HAVE_SELECT 1 CACHE INTERNAL "") SET(HAVE_SELECT_H CACHE INTERNAL "") SET(HAVE_SETENV CACHE INTERNAL "") SET(HAVE_SETLOCALE 1 CACHE INTERNAL "") +SET(HAVE_SETMNTENT CACHE INTERNAL "") SET(HAVE_SIGACTION CACHE INTERNAL "") SET(HAVE_SIGINT 1 CACHE INTERNAL "") SET(HAVE_SIGPIPE CACHE INTERNAL "") diff --git a/config.h.cmake b/config.h.cmake index 6e089ae3794..962efeb13ba 100644 --- a/config.h.cmake +++ b/config.h.cmake @@ -35,6 +35,11 @@ #cmakedefine HAVE_FLOAT_H 1 #cmakedefine HAVE_FNMATCH_H 1 #cmakedefine HAVE_FPU_CONTROL_H 1 +#cmakedefine HAVE_GETMNTENT 1 +#cmakedefine HAVE_GETMNTENT_IN_SYS_MNTAB 1 +#cmakedefine HAVE_GETMNTINFO 1 +#cmakedefine HAVE_GETMNTINFO64 1 +#cmakedefine HAVE_GETMNTINFO_TAKES_statvfs 1 #cmakedefine HAVE_GRP_H 1 #cmakedefine HAVE_IA64INTRIN_H 1 #cmakedefine HAVE_IEEEFP_H 1 @@ -214,6 +219,7 @@ #cmakedefine HAVE_SELECT 1 #cmakedefine HAVE_SETENV 1 #cmakedefine HAVE_SETLOCALE 1 +#cmakedefine HAVE_SETMNTENT 1 #cmakedefine HAVE_SETUPTERM 1 #cmakedefine HAVE_SIGSET 1 #cmakedefine HAVE_SIGACTION 1 diff --git a/plugin/disks/CMakeLists.txt b/plugin/disks/CMakeLists.txt index d0f34b04027..4e40842cad0 100644 --- a/plugin/disks/CMakeLists.txt +++ b/plugin/disks/CMakeLists.txt @@ -1,7 +1,24 @@ INCLUDE (CheckIncludeFiles) -CHECK_INCLUDE_FILES ("sys/statvfs.h;mntent.h" INFO_HEADERS LANGUAGE CXX) -IF (INFO_HEADERS) +CHECK_SYMBOL_EXISTS (getmntent "mntent.h" HAVE_GETMNTENT) +CHECK_SYMBOL_EXISTS (getmntent "sys/mnttab.h" HAVE_GETMNTENT_IN_SYS_MNTAB) +CHECK_SYMBOL_EXISTS (setmntent "mntent.h" HAVE_SETMNTENT) +CHECK_SYMBOL_EXISTS (getmntinfo "sys/types.h;sys/mount.h" HAVE_GETMNTINFO) +CHECK_SYMBOL_EXISTS (getmntinfo64 "sys/types.h;sys/mount.h" HAVE_GETMNTINFO64) + +IF (HAVE_GETMNTINFO) +CHECK_CXX_SOURCE_COMPILES(" +#include +#include +int main() +{ + struct statvfs *s; + return getmntinfo(&s, ST_WAIT); +} + " HAVE_GETMNTINFO_TAKES_statvfs) +ENDIF() +IF (HAVE_GETMNTENT OR HAVE_GETMNTENT_IN_SYS_MNTAB OR + HAVE_GETMNTINFO OR HAVE_GETMNTINFO64) INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/sql) MYSQL_ADD_PLUGIN(DISKS information_schema_disks.cc MODULE_ONLY RECOMPILE_FOR_EMBEDDED) ENDIF() diff --git a/plugin/disks/information_schema_disks.cc b/plugin/disks/information_schema_disks.cc index 8806e0d60b2..bba5c850415 100644 --- a/plugin/disks/information_schema_disks.cc +++ b/plugin/disks/information_schema_disks.cc @@ -17,11 +17,45 @@ #include #include #include +#if defined(HAVE_GETMNTENT) #include +#elif !defined(HAVE_GETMNTINFO_TAKES_statvfs) +/* getmntinfo (the not NetBSD variants) */ +#include +#include +#include +#endif +#if defined(HAVE_GETMNTENT_IN_SYS_MNTAB) +#include +#define HAVE_GETMNTENT +#endif #include #include #include /* check_global_access() */ +/* + This intends to support *BSD's, macOS, Solaris, AIX, HP-UX, and Linux. + + specificly: + FreeBSD/OpenBSD/DragonFly (statfs) NetBSD (statvfs) uses getmntinfo(). + macOS uses getmntinfo64(). + Linux can use getmntent_r(), but we've just used getmntent for simplification. + Linux/Solaris/AIX/HP-UX uses setmntent()/getmntent(). + Solaris uses getmntent() with a diffent prototype, return structure, and + no setmntent(fopen instead) +*/ +#if defined(HAVE_GETMNTINFO_TAKES_statvfs) || defined(HAVE_GETMNTENT) +typedef struct statvfs st_info; +#elif defined(HAVE_GETMNTINFO64) +typedef struct statfs64 st_info; +#else // GETMNTINFO +typedef struct statfs st_info; +#endif +#ifndef MOUNTED +/* HPUX - https://docstore.mik.ua/manuals/hp-ux/en/B2355-60130/getmntent.3X.html */ +#define MOUNTED MNT_MNTTAB +#endif + bool schema_table_store_record(THD *thd, TABLE *table); namespace @@ -39,23 +73,40 @@ ST_FIELD_INFO disks_table_fields[]= { 0, 0, MYSQL_TYPE_NULL, 0, 0, 0, 0 } }; -int disks_table_add_row(THD* pThd, - TABLE* pTable, - const char* zDisk, - const char* zPath, - const struct statvfs& info) +static int disks_table_add_row_stat( + THD* pThd, + TABLE* pTable, + const char* zDisk, + const char* zPath, + const st_info &info) { // From: http://pubs.opengroup.org/onlinepubs/009695399/basedefs/sys/statvfs.h.html + // and same for statfs: + // From: https://developer.apple.com/library/archive/documentation/System/Conceptual/ManPages_iPhoneOS/man2/statfs.2.html#//apple_ref/doc/man/2/statfs + // and: https://www.freebsd.org/cgi/man.cgi?query=statfs&sektion=2&apropos=0&manpath=FreeBSD+13.1-RELEASE+and+Ports // - // f_frsize Fundamental file system block size. + // f_bsize Fundamental file system block size. // f_blocks Total number of blocks on file system in units of f_frsize. // f_bfree Total number of free blocks. // f_bavail Number of free blocks available to non-privileged process. + ulong block_size= (ulong) info.f_bsize; - ulonglong total = ((ulonglong)info.f_frsize * info.f_blocks) / 1024; - ulonglong used = ((ulonglong)info.f_frsize * + ulonglong total = ((ulonglong) block_size * info.f_blocks) / 1024; + ulonglong used = ((ulonglong) block_size * (info.f_blocks - info.f_bfree)) / 1024; - ulonglong avail = ((ulonglong)info.f_frsize * info.f_bavail) / 1024; + ulonglong avail = ((ulonglong) block_size * info.f_bavail) / 1024; + + /* skip filesystems that don't have any space */ + if (!info.f_blocks) + return 0; + + /* skip RO mounted filesystems */ +#if defined(HAVE_GETMNTINFO_TAKES_statvfs) || defined(HAVE_GETMNTENT) + if (info.f_flag & ST_RDONLY) +#else + if (info.f_flags & MNT_RDONLY) +#endif + return 0; pTable->field[0]->store(zDisk, strlen(zDisk), system_charset_info); pTable->field[1]->store(zPath, strlen(zPath), system_charset_info); @@ -67,71 +118,147 @@ int disks_table_add_row(THD* pThd, return (schema_table_store_record(pThd, pTable) != 0) ? 1 : 0; } -int disks_table_add_row(THD* pThd, TABLE* pTable, const char* zDisk, const char* zPath) + +#ifdef HAVE_GETMNTENT +static int disks_table_add_row(THD* pThd, TABLE* pTable, const char* zDisk, const char* zPath) { int rv = 0; - struct statvfs info; + st_info info; if (statvfs(zPath, &info) == 0) // We ignore failures. { - rv = disks_table_add_row(pThd, pTable, zDisk, zPath, info); + rv = disks_table_add_row_stat(pThd, pTable, zDisk, zPath, info); } return rv; } +#endif -int disks_fill_table(THD* pThd, TABLE_LIST* pTables, Item* pCond) + +#ifdef HAVE_GETMNTINFO +static int disks_fill_table(THD* pThd, TABLE_LIST* pTables, Item* pCond) { - int rv = 1; - TABLE* pTable = pTables->table; + st_info *s; + int count, rv= 0; + TABLE* pTable= pTables->table; if (check_global_access(pThd, FILE_ACL, true)) - return 0; + return 0; + +#if defined(HAVE_GETMNTINFO_TAKES_statvfs) + count= getmntinfo(&s, ST_WAIT); +#elif defined(HAVE_GETMNTINFO64) + count= getmntinfo64(&s, MNT_WAIT); +#else + count= getmntinfo(&s, MNT_WAIT); +#endif + if (count == 0) + return 1; + + while (count && rv == 0) + { + rv= disks_table_add_row_stat(pThd, pTable, s->f_mntfromname, s->f_mntonname, *s); + count--; + s++; + } + return rv; +} +#else /* HAVE_GETMNTINFO */ + +static mysql_mutex_t m_getmntent; - FILE* pFile = setmntent("/etc/mtab", "r"); +/* HAVE_GETMNTENT */ +static int disks_fill_table(THD* pThd, TABLE_LIST* pTables, Item* pCond) +{ + int rv= 1; +#ifdef HAVE_SETMNTENT + struct mntent* pEnt; +#else + struct mnttab mnttabent, *pEnt= &mnttabent; +#endif + FILE* pFile; + TABLE* pTable= pTables->table; - if (pFile) + if (check_global_access(pThd, FILE_ACL, true)) + return 0; + +#ifdef HAVE_SETMNTENT + pFile= setmntent(MOUNTED, "r"); +#else + /* Solaris */ + pFile= fopen("/etc/mnttab", "r"); +#endif + + if (!pFile) + return 1; + + rv= 0; + + /* + We lock the outer loop rather than between getmntent so the multiple + infomation_schema.disks reads don't all start blocking each other and + no-one gets any answers. + */ + mysql_mutex_lock(&m_getmntent); + + while ((rv == 0) && +#if defined(HAVE_SETMNTENT) + (pEnt = getmntent(pFile)) + +#else + getmntent(pFile, pEnt) != 0 +#endif + ) { - const size_t BUFFER_SIZE = 4096; // 4K should be sufficient. - - char* pBuffer = new (std::nothrow) char [BUFFER_SIZE]; - - if (pBuffer) - { - rv = 0; - - struct mntent ent; - struct mntent* pEnt; - - while ((rv == 0) && (pEnt = getmntent_r(pFile, &ent, pBuffer, BUFFER_SIZE))) - { - // We only report the ones that refer to physical disks. - if (pEnt->mnt_fsname[0] == '/') - { - rv = disks_table_add_row(pThd, pTable, pEnt->mnt_fsname, pEnt->mnt_dir); - } - } - - delete [] pBuffer; - } - else - { - rv = 1; - } - - endmntent(pFile); + struct stat f; + const char *path, *point; +#ifdef HAVE_SETMNTENT + path= pEnt->mnt_dir; + point= pEnt->mnt_fsname; +#else + path= pEnt->mnt_mountp; + point= pEnt->mnt_special; +#endif + // Try to keep to real storage by excluding + // read only mounts, and mount points that aren't directories + if (hasmntopt(pEnt, MNTOPT_RO) != NULL) + continue; + if (stat(path, &f)) + continue; + if (!S_ISDIR(f.st_mode)) + continue; + rv= disks_table_add_row(pThd, pTable, point, path); } + mysql_mutex_unlock(&m_getmntent); + +#ifdef HAVE_SETMNTENT + endmntent(pFile); +#else + fclose(pFile); +#endif return rv; } +#endif /* HAVE_GETMNTINFO */ -int disks_table_init(void *ptr) +static int disks_table_init(void *ptr) { ST_SCHEMA_TABLE* pSchema_table = (ST_SCHEMA_TABLE*)ptr; pSchema_table->fields_info = disks_table_fields; pSchema_table->fill_table = disks_fill_table; +#ifndef HAVE_GETMNTINFO + mysql_mutex_init(0, &m_getmntent, MY_MUTEX_INIT_SLOW); +#endif + return 0; +} + +static int disks_table_deinit(void *ptr __attribute__((unused))) +{ +#ifndef HAVE_GETMNTINFO + mysql_mutex_destroy(&m_getmntent); +#endif return 0; } @@ -145,15 +272,15 @@ maria_declare_plugin(disks) MYSQL_INFORMATION_SCHEMA_PLUGIN, &disks_table_info, /* type-specific descriptor */ "DISKS", /* table name */ - "Johan Wikman", /* author */ + "Johan Wikman, Daniel Black", /* author */ "Disk space information", /* description */ PLUGIN_LICENSE_GPL, /* license type */ disks_table_init, /* init function */ - NULL, /* deinit function */ - 0x0101, /* version = 1.1 */ + disks_table_deinit, /* deinit function */ + 0x0102, /* version = 1.2 */ NULL, /* no status variables */ NULL, /* no system variables */ - "1.1", /* String version representation */ + "1.2", /* String version representation */ MariaDB_PLUGIN_MATURITY_STABLE /* Maturity (see include/mysql/plugin.h)*/ } mysql_declare_plugin_end; diff --git a/plugin/disks/mysql-test/disks/disks.result b/plugin/disks/mysql-test/disks/disks.result index 319e1eac10f..fccf9b4a9f1 100644 --- a/plugin/disks/mysql-test/disks/disks.result +++ b/plugin/disks/mysql-test/disks/disks.result @@ -1,8 +1,8 @@ show create table information_schema.disks; Table Create Table DISKS CREATE TEMPORARY TABLE `DISKS` ( - `Disk` varchar(4096) NOT NULL, - `Path` varchar(4096) NOT NULL, + `Disk` varchar(pathlen) NOT NULL, + `Path` varchar(pathlen) NOT NULL, `Total` bigint(32) NOT NULL, `Used` bigint(32) NOT NULL, `Available` bigint(32) NOT NULL diff --git a/plugin/disks/mysql-test/disks/disks.test b/plugin/disks/mysql-test/disks/disks.test index 13a0762ae01..7189c548342 100644 --- a/plugin/disks/mysql-test/disks/disks.test +++ b/plugin/disks/mysql-test/disks/disks.test @@ -1,2 +1,3 @@ +--replace_regex /varchar\([0-9]+\)/varchar(pathlen)/ show create table information_schema.disks; select sum(Total) > sum(Available), sum(Total)>sum(Used) from information_schema.disks; From 5cbc5dbbbe6c82ec6a4704073514ae274cdf40c1 Mon Sep 17 00:00:00 2001 From: Daniel Black Date: Wed, 31 Aug 2022 13:00:16 +1000 Subject: [PATCH 2/3] MDEV-29418 linux uuid implementation returning non-hwaddr based suffix Because of a define error the wrong value was being returned. Regression in MDEV-28243 Fixes: 607f9874679c3e4ef7edcd2c9d80120051af73cc --- mysys/my_gethwaddr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mysys/my_gethwaddr.c b/mysys/my_gethwaddr.c index 0fa4fb2f995..6bba553a549 100644 --- a/mysys/my_gethwaddr.c +++ b/mysys/my_gethwaddr.c @@ -111,7 +111,7 @@ my_bool my_gethwaddr(uchar *to) for (i= 0; res && i < ifc.ifc_len / sizeof(ifr[0]); i++) { #if !defined(_AIX) || !defined(__linux__) -#if defined(__linux___) +#if defined(__linux__) #define HWADDR_DATA ifr[i].ifr_hwaddr.sa_data #else #define HWADDR_DATA ifr[i].ifr_hwaddr From 244fdc435da364aea3b6e55835115cc04fe3297f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 5 Sep 2022 09:54:47 +0300 Subject: [PATCH 3/3] MDEV-29438 Recovery or backup of instant ALTER TABLE is incorrect This bug was found in MariaDB Server 10.6 thanks to the OPT_PAGE_CHECKSUM record that was implemented in commit 4179f93d28035ea2798cb1c16feeaaef87ab4775 for catching this type of recovery failures. page_cur_insert_rec_low(): If the previous record is the page infimum, correctly limit the end of the record. We do not want to copy data from the header of the page supremum. This omission caused the incorrect recovery of DB_TRX_ID in an instant ALTER TABLE metadata record, because part of the DB_TRX_ID was incorrectly copied from the n_owned of the page supremum, which in recovery would be updated after the copying, but in normal operation would already have been updated at the time the common prefix was being determined. log_phys_t::apply(): If a data page is found to be corrupted, do not flag the log corrupted but instead return a new status APPLIED_CORRUPTED so that the caller may discard all log for this page. We do not want the recovery of unrelated pages to fail in recv_recover_page(). No test case is included, because the known test case would only work in 10.6, and even after this fix, it would trigger another bug in instant ALTER TABLE crash recovery. --- storage/innobase/log/log0recv.cc | 11 +++++++---- storage/innobase/page/page0cur.cc | 2 +- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index d9761fe9d85..f789e061d9b 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -187,7 +187,9 @@ public: /** The page was modified, affecting the encryption parameters */ APPLIED_TO_ENCRYPTION, /** The page was modified, affecting the tablespace header */ - APPLIED_TO_FSP_HEADER + APPLIED_TO_FSP_HEADER, + /** The page was found to be corrupted */ + APPLIED_CORRUPTED, }; /** Apply log to a page frame. @@ -308,8 +310,7 @@ public: { page_corrupted: ib::error() << "Set innodb_force_recovery=1 to ignore corruption."; - recv_sys.found_corrupt_log= true; - return applied; + return APPLIED_CORRUPTED; } break; case INSERT_HEAP_REDUNDANT: @@ -2338,6 +2339,7 @@ static void recv_recover_page(buf_block_t* block, mtr_t& mtr, start_lsn = 0; continue; case log_phys_t::APPLIED_YES: + case log_phys_t::APPLIED_CORRUPTED: goto set_start_lsn; case log_phys_t::APPLIED_TO_FSP_HEADER: case log_phys_t::APPLIED_TO_ENCRYPTION: @@ -2391,7 +2393,8 @@ static void recv_recover_page(buf_block_t* block, mtr_t& mtr, } set_start_lsn: - if (recv_sys.found_corrupt_log && !srv_force_recovery) { + if ((a == log_phys_t::APPLIED_CORRUPTED + || recv_sys.found_corrupt_log) && !srv_force_recovery) { break; } diff --git a/storage/innobase/page/page0cur.cc b/storage/innobase/page/page0cur.cc index 31a780d8644..a1856586a10 100644 --- a/storage/innobase/page/page0cur.cc +++ b/storage/innobase/page/page0cur.cc @@ -1573,7 +1573,7 @@ inc_dir: { const byte *r= rec; const byte *c= cur->rec; - const byte *c_end= cur->rec + data_size; + const byte *c_end= c + (page_rec_is_infimum(c) ? 8 : data_size); static_assert(REC_N_OLD_EXTRA_BYTES == REC_N_NEW_EXTRA_BYTES + 1, ""); if (c <= insert_buf && c_end > insert_buf) c_end= insert_buf;