@ -313,6 +313,25 @@ new_range:
/** Tablespace or log data space */
# ifndef UNIV_INNOCHECKSUM
struct fil_io_t
{
/** error code */
dberr_t err ;
/** file; node->space->release_for_io() must follow IORequestRead call */
fil_node_t * node ;
} ;
/** Tablespace encryption mode */
enum fil_encryption_t
{
/** Encrypted if innodb_encrypt_tables=ON (srv_encrypt_tables) */
FIL_ENCRYPTION_DEFAULT ,
/** Encrypted */
FIL_ENCRYPTION_ON ,
/** Not encrypted */
FIL_ENCRYPTION_OFF
} ;
struct fil_space_t : ilist_node < unflushed_spaces_tag_t > ,
ilist_node < rotation_list_tag_t >
# else
@ -348,8 +367,6 @@ struct fil_space_t
/*!< recovered tablespace size in pages;
0 if no size change was read from the redo log ,
or if the size change was implemented */
/** the committed size of the tablespace in pages */
Atomic_relaxed < uint32_t > committed_size ;
ulint n_reserved_extents ;
/*!< number of reserved free extents for
ongoing operations like B - tree page split */
@ -357,28 +374,33 @@ struct fil_space_t
the tablespace to disk ; dropping of the
tablespace is forbidden if this is positive */
private :
/** the committed size of the tablespace in pages */
Atomic_relaxed < uint32_t > committed_size ;
/** Number of pending buffer pool operations accessing the
tablespace without holding a table lock or dict_operation_lock
S - latch that would prevent the table ( and tablespace ) from being
dropped . An example is encryption key rotation .
The tablespace cannot be dropped while this is nonzero , or while
fil_node_t : : n_pending is nonzero .
The tablespace cannot be dropped while this is nonzero .
The most significant bit contains the STOP_NEW_OPS flag . */
Atomic_relaxed < size_t > n_pending_ops ;
Atomic_relaxed < uint32_t > n_pending_ops ;
/** Number of pending block read or write operations
The tablespace object cannot be freed while this is nonzero ,
but it can be detached from fil_system .
The most significant bit contains the CLOSING flag . */
std : : atomic < uint32_t > n_pending_ios ;
/** Flag in n_pending_ops that indicates that the tablespace is being
deleted , and no further operations should be performed */
static constexpr uint32_t STOP_NEW_OPS = ~ ( ~ uint32_t ( 0 ) > > 1 ) ;
/** Flag in n_pending_ios that indicates that the tablespace is a candidate
for being closed , and fil_node_t : : is_open ( ) can only be trusted after
acquiring fil_system . mutex and resetting the flag */
static constexpr uint32_t CLOSING = STOP_NEW_OPS ;
static constexpr uint32_t NOT_CLOSING = ~ CLOSING ;
public :
/** Number of pending block read or write operations
( when a write is imminent or a read has recently completed ) .
The tablespace object cannot be freed while this is nonzero ,
but it can be detached from fil_system .
Note that fil_node_t : : n_pending tracks actual pending I / O requests .
Protected by fil_system . mutex and std : : atomic . */
std : : atomic < ulint > n_pending_ios ;
rw_lock_t latch ; /*!< latch protecting the file space storage
allocation */
UT_LIST_NODE_T ( fil_space_t ) named_spaces ;
@ -476,19 +498,20 @@ public:
dberr_t rename ( const char * name , const char * path , bool log ,
bool replace = false ) ;
/** Note that the tablespace has been imported.
Initially , purpose = FIL_TYPE_IMPORT so that no redo log is
written while the space ID is being updated in each page . */
inline void set_imported ( ) ;
/** Note that the tablespace has been imported.
Initially , purpose = FIL_TYPE_IMPORT so that no redo log is
written while the space ID is being updated in each page . */
inline void set_imported ( ) ;
/** @return whether the storage device is rotational (HDD, not SSD) */
inline bool is_rotational ( ) const ;
/** @return whether the storage device is rotational (HDD, not SSD) */
inline bool is_rotational ( ) const ;
/** Open each file. Only invoked on fil_system.temp_space.
@ return whether all files were opened */
bool open ( ) ;
/** Close each file. Only invoked on fil_system.temp_space. */
void close ( ) ;
/** Open each file. Never invoked on .ibd files.
@ param create_new_db whether to skip the call to fil_node_t : : read_page0 ( )
@ return whether all files were opened */
bool open ( bool create_new_db ) ;
/** Close each file. Only invoked on fil_system.temp_space. */
void close ( ) ;
/** @return whether the tablespace is about to be dropped */
bool is_stopping ( ) const { return n_pending_ops & STOP_NEW_OPS ; }
@ -497,17 +520,13 @@ public:
size_t referenced ( ) const { return n_pending_ops & ~ STOP_NEW_OPS ; }
/** Note that operations on the tablespace must stop or can resume */
void set_stopping ( bool stopping )
{
ut_d ( auto n = ) n_pending_ops . fetch_xor ( STOP_NEW_OPS ) ;
ut_ad ( ! ( n & STOP_NEW_OPS ) = = stopping ) ;
}
inline void set_stopping ( bool stopping ) ;
MY_ATTRIBUTE ( ( warn_unused_result ) )
/** @return whether a tablespace reference was successfully acquired */
bool acquire ( )
{
size _t n = 0 ;
uint32 _t n = 0 ;
while ( ! n_pending_ops . compare_exchange_strong ( n , n + 1 ,
std : : memory_order_acquire ,
std : : memory_order_relaxed ) )
@ -523,30 +542,41 @@ public:
ut_ad ( n & ~ STOP_NEW_OPS ) ;
return ( n & ~ STOP_NEW_OPS ) = = 1 ;
}
/** Acquire a tablespace reference for I/O. */
void acquire_for_io ( ) { n_pending_ios + + ; }
MY_ATTRIBUTE ( ( warn_unused_result ) )
/** Acquire a tablespace reference for I/O.
@ return whether the file is usable */
bool acquire_for_io ( )
{
return UNIV_LIKELY ( ! ( n_pending_ios . fetch_add ( 1 , std : : memory_order_acquire ) &
CLOSING ) ) | |
prepare_for_io ( ) ;
}
/** Acquire another tablespace reference for I/O. */
inline void reacquire_for_io ( ) ;
/** Release a tablespace reference for I/O. */
void release_for_io ( ) { ut_d ( auto n = ) n_pending_ios - - ; ut_ad ( n ) ; }
/** @return whether I/O is pending */
bool pending_io ( ) const { return n_pending_ios ; }
void release_for_io ( )
{
ut_d ( uint32_t n = ) n_pending_ios . fetch_sub ( 1 , std : : memory_order_release ) ;
ut_ad ( n & NOT_CLOSING ) ;
}
/** @return number of pending reads or writes */
uint32_t pending_io ( ) const
{ return n_pending_ios . load ( std : : memory_order_acquire ) & NOT_CLOSING ; }
/** @return whether the tablespace file can be closed and reopened */
bool belongs_in_lru ( ) const
MY_ATTRIBUTE ( ( warn_unused_result ) )
/** Prepare to close the file handle.
@ return number of pending operations */
uint32_t set_closing ( )
{
switch ( purpose ) {
case FIL_TYPE_TEMPORARY :
ut_ad ( id = = SRV_TMP_SPACE_ID ) ;
return false ;
case FIL_TYPE_IMPORT :
ut_ad ( id ! = SRV_TMP_SPACE_ID ) ;
return true ;
case FIL_TYPE_TABLESPACE :
ut_ad ( id ! = SRV_TMP_SPACE_ID ) ;
return id & & ! srv_is_undo_tablespace ( id ) ;
}
ut_ad ( 0 ) ;
return false ;
return n_pending_ios . fetch_or ( CLOSING , std : : memory_order_acquire ) &
NOT_CLOSING ;
}
/** @return whether close() of the file handle has been requested */
bool is_closing ( ) const
{ return n_pending_ios . load ( std : : memory_order_acquire ) & CLOSING ; }
/** @return last_freed_lsn */
lsn_t get_last_freed_lsn ( ) { return last_freed_lsn ; }
@ -835,6 +865,25 @@ public:
}
# ifndef UNIV_INNOCHECKSUM
MY_ATTRIBUTE ( ( warn_unused_result ) )
/** Create a tablespace in fil_system.
@ param name tablespace name
@ param id tablespace identifier
@ param flags tablespace flags
@ param purpose tablespace purpose
@ param crypt_data encryption information
@ param mode encryption mode
@ return pointer to created tablespace , to be filled in with add ( )
@ retval nullptr on failure ( such as when the same tablespace exists ) */
static fil_space_t * create ( const char * name , ulint id , ulint flags ,
fil_type_t purpose , fil_space_crypt_t * crypt_data ,
fil_encryption_t mode = FIL_ENCRYPTION_DEFAULT ) ;
/** Acquire a tablespace for reading or writing a block.
@ param id tablespace ID
@ return the tablespace , or nullptr if missing or inaccessible */
static fil_space_t * get_for_io ( ulint id ) ;
/** Add/remove the free page in the freed ranges list.
@ param [ in ] offset page number to be added
@ param [ in ] free true if page to be freed */
@ -863,8 +912,47 @@ public:
std : : lock_guard < std : : mutex > freed_lock ( freed_range_mutex ) ;
freed_ranges . add_range ( range ) ;
}
# endif /*!UNIV_INNOCHECKSUM */
/** Set the tablespace size in pages */
void set_sizes ( uint32_t s )
{
ut_ad ( id ? ! size : ( size > = s ) ) ;
size = s ; committed_size = s ;
}
/** Update committed_size in mtr_t::commit() */
void set_committed_size ( )
{
ut_ad ( rw_lock_own ( & latch , RW_LOCK_X ) ) ;
committed_size = size ;
}
/** @return the last persisted page number */
uint32_t last_page_number ( ) const { return committed_size - 1 ; }
/** @return the size in pages (0 if unreadable) */
inline uint32_t get_size ( ) ;
/** Read or write data.
@ param type I / O context
@ param offset offset in bytes
@ param len number of bytes
@ param buf the data to be read or written
@ param bpage buffer block ( for type . is_async ( ) completion callback )
@ return status and file descriptor */
fil_io_t io ( const IORequest & type , os_offset_t offset , size_t len ,
void * buf , buf_page_t * bpage = nullptr ) ;
/** Flush pending writes from the file system cache to the file */
void flush ( ) ;
/** Read the first page of a data file.
@ return whether the page was found valid */
bool read_page0 ( ) ;
private :
/** @return whether the file is usable for io() */
ATTRIBUTE_COLD bool prepare_for_io ( ) ;
# endif /*!UNIV_INNOCHECKSUM */
} ;
# ifndef UNIV_INNOCHECKSUM
@ -892,8 +980,6 @@ struct fil_node_t {
uint32_t init_size ;
/** maximum size of the file in database pages (0 if unlimited) */
uint32_t max_size ;
/** count of pending i/o's; is_open must be true if nonzero */
ulint n_pending ;
/** count of pending flushes; is_open must be true if nonzero */
ulint n_pending_flushes ;
/** whether the file is currently being extended */
@ -902,8 +988,6 @@ struct fil_node_t {
bool needs_flush ;
/** link to other files in this tablespace */
UT_LIST_NODE_T ( fil_node_t ) chain ;
/** link to the fil_system.LRU list (keeping track of open files) */
UT_LIST_NODE_T ( fil_node_t ) LRU ;
/** whether this file could use atomic write (data file) */
bool atomic_write ;
@ -921,9 +1005,8 @@ struct fil_node_t {
}
/** Read the first page of a data file.
@ param [ in ] first whether this is the very first read
@ return whether the page was found valid */
bool read_page0 ( bool first ) ;
bool read_page0 ( ) ;
/** Determine some file metadata when creating or reading the file.
@ param file the file that is being created , or OS_FILE_CLOSED */
@ -942,8 +1025,8 @@ struct fil_node_t {
@ return detached handle or OS_FILE_CLOSED */
pfs_os_file_t close_to_free ( bool detach_handle = false ) ;
/** Update the data structures on I/O completion */
inline void complete_io ( bool write = false ) ;
/** Update the data structures on write completion */
inline void complete_write ( ) ;
private :
/** Does stuff common for close() and detach() */
@ -953,22 +1036,27 @@ private:
/** Value of fil_node_t::magic_n */
# define FIL_NODE_MAGIC_N 89389
inline void fil_space_t : : reacquire_for_io ( )
{
ut_d ( uint32_t n = ) n_pending_ios . fetch_add ( 1 , std : : memory_order_relaxed ) ;
ut_ad ( n & NOT_CLOSING ) ;
ut_ad ( UT_LIST_GET_FIRST ( chain ) - > is_open ( ) ) ;
}
inline void fil_space_t : : set_imported ( )
{
ut_ad ( purpose = = FIL_TYPE_IMPORT ) ;
purpose = FIL_TYPE_TABLESPACE ;
UT_LIST_GET_FIRST ( chain ) - > find_metadata ( ) ;
ut_ad ( purpose = = FIL_TYPE_IMPORT ) ;
purpose = FIL_TYPE_TABLESPACE ;
UT_LIST_GET_FIRST ( chain ) - > find_metadata ( ) ;
}
inline bool fil_space_t : : is_rotational ( ) const
{
for ( const fil_node_t * node = UT_LIST_GET_FIRST ( chain ) ; node ;
node = UT_LIST_GET_NEXT ( chain , node ) ) {
if ( ! node - > on_ssd ) {
return true ;
}
}
return false ;
for ( const fil_node_t * node = UT_LIST_GET_FIRST ( chain ) ; node ;
node = UT_LIST_GET_NEXT ( chain , node ) )
if ( ! node - > on_ssd )
return true ;
return false ;
}
/** Common InnoDB file extensions */
@ -1179,16 +1267,6 @@ index */
# define fil_page_index_page_check(page) \
fil_page_type_is_index ( fil_page_get_type ( page ) )
/** Enum values for encryption table option */
enum fil_encryption_t {
/** Encrypted if innodb_encrypt_tables=ON (srv_encrypt_tables) */
FIL_ENCRYPTION_DEFAULT ,
/** Encrypted */
FIL_ENCRYPTION_ON ,
/** Not encrypted */
FIL_ENCRYPTION_OFF
} ;
/** Get the file page type.
@ param [ in ] page file page
@ return page type */
@ -1227,7 +1305,6 @@ struct fil_system_t {
*/
fil_system_t ( ) : m_initialised ( false )
{
UT_LIST_INIT ( LRU , & fil_node_t : : LRU ) ;
UT_LIST_INIT ( space_list , & fil_space_t : : space_list ) ;
UT_LIST_INIT ( named_spaces , & fil_space_t : : named_spaces ) ;
}
@ -1275,30 +1352,23 @@ public:
fil_space_t * temp_space ; /*!< The innodb_temporary tablespace */
/** Map of fil_space_t::id to fil_space_t* */
hash_table_t spaces ;
UT_LIST_BASE_NODE_T ( fil_node_t ) LRU ;
/*!< base node for the LRU list of the
most recently used open files with no
pending i / o ' s ; if we start an i / o on
the file , we first remove it from this
list , and return it to the start of
the list when the i / o ends ;
log files and the system tablespace are
not put to this list : they are opened
after the startup , and kept open until
shutdown */
sized_ilist < fil_space_t , unflushed_spaces_tag_t > unflushed_spaces ;
/*!< list of those
tablespaces whose files contain
unflushed writes ; those spaces have
at least one file node where
needs_flush = = true */
ulint n_open ; /*!< number of files currently open */
/** number of currently open files; protected by mutex */
ulint n_open ;
ulint max_assigned_id ; /*!< maximum space id in the existing
tables , or assigned during the time
mysqld has been up ; at an InnoDB
startup we scan the data dictionary
and set here the maximum of the
space id ' s of the tables there */
/** nonzero if fil_node_open_file_low() should avoid moving the tablespace
to the end of space_list , for FIFO policy of try_to_close ( ) */
ulint freeze_space_list ;
UT_LIST_BASE_NODE_T ( fil_space_t ) space_list ;
/*!< list of all file spaces */
UT_LIST_BASE_NODE_T ( fil_space_t ) named_spaces ;
@ -1312,16 +1382,10 @@ public:
key rotation . */
bool space_id_reuse_warned ;
/*!< whether fil_space_create()
/*!< whether fil_space_t:: create()
has issued a warning about
potential space_id reuse */
/** Trigger a call to fil_node_t::read_page0()
@ param [ in ] id tablespace identifier
@ return tablespace
@ retval NULL if the tablespace does not exist or cannot be read */
fil_space_t * read_page0 ( ulint id ) ;
/** Return the next tablespace from rotation_list.
@ param space previous tablespace ( NULL to start from the start )
@ param recheck whether the removal condition needs to be rechecked after
@ -1336,63 +1400,28 @@ public:
/** The tablespace memory cache. */
extern fil_system_t fil_system ;
/** Update the data structures on I/O completion */
inline void fil_node_t : : complete_io ( bool write )
/** Note that operations on the tablespace must stop or can resume */
inline void fil_space_t : : set_stopping ( bool stopping )
{
ut_ad ( mutex_own ( & fil_system . mutex ) ) ;
ut_d ( auto n = ) n_pending_ops . fetch_xor ( STOP_NEW_OPS ) ;
ut_ad ( ! ( n & STOP_NEW_OPS ) = = stopping ) ;
}
if ( write )
/** @return the size in pages (0 if unreadable) */
inline uint32_t fil_space_t : : get_size ( )
{
if ( ! size )
{
if ( srv_file_flush_method = = SRV_O_DIRECT_NO_FSYNC )
{
/* We don't need to keep track of unflushed changes as user has
explicitly disabled buffering . */
ut_ad ( ! space - > is_in_unflushed_spaces ) ;
ut_ad ( ! needs_flush ) ;
}
else if ( ! space - > is_stopping ( ) )
{
needs_flush = true ;
if ( ! space - > is_in_unflushed_spaces )
{
space - > is_in_unflushed_spaces = true ;
fil_system . unflushed_spaces . push_front ( * space ) ;
}
}
}
switch ( n_pending - - ) {
case 0 :
ut_error ;
case 1 :
if ( space - > belongs_in_lru ( ) )
/* The node must be put back to the LRU list */
UT_LIST_ADD_FIRST ( fil_system . LRU , this ) ;
mutex_enter ( & fil_system . mutex ) ;
read_page0 ( ) ;
mutex_exit ( & fil_system . mutex ) ;
}
return size ;
}
# include "fil0crypt.h"
/** Create a space memory object and put it to the fil_system hash table.
Error messages are issued to the server log .
@ param [ in ] name tablespace name
@ param [ in ] id tablespace identifier
@ param [ in ] flags tablespace flags
@ param [ in ] purpose tablespace purpose
@ param [ in , out ] crypt_data encryption information
@ param [ in ] mode encryption mode
@ return pointer to created tablespace , to be filled in with fil_space_t : : add ( )
@ retval NULL on failure ( such as when the same tablespace exists ) */
fil_space_t *
fil_space_create (
const char * name ,
ulint id ,
ulint flags ,
fil_type_t purpose ,
fil_space_crypt_t * crypt_data ,
fil_encryption_t mode = FIL_ENCRYPTION_DEFAULT )
MY_ATTRIBUTE ( ( warn_unused_result ) ) ;
/*******************************************************************/ /**
Assigns a new space id for a new single - table tablespace . This works simply by
incrementing the global counter . If 4 billion id ' s is not enough , we may need
@ -1421,21 +1450,6 @@ fil_space_free(
void fil_space_set_recv_size_and_flags ( ulint id , uint32_t size ,
uint32_t flags ) ;
/*******************************************************************/ /**
Returns the size of the space in pages . The tablespace must be cached in the
memory cache .
@ return space size , 0 if space not found */
ulint
fil_space_get_size (
/*===============*/
ulint id ) ; /*!< in: space id */
/** Opens all system tablespace data files. They stay open until the
database server shutdown . This should be called at a server startup after the
space objects for the system tablespace have been created . The
purpose of this operation is to make sure we never run out of file descriptors
if we need to read from the insert buffer . */
void fil_open_system_tablespace_files ( ) ;
/** Close all tablespace files at shutdown */
void fil_close_all_files ( ) ;
/*******************************************************************/ /**
@ -1491,14 +1505,6 @@ fil_space_acquire_silent(ulint id)
return ( fil_space_acquire_low ( id , true ) ) ;
}
/** Acquire a tablespace for reading or writing a block,
when it could be dropped concurrently .
@ param [ in ] id tablespace ID
@ return the tablespace
@ retval NULL if missing */
fil_space_t *
fil_space_acquire_for_io ( ulint id ) ;
/** Replay a file rename operation if possible.
@ param [ in ] space_id tablespace identifier
@ param [ in ] name old file name
@ -1674,7 +1680,7 @@ fil_file_readdir_next_file(
memory cache . Note that if we have not done a crash recovery at the database
startup , there may be many tablespaces which are not yet in the memory cache .
@ param [ in ] id Tablespace ID
@ param [ in ] name Tablespace name used in fil_space_create ( ) .
@ param [ in ] name Tablespace name used in fil_space_t : : create ( ) .
@ param [ in ] table_flags table flags
@ return the tablespace
@ retval NULL if no matching tablespace exists in the memory cache */
@ -1690,70 +1696,6 @@ fil_space_for_table_exists_in_mem(
@ return whether the tablespace is at least as big as requested */
bool fil_space_extend ( fil_space_t * space , uint32_t size ) ;
struct fil_io_t
{
/** error code */
dberr_t err ;
/** file; node->space->release_for_io() must follow fil_io(sync=true) call */
fil_node_t * node ;
} ;
/** Reads or writes data. This operation could be asynchronous (aio).
@ param [ in ] type IO context
@ param [ in ] sync true if synchronous aio is desired
@ param [ in ] page_id page id
@ param [ in ] zip_size ROW_FORMAT = COMPRESSED page size , or 0
@ param [ in ] byte_offset remainder of offset in bytes ; in aio this
must be divisible by the OS block size
@ param [ in ] len how many bytes to read or write ; this must
not cross a file boundary ; in aio this must
be a block size multiple
@ param [ in , out ] buf buffer where to store read data or from where
to write ; in aio this must be appropriately
aligned
@ param [ in ] message message for aio handler if non - sync aio
used , else ignored
@ param [ in ] ignore whether to ignore errors
@ param [ in ] punch_hole punch the hole to the file for page_compressed
tablespace
@ return status and file descriptor */
fil_io_t
fil_io (
const IORequest & type ,
bool sync ,
const page_id_t page_id ,
ulint zip_size ,
ulint byte_offset ,
ulint len ,
void * buf ,
void * message ,
bool ignore = false ,
bool punch_hole = false ) ;
/**********************************************************************/ /**
Waits for an aio operation to complete . This function is used to write the
handler for completed requests . The aio array of pending requests is divided
into segments ( see os0file . cc for more info ) . The thread specifies which
segment it wants to wait for . */
void
fil_aio_wait (
/*=========*/
ulint segment ) ; /*!< in: the number of the segment in the aio
array to wait for */
/**********************************************************************/ /**
Flushes to disk possible writes cached by the OS . If the space does not exist
or is being dropped , does not do anything . */
void
fil_flush (
/*======*/
ulint space_id ) ; /*!< in: file space id (this can be a group of
log files or a tablespace of the database ) */
/** Flush a tablespace.
@ param [ in , out ] space tablespace to flush */
void
fil_flush ( fil_space_t * space ) ;
/** Flush to disk the writes in file spaces of the given type
possibly cached by the OS . */
void fil_flush_file_spaces ( ) ;
@ -1846,23 +1788,6 @@ inline bool fil_names_write_if_was_clean(fil_space_t* space)
return ( was_clean ) ;
}
/** During crash recovery, open a tablespace if it had not been opened
yet , to get valid size and flags .
@ param [ in , out ] space tablespace */
inline void fil_space_open_if_needed ( fil_space_t * space )
{
ut_ad ( recv_recovery_is_on ( ) ) ;
if ( space - > size = = 0 ) {
/* Initially, size and flags will be set to 0,
until the files are opened for the first time .
fil_space_get_size ( ) will open the file
and adjust the size and flags . */
ut_d ( ulint size = ) fil_space_get_size ( space - > id ) ;
ut_ad ( size = = space - > size ) ;
}
}
/** On a log checkpoint, reset fil_names_dirty_and_write() flags
and write out FILE_MODIFY and FILE_CHECKPOINT if needed .
@ param [ in ] lsn checkpoint LSN