@ -465,36 +465,25 @@ fil_space_is_flushed(
/** Append a file to the chain of files of a space.
@ param [ in ] name file name of a file that is not open
@ param [ in ] size file size in entire database blocks
@ param [ in , out ] space tablespace from fil_space_create ( )
@ param [ in ] is_raw whether this is a raw device or partition
@ param [ in ] atomic_write true if the file could use atomic write
@ param [ in ] handle file handle , or OS_FILE_CLOSED
@ param [ in ] size file size in entire database pages
@ param [ in ] is_raw whether this is a raw device
@ param [ in ] atomic_write true if atomic write could be enabled
@ param [ in ] max_pages maximum number of pages in file ,
ULINT_MAX means the file size is unlimited .
@ return pointer to the file name
@ retval NULL if error */
static
fil_node_t *
fil_node_create_low (
const char * name ,
ulint size ,
fil_space_t * space ,
bool is_raw ,
bool atomic_write ,
ulint max_pages = ULINT_MAX )
or ULINT_MAX for unlimited
@ return file object */
fil_node_t * fil_space_t : : add ( const char * name , pfs_os_file_t handle ,
ulint size , bool is_raw , bool atomic_write ,
ulint max_pages )
{
fil_node_t * node ;
ut_ad ( name ! = NULL ) ;
ut_ad ( fil_system ! = NULL ) ;
if ( space = = NULL ) {
return ( NULL ) ;
}
node = reinterpret_cast < fil_node_t * > ( ut_zalloc_nokey ( sizeof ( * node ) ) ) ;
node - > handle = OS_FILE_CLOSED ;
node - > handle = handle ;
node - > name = mem_strdup ( name ) ;
@ -511,56 +500,116 @@ fil_node_create_low(
node - > init_size = size ;
node - > max_size = max_pages ;
mutex_enter ( & fil_system - > mutex ) ;
space - > size + = size ;
node - > space = space ;
node - > space = this ;
node - > atomic_write = atomic_write ;
UT_LIST_ADD_LAST ( space - > chain , node ) ;
mutex_enter ( & fil_system - > mutex ) ;
this - > size + = size ;
UT_LIST_ADD_LAST ( chain , node ) ;
if ( node - > is_open ( ) ) {
fil_system - > n_open + + ;
}
mutex_exit ( & fil_system - > mutex ) ;
return ( node ) ;
return node ;
}
/** Appends a new file to the chain of files of a space. File must be closed.
@ param [ in ] name file name ( file must be closed )
@ param [ in ] size file size in database blocks , rounded downwards to
an integer
@ param [ in , out ] space space where to append
@ param [ in ] is_raw true if a raw device or a raw disk partition
@ param [ in ] atomic_write true if the file could use atomic write
@ param [ in ] max_pages maximum number of pages in file ,
ULINT_MAX means the file size is unlimited .
@ return pointer to the file name
@ retval NULL if error */
char *
fil_node_create (
const char * name ,
ulint size ,
fil_space_t * space ,
bool is_raw ,
bool atomic_write ,
ulint max_pages )
/** Read the first page of a data file.
@ param [ in ] first whether this is the very first read
@ return whether the page was found valid */
bool fil_node_t : : read_page0 ( bool first )
{
fil_node_t * node ;
ut_ad ( mutex_own ( & fil_system - > mutex ) ) ;
ut_a ( space - > purpose ! = FIL_TYPE_LOG ) ;
const page_size_t page_size ( space - > flags ) ;
const ulint psize = page_size . physical ( ) ;
os_offset_t size_bytes = os_file_get_size ( handle ) ;
ut_a ( size_bytes ! = ( os_offset_t ) - 1 ) ;
const ulint min_size = FIL_IBD_FILE_INITIAL_SIZE * psize ;
if ( size_bytes < min_size ) {
ib : : error ( ) < < " The size of the file " < < name
< < " is only " < < size_bytes
< < " bytes, should be at least " < < min_size ;
return false ;
}
node = fil_node_create_low (
name , size , space , is_raw , atomic_write , max_pages ) ;
byte * buf2 = static_cast < byte * > ( ut_malloc_nokey ( 2 * psize ) ) ;
return ( node = = NULL ? NULL : node - > name ) ;
/* Align the memory for file i/o if we might have O_DIRECT set */
byte * page = static_cast < byte * > ( ut_align ( buf2 , psize ) ) ;
IORequest request ( IORequest : : READ ) ;
if ( ! os_file_read ( request , handle , page , 0 , psize ) ) {
ib : : error ( ) < < " Unable to read first page of file " < < name ;
ut_free ( buf2 ) ;
return false ;
}
srv_stats . page0_read . add ( 1 ) ;
const ulint space_id = fsp_header_get_space_id ( page ) ;
ulint flags = fsp_header_get_flags ( page ) ;
const ulint size = fsp_header_get_field ( page , FSP_SIZE ) ;
const ulint free_limit = fsp_header_get_field ( page , FSP_FREE_LIMIT ) ;
const ulint free_len = flst_get_len ( FSP_HEADER_OFFSET + FSP_FREE
+ page ) ;
/* Try to read crypt_data from page 0 if it is not yet read. */
if ( ! space - > crypt_data ) {
space - > crypt_data = fil_space_read_crypt_data ( page_size , page ) ;
}
ut_free ( buf2 ) ;
if ( ! fsp_flags_is_valid ( flags , space - > id ) ) {
ulint cflags = fsp_flags_convert_from_101 ( flags ) ;
if ( cflags = = ULINT_UNDEFINED
| | ( cflags ^ space - > flags ) & ~ FSP_FLAGS_MEM_MASK ) {
ib : : error ( )
< < " Expected tablespace flags "
< < ib : : hex ( space - > flags )
< < " but found " < < ib : : hex ( flags )
< < " in the file " < < name ;
return false ;
}
flags = cflags ;
}
if ( UNIV_UNLIKELY ( space_id ! = space - > id ) ) {
ib : : error ( ) < < " Expected tablespace id " < < space - > id
< < " but found " < < space_id
< < " in the file " < < name ;
return false ;
}
ut_ad ( space - > free_limit = = 0 | | space - > free_limit = = free_limit ) ;
ut_ad ( space - > free_len = = 0 | | space - > free_len = = free_len ) ;
space - > size_in_header = size ;
space - > free_limit = free_limit ;
space - > free_len = free_len ;
if ( first ) {
/* Truncate the size to a multiple of extent size. */
ulint mask = psize * FSP_EXTENT_SIZE - 1 ;
if ( size_bytes < = mask ) {
/* .ibd files start smaller than an
extent size . Do not truncate valid data . */
} else {
size_bytes & = ~ os_offset_t ( mask ) ;
}
this - > size = ulint ( size_bytes / psize ) ;
space - > size + = this - > size ;
}
return true ;
}
/** Open a file node of a tablespace.
The caller must own the fil_system mutex .
@ param [ in , out ] node File node
@ return false if the file can ' t be opened , otherwise true */
static
bool
fil_node_open_file (
fil_node_t * node )
static bool fil_node_open_file ( fil_node_t * node )
{
bool success ;
bool read_only_mode ;
@ -588,9 +637,12 @@ fil_node_open_file(
from a file opened for async I / O ! */
retry :
node - > handle = os_file_create_simple_no_error_handling (
innodb_data_file_key , node - > name , OS_FILE_OPEN ,
OS_FILE_READ_ONLY , read_only_mode , & success ) ;
node - > handle = os_file_create (
innodb_data_file_key , node - > name ,
node - > is_raw_disk
? OS_FILE_OPEN_RAW | OS_FILE_ON_ERROR_NO_EXIT
: OS_FILE_OPEN | OS_FILE_ON_ERROR_NO_EXIT ,
OS_FILE_AIO , OS_DATA_FILE , read_only_mode , & success ) ;
if ( ! success ) {
/* The following call prints an error message */
@ -606,150 +658,47 @@ retry:
return ( false ) ;
}
os_offset_t size_bytes = os_file_get_size ( node - > handle ) ;
ut_a ( size_bytes ! = ( os_offset_t ) - 1 ) ;
ut_a ( space - > purpose ! = FIL_TYPE_LOG ) ;
const page_size_t page_size ( space - > flags ) ;
const ulint psize = page_size . physical ( ) ;
const ulint min_size = FIL_IBD_FILE_INITIAL_SIZE
* psize ;
if ( size_bytes < min_size ) {
ib : : error ( ) < < " The size of the file " < < node - > name
< < " is only " < < size_bytes
< < " bytes, should be at least " < < min_size ;
if ( ! node - > read_page0 ( first_time_open ) ) {
os_file_close ( node - > handle ) ;
node - > handle = OS_FILE_CLOSED ;
return ( false ) ;
}
/* Read the first page of the tablespace */
byte * buf2 = static_cast < byte * > ( ut_malloc_nokey ( 2 * psize ) ) ;
/* Align the memory for file i/o if we might have O_DIRECT
set */
byte * page = static_cast < byte * > ( ut_align ( buf2 , psize ) ) ;
IORequest request ( IORequest : : READ ) ;
success = os_file_read (
request ,
node - > handle , page , 0 , psize ) ;
srv_stats . page0_read . add ( 1 ) ;
const ulint space_id
= fsp_header_get_space_id ( page ) ;
ulint flags = fsp_header_get_flags ( page ) ;
const ulint size = fsp_header_get_field (
page , FSP_SIZE ) ;
const ulint free_limit = fsp_header_get_field (
page , FSP_FREE_LIMIT ) ;
const ulint free_len = flst_get_len (
FSP_HEADER_OFFSET + FSP_FREE + page ) ;
/* Try to read crypt_data from page 0 if it is not yet
read . */
if ( ! space - > crypt_data ) {
space - > crypt_data = fil_space_read_crypt_data (
page_size_t ( space - > flags ) , page ) ;
}
ut_free ( buf2 ) ;
os_file_close ( node - > handle ) ;
node - > handle = OS_FILE_CLOSED ;
if ( ! fsp_flags_is_valid ( flags , space - > id ) ) {
ulint cflags = fsp_flags_convert_from_101 ( flags ) ;
if ( cflags = = ULINT_UNDEFINED
| | ( cflags ^ space - > flags ) & ~ FSP_FLAGS_MEM_MASK ) {
ib : : error ( )
< < " Expected tablespace flags "
< < ib : : hex ( space - > flags )
< < " but found " < < ib : : hex ( flags )
< < " in the file " < < node - > name ;
return ( false ) ;
}
flags = cflags ;
}
if ( UNIV_UNLIKELY ( space_id ! = space - > id ) ) {
ib : : error ( )
< < " Expected tablespace id " < < space - > id
< < " but found " < < space_id
< < " in the file " < < node - > name ;
return ( false ) ;
return false ;
}
ut_ad ( space - > free_limit = = 0
| | space - > free_limit = = free_limit ) ;
ut_ad ( space - > free_len = = 0
| | space - > free_len = = free_len ) ;
space - > size_in_header = size ;
space - > free_limit = free_limit ;
space - > free_len = free_len ;
if ( first_time_open ) {
/* Truncate the size to a multiple of extent size. */
ulint mask = psize * FSP_EXTENT_SIZE - 1 ;
if ( size_bytes < = mask ) {
/* .ibd files start smaller than an
extent size . Do not truncate valid data . */
} else {
size_bytes & = ~ os_offset_t ( mask ) ;
}
node - > size = ulint ( size_bytes / psize ) ;
space - > size + = node - > size ;
}
}
/* printf("Opening file %s\n", node->name); */
/* Open the file for reading and writing, in Windows normally in the
unbuffered async I / O mode , though global variables may make
os_file_create ( ) to fall back to the normal file I / O mode . */
if ( space - > purpose = = FIL_TYPE_LOG ) {
} else if ( space - > purpose = = FIL_TYPE_LOG ) {
node - > handle = os_file_create (
innodb_log_file_key , node - > name , OS_FILE_OPEN ,
OS_FILE_AIO , OS_LOG_FILE , read_only_mode , & success ) ;
} else if ( node - > is_raw_disk ) {
node - > handle = os_file_create (
innodb_data_file_key , node - > name , OS_FILE_OPEN_RAW ,
OS_FILE_AIO , OS_DATA_FILE , read_only_mode , & success ) ;
} else {
node - > handle = os_file_create (
innodb_data_file_key , node - > name , OS_FILE_OPEN ,
innodb_data_file_key , node - > name ,
node - > is_raw_disk
? OS_FILE_OPEN_RAW | OS_FILE_ON_ERROR_NO_EXIT
: OS_FILE_OPEN | OS_FILE_ON_ERROR_NO_EXIT ,
OS_FILE_AIO , OS_DATA_FILE , read_only_mode , & success ) ;
}
if ( first_time_open ) {
/*
For the temporary tablespace and during the
non - redo - logged adjustments in
IMPORT TABLESPACE , we do not care about
the atomicity of writes .
Atomic writes is supported if the file can be used
with atomic_writes ( not log file ) , O_DIRECT is
used ( tested in ha_innodb . cc ) and the file is
device and file system that supports atomic writes
for the given block size
*/
space - > atomic_write_supported
= space - > purpose = = FIL_TYPE_TEMPORARY
| | space - > purpose = = FIL_TYPE_IMPORT
| | ( node - > atomic_write
& & srv_use_atomic_writes
& & my_test_if_atomic_write (
node - > handle ,
int ( page_size_t ( space - > flags )
. physical ( ) ) ) ) ;
}
}
if ( space - > purpose ! = FIL_TYPE_LOG ) {
/*
For the temporary tablespace and during the
non - redo - logged adjustments in
IMPORT TABLESPACE , we do not care about
the atomicity of writes .
Atomic writes is supported if the file can be used
with atomic_writes ( not log file ) , O_DIRECT is
used ( tested in ha_innodb . cc ) and the file is
device and file system that supports atomic writes
for the given block size
*/
space - > atomic_write_supported
= space - > purpose = = FIL_TYPE_TEMPORARY
| | space - > purpose = = FIL_TYPE_IMPORT
| | ( node - > atomic_write
& & srv_use_atomic_writes
& & my_test_if_atomic_write (
node - > handle ,
int ( page_size_t ( space - > flags )
. physical ( ) ) ) ) ;
}
ut_a ( success ) ;
ut_a ( node - > is_open ( ) ) ;
@ -1430,7 +1379,7 @@ Error messages are issued to the server log.
@ param [ in ] purpose tablespace purpose
@ param [ in , out ] crypt_data encryption information
@ param [ in ] mode encryption mode
@ return pointer to created tablespace , to be filled in with fil_node_create ( )
@ return pointer to created tablespace , to be filled in with fil_space_t : : add ( )
@ retval NULL on failure ( such as when the same tablespace exists ) */
fil_space_t *
fil_space_create (
@ -1519,7 +1468,7 @@ fil_space_create(
if ( space - > purpose = = FIL_TYPE_TEMPORARY ) {
ut_d ( space - > latch . set_temp_fsp ( ) ) ;
/* SysTablespace::open_or_create() would pass
size ! = 0 to fil_node_create ( ) , so first_time_open
size ! = 0 to fil_space_t : : add ( ) , so first_time_open
would not hold in fil_node_open_file ( ) , and we
must assign this manually . We do not care about
the durability or atomicity of writes to the
@ -3819,22 +3768,16 @@ fil_ibd_create(
space = fil_space_create ( name , space_id , flags , FIL_TYPE_TABLESPACE ,
crypt_data , mode ) ;
fil_node_t * node = NULL ;
if ( space ) {
node = fil_node_create_low ( path , size , space , false , true ) ;
}
if ( ! space | | ! node ) {
if ( ! space ) {
if ( crypt_data ) {
free ( crypt_data ) ;
}
err = DB_ERROR ;
} else {
mtr_t mtr ;
const fil_node_t * file = UT_LIST_GET_FIRST ( space - > chain ) ;
mtr_t mtr ;
fil_node_t * file = space - > add ( path , OS_FILE_CLOSED , size ,
false , true ) ;
mtr . start ( ) ;
fil_op_write_log (
@ -3843,7 +3786,7 @@ fil_ibd_create(
fil_name_write ( space , 0 , file , & mtr ) ;
mtr . commit ( ) ;
nod e- > block_size = block_size ;
fil e- > block_size = block_size ;
space - > punch_hole = punch_hole ;
err = DB_SUCCESS ;
@ -4072,6 +4015,7 @@ fil_ibd_open(
| | df_remote . is_open ( ) ! = df_remote . is_valid ( ) ) {
return ( DB_CORRUPTION ) ;
}
error :
return ( DB_ERROR ) ;
}
@ -4180,17 +4124,17 @@ skip_validate:
fil_space_t * space = fil_space_create (
space_name , id , flags , purpose , crypt_data ) ;
if ( ! space ) {
goto error ;
}
/* We do not measure the size of the file, that is why
we pass the 0 below */
if ( fil_node_create_low (
df_remote . is_open ( ) ? df_remote . filepath ( ) :
df_dict . is_open ( ) ? df_dict . filepath ( ) :
df_default . filepath ( ) , 0 , space , false ,
true ) = = NULL ) {
err = DB_ERROR ;
}
space - > add (
df_remote . is_open ( ) ? df_remote . filepath ( ) :
df_dict . is_open ( ) ? df_dict . filepath ( ) :
df_default . filepath ( ) , OS_FILE_CLOSED , 0 , false , true ) ;
if ( err = = DB_SUCCESS & & validate
& & purpose ! = FIL_TYPE_IMPORT & & ! srv_read_only_mode ) {
@ -4539,9 +4483,7 @@ fil_ibd_load(
the rounding formula for extents and pages is somewhat complex ; we
let fil_node_open ( ) do that task . */
if ( ! fil_node_create_low ( file . filepath ( ) , 0 , space , false , false ) ) {
ut_error ;
}
space - > add ( file . filepath ( ) , OS_FILE_CLOSED , 0 , false , false ) ;
return ( FIL_LOAD_OK ) ;
}