@ -115,11 +115,6 @@
/* TODO: put it to my_static.c */
/* TODO: put it to my_static.c */
my_bool my_disable_flush_pagecache_blocks = 0 ;
my_bool my_disable_flush_pagecache_blocks = 0 ;
/**
when flushing pages of a file , it can happen that we take some dirty blocks
out of changed_blocks [ ] ; Checkpoint must not run at this moment .
*/
uint changed_blocks_is_incomplete = 0 ;
# define STRUCT_PTR(TYPE, MEMBER, a) \
# define STRUCT_PTR(TYPE, MEMBER, a) \
( TYPE * ) ( ( char * ) ( a ) - offsetof ( TYPE , MEMBER ) )
( TYPE * ) ( ( char * ) ( a ) - offsetof ( TYPE , MEMBER ) )
@ -320,6 +315,22 @@ struct st_pagecache_block_link
LSN rec_lsn ;
LSN rec_lsn ;
} ;
} ;
/** @brief information describing a run of flush_pagecache_blocks_int() */
struct st_file_in_flush
{
PAGECACHE_FILE file ;
/**
@ brief threads waiting for the thread currently flushing this file to be
done
*/
WQUEUE flush_queue ;
/**
@ brief if the thread currently flushing the file has a non - empty
first_in_switch list .
*/
my_bool first_in_switch ;
} ;
# ifndef DBUG_OFF
# ifndef DBUG_OFF
/* debug checks */
/* debug checks */
@ -678,9 +689,14 @@ ulong init_pagecache(PAGECACHE *pagecache, size_t use_mem,
pagecache - > disk_blocks = - 1 ;
pagecache - > disk_blocks = - 1 ;
if ( ! pagecache - > inited )
if ( ! pagecache - > inited )
{
{
if ( pthread_mutex_init ( & pagecache - > cache_lock , MY_MUTEX_INIT_FAST ) | |
hash_init ( & pagecache - > files_in_flush , & my_charset_bin , 32 ,
offsetof ( struct st_file_in_flush , file ) ,
sizeof ( ( ( struct st_file_in_flush * ) NULL ) - > file ) ,
NULL , NULL , 0 ) )
goto err ;
pagecache - > inited = 1 ;
pagecache - > inited = 1 ;
pagecache - > in_init = 0 ;
pagecache - > in_init = 0 ;
pthread_mutex_init ( & pagecache - > cache_lock , MY_MUTEX_INIT_FAST ) ;
pagecache - > resize_queue . last_thread = NULL ;
pagecache - > resize_queue . last_thread = NULL ;
}
}
@ -1074,6 +1090,7 @@ void end_pagecache(PAGECACHE *pagecache, my_bool cleanup)
if ( cleanup )
if ( cleanup )
{
{
hash_free ( & pagecache - > files_in_flush ) ;
pthread_mutex_destroy ( & pagecache - > cache_lock ) ;
pthread_mutex_destroy ( & pagecache - > cache_lock ) ;
pagecache - > inited = pagecache - > can_be_used = 0 ;
pagecache - > inited = pagecache - > can_be_used = 0 ;
PAGECACHE_DEBUG_CLOSE ;
PAGECACHE_DEBUG_CLOSE ;
@ -3557,7 +3574,8 @@ static int flush_cached_blocks(PAGECACHE *pagecache,
wqueue_release_queue ( & block - > wqueue [ COND_FOR_SAVED ] ) ;
wqueue_release_queue ( & block - > wqueue [ COND_FOR_SAVED ] ) ;
# endif
# endif
/* type will never be FLUSH_IGNORE_CHANGED here */
/* type will never be FLUSH_IGNORE_CHANGED here */
if ( ! ( type = = FLUSH_KEEP | | type = = FLUSH_FORCE_WRITE ) )
if ( ! ( type = = FLUSH_KEEP | | type = = FLUSH_KEEP_LAZY | |
type = = FLUSH_FORCE_WRITE ) )
{
{
pagecache - > blocks_changed - - ;
pagecache - > blocks_changed - - ;
pagecache - > global_blocks_changed - - ;
pagecache - > global_blocks_changed - - ;
@ -3581,7 +3599,8 @@ static int flush_cached_blocks(PAGECACHE *pagecache,
@ param file handler for the file to flush to
@ param file handler for the file to flush to
@ param flush_type type of the flush
@ param flush_type type of the flush
@ param filter optional function which tells what blocks to flush ;
@ param filter optional function which tells what blocks to flush ;
can be non - NULL only if FLUSH_KEEP or FLUSH_FORCE_WRITE .
can be non - NULL only if FLUSH_KEEP , FLUSH_KEEP_LAZY
or FLUSH_FORCE_WRITE .
@ param filter_arg an argument to pass to ' filter ' . Information about
@ param filter_arg an argument to pass to ' filter ' . Information about
the block will be passed too .
the block will be passed too .
@ -3590,6 +3609,12 @@ static int flush_cached_blocks(PAGECACHE *pagecache,
both from flush_pagecache_blocks and flush_all_key_blocks ( the later one
both from flush_pagecache_blocks and flush_all_key_blocks ( the later one
does the mutex lock in the resize_pagecache ( ) function ) .
does the mutex lock in the resize_pagecache ( ) function ) .
@ note
This function can cause problems if two threads call it
concurrently on the same file ( look for " PageCacheFlushConcurrencyBugs "
in ma_checkpoint . c ) ; to avoid them , it has internal logic to serialize in
this situation .
@ return Operation status
@ return Operation status
@ retval 0 OK
@ retval 0 OK
@ retval 1 Error
@ retval 1 Error
@ -3615,9 +3640,15 @@ static int flush_pagecache_blocks_int(PAGECACHE *pagecache,
cache = cache_buff ;
cache = cache_buff ;
if ( pagecache - > disk_blocks > 0 & &
if ( pagecache - > disk_blocks > 0 & &
( ! my_disable_flush_pagecache_blocks | | type ! = FLUSH_KEEP ) )
( ! my_disable_flush_pagecache_blocks | |
( type ! = FLUSH_KEEP & & type ! = FLUSH_KEEP_LAZY ) ) )
{
{
/* Key cache exists and flush is not disabled */
/*
Key cache exists . If my_disable_flush_pagecache_blocks is true it
disables the operation but only FLUSH_KEEP [ _LAZY ] : other flushes still
need to be allowed : FLUSH_RELEASE has to free blocks , and
FLUSH_FORCE_WRITE is to overrule my_disable_flush_pagecache_blocks .
*/
int error = 0 ;
int error = 0 ;
uint count = 0 ;
uint count = 0 ;
PAGECACHE_BLOCK_LINK * * pos , * * end ;
PAGECACHE_BLOCK_LINK * * pos , * * end ;
@ -3626,33 +3657,66 @@ static int flush_pagecache_blocks_int(PAGECACHE *pagecache,
# if defined(PAGECACHE_DEBUG)
# if defined(PAGECACHE_DEBUG)
uint cnt = 0 ;
uint cnt = 0 ;
# endif
# endif
uint8 changed_blocks_is_incomplete_incremented = 0 ;
if ( type ! = FLUSH_IGNORE_CHANGED )
# ifdef THREAD
struct st_file_in_flush us_flusher , * other_flusher ;
us_flusher . file = * file ;
us_flusher . flush_queue . last_thread = NULL ;
us_flusher . first_in_switch = FALSE ;
while ( ( other_flusher = ( struct st_file_in_flush * )
hash_search ( & pagecache - > files_in_flush , ( uchar * ) file ,
sizeof ( * file ) ) ) )
{
{
/**
Count how many key blocks we have to cache to be able
to flush all dirty pages with minimum seek moves .
/*
File is in flush already : wait , unless FLUSH_KEEP_LAZY . " Flusher "
means " who can mark PCBLOCK_IN_FLUSH " , i . e . caller of
flush_pagecache_blocks_int ( ) .
*/
struct st_my_thread_var * thread ;
if ( type = = FLUSH_KEEP_LAZY )
{
DBUG_PRINT ( " info " , ( " FLUSH_KEEP_LAZY skips " ) ) ;
DBUG_RETURN ( 0 ) ;
}
thread = my_thread_var ;
wqueue_add_to_queue ( & other_flusher - > flush_queue , thread ) ;
do
{
KEYCACHE_DBUG_PRINT ( " flush_pagecache_blocks_int: wait1 " ,
( " suspend thread %ld " , thread - > id ) ) ;
pagecache_pthread_cond_wait ( & thread - > suspend ,
& pagecache - > cache_lock ) ;
}
while ( thread - > next ) ;
}
/* we are the only flusher of this file now */
while ( my_hash_insert ( & pagecache - > files_in_flush , ( uchar * ) & us_flusher ) )
{
/*
Out of memory , wait for flushers to empty the hash and retry ; should
rarely happen . Other threads are flushing the file ; when done , they
are going to remove themselves from the hash , and thus memory will
appear again . However , this memory may be stolen by yet another thread
( for a purpose unrelated to page cache ) , before we retry
hash_insert ( ) . So the loop may run for long . Only if the thread was
killed do we abort the loop , returning 1 ( error ) which can cause the
table to be marked as corrupted ( cf maria_chk_size ( ) , maria_close ( ) )
and thus require a table check .
*/
DBUG_ASSERT ( 0 ) ;
pagecache_pthread_mutex_unlock ( & pagecache - > cache_lock ) ;
if ( my_thread_var - > abort )
DBUG_RETURN ( 1 ) ; /* End if aborted by user */
sleep ( 10 ) ;
pagecache_pthread_mutex_lock ( & pagecache - > cache_lock ) ;
}
# endif
@ todo RECOVERY BUG
We will soon here put code to wait if another thread is flushing the
same file , to avoid concurrency bugs . Examples of concurrency bugs
which happened without serialization :
- assume maria_chk_size ( ) ( via CHECK TABLE ) happens
concurrently with Checkpoint : Checkpoint may be flushing a page , and
maria_chk_size ( ) wants to flush this page too so gets an error
because Checkpoint pinned this page . Such error leads to marking the
table corrupted .
- assume maria_close ( ) happens concurrently with Checkpoint :
Checkpoint may be flushing a page , and maria_close ( ) flushes this
page too with FLUSH_RELEASE : the FLUSH_RELEASE will cause a
free_block ( ) which assumes the page is in the LRU , but it is not ( as
Checkpoint is flushing it ) . Crash .
- assume two flushes of the same file happen concurrently ( like
above ) , and a third thread is pushing a page of this file out of the
LRU and runs first . Then one flusher will remove the page from
changed_blocks [ ] and put it in its first_in_switch , so the other
flusher will not see the page at all and return too early .
if ( type ! = FLUSH_IGNORE_CHANGED )
{
/*
Count how many key blocks we have to cache to be able
to flush all dirty pages with minimum seek moves .
*/
*/
for ( block = pagecache - > changed_blocks [ FILE_HASH ( * file ) ] ;
for ( block = pagecache - > changed_blocks [ FILE_HASH ( * file ) ] ;
block ;
block ;
@ -3745,34 +3809,15 @@ restart:
free_block ( pagecache , block ) ;
free_block ( pagecache , block ) ;
}
}
}
}
else
else if ( type ! = FLUSH_KEEP_LAZY )
{
{
/* Link the block into a list of blocks 'in switch' */
unlink_changed ( block ) ;
link_changed ( block , & first_in_switch ) ;
/*
/*
We have just removed a page from the list of dirty pages
( " changed_blocks " ) though it ' s still dirty ( the flush by another
thread has not yet happened ) . Checkpoint will miss the page and so
must be blocked until that flush has happened .
Note that if there are two concurrent
flush_pagecache_blocks_int ( ) on this file , then the first one may
move the block into its first_in_switch , and the second one would
just not see the block and wrongly consider its job done .
@ todo RECOVERY Maria does protect such flushes with intern_lock ,
but Checkpoint does not ( Checkpoint makes sure that
changed_blocks_is_incomplete is 0 when it starts , but as
flush_cached_blocks ( ) releases mutex , this may change . . .
*/
/**
@ todo RECOVERY : check all places where we remove a page from the
list of dirty pages
Link the block into a list of blocks ' in switch ' , and then we will
wait for this list to be empty , which means they have been flushed
*/
*/
if ( unlikely ( ! changed_blocks_is_incomplete_incremented ) )
{
changed_blocks_is_incomplete_incremented = 1 ;
changed_blocks_is_incomplete + + ;
}
unlink_changed ( block ) ;
link_changed ( block , & first_in_switch ) ;
us_flusher . first_in_switch = TRUE ;
}
}
}
}
}
}
@ -3794,7 +3839,7 @@ restart:
wqueue_add_to_queue ( & block - > wqueue [ COND_FOR_SAVED ] , thread ) ;
wqueue_add_to_queue ( & block - > wqueue [ COND_FOR_SAVED ] , thread ) ;
do
do
{
{
KEYCACHE_DBUG_PRINT ( " flush_pagecache_blocks_int: wait " ,
KEYCACHE_DBUG_PRINT ( " flush_pagecache_blocks_int: wait2 " ,
( " suspend thread %ld " , thread - > id ) ) ;
( " suspend thread %ld " , thread - > id ) ) ;
pagecache_pthread_cond_wait ( & thread - > suspend ,
pagecache_pthread_cond_wait ( & thread - > suspend ,
& pagecache - > cache_lock ) ;
& pagecache - > cache_lock ) ;
@ -3810,10 +3855,10 @@ restart:
KEYCACHE_DBUG_ASSERT ( cnt < = pagecache - > blocks_used ) ;
KEYCACHE_DBUG_ASSERT ( cnt < = pagecache - > blocks_used ) ;
# endif
# endif
}
}
changed_blocks_is_incomplete - =
changed_blocks_is_incomplete_incremented ;
us_flusher . first_in_switch = FALSE ;
/* The following happens very seldom */
/* The following happens very seldom */
if ( ! ( type = = FLUSH_KEEP | | type = = FLUSH_FORCE_WRITE ) )
if ( ! ( type = = FLUSH_KEEP | | type = = FLUSH_KEEP_LAZY | |
type = = FLUSH_FORCE_WRITE ) )
{
{
/*
/*
this code would free all blocks while filter maybe handled only a
this code would free all blocks while filter maybe handled only a
@ -3841,6 +3886,12 @@ restart:
}
}
}
}
}
}
# ifdef THREAD
/* wake up others waiting to flush this file */
hash_delete ( & pagecache - > files_in_flush , ( uchar * ) & us_flusher ) ;
if ( us_flusher . flush_queue . last_thread )
wqueue_release_queue ( & us_flusher . flush_queue ) ;
# endif
}
}
# ifndef DBUG_OFF
# ifndef DBUG_OFF
@ -3862,7 +3913,8 @@ restart:
@ param file handler for the file to flush to
@ param file handler for the file to flush to
@ param flush_type type of the flush
@ param flush_type type of the flush
@ param filter optional function which tells what blocks to flush ;
@ param filter optional function which tells what blocks to flush ;
can be non - NULL only if FLUSH_KEEP or FLUSH_FORCE_WRITE .
can be non - NULL only if FLUSH_KEEP , FLUSH_KEEP_LAZY
or FLUSH_FORCE_WRITE .
@ param filter_arg an argument to pass to ' filter ' . Information about
@ param filter_arg an argument to pass to ' filter ' . Information about
the block will be passed too .
the block will be passed too .
@ -3965,16 +4017,42 @@ my_bool pagecache_collect_changed_blocks_with_lsn(PAGECACHE *pagecache,
of memory at most .
of memory at most .
*/
*/
pagecache_pthread_mutex_lock ( & pagecache - > cache_lock ) ;
pagecache_pthread_mutex_lock ( & pagecache - > cache_lock ) ;
while ( changed_blocks_is_incomplete > 0 )
# ifdef THREAD
for ( ; ; )
{
{
struct st_file_in_flush * other_flusher ;
for ( file_hash = 0 ;
( other_flusher = ( struct st_file_in_flush * )
hash_element ( & pagecache - > files_in_flush , file_hash ) ) ! = NULL & &
! other_flusher - > first_in_switch ;
file_hash + + )
{ }
if ( other_flusher = = NULL )
break ;
/*
/*
Some pages are more recent in memory than on disk ( = dirty ) and are not
in " changed_blocks " so we cannot know them . Wait .
other_flusher . first_in_switch is true : some thread is flushing a file
and has removed dirty blocks from changed_blocks [ ] while they were still
dirty ( they were being evicted ( = > flushed ) by yet another thread , which
may not have flushed the block yet so it may still be dirty ) .
If Checkpoint proceeds now , it will not see the page . If there is a
crash right after writing the checkpoint record , before the page is
flushed , at recovery the page will be wrongly ignored because it won ' t
be in the dirty pages list in the checkpoint record . So wait .
*/
*/
pagecache_pthread_mutex_unlock ( & pagecache - > cache_lock ) ;
sleep ( 1 ) ;
pagecache_pthread_mutex_lock ( & pagecache - > cache_lock ) ;
{
struct st_my_thread_var * thread = my_thread_var ;
wqueue_add_to_queue ( & other_flusher - > flush_queue , thread ) ;
do
{
KEYCACHE_DBUG_PRINT ( " pagecache_collect_çhanged_blocks_with_lsn: wait " ,
( " suspend thread %ld " , thread - > id ) ) ;
pagecache_pthread_cond_wait ( & thread - > suspend ,
& pagecache - > cache_lock ) ;
}
while ( thread - > next ) ;
}
}
}
# endif
/* Count how many dirty pages are interesting */
/* Count how many dirty pages are interesting */
for ( file_hash = 0 ; file_hash < PAGECACHE_CHANGED_BLOCKS_HASH ; file_hash + + )
for ( file_hash = 0 ; file_hash < PAGECACHE_CHANGED_BLOCKS_HASH ; file_hash + + )