Merge XtraDB from Percona-server-5.1.51-12 into MariaDB.

15 years ago · c6ccd3f346
50 changed files with 1159 additions and 384 deletions
--- a/mysql-test/include/percona_query_response_time_show.inc
+++ b/mysql-test/include/percona_query_response_time_show.inc
@ -1,7 +1,8 @@
-SELECT c.count,
+SELECT d.count,
 (SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count,
-(SELECT COUNT(*)     FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as not_zero_region_count,
+(SELECT SUM((b.total * 1000000) DIV 1000000) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as query_total,
+(SELECT COUNT(*)     FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count != 0) as not_zero_region_count,
 (SELECT COUNT(*)     FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count
-FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count > 0;
+FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as d WHERE d.count > 0;
 SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
 SELECT time FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
--- a/mysql-test/suite/percona/percona_query_response_time-stored.result
+++ b/mysql-test/suite/percona/percona_query_response_time-stored.result
@ -22,12 +22,13 @@ SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
 Variable_name	Value
 query_response_time_range_base	2
 FLUSH QUERY_RESPONSE_TIME;
-SELECT c.count,
+SELECT d.count,
 (SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count,
-(SELECT COUNT(*)     FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as not_zero_region_count,
+(SELECT SUM((b.total * 1000000) DIV 1000000) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as query_total,
+(SELECT COUNT(*)     FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count != 0) as not_zero_region_count,
 (SELECT COUNT(*)     FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count
-FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count > 0;
-count	query_count	not_zero_region_count	region_count
+FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as d WHERE d.count > 0;
+count	query_count	query_total	not_zero_region_count	region_count
 SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
 region_count
 44
@ -76,7 +77,7 @@ time
 2097152.00000
 4194304.00000
 8388608.00000
-TOO LONG QUERY
+TOO LONG
 SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=1;
 SELECT test_f();
 test_f()
@ -91,14 +92,15 @@ SELECT test_f();
 test_f()
 Hello, world!
 SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=0;
-SELECT c.count,
+SELECT d.count,
 (SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count,
-(SELECT COUNT(*)     FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as not_zero_region_count,
+(SELECT SUM((b.total * 1000000) DIV 1000000) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as query_total,
+(SELECT COUNT(*)     FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count != 0) as not_zero_region_count,
 (SELECT COUNT(*)     FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count
-FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count > 0;
-count	query_count	not_zero_region_count	region_count
-1	5	2	44
-4	5	2	44
+FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as d WHERE d.count > 0;
+count	query_count	query_total	not_zero_region_count	region_count
+1	5	4	2	44
+4	5	4	2	44
 SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
 region_count
 44
@ -147,7 +149,7 @@ time
 2097152.00000
 4194304.00000
 8388608.00000
-TOO LONG QUERY
+TOO LONG
 SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
 Variable_name	Value
 query_response_time_range_base	2
@ -161,14 +163,15 @@ SELECT test_f();
 test_f()
 Hello, world!
 SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=0;
-SELECT c.count,
+SELECT d.count,
 (SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count,
-(SELECT COUNT(*)     FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as not_zero_region_count,
+(SELECT SUM((b.total * 1000000) DIV 1000000) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as query_total,
+(SELECT COUNT(*)     FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count != 0) as not_zero_region_count,
 (SELECT COUNT(*)     FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count
-FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count > 0;
-count	query_count	not_zero_region_count	region_count
-1	2	2	14
-1	2	2	14
+FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as d WHERE d.count > 0;
+count	query_count	query_total	not_zero_region_count	region_count
+1	2	1	2	14
+1	2	1	2	14
 SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
 region_count
 14
@ -187,7 +190,7 @@ time
  10000.000000
 100000.000000
 1000000.00000
-TOO LONG QUERY
+TOO LONG
 SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
 Variable_name	Value
 query_response_time_range_base	10
@ -201,14 +204,15 @@ SELECT test_f();
 test_f()
 Hello, world!
 SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=0;
-SELECT c.count,
+SELECT d.count,
 (SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count,
-(SELECT COUNT(*)     FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as not_zero_region_count,
+(SELECT SUM((b.total * 1000000) DIV 1000000) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as query_total,
+(SELECT COUNT(*)     FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count != 0) as not_zero_region_count,
 (SELECT COUNT(*)     FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count
-FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count > 0;
-count	query_count	not_zero_region_count	region_count
-1	2	2	17
-1	2	2	17
+FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as d WHERE d.count > 0;
+count	query_count	query_total	not_zero_region_count	region_count
+1	2	1	2	17
+1	2	1	2	17
 SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
 region_count
 17
@ -230,7 +234,7 @@ time
 117649.000000
 823543.000000
 5764801.00000
-TOO LONG QUERY
+TOO LONG
 SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
 Variable_name	Value
 query_response_time_range_base	7
@ -244,14 +248,15 @@ SELECT test_f();
 test_f()
 Hello, world!
 SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=0;
-SELECT c.count,
+SELECT d.count,
 (SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count,
-(SELECT COUNT(*)     FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as not_zero_region_count,
+(SELECT SUM((b.total * 1000000) DIV 1000000) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as query_total,
+(SELECT COUNT(*)     FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count != 0) as not_zero_region_count,
 (SELECT COUNT(*)     FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count
-FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count > 0;
-count	query_count	not_zero_region_count	region_count
-1	2	2	7
-1	2	2	7
+FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as d WHERE d.count > 0;
+count	query_count	query_total	not_zero_region_count	region_count
+1	2	1	2	7
+1	2	1	2	7
 SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
 region_count
 7
@ -263,7 +268,7 @@ time
    156.000000
  24336.000000
 3796416.00000
-TOO LONG QUERY
+TOO LONG
 SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
 Variable_name	Value
 query_response_time_range_base	156
@ -277,14 +282,15 @@ SELECT test_f();
 test_f()
 Hello, world!
 SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=0;
-SELECT c.count,
+SELECT d.count,
 (SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count,
-(SELECT COUNT(*)     FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as not_zero_region_count,
+(SELECT SUM((b.total * 1000000) DIV 1000000) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as query_total,
+(SELECT COUNT(*)     FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count != 0) as not_zero_region_count,
 (SELECT COUNT(*)     FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count
-FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count > 0;
-count	query_count	not_zero_region_count	region_count
-1	2	2	6
-1	2	2	6
+FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as d WHERE d.count > 0;
+count	query_count	query_total	not_zero_region_count	region_count
+1	2	1	2	6
+1	2	1	2	6
 SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
 region_count
 6
@ -295,7 +301,7 @@ time
      1.000000
   1000.000000
 1000000.00000
-TOO LONG QUERY
+TOO LONG
 SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
 Variable_name	Value
 query_response_time_range_base	1000
--- a/mysql-test/suite/percona/percona_query_response_time.result
+++ b/mysql-test/suite/percona/percona_query_response_time.result
@ -9,12 +9,13 @@ SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
 Variable_name	Value
 query_response_time_range_base	2
 FLUSH QUERY_RESPONSE_TIME;
-SELECT c.count,
+SELECT d.count,
 (SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count,
-(SELECT COUNT(*)     FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as not_zero_region_count,
+(SELECT SUM((b.total * 1000000) DIV 1000000) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as query_total,
+(SELECT COUNT(*)     FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count != 0) as not_zero_region_count,
 (SELECT COUNT(*)     FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count
-FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count > 0;
-count	query_count	not_zero_region_count	region_count
+FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as d WHERE d.count > 0;
+count	query_count	query_total	not_zero_region_count	region_count
 SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
 region_count
 44
@ -63,7 +64,7 @@ time
 2097152.00000
 4194304.00000
 8388608.00000
-TOO LONG QUERY
+TOO LONG
 SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=1;
 SELECT SLEEP(0.31);
 SLEEP(0.31)
@ -123,17 +124,18 @@ SELECT SLEEP(2.5);
 SLEEP(2.5)
 0
 SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=0;
-SELECT c.count,
+SELECT d.count,
 (SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count,
-(SELECT COUNT(*)     FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as not_zero_region_count,
+(SELECT SUM((b.total * 1000000) DIV 1000000) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as query_total,
+(SELECT COUNT(*)     FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count != 0) as not_zero_region_count,
 (SELECT COUNT(*)     FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count
-FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count > 0;
-count	query_count	not_zero_region_count	region_count
-1	20	5	44
-10	20	5	44
-1	20	5	44
-5	20	5	44
-3	20	5	44
+FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as d WHERE d.count > 0;
+count	query_count	query_total	not_zero_region_count	region_count
+1	20	15	5	44
+10	20	15	5	44
+1	20	15	5	44
+5	20	15	5	44
+3	20	15	5	44
 SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
 region_count
 44
@ -182,7 +184,7 @@ time
 2097152.00000
 4194304.00000
 8388608.00000
-TOO LONG QUERY
+TOO LONG
 SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
 Variable_name	Value
 query_response_time_range_base	2
@ -250,15 +252,16 @@ SELECT SLEEP(2.5);
 SLEEP(2.5)
 0
 SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=0;
-SELECT c.count,
+SELECT d.count,
 (SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count,
-(SELECT COUNT(*)     FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as not_zero_region_count,
+(SELECT SUM((b.total * 1000000) DIV 1000000) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as query_total,
+(SELECT COUNT(*)     FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count != 0) as not_zero_region_count,
 (SELECT COUNT(*)     FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count
-FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count > 0;
-count	query_count	not_zero_region_count	region_count
-1	20	3	14
-11	20	3	14
-8	20	3	14
+FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as d WHERE d.count > 0;
+count	query_count	query_total	not_zero_region_count	region_count
+1	20	17	3	14
+11	20	17	3	14
+8	20	17	3	14
 SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
 region_count
 14
@ -277,7 +280,7 @@ time
  10000.000000
 100000.000000
 1000000.00000
-TOO LONG QUERY
+TOO LONG
 SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
 Variable_name	Value
 query_response_time_range_base	10
@ -345,15 +348,16 @@ SELECT SLEEP(2.5);
 SLEEP(2.5)
 0
 SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=0;
-SELECT c.count,
+SELECT d.count,
 (SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count,
-(SELECT COUNT(*)     FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as not_zero_region_count,
+(SELECT SUM((b.total * 1000000) DIV 1000000) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as query_total,
+(SELECT COUNT(*)     FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count != 0) as not_zero_region_count,
 (SELECT COUNT(*)     FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count
-FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count > 0;
-count	query_count	not_zero_region_count	region_count
-1	20	3	17
-11	20	3	17
-8	20	3	17
+FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as d WHERE d.count > 0;
+count	query_count	query_total	not_zero_region_count	region_count
+1	20	17	3	17
+11	20	17	3	17
+8	20	17	3	17
 SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
 region_count
 17
@ -375,7 +379,7 @@ time
 117649.000000
 823543.000000
 5764801.00000
-TOO LONG QUERY
+TOO LONG
 SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
 Variable_name	Value
 query_response_time_range_base	7
@ -443,15 +447,16 @@ SELECT SLEEP(2.5);
 SLEEP(2.5)
 0
 SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=0;
-SELECT c.count,
+SELECT d.count,
 (SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count,
-(SELECT COUNT(*)     FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as not_zero_region_count,
+(SELECT SUM((b.total * 1000000) DIV 1000000) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as query_total,
+(SELECT COUNT(*)     FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count != 0) as not_zero_region_count,
 (SELECT COUNT(*)     FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count
-FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count > 0;
-count	query_count	not_zero_region_count	region_count
-1	20	3	7
-11	20	3	7
-8	20	3	7
+FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as d WHERE d.count > 0;
+count	query_count	query_total	not_zero_region_count	region_count
+1	20	17	3	7
+11	20	17	3	7
+8	20	17	3	7
 SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
 region_count
 7
@ -463,7 +468,7 @@ time
    156.000000
  24336.000000
 3796416.00000
-TOO LONG QUERY
+TOO LONG
 SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
 Variable_name	Value
 query_response_time_range_base	156
@ -531,15 +536,16 @@ SELECT SLEEP(2.5);
 SLEEP(2.5)
 0
 SET GLOBAL ENABLE_QUERY_RESPONSE_TIME_STATS=0;
-SELECT c.count,
+SELECT d.count,
 (SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count,
-(SELECT COUNT(*)     FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as not_zero_region_count,
+(SELECT SUM((b.total * 1000000) DIV 1000000) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as query_total,
+(SELECT COUNT(*)     FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count != 0) as not_zero_region_count,
 (SELECT COUNT(*)     FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count
-FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count > 0;
-count	query_count	not_zero_region_count	region_count
-1	20	3	6
-11	20	3	6
-8	20	3	6
+FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as d WHERE d.count > 0;
+count	query_count	query_total	not_zero_region_count	region_count
+1	20	17	3	6
+11	20	17	3	6
+8	20	17	3	6
 SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
 region_count
 6
@ -550,7 +556,7 @@ time
      1.000000
   1000.000000
 1000000.00000
-TOO LONG QUERY
+TOO LONG
 SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
 Variable_name	Value
 query_response_time_range_base	1000
--- a/mysql-test/suite/percona/percona_server_variables.result
+++ b/mysql-test/suite/percona/percona_server_variables.result
@ -77,8 +77,10 @@ innodb_adaptive_checkpoint	Value
 innodb_adaptive_flushing	Value
 innodb_adaptive_hash_index	Value
 innodb_additional_mem_pool_size	Value
+innodb_auto_lru_dump	Value
 innodb_autoextend_increment	Value
 innodb_autoinc_lock_mode	Value
+innodb_buffer_pool_shm_checksum	Value
 innodb_buffer_pool_shm_key	Value
 innodb_buffer_pool_size	Value
 innodb_change_buffering	Value
--- a/storage/xtradb/ChangeLog
+++ b/storage/xtradb/ChangeLog
@ -1,3 +1,58 @@
+2010-08-24	The InnoDB Team
+
+	* handler/ha_innodb.c, dict/dict0dict.c:
+	Fix Bug #55832 selects crash too easily when innodb_force_recovery>3
+
+2010-08-03	The InnoDB Team
+
+	* include/dict0dict.h, include/dict0dict.ic, row/row0mysql.c:
+	Fix bug #54678, InnoDB, TRUNCATE, ALTER, I_S SELECT, crash or deadlock
+
+2010-08-03	The InnoDB Team
+
+	* dict/dict0load.c, handler/ha_innodb.cc, include/db0err.h,
+	include/dict0load.h, include/dict0mem.h, include/que0que.h,
+	row/row0merge.c, row/row0mysql.c:
+	Fix Bug#54582 stack overflow when opening many tables linked
+	with foreign keys at once
+
+2010-08-03	The InnoDB Team
+
+	* include/ut0mem.h, ut/ut0mem.c:
+	Fix Bug #55627 segv in ut_free pars_lexer_close innobase_shutdown
+	innodb-use-sys-malloc=0
+
+2010-08-01	The InnoDB Team
+
+	* handler/ha_innodb.cc
+	Fix Bug #55382 Assignment with SELECT expressions takes unexpected
+	S locks in READ COMMITTED
+>>>>>>> MERGE-SOURCE
+
+2010-07-27	The InnoDB Team
+
+	* include/mem0pool.h, mem/mem0mem.c, mem/mem0pool.c, srv/srv0start.c:
+	Fix Bug#55581 shutdown with innodb-use-sys-malloc=0: assert
+	mutex->magic_n == MUTEX_MAGIC_N.
+
+2010-06-30	The InnoDB Team
+
+	* btr/btr0sea.c, ha/ha0ha.c, handler/ha_innodb.cc, include/btr0sea.h:
+	Fix Bug#54311 Crash on CHECK PARTITION after concurrent LOAD DATA
+	and adaptive_hash_index=OFF
+
+2010-06-29	The InnoDB Team
+	* row/row0row.c, row/row0undo.c, row/row0upd.c:
+	Fix Bug#54408 txn rollback after recovery: row0umod.c:673
+	dict_table_get_format(index->table)
+
+2010-06-29	The InnoDB Team
+
+	* btr/btr0cur.c, include/btr0cur.h,
+	include/row0mysql.h, row/row0merge.c, row/row0sel.c:
+	Fix Bug#54358 READ UNCOMMITTED access failure of off-page DYNAMIC
+	or COMPRESSED columns
+
 2010-06-24	The InnoDB Team

 	* handler/ha_innodb.cc:
--- a/storage/xtradb/Makefile.am
+++ b/storage/xtradb/Makefile.am
@ -326,7 +326,7 @@ libxtradb_a_SOURCES=	\
 			ut/ut0vec.c			\
 			ut/ut0wqueue.c

-libxtradb_a_CXXFLAGS=	$(AM_CFLAGS)
+libxtradb_a_CXXFLAGS=	$(AM_CXXFLAGS)
 libxtradb_a_CFLAGS=	$(AM_CFLAGS)

 EXTRA_LTLIBRARIES=	ha_xtradb.la
--- a/storage/xtradb/btr/btr0cur.c
+++ b/storage/xtradb/btr/btr0cur.c
@ -3866,9 +3866,10 @@ btr_cur_set_ownership_of_extern_field(
 Marks not updated extern fields as not-owned by this record. The ownership
 is transferred to the updated record which is inserted elsewhere in the
 index tree. In purge only the owner of externally stored field is allowed
-to free the field. */
+to free the field.
+@return TRUE if BLOB ownership was transferred */
 UNIV_INTERN
-void
+ibool
 btr_cur_mark_extern_inherited_fields(
 /*=================================*/
 	page_zip_des_t*	page_zip,/*!< in/out: compressed page whose uncompressed
@ -3882,13 +3883,14 @@ btr_cur_mark_extern_inherited_fields(
 	ulint	n;
 	ulint	j;
 	ulint	i;
+	ibool	change_ownership = FALSE;

 	ut_ad(rec_offs_validate(rec, NULL, offsets));
 	ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec));

 	if (!rec_offs_any_extern(offsets)) {

-		return;
+		return(FALSE);
 	}

 	n = rec_offs_n_fields(offsets);
@ -3911,10 +3913,14 @@ btr_cur_mark_extern_inherited_fields(

 			btr_cur_set_ownership_of_extern_field(
 				page_zip, rec, index, offsets, i, FALSE, mtr);
+
+			change_ownership = TRUE;
 updated:
 			;
 		}
 	}
+
+	return(change_ownership);
 }

 /*******************************************************************//**
@ -5202,7 +5208,7 @@ btr_copy_externally_stored_field(

 /*******************************************************************//**
 Copies an externally stored field of a record to mem heap.
-@return	the field copied to heap */
+@return	the field copied to heap, or NULL if the field is incomplete */
 UNIV_INTERN
 byte*
 btr_rec_copy_externally_stored_field(
@ -5232,6 +5238,18 @@ btr_rec_copy_externally_stored_field(

 	data = rec_get_nth_field(rec, offsets, no, &local_len);

+	ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
+
+	if (UNIV_UNLIKELY
+	    (!memcmp(data + local_len - BTR_EXTERN_FIELD_REF_SIZE,
+		     field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE))) {
+		/* The externally stored field was not written yet.
+		This record should only be seen by
+		recv_recovery_rollback_active() or any
+		TRX_ISO_READ_UNCOMMITTED transactions. */
+		return(NULL);
+	}
+
 	return(btr_copy_externally_stored_field(len, data,
 						zip_size, local_len, heap));
 }
--- a/storage/xtradb/btr/btr0sea.c
+++ b/storage/xtradb/btr/btr0sea.c
@ -46,6 +46,7 @@ Created 2/17/1996 Heikki Tuuri
 /** Flag: has the search system been enabled?
 Protected by btr_search_latch and btr_search_enabled_mutex. */
 UNIV_INTERN char		btr_search_enabled	= TRUE;
+UNIV_INTERN ibool		btr_search_fully_disabled = FALSE;

 /** Mutex protecting btr_search_enabled */
 static mutex_t			btr_search_enabled_mutex;
@ -201,12 +202,19 @@ btr_search_disable(void)
 	mutex_enter(&btr_search_enabled_mutex);
 	rw_lock_x_lock(&btr_search_latch);

+	/* Disable access to hash index, also tell ha_insert_for_fold()
+	stop adding new nodes to hash index, but still allow updating
+	existing nodes */
 	btr_search_enabled = FALSE;

 	/* Clear all block->is_hashed flags and remove all entries
 	from btr_search_sys->hash_index. */
 	buf_pool_drop_hash_index();

+	/* hash index has been cleaned up, disallow any operation to
+	the hash index */
+	btr_search_fully_disabled = TRUE;
+
 	/* btr_search_enabled_mutex should guarantee this. */
 	ut_ad(!btr_search_enabled);

@ -225,6 +233,7 @@ btr_search_enable(void)
 	rw_lock_x_lock(&btr_search_latch);

 	btr_search_enabled = TRUE;
+	btr_search_fully_disabled = FALSE;

 	rw_lock_x_unlock(&btr_search_latch);
 	mutex_exit(&btr_search_enabled_mutex);
@ -1488,7 +1497,7 @@ btr_search_build_page_hash_index(

 	rw_lock_x_lock(&btr_search_latch);

-	if (UNIV_UNLIKELY(!btr_search_enabled)) {
+	if (UNIV_UNLIKELY(btr_search_fully_disabled)) {
 		goto exit_func;
 	}

@ -1850,6 +1859,7 @@ function_exit:
 	}
 }

+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
 /********************************************************************//**
 Validates the search system.
@return	TRUE if ok */
@ -2019,3 +2029,4 @@ btr_search_validate(void)

 	return(ok);
 }
+#endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */
--- a/storage/xtradb/buf/buf0buf.c
+++ b/storage/xtradb/buf/buf0buf.c
@ -792,7 +792,7 @@ buf_block_reuse(
 	ptrdiff_t	frame_offset)
 {
 	/* block_init */
-	block->frame = ((byte*)(block->frame) + frame_offset);
+	block->frame += frame_offset;

 	UNIV_MEM_DESC(block->frame, UNIV_PAGE_SIZE, block);

@ -809,7 +809,7 @@ buf_block_reuse(
 #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */

 	if (block->page.zip.data)
-		block->page.zip.data = ((byte*)(block->page.zip.data) + frame_offset);
+		block->page.zip.data += frame_offset;

 	block->is_hashed = FALSE;

@ -845,6 +845,8 @@ buf_chunk_init(
 	although it already should be. */
 	mem_size = ut_2pow_round(mem_size, UNIV_PAGE_SIZE);

+	srv_buffer_pool_shm_is_reused = FALSE;
+
 	if (srv_buffer_pool_shm_key) {
 		/* zip_hash size */
 		zip_hash_n = (mem_size / UNIV_PAGE_SIZE) * 2;
@ -870,39 +872,46 @@ buf_chunk_init(
 		ut_a(buf_pool->n_chunks == 1);

 		fprintf(stderr,
-		"InnoDB: Notice: innodb_buffer_pool_shm_key option is specified.\n"
-		"InnoDB: This option may not be safe to keep consistency of datafiles.\n"
-		"InnoDB: Because InnoDB cannot lock datafiles when shutdown until reusing shared memory segment.\n"
-		"InnoDB: You should ensure no change of InnoDB files while using innodb_buffer_pool_shm_key.\n");
+		"InnoDB: Warning: The innodb_buffer_pool_shm_key option has been specified.\n"
+		"InnoDB: Do not change the following between restarts of the server while this option is being used:\n"
+		"InnoDB:   * the mysqld executable between restarts of the server.\n"
+		"InnoDB:   * the value of innodb_buffer_pool_size.\n"
+		"InnoDB:   * the value of innodb_page_size.\n"
+		"InnoDB:   * datafiles created by InnoDB during this session.\n"
+		"InnoDB: Otherwise, data corruption in datafiles may result.\n");

 		/* FIXME: This is vague id still */
-		binary_id = (ulint) ((char*)mtr_commit - (char *)btr_root_get)
-			  + (ulint) ((char *)os_get_os_version - (char *)buf_calc_page_new_checksum)
-			  + (ulint) ((char *)page_dir_find_owner_slot - (char *)dfield_data_is_binary_equal)
-			  + (ulint) ((char *)que_graph_publish - (char *)dict_casedn_str)
-			  + (ulint) ((char *)read_view_oldest_copy_or_open_new - (char *)fil_space_get_version)
-			  + (ulint) ((char *)rec_get_n_extern_new - (char *)fsp_get_size_low)
-			  + (ulint) ((char *)row_get_trx_id_offset - (char *)ha_create_func)
-			  + (ulint) ((char *)srv_set_io_thread_op_info - (char *)thd_is_replication_slave_thread)
-			  + (ulint) ((char *)mutex_create_func - (char *)ibuf_inside)
-			  + (ulint) ((char *)trx_set_detailed_error - (char *)lock_check_trx_id_sanity)
-			  + (ulint) ((char *)ut_time - (char *)mem_heap_strdup);
+		binary_id = (ulint) ((byte*)mtr_commit - (byte*)btr_root_get)
+			  + (ulint) ((byte*)os_get_os_version - (byte*)buf_calc_page_new_checksum)
+			  + (ulint) ((byte*)page_dir_find_owner_slot - (byte*)dfield_data_is_binary_equal)
+			  + (ulint) ((byte*)que_graph_publish - (byte*)dict_casedn_str)
+			  + (ulint) ((byte*)read_view_oldest_copy_or_open_new - (byte*)fil_space_get_version)
+			  + (ulint) ((byte*)rec_get_n_extern_new - (byte*)fsp_get_size_low)
+			  + (ulint) ((byte*)row_get_trx_id_offset - (byte*)ha_create_func)
+			  + (ulint) ((byte*)srv_set_io_thread_op_info - (byte*)thd_is_replication_slave_thread)
+			  + (ulint) ((byte*)mutex_create_func - (byte*)ibuf_inside)
+			  + (ulint) ((byte*)trx_set_detailed_error - (byte*)lock_check_trx_id_sanity)
+			  + (ulint) ((byte*)ut_time - (byte*)mem_heap_strdup);

 		chunk->mem = os_shm_alloc(&chunk->mem_size, srv_buffer_pool_shm_key, &is_new);

 		if (UNIV_UNLIKELY(chunk->mem == NULL)) {
 			return(NULL);
 		}
-
+init_again:
 #ifdef UNIV_SET_MEM_TO_ZERO
 		if (is_new) {
 			memset(chunk->mem, '\0', chunk->mem_size);
 		}
 #endif
+		/* for ut_fold_binary_32(), these values should be 32-bit aligned */
+		ut_a(sizeof(buf_shm_info_t) % 4 == 0);
+		ut_a((ulint)chunk->mem % 4 == 0);
+		ut_a(chunk->mem_size % 4 == 0);

 		shm_info = chunk->mem;

-		zip_hash_tmp = (hash_table_t*)((char *)chunk->mem + chunk->mem_size - zip_hash_mem_size);
+		zip_hash_tmp = (hash_table_t*)((byte*)chunk->mem + chunk->mem_size - zip_hash_mem_size);

 		if (is_new) {
 			strncpy(shm_info->head_str, BUF_SHM_INFO_HEAD, 8);
@ -932,16 +941,6 @@ buf_chunk_init(
 				"InnoDB: Error: The shared memory was not initialized yet.\n");
 				return(NULL);
 			}
-			if (!shm_info->clean) {
-				fprintf(stderr,
-				"InnoDB: Error: The shared memory was not shut down cleanly.\n");
-				return(NULL);
-			}
-			if (!shm_info->reusable) {
-				fprintf(stderr,
-				"InnoDB: Error: The shared memory has unrecoverable contents.\n");
-				return(NULL);
-			}
 			if (shm_info->buf_pool_size != srv_buf_pool_size) {
 				fprintf(stderr,
 				"InnoDB: Error: srv_buf_pool_size is different (shm=%lu current=%lu).\n",
@ -954,14 +953,34 @@ buf_chunk_init(
 				shm_info->page_size, srv_page_size);
 				return(NULL);
 			}
+			if (!shm_info->reusable) {
+				fprintf(stderr,
+				"InnoDB: Warning: The shared memory has unrecoverable contents.\n"
+				"InnoDB: The shared memory segment is initialized.\n");
+				is_new = TRUE;
+				goto init_again;
+			}
+			if (!shm_info->clean) {
+				fprintf(stderr,
+				"InnoDB: Warning: The shared memory was not shut down cleanly.\n"
+				"InnoDB: The shared memory segment is initialized.\n");
+				is_new = TRUE;
+				goto init_again;
+			}

 			ut_a(shm_info->zip_hash_offset == chunk->mem_size - zip_hash_mem_size);
 			ut_a(shm_info->zip_hash_n == zip_hash_n);

 			/* check checksum */
-			checksum = ut_fold_binary((byte*)chunk->mem + sizeof(buf_shm_info_t),
-						  chunk->mem_size - sizeof(buf_shm_info_t));
-			if (shm_info->checksum != checksum) {
+			if (srv_buffer_pool_shm_checksum) {
+				checksum = ut_fold_binary_32((byte*)chunk->mem + sizeof(buf_shm_info_t),
+							     chunk->mem_size - sizeof(buf_shm_info_t));
+			} else {
+				checksum = BUF_NO_CHECKSUM_MAGIC;
+			}
+
+			if (shm_info->checksum != BUF_NO_CHECKSUM_MAGIC
+			    && shm_info->checksum != checksum) {
 				fprintf(stderr,
 				"InnoDB: Error: checksum of the shared memory is not match. "
 				"(stored=%lu calculated=%lu)\n",
@ -979,6 +998,8 @@ buf_chunk_init(
 		} else {
 			/* adjust offset is done later */
 			hash_create_reuse(zip_hash_tmp);
+
+			srv_buffer_pool_shm_is_reused = TRUE;
 		}
 	} else {
 	chunk->mem = os_mem_alloc_large(&chunk->mem_size);
@ -992,7 +1013,7 @@ buf_chunk_init(
 	/* Allocate the block descriptors from
 	the start of the memory block. */
 	if (srv_buffer_pool_shm_key) {
-		chunk->blocks = (buf_block_t*)((char*)chunk->mem + sizeof(buf_shm_info_t));
+		chunk->blocks = (buf_block_t*)((byte*)chunk->mem + sizeof(buf_shm_info_t));
 	} else {
 	chunk->blocks = chunk->mem;
 	}
@ -1039,10 +1060,10 @@ buf_chunk_init(
 		}

 		chunk->size = shm_info->chunk_backup.size;
-		phys_offset = (char*)frame - ((char*)chunk->mem + shm_info->frame_offset);
-		logi_offset = (char *)frame - (char *)chunk->blocks[0].frame;
+		phys_offset = frame - ((byte*)chunk->mem + shm_info->frame_offset);
+		logi_offset = frame - chunk->blocks[0].frame;
 		previous_frame_address = chunk->blocks[0].frame;
-		blocks_offset = (char *)chunk->blocks - (char *)shm_info->chunk_backup.blocks;
+		blocks_offset = (byte*)chunk->blocks - (byte*)shm_info->chunk_backup.blocks;

 		if (phys_offset || logi_offset || blocks_offset) {
 			fprintf(stderr,
@ -1053,10 +1074,10 @@ buf_chunk_init(
 			"InnoDB: Pysical offset                  : %ld (%#lx)\n"
 			"InnoDB: Logical offset (frames)         : %ld (%#lx)\n"
 			"InnoDB: Logical offset (blocks)         : %ld (%#lx)\n",
-				(char *)chunk->mem + shm_info->frame_offset,
+				(byte*)chunk->mem + shm_info->frame_offset,
 				chunk->blocks[0].frame, frame,
-				(ulong) phys_offset, (ulong) phys_offset, (ulong) logi_offset, (ulong) logi_offset,
-				(ulong) blocks_offset, (ulong) blocks_offset);
+				(long) phys_offset, (ulong) phys_offset, (long) logi_offset, (ulong) logi_offset,
+				(long) blocks_offset, (ulong) blocks_offset);
 		} else {
 			fprintf(stderr,
 			"InnoDB: Buffer pool in the shared memory segment can be used as it is.\n");
@ -1066,24 +1087,24 @@ buf_chunk_init(
 			fprintf(stderr,
 			"InnoDB: Aligning physical offset...");

-			memmove(frame, ((char*)chunk->mem + shm_info->frame_offset),
+			memmove(frame, (byte*)chunk->mem + shm_info->frame_offset,
 				chunk->size * UNIV_PAGE_SIZE);

 			fprintf(stderr,
 			" Done.\n");
 		}

+		/* buf_block_t */
+		block = chunk->blocks;
+		for (i = chunk->size; i--; ) {
+			buf_block_reuse(block, logi_offset);
+			block++;
+		}
+
 		if (logi_offset || blocks_offset) {
 			fprintf(stderr,
 			"InnoDB: Aligning logical offset...");

-			/* buf_block_t */
-			block = chunk->blocks;
-
-			for (i = chunk->size; i--; ) {
-				buf_block_reuse(block, logi_offset);
-				block++;
-			}

 			/* buf_pool_t buf_pool_backup */
 			UT_LIST_OFFSET(flush_list, buf_page_t, shm_info->buf_pool_backup.flush_list,
@ -1094,8 +1115,8 @@ buf_chunk_init(
 					previous_frame_address, logi_offset, blocks_offset);
 			if (shm_info->buf_pool_backup.LRU_old)
 				shm_info->buf_pool_backup.LRU_old =
-					(buf_page_t*)((char*)(shm_info->buf_pool_backup.LRU_old)
-						+ (((byte*)shm_info->buf_pool_backup.LRU_old > previous_frame_address)
+					(buf_page_t*)((byte*)(shm_info->buf_pool_backup.LRU_old)
+						+ (((void*)shm_info->buf_pool_backup.LRU_old > previous_frame_address)
 						  ? logi_offset : blocks_offset));

 			UT_LIST_OFFSET(unzip_LRU, buf_block_t, shm_info->buf_pool_backup.unzip_LRU,
@ -1141,7 +1162,7 @@ buf_chunk_init(
 	}

 	if (shm_info) {
-		shm_info->frame_offset = (char*)chunk->blocks[0].frame - (char*)chunk->mem;
+		shm_info->frame_offset = chunk->blocks[0].frame - (byte*)chunk->mem;
 	}

 	return(chunk);
@ -1396,10 +1417,10 @@ buf_pool_init(void)
 	if (srv_buffer_pool_shm_key) {
 		buf_shm_info_t*	shm_info;

-		ut_a((char*)chunk->blocks == (char*)chunk->mem + sizeof(buf_shm_info_t));
+		ut_a((byte*)chunk->blocks == (byte*)chunk->mem + sizeof(buf_shm_info_t));
 		shm_info = chunk->mem;

-		buf_pool->zip_hash = (hash_table_t*)((char*)chunk->mem + shm_info->zip_hash_offset);
+		buf_pool->zip_hash = (hash_table_t*)((byte*)chunk->mem + shm_info->zip_hash_offset);

 		if(shm_info->is_new) {
 			shm_info->is_new = FALSE; /* initialization was finished */
@ -1504,7 +1525,7 @@ buf_pool_free(void)

 		chunk = buf_pool->chunks;
 		shm_info = chunk->mem;
-		ut_a((char*)chunk->blocks == (char*)chunk->mem + sizeof(buf_shm_info_t));
+		ut_a((byte*)chunk->blocks == (byte*)chunk->mem + sizeof(buf_shm_info_t));

 		/* validation the shared memory segment doesn't have unrecoverable contents. */
 		/* Currently, validation became not needed */
@ -1514,8 +1535,12 @@ buf_pool_free(void)
 		memcpy(&(shm_info->chunk_backup), chunk, sizeof(buf_chunk_t));

 		if (srv_fast_shutdown < 2) {
-			shm_info->checksum = ut_fold_binary((byte*)chunk->mem + sizeof(buf_shm_info_t),
-							    chunk->mem_size - sizeof(buf_shm_info_t));
+			if (srv_buffer_pool_shm_checksum) {
+				shm_info->checksum = ut_fold_binary_32((byte*)chunk->mem + sizeof(buf_shm_info_t),
+								       chunk->mem_size - sizeof(buf_shm_info_t));
+			} else {
+				shm_info->checksum = BUF_NO_CHECKSUM_MAGIC;
+			}
 			shm_info->clean = TRUE;
 		}

--- a/storage/xtradb/buf/buf0lru.c
+++ b/storage/xtradb/buf/buf0lru.c
@ -2228,6 +2228,26 @@ end:

 	return(ret);
 }
+
+typedef struct {
+	ib_uint32_t space_id;
+	ib_uint32_t page_no;
+} dump_record_t;
+
+static int dump_record_cmp(const void *a, const void *b)
+{
+	const dump_record_t *rec1 = (dump_record_t *) a;
+	const dump_record_t *rec2 = (dump_record_t *) b;
+
+	if (rec1->space_id < rec2->space_id)
+		return -1;
+	if (rec1->space_id > rec2->space_id)
+		return 1;
+	if (rec1->page_no < rec2->page_no)
+		return -1;
+	return rec1->page_no > rec2->page_no;
+}
+
 /********************************************************************//**
 Read the pages based on the specific file.*/
 UNIV_INTERN
@ -2245,25 +2265,34 @@ buf_LRU_file_restore(void)
 	ulint		req = 0;
 	ibool		terminated = FALSE;
 	ibool		ret = FALSE;
-
-	buffer_base = ut_malloc(2 * UNIV_PAGE_SIZE);
-	buffer = ut_align(buffer_base, UNIV_PAGE_SIZE);
-	if (!buffer) {
-		fprintf(stderr,
-			" InnoDB: cannot allocate buffer.\n");
-		goto end;
-	}
+	dump_record_t*	records;
+	ulint		size;
+	ulint		size_high;
+	ulint		length;

 	dump_file = os_file_create_simple_no_error_handling(
 		LRU_DUMP_FILE, OS_FILE_OPEN, OS_FILE_READ_ONLY, &success);
-	if (!success) {
+	if (!success || !os_file_get_size(dump_file, &size, &size_high)) {
 		os_file_get_last_error(TRUE);
 		fprintf(stderr,
 			" InnoDB: cannot open %s\n", LRU_DUMP_FILE);
 		goto end;
 	}
+	if (size == 0 || size_high > 0 || size % 8) {
+		fprintf(stderr, " InnoDB: broken LRU dump file\n");
+		goto end;
+	}
+	buffer_base = ut_malloc(2 * UNIV_PAGE_SIZE);
+	buffer = ut_align(buffer_base, UNIV_PAGE_SIZE);
+	records = ut_malloc(size);
+	if (!buffer || !records) {
+		fprintf(stderr,
+			" InnoDB: cannot allocate buffer.\n");
+		goto end;
+	}

 	buffers = 0;
+	length = 0;
 	while (!terminated) {
 		success = os_file_read(dump_file, buffer,
 				(buffers << UNIV_PAGE_SIZE_SHIFT) & 0xFFFFFFFFUL,
@ -2272,15 +2301,14 @@ buf_LRU_file_restore(void)
 		if (!success) {
 			fprintf(stderr,
 				" InnoDB: cannot read page %lu of %s,"
-				" or meet unexpected terminal.",
+				" or meet unexpected terminal.\n",
 				buffers, LRU_DUMP_FILE);
 			goto end;
 		}

 		for (offset = 0; offset < UNIV_PAGE_SIZE/4; offset += 2) {
-			ulint	space_id, zip_size, page_no;
-			ulint	err;
-			ib_int64_t	tablespace_version;
+			ulint	space_id;
+			ulint	page_no;

 			space_id = mach_read_from_4(buffer + offset * 4);
 			page_no = mach_read_from_4(buffer + (offset + 1) * 4);
@ -2290,31 +2318,61 @@ buf_LRU_file_restore(void)
 				break;
 			}

-			if (offset % 16 == 15) {
-				os_aio_simulated_wake_handler_threads();
-				buf_flush_free_margin(FALSE);
+			records[length].space_id = space_id;
+			records[length].page_no = page_no;
+			length++;
+			if (length * 8 >= size) {
+				fprintf(stderr,
+					" InnoDB: could not find the "
+					"end-of-file marker after reading "
+					"the expected %lu bytes from the "
+					"LRU dump file.\n"
+					" InnoDB: this could be caused by a "
+					"broken or incomplete file.\n"
+					" InnoDB: trying to process what has "
+					"been read so far.\n",
+					size);
+				terminated= TRUE;
+				break;
 			}
+		}
+		buffers++;
+	}

-			zip_size = fil_space_get_zip_size(space_id);
-			if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
-				continue;
-			}
+	qsort(records, length, sizeof(dump_record_t), dump_record_cmp);

-			if (fil_area_is_exist(space_id, zip_size, page_no, 0,
-					zip_size ? zip_size : UNIV_PAGE_SIZE)) {
+	for (offset = 0; offset < length; offset++) {
+		ulint		space_id;
+		ulint		page_no;
+		ulint		zip_size;
+		ulint		err;
+		ib_int64_t	tablespace_version;

-				tablespace_version = fil_space_get_version(space_id);
+		space_id = records[offset].space_id;
+		page_no = records[offset].page_no;

-				req++;
-				reads += buf_read_page_low(&err, FALSE, BUF_READ_ANY_PAGE
-						  | OS_AIO_SIMULATED_WAKE_LATER,
-						  space_id, zip_size, TRUE,
-						  tablespace_version, page_no, NULL);
-				buf_LRU_stat_inc_io();
-			}
+		if (offset % 16 == 15) {
+			os_aio_simulated_wake_handler_threads();
+			buf_flush_free_margin(FALSE);
 		}

-		buffers++;
+		zip_size = fil_space_get_zip_size(space_id);
+		if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
+			continue;
+		}
+
+		if (fil_area_is_exist(space_id, zip_size, page_no, 0,
+				      zip_size ? zip_size : UNIV_PAGE_SIZE)) {
+
+			tablespace_version = fil_space_get_version(space_id);
+
+			req++;
+			reads += buf_read_page_low(&err, FALSE, BUF_READ_ANY_PAGE
+						   | OS_AIO_SIMULATED_WAKE_LATER,
+						   space_id, zip_size, TRUE,
+						   tablespace_version, page_no, NULL);
+			buf_LRU_stat_inc_io();
+		}
 	}

 	os_aio_simulated_wake_handler_threads();
@ -2330,6 +2388,8 @@ end:
 		os_file_close(dump_file);
 	if (buffer_base)
 		ut_free(buffer_base);
+	if (records)
+		ut_free(records);

 	return(ret);
 }
--- a/storage/xtradb/dict/dict0crea.c
+++ b/storage/xtradb/dict/dict0crea.c
@ -1245,13 +1245,13 @@ dict_create_index_step(
 			goto function_exit;
 		}

-		if (srv_use_sys_stats_table) {
+		if (srv_use_sys_stats_table
+		    && !((node->table->flags >> DICT_TF2_SHIFT) & DICT_TF2_TEMPORARY)) {
 			node->state = INDEX_BUILD_STATS_COLS;
 		} else {
 			node->state = INDEX_CREATE_INDEX_TREE;
 		}
 	}
-
 	if (node->state == INDEX_BUILD_STATS_COLS) {
 		if (node->stats_no <= dict_index_get_n_unique(node->index)) {

--- a/storage/xtradb/dict/dict0dict.c
+++ b/storage/xtradb/dict/dict0dict.c
@ -569,8 +569,7 @@ dict_table_get_on_id(
 {
 	dict_table_t*	table;

-	if (ut_dulint_cmp(table_id, DICT_FIELDS_ID) <= 0
-	    || trx->dict_operation_lock_mode == RW_X_LATCH) {
+	if (trx->dict_operation_lock_mode == RW_X_LATCH) {

 		/* Note: An X latch implies that the transaction
 		already owns the dictionary mutex. */
@ -4514,7 +4513,6 @@ dict_update_statistics_low(
 	ibool		sync)		/*!< in: TRUE if must update SYS_STATS */
 {
 	dict_index_t*	index;
-	ulint		size;
 	ulint		sum_of_index_sizes	= 0;

 	if (table->ibd_file_missing) {
@ -4529,15 +4527,7 @@ dict_update_statistics_low(
 		return;
 	}

-	/* If we have set a high innodb_force_recovery level, do not calculate
-	statistics, as a badly corrupted index can cause a crash in it. */
-
-	if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
-
-		return;
-	}
-
-	if (srv_use_sys_stats_table && !sync) {
+	if (srv_use_sys_stats_table && !((table->flags >> DICT_TF2_SHIFT) & DICT_TF2_TEMPORARY) && !sync) {
 		/* reload statistics from SYS_STATS table */
 		if (dict_reload_statistics(table, &sum_of_index_sizes)) {
 			/* success */
@ -4565,33 +4555,55 @@ dict_update_statistics_low(
 		return;
 	}

-	while (index) {
+
+	do {
 		if (table->is_corrupt) {
 			ut_a(srv_pass_corrupt_table);
 			return;
 		}

-		size = btr_get_size(index, BTR_TOTAL_SIZE);
+		if (UNIV_LIKELY
+		    (srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE
+		     || (srv_force_recovery < SRV_FORCE_NO_LOG_REDO
+			 && dict_index_is_clust(index)))) {
+			ulint	size;
+			size = btr_get_size(index, BTR_TOTAL_SIZE);

-		index->stat_index_size = size;
+			index->stat_index_size = size;

-		sum_of_index_sizes += size;
+			sum_of_index_sizes += size;

-		size = btr_get_size(index, BTR_N_LEAF_PAGES);
+			size = btr_get_size(index, BTR_N_LEAF_PAGES);

-		if (size == 0) {
-			/* The root node of the tree is a leaf */
-			size = 1;
-		}
+			if (size == 0) {
+				/* The root node of the tree is a leaf */
+				size = 1;
+			}

-		index->stat_n_leaf_pages = size;
+			index->stat_n_leaf_pages = size;
+
+			btr_estimate_number_of_different_key_vals(index);
+		} else {
+			/* If we have set a high innodb_force_recovery
+			level, do not calculate statistics, as a badly
+			corrupted index can cause a crash in it.
+			Initialize some bogus index cardinality
+			statistics, so that the data can be queried in
+			various means, also via secondary indexes. */
+			ulint	i;
+
+			sum_of_index_sizes++;
+			index->stat_index_size = index->stat_n_leaf_pages = 1;

-		btr_estimate_number_of_different_key_vals(index);
+			for (i = dict_index_get_n_unique(index); i; ) {
+				index->stat_n_diff_key_vals[i--] = 1;
+			}
+		}

 		index = dict_table_get_next_index(index);
-	}
+	} while (index);

-	if (srv_use_sys_stats_table) {
+	if (srv_use_sys_stats_table && !((table->flags >> DICT_TF2_SHIFT) & DICT_TF2_TEMPORARY)) {
 		/* store statistics to SYS_STATS table */
 		dict_store_statistics(table);
 	}
--- a/storage/xtradb/dict/dict0load.c
+++ b/storage/xtradb/dict/dict0load.c
@ -1010,16 +1010,27 @@ err_exit:

 	err = dict_load_indexes(table, heap);

+	/* Initialize table foreign_child value. Its value could be
+	changed when dict_load_foreigns() is called below */
+	table->fk_max_recusive_level = 0;
+
 	/* If the force recovery flag is set, we open the table irrespective
 	of the error condition, since the user may want to dump data from the
 	clustered index. However we load the foreign key information only if
 	all indexes were loaded. */
 	if (err == DB_SUCCESS) {
-		err = dict_load_foreigns(table->name, TRUE);
+		err = dict_load_foreigns(table->name, TRUE, TRUE);
+
+		if (err != DB_SUCCESS) {
+			dict_table_remove_from_cache(table);
+			table = NULL;
+		}
 	} else if (!srv_force_recovery) {
 		dict_table_remove_from_cache(table);
 		table = NULL;
 	}
+
+	table->fk_max_recusive_level = 0;
 #if 0
 	if (err != DB_SUCCESS && table != NULL) {

@ -1073,6 +1084,8 @@ dict_load_table_on_id(

 	ut_ad(mutex_own(&(dict_sys->mutex)));

+	table = NULL;
+
 	/* NOTE that the operation of this function is protected by
 	the dictionary mutex, and therefore no deadlocks can occur
 	with other dictionary operations. */
@ -1099,15 +1112,17 @@ dict_load_table_on_id(
 				  BTR_SEARCH_LEAF, &pcur, &mtr);
 	rec = btr_pcur_get_rec(&pcur);

-	if (!btr_pcur_is_on_user_rec(&pcur)
-	    || rec_get_deleted_flag(rec, 0)) {
+	if (!btr_pcur_is_on_user_rec(&pcur)) {
 		/* Not found */
+		goto func_exit;
+	}

-		btr_pcur_close(&pcur);
-		mtr_commit(&mtr);
-		mem_heap_free(heap);
-
-		return(NULL);
+	/* Find the first record that is not delete marked */
+	while (rec_get_deleted_flag(rec, 0)) {
+		if (!btr_pcur_move_to_next_user_rec(&pcur, &mtr)) {
+			goto func_exit;
+		}
+		rec = btr_pcur_get_rec(&pcur);
 	}

 	/*---------------------------------------------------*/
@ -1120,19 +1135,14 @@ dict_load_table_on_id(

 	/* Check if the table id in record is the one searched for */
 	if (ut_dulint_cmp(table_id, mach_read_from_8(field)) != 0) {
-
-		btr_pcur_close(&pcur);
-		mtr_commit(&mtr);
-		mem_heap_free(heap);
-
-		return(NULL);
+		goto func_exit;
 	}

 	/* Now we get the table name from the record */
 	field = rec_get_nth_field_old(rec, 1, &len);
 	/* Load the table definition to memory */
 	table = dict_load_table(mem_heap_strdupl(heap, (char*) field, len));
-
+func_exit:
 	btr_pcur_close(&pcur);
 	mtr_commit(&mtr);
 	mem_heap_free(heap);
@ -1242,8 +1252,12 @@ dict_load_foreign(
 /*==============*/
 	const char*	id,	/*!< in: foreign constraint id as a
 				null-terminated string */
-	ibool		check_charsets)
+	ibool		check_charsets,
 				/*!< in: TRUE=check charset compatibility */
+	ibool		check_recursive)
+				/*!< in: Whether to record the foreign table
+				parent count to avoid unlimited recursive
+				load of chained foreign tables */
 {
 	dict_foreign_t*	foreign;
 	dict_table_t*	sys_foreign;
@ -1257,6 +1271,8 @@ dict_load_foreign(
 	ulint		len;
 	ulint		n_fields_and_type;
 	mtr_t		mtr;
+	dict_table_t*	for_table;
+	dict_table_t*	ref_table;

 	ut_ad(mutex_own(&(dict_sys->mutex)));

@ -1341,11 +1357,54 @@ dict_load_foreign(

 	dict_load_foreign_cols(id, foreign);

-	/* If the foreign table is not yet in the dictionary cache, we
-	have to load it so that we are able to make type comparisons
-	in the next function call. */
-
-	dict_table_get_low(foreign->foreign_table_name);
+	ref_table = dict_table_check_if_in_cache_low(
+			foreign->referenced_table_name);
+
+	/* We could possibly wind up in a deep recursive calls if
+	we call dict_table_get_low() again here if there
+	is a chain of tables concatenated together with
+	foreign constraints. In such case, each table is
+	both a parent and child of the other tables, and
+	act as a "link" in such table chains.
+	To avoid such scenario, we would need to check the
+	number of ancesters the current table has. If that
+	exceeds DICT_FK_MAX_CHAIN_LEN, we will stop loading
+	the child table.
+	Foreign constraints are loaded in a Breath First fashion,
+	that is, the index on FOR_NAME is scanned first, and then
+	index on REF_NAME. So foreign constrains in which
+	current table is a child (foreign table) are loaded first,
+	and then those constraints where current table is a
+	parent (referenced) table.
+	Thus we could check the parent (ref_table) table's
+	reference count (fk_max_recusive_level) to know how deep the
+	recursive call is. If the parent table (ref_table) is already
+	loaded, and its fk_max_recusive_level is larger than
+	DICT_FK_MAX_CHAIN_LEN, we will stop the recursive loading
+	by skipping loading the child table. It will not affect foreign
+	constraint check for DMLs since child table will be loaded
+	at that time for the constraint check. */
+	if (!ref_table
+	    || ref_table->fk_max_recusive_level < DICT_FK_MAX_RECURSIVE_LOAD) {
+
+		/* If the foreign table is not yet in the dictionary cache, we
+		have to load it so that we are able to make type comparisons
+		in the next function call. */
+
+		for_table = dict_table_get_low(foreign->foreign_table_name);
+
+		if (for_table && ref_table && check_recursive) {
+			/* This is to record the longest chain of ancesters
+			this table has, if the parent has more ancesters
+			than this table has, record it after add 1 (for this
+			parent */
+			if (ref_table->fk_max_recusive_level
+			    >= for_table->fk_max_recusive_level) {
+				for_table->fk_max_recusive_level =
+					 ref_table->fk_max_recusive_level + 1;
+			}
+		}
+	}

 	/* Note that there may already be a foreign constraint object in
 	the dictionary cache for this constraint: then the following
@ -1370,6 +1429,8 @@ ulint
 dict_load_foreigns(
 /*===============*/
 	const char*	table_name,	/*!< in: table name */
+	ibool		check_recursive,/*!< in: Whether to check recursive
+					load of tables chained by FK */
 	ibool		check_charsets)	/*!< in: TRUE=check charset
 					compatibility */
 {
@ -1471,7 +1532,7 @@ loop:

 	/* Load the foreign constraint definition to the dictionary cache */

-	err = dict_load_foreign(id, check_charsets);
+	err = dict_load_foreign(id, check_charsets, check_recursive);

 	if (err != DB_SUCCESS) {
 		btr_pcur_close(&pcur);
@ -1499,6 +1560,11 @@ load_next_index:

 		mtr_start(&mtr);

+		/* Switch to scan index on REF_NAME, fk_max_recusive_level
+		already been updated when scanning FOR_NAME index, no need to
+		update again */
+		check_recursive = FALSE;
+
 		goto start_load;
 	}

--- a/storage/xtradb/fil/fil0fil.c
+++ b/storage/xtradb/fil/fil0fil.c
@ -3043,6 +3043,10 @@ fil_open_single_table_tablespace(

 	if (srv_expand_import
 	    && (space_id != id || space_flags != (flags & ~(~0 << DICT_TF_BITS)))) {
+		ibool		file_is_corrupt = FALSE;
+		byte*		buf3;
+		byte*		descr_page;
+		ibool		descr_is_corrupt = FALSE;
 		dulint		old_id[31];
 		dulint		new_id[31];
 		ulint		root_page[31];
@ -3052,16 +3056,37 @@ fil_open_single_table_tablespace(
 		ulint	i;
 		int		len;
 		ib_uint64_t	current_lsn;
-		ulint		size_low, size_high, size;
-		ib_int64_t	size_bytes;
+		ulint		size_low, size_high, size, free_limit;
+		ib_int64_t	size_bytes, free_limit_bytes;
 		dict_table_t*	table;
 		dict_index_t*	index;
 		fil_system_t*	system;
 		fil_node_t*	node = NULL;
 		fil_space_t*	space;

+		buf3 = ut_malloc(2 * UNIV_PAGE_SIZE);
+		descr_page = ut_align(buf3, UNIV_PAGE_SIZE);
+
 		current_lsn = log_get_lsn();

+		/* check the header page's consistency */
+		if (buf_page_is_corrupted(page,
+					  dict_table_flags_to_zip_size(space_flags))) {
+			fprintf(stderr, "InnoDB: page 0 of %s seems corrupt.\n", filepath);
+			file_is_corrupt = TRUE;
+			descr_is_corrupt = TRUE;
+		}
+
+		/* store as first descr page */
+		memcpy(descr_page, page, UNIV_PAGE_SIZE);
+
+		/* get free limit (page number) of the table space */
+/* these should be same to the definition in fsp0fsp.c */
+#define FSP_HEADER_OFFSET	FIL_PAGE_DATA
+#define	FSP_FREE_LIMIT		12
+		free_limit = mach_read_from_4(FSP_HEADER_OFFSET + FSP_FREE_LIMIT + page);
+		free_limit_bytes = (ib_int64_t)free_limit * (ib_int64_t)UNIV_PAGE_SIZE;
+
 		/* overwrite fsp header */
 		fsp_header_init_fields(page, id, flags);
 		mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, id);
@ -3086,6 +3111,12 @@ fil_open_single_table_tablespace(
 		size_bytes = (((ib_int64_t)size_high) << 32)
 				+ (ib_int64_t)size_low;

+		if (size_bytes < free_limit_bytes) {
+			free_limit_bytes = size_bytes;
+			fprintf(stderr, "InnoDB: free limit of %s is larger than its real size.\n", filepath);
+			file_is_corrupt = TRUE;
+		}
+
 		/* get cruster index information */
 		table = dict_table_get_low(name);
 		index = dict_table_get_first_index(table);
@ -3107,16 +3138,19 @@ fil_open_single_table_tablespace(
 				info_file_path, OS_FILE_OPEN, OS_FILE_READ_ONLY, &success);
 		if (!success) {
 			fprintf(stderr, "InnoDB: cannot open %s\n", info_file_path);
+			file_is_corrupt = TRUE;
 			goto skip_info;
 		}
 		success = os_file_read(info_file, page, 0, 0, UNIV_PAGE_SIZE);
 		if (!success) {
 			fprintf(stderr, "InnoDB: cannot read %s\n", info_file_path);
+			file_is_corrupt = TRUE;
 			goto skip_info;
 		}
 		if (mach_read_from_4(page) != 0x78706f72UL
 		    || mach_read_from_4(page + 4) != 0x74696e66UL) {
 			fprintf(stderr, "InnoDB: %s seems not to be a correct .exp file\n", info_file_path);
+			file_is_corrupt = TRUE;
 			goto skip_info;
 		}

@ -3153,20 +3187,29 @@ skip_info:

 			fprintf(stderr, "InnoDB: Progress in %%:");

-			for (offset = 0; offset < size_bytes; offset += UNIV_PAGE_SIZE) {
+			for (offset = 0; offset < free_limit_bytes; offset += UNIV_PAGE_SIZE) {
 				ulint		checksum_field;
 				ulint		old_checksum_field;
+				ibool		page_is_corrupt;

 				success = os_file_read(file, page,
 							(ulint)(offset & 0xFFFFFFFFUL),
 							(ulint)(offset >> 32), UNIV_PAGE_SIZE);

-				/* skip inconsistent pages, it may be free page. */
+				page_is_corrupt = FALSE;
+
+				/* check consistency */
 				if (memcmp(page + FIL_PAGE_LSN + 4,
 					   page + UNIV_PAGE_SIZE
 					   - FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4)) {

-					goto skip_write;
+					page_is_corrupt = TRUE;
+				}
+
+				if (mach_read_from_4(page + FIL_PAGE_OFFSET)
+				    != offset / UNIV_PAGE_SIZE) {
+
+					page_is_corrupt = TRUE;
 				}

 				checksum_field = mach_read_from_4(page
@ -3182,7 +3225,7 @@ skip_info:
 				    && old_checksum_field
 				    != buf_calc_page_old_checksum(page)) {

-					goto skip_write;
+					page_is_corrupt = TRUE;
 				}

 				if (!srv_fast_checksum
@ -3191,7 +3234,7 @@ skip_info:
 				    && checksum_field
 				    != buf_calc_page_new_checksum(page)) {

-					goto skip_write;
+					page_is_corrupt = TRUE;
 				}

 				if (srv_fast_checksum
@ -3202,6 +3245,77 @@ skip_info:
 				    && checksum_field
 				    != buf_calc_page_new_checksum(page)) {

+					page_is_corrupt = TRUE;
+				}
+
+				/* if it is free page, inconsistency is acceptable */
+				if (!offset) {
+					/* header page*/
+					/* it should be overwritten already */
+					ut_a(!page_is_corrupt);
+
+				} else if (!((offset / UNIV_PAGE_SIZE) % UNIV_PAGE_SIZE)) {
+					/* descr page (not header) */
+					if (page_is_corrupt) {
+						file_is_corrupt = TRUE;
+						descr_is_corrupt = TRUE;
+					} else {
+						ut_a(fil_page_get_type(page) == FIL_PAGE_TYPE_XDES);
+						descr_is_corrupt = FALSE;
+					}
+
+					/* store as descr page */
+					memcpy(descr_page, page, UNIV_PAGE_SIZE);
+
+				} else if (descr_is_corrupt) {
+					/* unknown state of the page */
+					if (page_is_corrupt) {
+						file_is_corrupt = TRUE;
+					}
+
+				} else {
+					/* check free page or not */
+					/* These definitions should be same to fsp0fsp.c */
+#define	FSP_HEADER_SIZE		(32 + 5 * FLST_BASE_NODE_SIZE)
+
+#define	XDES_BITMAP		(FLST_NODE_SIZE + 12)
+#define	XDES_BITS_PER_PAGE	2
+#define	XDES_FREE_BIT		0
+#define	XDES_SIZE							\
+	(XDES_BITMAP + UT_BITS_IN_BYTES(FSP_EXTENT_SIZE * XDES_BITS_PER_PAGE))
+#define	XDES_ARR_OFFSET		(FSP_HEADER_OFFSET + FSP_HEADER_SIZE)
+
+					/*descr = descr_page + XDES_ARR_OFFSET + XDES_SIZE * xdes_calc_descriptor_index(zip_size, offset)*/
+					/*xdes_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr)*/
+					byte*	descr;
+					ulint	index;
+					ulint	byte_index;
+					ulint	bit_index;
+
+					descr = descr_page + XDES_ARR_OFFSET
+						+ XDES_SIZE * (ut_2pow_remainder((offset / UNIV_PAGE_SIZE), UNIV_PAGE_SIZE) / FSP_EXTENT_SIZE);
+
+					index = XDES_FREE_BIT + XDES_BITS_PER_PAGE * ((offset / UNIV_PAGE_SIZE) % FSP_EXTENT_SIZE);
+					byte_index = index / 8;
+					bit_index = index % 8;
+
+					if (ut_bit_get_nth(mach_read_from_1(descr + XDES_BITMAP + byte_index), bit_index)) {
+						/* free page */
+						if (page_is_corrupt) {
+							goto skip_write;
+						}
+					} else {
+						/* not free */
+						if (page_is_corrupt) {
+							file_is_corrupt = TRUE;
+						}
+					}
+				}
+
+				if (page_is_corrupt) {
+					fprintf(stderr, " [errp:%lld]", offset / UNIV_PAGE_SIZE);
+
+					/* cannot treat corrupt page */
 					goto skip_write;
 				}

@ -3294,11 +3408,11 @@ skip_info:
 				}

 skip_write:
-				if (size_bytes
-				    && ((ib_int64_t)((offset + UNIV_PAGE_SIZE) * 100) / size_bytes)
-					!= ((offset * 100) / size_bytes)) {
+				if (free_limit_bytes
+				    && ((ib_int64_t)((offset + UNIV_PAGE_SIZE) * 100) / free_limit_bytes)
+					!= ((offset * 100) / free_limit_bytes)) {
 					fprintf(stderr, " %lu",
-						(ulong)((ib_int64_t)((offset + UNIV_PAGE_SIZE) * 100) / size_bytes));
+						(ulong)((ib_int64_t)((offset + UNIV_PAGE_SIZE) * 100) / free_limit_bytes));
 				}
 			}

@ -3379,6 +3493,26 @@ skip_write:
 			node->size = size;
 		}
 		mutex_exit(&(system->mutex));
+
+		ut_free(buf3);
+
+		if (file_is_corrupt) {
+			ut_print_timestamp(stderr);
+			fputs("  InnoDB: Error: file ",
+			      stderr);
+			ut_print_filename(stderr, filepath);
+			fprintf(stderr, " seems to be corrupt.\n"
+				"InnoDB: anyway, all not corrupt pages were tried to be converted to salvage.\n"
+				"InnoDB: ##### CAUTION #####\n"
+				"InnoDB: ## The .ibd must cause to crash InnoDB, though re-import would seem to be succeeded.\n"
+				"InnoDB: ## If you don't have knowledge about salvaging data from .ibd, you should not use the file.\n"
+				"InnoDB: ###################\n");
+			success = FALSE;
+
+			ut_free(buf2);
+
+			goto func_exit;
+		}
 	}

 	ut_free(buf2);
--- a/storage/xtradb/ha/ha0ha.c
+++ b/storage/xtradb/ha/ha0ha.c
@ -31,9 +31,7 @@ Created 8/22/1994 Heikki Tuuri
 #ifdef UNIV_DEBUG
 # include "buf0buf.h"
 #endif /* UNIV_DEBUG */
-#ifdef UNIV_SYNC_DEBUG
-# include "btr0sea.h"
-#endif /* UNIV_SYNC_DEBUG */
+#include "btr0sea.h"
 #include "page0page.h"

 /*************************************************************//**
@ -127,7 +125,8 @@ ha_clear(
 /*************************************************************//**
 Inserts an entry into a hash table. If an entry with the same fold number
 is found, its node is updated to point to the new data, and no new node
-is inserted.
+is inserted. If btr_search_enabled is set to FALSE, we will only allow
+updating existing nodes, but no new node is allowed to be added.
@return	TRUE if succeed, FALSE if no more memory could be allocated */
 UNIV_INTERN
 ibool
@ -174,6 +173,7 @@ ha_insert_for_fold_func(
 				prev_block->n_pointers--;
 				block->n_pointers++;
 			}
+			ut_ad(!btr_search_fully_disabled);
 # endif /* !UNIV_HOTBACKUP */

 			prev_node->block = block;
@ -186,6 +186,13 @@ ha_insert_for_fold_func(
 		prev_node = prev_node->next;
 	}

+	/* We are in the process of disabling hash index, do not add
+	new chain node */
+	if (!btr_search_enabled) {
+		ut_ad(!btr_search_fully_disabled);
+		return(TRUE);
+	}
+
 	/* We have to allocate a new chain node */

 	node = mem_heap_alloc(hash_get_heap(table, fold), sizeof(ha_node_t));
@ -347,6 +354,7 @@ ha_remove_all_nodes_to_page(
 #endif
 }

+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
 /*************************************************************//**
 Validates a given range of the cells in hash table.
@return	TRUE if ok */
@ -393,6 +401,7 @@ ha_validate(

 	return(ok);
 }
+#endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */

 /*************************************************************//**
 Prints info of a hash table. */
--- a/storage/xtradb/ha/hash0hash.c
+++ b/storage/xtradb/ha/hash0hash.c
@ -161,7 +161,7 @@ hash_create_init(
 	offset = (sizeof(hash_table_t) + 7) / 8;
 	offset *= 8;

-	table->array = (hash_cell_t*)(((char*)table) + offset);
+	table->array = (hash_cell_t*)(((byte*)table) + offset);
 	table->n_cells = prime;
 # if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
 	table->adaptive = FALSE;
@ -187,7 +187,7 @@ hash_create_reuse(
 	offset = (sizeof(hash_table_t) + 7) / 8;
 	offset *= 8;

-	table->array = (hash_cell_t*)(((char*)table) + offset);
+	table->array = (hash_cell_t*)(((byte*)table) + offset);
 	ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
 }

--- a/storage/xtradb/handler/ha_innodb.cc
+++ b/storage/xtradb/handler/ha_innodb.cc
@ -194,6 +194,7 @@ static my_bool	innobase_rollback_on_timeout		= FALSE;
 static my_bool	innobase_create_status_file		= FALSE;
 static my_bool	innobase_stats_on_metadata		= TRUE;
 static my_bool	innobase_use_sys_stats_table		= FALSE;
+static my_bool	innobase_buffer_pool_shm_checksum	= TRUE;

 static char*	internal_innobase_data_file_path	= NULL;

@ -812,6 +813,19 @@ convert_error_code_to_mysql(
 	case DB_INTERRUPTED:
 		my_error(ER_QUERY_INTERRUPTED, MYF(0));
 		/* fall through */
+
+	case DB_FOREIGN_EXCEED_MAX_CASCADE:
+		push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+				    HA_ERR_ROW_IS_REFERENCED,
+				    "InnoDB: Cannot delete/update "
+				    "rows with cascading foreign key "
+				    "constraints that exceed max "
+				    "depth of %d. Please "
+				    "drop extra constraints and try "
+				    "again", DICT_FK_MAX_RECURSIVE_LOAD);
+
+		/* fall through */
+
 	case DB_ERROR:
 	default:
 		return(-1); /* unspecified error */
@ -2413,6 +2427,7 @@ innobase_change_buffering_inited_ok:
 	srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite;
 	srv_use_checksums = (ibool) innobase_use_checksums;
 	srv_fast_checksum = (ibool) innobase_fast_checksum;
+	srv_buffer_pool_shm_checksum = (ibool) innobase_buffer_pool_shm_checksum;

 #ifdef HAVE_LARGE_PAGES
        if ((os_use_large_pages = (ibool) my_use_large_pages))
@ -2549,6 +2564,7 @@ skip_overwrite:
 	/* Get the current high water mark format. */
 	innobase_file_format_check = (char*) trx_sys_file_format_max_get();

+	btr_search_fully_disabled = (!btr_search_enabled);
 	DBUG_RETURN(FALSE);
 error:
 	DBUG_RETURN(TRUE);
@ -3648,12 +3664,19 @@ ha_innobase::innobase_initialize_autoinc()
 		err = row_search_max_autoinc(index, col_name, &read_auto_inc);

 		switch (err) {
-		case DB_SUCCESS:
+		case DB_SUCCESS: {
+			ulonglong	col_max_value;
+
+			col_max_value = innobase_get_int_col_max_value(field);
+
 			/* At the this stage we do not know the increment
-			or the offset, so use a default increment of 1. */
-			auto_inc = read_auto_inc + 1;
-			break;
+			nor the offset, so use a default increment of 1. */

+			auto_inc = innobase_next_autoinc(
+				read_auto_inc, 1, 1, col_max_value);
+
+			break;
+		}
 		case DB_RECORD_NOT_FOUND:
 			ut_print_timestamp(stderr);
 			fprintf(stderr, "  InnoDB: MySQL and InnoDB data "
@ -3966,8 +3989,6 @@ retry:
 			dict_table_get_format(prebuilt->table));
 	}

-	info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST);
-
 	/* Only if the table has an AUTOINC column. */
 	if (prebuilt->table != NULL && table->found_next_number_field != NULL) {
 		dict_table_autoinc_lock(prebuilt->table);
@ -3984,6 +4005,8 @@ retry:
 		dict_table_autoinc_unlock(prebuilt->table);
 	}

+	info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST);
+
 	DBUG_RETURN(0);
 }

@ -7911,28 +7934,15 @@ ha_innobase::info(
 	dict_index_t*	index;
 	ha_rows		rec_per_key;
 	ib_int64_t	n_rows;
-	ulong		j;
-	ulong		i;
 	char		path[FN_REFLEN];
 	os_file_stat_t	stat_info;

-
 	DBUG_ENTER("info");

 	/* If we are forcing recovery at a high level, we will suppress
 	statistics calculation on tables, because that may crash the
 	server if an index is badly corrupted. */

-	if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
-
-		/* We return success (0) instead of HA_ERR_CRASHED,
-		because we want MySQL to process this query and not
-		stop, like it would do if it received the error code
-		HA_ERR_CRASHED. */
-
-		DBUG_RETURN(0);
-	}
-
 	/* We do not know if MySQL can call this function before calling
 	external_lock(). To be safe, update the thd of the current table
 	handle. */
@ -7955,7 +7965,7 @@ ha_innobase::info(
 			/* In sql_show we call with this flag: update
 			then statistics so that they are up-to-date */

-			if (srv_use_sys_stats_table
+			if (srv_use_sys_stats_table && !((ib_table->flags >> DICT_TF2_SHIFT) & DICT_TF2_TEMPORARY)
 			    && thd_sql_command(user_thd) == SQLCOM_ANALYZE) {
 				/* If the indexes on the table don't have enough rows in SYS_STATS system table, */
 				/* they need to be created. */
@ -8050,12 +8060,18 @@ ha_innobase::info(
 		acquiring latches inside InnoDB, we do not call it if we
 		are asked by MySQL to avoid locking. Another reason to
 		avoid the call is that it uses quite a lot of CPU.
-		See Bug#38185.
-		We do not update delete_length if no locking is requested
-		so the "old" value can remain. delete_length is initialized
-		to 0 in the ha_statistics' constructor. */
-		if (!(flag & HA_STATUS_NO_LOCK) && srv_stats_update_need_lock) {
-
+		See Bug#38185. */
+		if (flag & HA_STATUS_NO_LOCK || !srv_stats_update_need_lock) {
+			/* We do not update delete_length if no
+			locking is requested so the "old" value can
+			remain. delete_length is initialized to 0 in
+			the ha_statistics' constructor. */
+		} else if (UNIV_UNLIKELY
+			   (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE)) {
+			/* Avoid accessing the tablespace if
+			innodb_crash_recovery is set to a high value. */
+			stats.delete_length = 0;
+		} else {
 			/* lock the data dictionary to avoid races with
 			ibd_file_missing and tablespace_discarded */
 			row_mysql_lock_data_dictionary(prebuilt->trx);
@ -8100,6 +8116,7 @@ ha_innobase::info(
 	}

 	if (flag & HA_STATUS_CONST) {
+		ulong	i;
 		/* Verify the number of index in InnoDB and MySQL
 		matches up. If prebuilt->clust_index_was_generated
 		holds, InnoDB defines GEN_CLUST_INDEX internally */
@ -8116,6 +8133,7 @@ ha_innobase::info(
 		}

 		for (i = 0; i < table->s->keys; i++) {
+			ulong	j;
 			/* We could get index quickly through internal
 			index mapping with the index translation table.
 			The identity of index (match up index name with
@ -8181,6 +8199,11 @@ ha_innobase::info(
 		}
 	}

+	if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
+
+		goto func_exit;
+	}
+
 	if (flag & HA_STATUS_ERRKEY) {
 		const dict_index_t*	err_index;

@ -8201,6 +8224,7 @@ ha_innobase::info(
 		stats.auto_increment_value = innobase_peek_autoinc();
 	}

+func_exit:
 	prebuilt->trx->op_info = (char*)"";

 	DBUG_RETURN(0);
@ -9691,7 +9715,8 @@ ha_innobase::store_lock(
 		    && (sql_command == SQLCOM_INSERT_SELECT
 			|| sql_command == SQLCOM_REPLACE_SELECT
 			|| sql_command == SQLCOM_UPDATE
-			|| sql_command == SQLCOM_CREATE_TABLE)) {
+			|| sql_command == SQLCOM_CREATE_TABLE
+			|| sql_command == SQLCOM_SET_OPTION)) {

 			/* If we either have innobase_locks_unsafe_for_binlog
 			option set or this session is using READ COMMITTED
@ -9699,9 +9724,9 @@ ha_innobase::store_lock(
 			is not set to serializable and MySQL is doing
 			INSERT INTO...SELECT or REPLACE INTO...SELECT
 			or UPDATE ... = (SELECT ...) or CREATE  ...
-			SELECT... without FOR UPDATE or IN SHARE
-			MODE in select, then we use consistent read
-			for select. */
+			SELECT... or SET ... = (SELECT ...) without
+			FOR UPDATE or IN SHARE MODE in select,
+			then we use consistent read for select. */

 			prebuilt->select_lock_type = LOCK_NONE;
 			prebuilt->stored_select_lock_type = LOCK_NONE;
@ -11359,9 +11384,14 @@ static MYSQL_SYSVAR_LONGLONG(buffer_pool_size, innobase_buffer_pool_size,

 static MYSQL_SYSVAR_UINT(buffer_pool_shm_key, srv_buffer_pool_shm_key,
  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
-  "[experimental] The key value of shared memory segment for the buffer pool. 0 means disable the feature (default).",
+  "[experimental] The key value of shared memory segment for the buffer pool. 0 (default) disables the feature.",
  NULL, NULL, 0, 0, INT_MAX32, 0);

+static MYSQL_SYSVAR_BOOL(buffer_pool_shm_checksum, innobase_buffer_pool_shm_checksum,
+  PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
+  "Enable buffer_pool_shm checksum validation (enabled by default).",
+  NULL, NULL, TRUE);
+
 static MYSQL_SYSVAR_ULONG(commit_concurrency, innobase_commit_concurrency,
  PLUGIN_VAR_RQCMDARG,
  "Helps in performance tuning in heavily concurrent environments.",
@ -11608,6 +11638,12 @@ static MYSQL_SYSVAR_ULONG(dict_size_limit, srv_dict_size_limit,
  "Limit the allocated memory for dictionary cache. (0: unlimited)",
  NULL, NULL, 0, 0, LONG_MAX, 0);

+static MYSQL_SYSVAR_UINT(auto_lru_dump, srv_auto_lru_dump,
+  PLUGIN_VAR_RQCMDARG,
+  "Time in seconds between automatic buffer pool dumps. "
+  "0 (the default) disables automatic dumps.",
+  NULL, NULL, 0, 0, UINT_MAX32, 0);
+
 static	MYSQL_SYSVAR_ULINT(pass_corrupt_table, srv_pass_corrupt_table,
  PLUGIN_VAR_RQCMDARG,
  "Pass corruptions of user tables as 'corrupt table' instead of not crashing itself, "
@ -11622,6 +11658,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
  MYSQL_SYSVAR(autoextend_increment),
  MYSQL_SYSVAR(buffer_pool_size),
  MYSQL_SYSVAR(buffer_pool_shm_key),
+  MYSQL_SYSVAR(buffer_pool_shm_checksum),
  MYSQL_SYSVAR(checksums),
  MYSQL_SYSVAR(fast_checksum),
  MYSQL_SYSVAR(commit_concurrency),
@ -11699,6 +11736,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
  MYSQL_SYSVAR(change_buffering),
  MYSQL_SYSVAR(read_ahead_threshold),
  MYSQL_SYSVAR(io_capacity),
+  MYSQL_SYSVAR(auto_lru_dump),
  MYSQL_SYSVAR(use_purge_thread),
  MYSQL_SYSVAR(pass_corrupt_table),
  NULL
--- a/storage/xtradb/include/btr0cur.h
+++ b/storage/xtradb/include/btr0cur.h
@ -468,9 +468,10 @@ btr_estimate_number_of_different_key_vals(
 Marks not updated extern fields as not-owned by this record. The ownership
 is transferred to the updated record which is inserted elsewhere in the
 index tree. In purge only the owner of externally stored field is allowed
-to free the field. */
+to free the field.
+@return TRUE if BLOB ownership was transferred */
 UNIV_INTERN
-void
+ibool
 btr_cur_mark_extern_inherited_fields(
 /*=================================*/
 	page_zip_des_t*	page_zip,/*!< in/out: compressed page whose uncompressed
@ -570,7 +571,7 @@ btr_copy_externally_stored_field_prefix(
 	ulint		local_len);/*!< in: length of data, in bytes */
 /*******************************************************************//**
 Copies an externally stored field of a record to mem heap.
-@return	the field copied to heap */
+@return	the field copied to heap, or NULL if the field is incomplete */
 UNIV_INTERN
 byte*
 btr_rec_copy_externally_stored_field(
--- a/storage/xtradb/include/btr0sea.h
+++ b/storage/xtradb/include/btr0sea.h
@ -187,6 +187,7 @@ btr_search_update_hash_on_delete(
 	btr_cur_t*	cursor);/*!< in: cursor which was positioned on the
 				record to delete using btr_cur_search_...,
 				the record is not yet deleted */
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
 /********************************************************************//**
 Validates the search system.
@return	TRUE if ok */
@ -194,10 +195,19 @@ UNIV_INTERN
 ibool
 btr_search_validate(void);
 /*======================*/
+#else
+# define btr_search_validate()	TRUE
+#endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */

 /** Flag: has the search system been enabled?
 Protected by btr_search_latch and btr_search_enabled_mutex. */
-extern char btr_search_enabled;
+extern char	btr_search_enabled;
+
+/** Flag: whether the search system has completed its disabling process,
+It is set to TRUE right after buf_pool_drop_hash_index() in
+btr_search_disable(), indicating hash index entries are cleaned up.
+Protected by btr_search_latch and btr_search_enabled_mutex. */
+extern ibool	btr_search_fully_disabled;

 /** The search info struct in an index */
 struct btr_search_struct{
--- a/storage/xtradb/include/buf0buf.h
+++ b/storage/xtradb/include/buf0buf.h
@ -1305,7 +1305,7 @@ Compute the hash fold value for blocks in buf_pool->zip_hash. */
 /* the fold should be relative when srv_buffer_pool_shm_key is enabled */
 #define BUF_POOL_ZIP_FOLD_PTR(ptr) (!srv_buffer_pool_shm_key\
 					?((ulint) (ptr) / UNIV_PAGE_SIZE)\
-					:((ulint) ((char*)ptr - (char*)(buf_pool->chunks->blocks->frame)) / UNIV_PAGE_SIZE))
+					:((ulint) ((byte*)ptr - (byte*)(buf_pool->chunks->blocks->frame)) / UNIV_PAGE_SIZE))
 #define BUF_POOL_ZIP_FOLD(b) BUF_POOL_ZIP_FOLD_PTR((b)->frame)
 #define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t*) (b))
 /* @} */
--- a/storage/xtradb/include/db0err.h
+++ b/storage/xtradb/include/db0err.h
@ -94,6 +94,9 @@ enum db_err {

 	DB_PRIMARY_KEY_IS_NULL,		/* a column in the PRIMARY KEY
 					was found to be NULL */
+	DB_FOREIGN_EXCEED_MAX_CASCADE,	/* Foreign key constraint related
+					cascading delete/update exceeds
+					maximum allowed depth */

 	/* The following are partial failure codes */
 	DB_FAIL = 1000,
--- a/storage/xtradb/include/dict0dict.h
+++ b/storage/xtradb/include/dict0dict.h
@ -680,6 +680,22 @@ ulint
 dict_table_zip_size(
 /*================*/
 	const dict_table_t*	table);	/*!< in: table */
+/*********************************************************************//**
+Obtain exclusive locks on all index trees of the table. This is to prevent
+accessing index trees while InnoDB is updating internal metadata for
+operations such as truncate tables. */
+UNIV_INLINE
+void
+dict_table_x_lock_indexes(
+/*======================*/
+	dict_table_t*	table);	/*!< in: table */
+/*********************************************************************//**
+Release the exclusive locks on all index tree. */
+UNIV_INLINE
+void
+dict_table_x_unlock_indexes(
+/*========================*/
+	dict_table_t*	table);	/*!< in: table */
 /********************************************************************//**
 Checks if a column is in the ordering columns of the clustered index of a
 table. Column prefixes are treated like whole columns.
--- a/storage/xtradb/include/dict0dict.ic
+++ b/storage/xtradb/include/dict0dict.ic
@ -452,6 +452,48 @@ dict_table_zip_size(
 	return(dict_table_flags_to_zip_size(table->flags));
 }

+/*********************************************************************//**
+Obtain exclusive locks on all index trees of the table. This is to prevent
+accessing index trees while InnoDB is updating internal metadata for
+operations such as truncate tables. */
+UNIV_INLINE
+void
+dict_table_x_lock_indexes(
+/*======================*/
+	dict_table_t*	table)	/*!< in: table */
+{
+	dict_index_t*   index;
+
+	ut_a(table);
+	ut_ad(mutex_own(&(dict_sys->mutex)));
+
+	/* Loop through each index of the table and lock them */
+	for (index = dict_table_get_first_index(table);
+	     index != NULL;
+	     index = dict_table_get_next_index(index)) {
+		rw_lock_x_lock(dict_index_get_lock(index));
+	}
+}
+
+/*********************************************************************//**
+Release the exclusive locks on all index tree. */
+UNIV_INLINE
+void
+dict_table_x_unlock_indexes(
+/*========================*/
+	dict_table_t*	table)	/*!< in: table */
+{
+	dict_index_t*   index;
+
+	ut_a(table);
+	ut_ad(mutex_own(&(dict_sys->mutex)));
+
+	for (index = dict_table_get_first_index(table);
+	     index != NULL;
+	     index = dict_table_get_next_index(index)) {
+		rw_lock_x_unlock(dict_index_get_lock(index));
+	}
+}
 /********************************************************************//**
 Gets the number of fields in the internal representation of an index,
 including fields added by the dictionary system.
--- a/storage/xtradb/include/dict0load.h
+++ b/storage/xtradb/include/dict0load.h
@ -97,6 +97,8 @@ ulint
 dict_load_foreigns(
 /*===============*/
 	const char*	table_name,	/*!< in: table name */
+	ibool		check_recursive,/*!< in: Whether to check recursive
+					load of tables chained by FK */
 	ibool		check_charsets);/*!< in: TRUE=check charsets
 					compatibility */
 /********************************************************************//**
--- a/storage/xtradb/include/dict0mem.h
+++ b/storage/xtradb/include/dict0mem.h
@ -112,6 +112,21 @@ ROW_FORMAT=REDUNDANT. */
 						in table->flags. */
 /* @} */

+/** Tables could be chained together with Foreign key constraint. When
+first load the parent table, we would load all of its descedents.
+This could result in rescursive calls and out of stack error eventually.
+DICT_FK_MAX_RECURSIVE_LOAD defines the maximum number of recursive loads,
+when exceeded, the child table will not be loaded. It will be loaded when
+the foreign constraint check needs to be run. */
+#define DICT_FK_MAX_RECURSIVE_LOAD	250
+
+/** Similarly, when tables are chained together with foreign key constraints
+with on cascading delete/update clause, delete from parent table could
+result in recursive cascading calls. This defines the maximum number of
+such cascading deletes/updates allowed. When exceeded, the delete from
+parent table will fail, and user has to drop excessive foreign constraint
+before proceeds. */
+#define FK_MAX_CASCADE_DEL		300

 /**********************************************************************//**
 Creates a table memory object.
@ -434,6 +449,12 @@ struct dict_table_struct{
 				NOT allowed until this count gets to zero;
 				MySQL does NOT itself check the number of
 				open handles at drop */
+	unsigned	fk_max_recusive_level:8;
+				/*!< maximum recursive level we support when
+				loading tables chained together with FK
+				constraints. If exceeds this level, we will
+				stop loading child table into memory along with
+				its parent table */
 	ulint		n_foreign_key_checks_running;
 				/*!< count of how many foreign key check
 				operations are currently being performed
--- a/storage/xtradb/include/ha0ha.h
+++ b/storage/xtradb/include/ha0ha.h
@ -186,6 +186,7 @@ ha_remove_all_nodes_to_page(
 	hash_table_t*	table,	/*!< in: hash table */
 	ulint		fold,	/*!< in: fold value */
 	const page_t*	page);	/*!< in: buffer page */
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
 /*************************************************************//**
 Validates a given range of the cells in hash table.
@return	TRUE if ok */
@ -196,6 +197,7 @@ ha_validate(
 	hash_table_t*	table,		/*!< in: hash table */
 	ulint		start_index,	/*!< in: start index */
 	ulint		end_index);	/*!< in: end index */
+#endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */
 /*************************************************************//**
 Prints info of a hash table. */
 UNIV_INTERN
--- a/storage/xtradb/include/hash0hash.h
+++ b/storage/xtradb/include/hash0hash.h
@ -363,13 +363,13 @@ do {\
 		NODE_TYPE*	node2222;\
 \
 		if ((TABLE)->array[i2222].node) \
-			(TABLE)->array[i2222].node = (void*)((char*)(TABLE)->array[i2222].node	\
+			(TABLE)->array[i2222].node = (void*)((byte*)(TABLE)->array[i2222].node \
 			+ (((TABLE)->array[i2222].node > (void*)FADDR)?FOFFSET:BOFFSET));\
 		node2222 = HASH_GET_FIRST((TABLE), i2222);\
 \
 		while (node2222) {\
 			if (node2222->PTR_NAME) \
-				node2222->PTR_NAME = (void*)((char*)node2222->PTR_NAME \
+				node2222->PTR_NAME = (void*)((byte*)(node2222->PTR_NAME) \
 				+ ((((void*)node2222->PTR_NAME) > (void*)FADDR)?FOFFSET:BOFFSET));\
 \
 			node2222 = node2222->PTR_NAME;\
--- a/storage/xtradb/include/mem0pool.h
+++ b/storage/xtradb/include/mem0pool.h
@ -100,18 +100,6 @@ mem_pool_get_reserved(
 /*==================*/
 	mem_pool_t*	pool);	/*!< in: memory pool */
 /********************************************************************//**
-Reserves the mem pool mutex. */
-UNIV_INTERN
-void
-mem_pool_mutex_enter(void);
-/*======================*/
-/********************************************************************//**
-Releases the mem pool mutex. */
-UNIV_INTERN
-void
-mem_pool_mutex_exit(void);
-/*=====================*/
-/********************************************************************//**
 Validates a memory pool.
@return	TRUE if ok */
 UNIV_INTERN
--- a/storage/xtradb/include/que0que.h
+++ b/storage/xtradb/include/que0que.h
@ -381,6 +381,9 @@ struct que_thr_struct{
 					thus far */
 	ulint		lock_state;	/*!< lock state of thread (table or
 					row) */
+	ulint		fk_cascade_depth; /*!< maximum cascading call depth
+					supported for foreign key constraint
+					related delete/updates */
 };

 #define QUE_THR_MAGIC_N		8476583
--- a/storage/xtradb/include/row0mysql.h
+++ b/storage/xtradb/include/row0mysql.h
@ -630,7 +630,11 @@ struct row_prebuilt_struct {
 					the secondary index, then this is
 					set to TRUE */
 	unsigned	templ_contains_blob:1;/*!< TRUE if the template contains
-					BLOB column(s) */
+					a column with DATA_BLOB ==
+					get_innobase_type_from_mysql_type();
+					not to be confused with InnoDB
+					externally stored columns
+					(VARCHAR can be off-page too) */
 	mysql_row_templ_t* mysql_template;/*!< template used to transform
 					rows fast between MySQL and Innobase
 					formats; memory for this template
--- a/storage/xtradb/include/srv0srv.h
+++ b/storage/xtradb/include/srv0srv.h
@ -157,6 +157,8 @@ extern ulint	srv_mem_pool_size;
 extern ulint	srv_lock_table_size;

 extern uint	srv_buffer_pool_shm_key;
+extern ibool	srv_buffer_pool_shm_is_reused;
+extern ibool	srv_buffer_pool_shm_checksum;

 extern ibool	srv_thread_concurrency_timer_based;

@ -340,6 +342,9 @@ extern ulint srv_buf_pool_flushed;
 reading of a disk page */
 extern ulint srv_buf_pool_reads;

+/** Time in seconds between automatic buffer pool dumps */
+extern uint srv_auto_lru_dump;
+
 /** Status variables to be passed to MySQL */
 typedef struct export_var_struct export_struc;

@ -608,6 +613,16 @@ srv_error_monitor_thread(
 /*=====================*/
 	void*	arg);	/*!< in: a dummy parameter required by
 			os_thread_create */
+/*********************************************************************//**
+A thread which restores the buffer pool from a dump file on startup and does
+periodic buffer pool dumps.
+@return	a dummy parameter */
+UNIV_INTERN
+os_thread_ret_t
+srv_LRU_dump_restore_thread(
+/*====================*/
+	void*	arg);	/*!< in: a dummy parameter required by
+			os_thread_create */
 /******************************************************************//**
 Outputs to a file the output of the InnoDB Monitor.
@return FALSE if not all information printed
--- a/storage/xtradb/include/univ.i
+++ b/storage/xtradb/include/univ.i
@ -46,8 +46,8 @@ Created 1/20/1994 Heikki Tuuri

 #define INNODB_VERSION_MAJOR	1
 #define INNODB_VERSION_MINOR	0
-#define INNODB_VERSION_BUGFIX	10
-#define PERCONA_INNODB_VERSION 12.0
+#define INNODB_VERSION_BUGFIX	12
+#define PERCONA_INNODB_VERSION 12.1

 /* The following is the InnoDB version as shown in
 SELECT plugin_version FROM information_schema.plugins;
--- a/storage/xtradb/include/ut0lst.h
+++ b/storage/xtradb/include/ut0lst.h
@ -269,10 +269,10 @@ do {									\
 	TYPE*	ut_list_node_313;					\
 									\
 	if ((BASE).start)						\
-		(BASE).start = (void*)((char*)((BASE).start)		\
+		(BASE).start = (void*)((byte*)((BASE).start)			\
 			+ (((void*)((BASE).start) > (void*)FADDR)?FOFFSET:BOFFSET));\
 	if ((BASE).end)							\
-		(BASE).end   = (void*)((char*)((BASE).end)		\
+		(BASE).end   = (void*)((byte*)((BASE).end)			\
 			+ (((void*)((BASE).end) > (void*)FADDR)?FOFFSET:BOFFSET));\
 									\
 	ut_list_node_313 = (BASE).start;				\
@ -280,10 +280,10 @@ do {									\
 	for (ut_list_i_313 = (BASE).count; ut_list_i_313--; ) {		\
 		ut_a(ut_list_node_313);					\
 		if ((ut_list_node_313->NAME).prev)			\
-			(ut_list_node_313->NAME).prev = (void*)((char*)((ut_list_node_313->NAME).prev) \
+			(ut_list_node_313->NAME).prev = (void*)((byte*)((ut_list_node_313->NAME).prev)\
 				+ (((void*)((ut_list_node_313->NAME).prev) > (void*)FADDR)?FOFFSET:BOFFSET));\
 		if ((ut_list_node_313->NAME).next)			\
-			(ut_list_node_313->NAME).next =	(void *)((char*)((ut_list_node_313->NAME).next) \
+			(ut_list_node_313->NAME).next =	(void*)((byte*)((ut_list_node_313->NAME).next)\
 				+ (((void*)((ut_list_node_313->NAME).next)> (void*)FADDR)?FOFFSET:BOFFSET));\
 		ut_list_node_313 = (ut_list_node_313->NAME).next;	\
 	}								\
--- a/storage/xtradb/include/ut0mem.h
+++ b/storage/xtradb/include/ut0mem.h
@ -113,7 +113,8 @@ ut_test_malloc(
 	ulint	n);	/*!< in: try to allocate this many bytes */
 #endif /* !UNIV_HOTBACKUP */
 /**********************************************************************//**
-Frees a memory block allocated with ut_malloc. */
+Frees a memory block allocated with ut_malloc. Freeing a NULL pointer is
+a nop. */
 UNIV_INTERN
 void
 ut_free(
--- a/storage/xtradb/lock/lock0lock.c
+++ b/storage/xtradb/lock/lock0lock.c
@ -4606,7 +4606,7 @@ print_rec:
 	nth_lock++;

 	if (nth_lock >= srv_show_locks_held) {
-		fputs("TOO LOCKS PRINTED FOR THIS TRX:"
+		fputs("TOO MANY LOCKS PRINTED FOR THIS TRX:"
 		      " SUPPRESSING FURTHER PRINTS\n",
 		      file);

--- a/storage/xtradb/log/log0recv.c
+++ b/storage/xtradb/log/log0recv.c
@ -2901,6 +2901,7 @@ recv_init_crash_recovery(void)
 /*==========================*/
 {
 	ut_a(!recv_needed_recovery);
+	ut_a(!srv_buffer_pool_shm_is_reused);

 	recv_needed_recovery = TRUE;

--- a/storage/xtradb/mem/mem0mem.c
+++ b/storage/xtradb/mem/mem0mem.c
@ -367,7 +367,7 @@ mem_heap_create_block(
 	block->line = line;

 #ifdef MEM_PERIODIC_CHECK
-	mem_pool_mutex_enter();
+	mutex_enter(&(mem_comm_pool->mutex));

 	if (!mem_block_list_inited) {
 		mem_block_list_inited = TRUE;
@ -376,7 +376,7 @@ mem_heap_create_block(

 	UT_LIST_ADD_LAST(mem_block_list, mem_block_list, block);

-	mem_pool_mutex_exit();
+	mutex_exit(&(mem_comm_pool->mutex));
 #endif
 	mem_block_set_len(block, len);
 	mem_block_set_type(block, type);
@ -479,11 +479,11 @@ mem_heap_block_free(
 	UT_LIST_REMOVE(list, heap->base, block);

 #ifdef MEM_PERIODIC_CHECK
-	mem_pool_mutex_enter();
+	mutex_enter(&(mem_comm_pool->mutex));

 	UT_LIST_REMOVE(mem_block_list, mem_block_list, block);

-	mem_pool_mutex_exit();
+	mutex_exit(&(mem_comm_pool->mutex));
 #endif

 	ut_ad(heap->total_size >= block->len);
@ -556,7 +556,7 @@ mem_validate_all_blocks(void)
 {
 	mem_block_t*	block;

-	mem_pool_mutex_enter();
+	mutex_enter(&(mem_comm_pool->mutex));

 	block = UT_LIST_GET_FIRST(mem_block_list);

@ -568,6 +568,6 @@ mem_validate_all_blocks(void)
 		block = UT_LIST_GET_NEXT(mem_block_list, block);
 	}

-	mem_pool_mutex_exit();
+	mutex_exit(&(mem_comm_pool->mutex));
 }
 #endif
--- a/storage/xtradb/mem/mem0pool.c
+++ b/storage/xtradb/mem/mem0pool.c
@ -34,6 +34,7 @@ Created 5/12/1997 Heikki Tuuri
 #include "ut0lst.h"
 #include "ut0byte.h"
 #include "mem0mem.h"
+#include "srv0start.h"

 /* We would like to use also the buffer frames to allocate memory. This
 would be desirable, because then the memory consumption of the database
@ -121,23 +122,33 @@ mysql@lists.mysql.com */
 UNIV_INTERN ulint	mem_n_threads_inside		= 0;

 /********************************************************************//**
-Reserves the mem pool mutex. */
-UNIV_INTERN
+Reserves the mem pool mutex if we are not in server shutdown. Use
+this function only in memory free functions, since only memory
+free functions are used during server shutdown. */
+UNIV_INLINE
 void
-mem_pool_mutex_enter(void)
-/*======================*/
+mem_pool_mutex_enter(
+/*=================*/
+	mem_pool_t*	pool)		/*!< in: memory pool */
 {
-	mutex_enter(&(mem_comm_pool->mutex));
+	if (srv_shutdown_state < SRV_SHUTDOWN_EXIT_THREADS) {
+		mutex_enter(&(pool->mutex));
+	}
 }

 /********************************************************************//**
-Releases the mem pool mutex. */
-UNIV_INTERN
+Releases the mem pool mutex if we are not in server shutdown. As
+its corresponding mem_pool_mutex_enter() function, use it only
+in memory free functions */
+UNIV_INLINE
 void
-mem_pool_mutex_exit(void)
-/*=====================*/
+mem_pool_mutex_exit(
+/*================*/
+	mem_pool_t*	pool)		/*!< in: memory pool */
 {
-	mutex_exit(&(mem_comm_pool->mutex));
+	if (srv_shutdown_state < SRV_SHUTDOWN_EXIT_THREADS) {
+		mutex_exit(&(pool->mutex));
+	}
 }

 /********************************************************************//**
@ -567,7 +578,7 @@ mem_area_free(

 	n = ut_2_log(size);

-	mutex_enter(&(pool->mutex));
+	mem_pool_mutex_enter(pool);
 	mem_n_threads_inside++;

 	ut_a(mem_n_threads_inside == 1);
@ -595,7 +606,7 @@ mem_area_free(
 		pool->reserved += ut_2_exp(n);

 		mem_n_threads_inside--;
-		mutex_exit(&(pool->mutex));
+		mem_pool_mutex_exit(pool);

 		mem_area_free(new_ptr, pool);

@ -611,7 +622,7 @@ mem_area_free(
 	}

 	mem_n_threads_inside--;
-	mutex_exit(&(pool->mutex));
+	mem_pool_mutex_exit(pool);

 	ut_ad(mem_pool_validate(pool));
 }
@ -630,7 +641,7 @@ mem_pool_validate(
 	ulint		free;
 	ulint		i;

-	mutex_enter(&(pool->mutex));
+	mem_pool_mutex_enter(pool);

 	free = 0;

@ -658,7 +669,7 @@ mem_pool_validate(

 	ut_a(free + pool->reserved == pool->size);

-	mutex_exit(&(pool->mutex));
+	mem_pool_mutex_exit(pool);

 	return(TRUE);
 }
--- a/storage/xtradb/os/os0proc.c
+++ b/storage/xtradb/os/os0proc.c
@ -246,12 +246,10 @@ os_shm_alloc(
 #if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H
 	ulint	size;
 	int	shmid;
-#endif

 	*is_new = FALSE;
-#if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H
 	fprintf(stderr,
-		"InnoDB: The shared memory key %#x (%d) is specified.\n",
+		"InnoDB: The shared memory segment containing the buffer pool is: key  %#x (%d).\n",
 		key, key);
 # if defined HAVE_LARGE_PAGES && defined UNIV_LINUX
 	if (!os_use_large_pages || !os_large_page_size) {
@ -268,12 +266,12 @@ os_shm_alloc(
 	if (shmid < 0) {
 		if (errno == EEXIST) {
 			fprintf(stderr,
-				"InnoDB: HugeTLB: The shared memory segment seems to exist already.\n");
+				"InnoDB: HugeTLB: The shared memory segment exists.\n");
 			shmid = shmget((key_t)key, (size_t)size,
 					SHM_HUGETLB | SHM_R | SHM_W);
 			if (shmid < 0) {
 				fprintf(stderr,
-					"InnoDB: HugeTLB: Warning: Failed to allocate %lu bytes.(reuse) errno %d\n",
+					"InnoDB: HugeTLB: Warning: Failed to allocate %lu bytes. (reuse) errno %d\n",
 					size, errno);
 				goto skip;
 			} else {
@ -282,14 +280,14 @@ os_shm_alloc(
 			}
 		} else {
 			fprintf(stderr,
-				"InnoDB: HugeTLB: Warning: Failed to allocate %lu bytes.(new) errno %d\n",
+				"InnoDB: HugeTLB: Warning: Failed to allocate %lu bytes. (new) errno %d\n",
 				size, errno);
 			goto skip;
 		}
 	} else {
 		*is_new = TRUE;
 		fprintf(stderr,
-			"InnoDB: HugeTLB: The new shared memory segment is created.\n");
+			"InnoDB: HugeTLB: A new shared memory segment has been created .\n");
 	}

 	ptr = shmat(shmid, NULL, 0);
@ -325,12 +323,12 @@ skip:
 	if (shmid < 0) {
 		if (errno == EEXIST) {
 			fprintf(stderr,
-				"InnoDB: The shared memory segment seems to exist already.\n");
+				"InnoDB: A shared memory segment containing the buffer pool seems to already exist.\n");
 			shmid = shmget((key_t)key, (size_t)size,
 					SHM_R | SHM_W);
 			if (shmid < 0) {
 				fprintf(stderr,
-					"InnoDB: Warning: Failed to allocate %lu bytes.(reuse) errno %d\n",
+					"InnoDB: Warning: Failed to allocate %lu bytes. (reuse) errno %d\n",
 					size, errno);
 				ptr = NULL;
 				goto end;
@ -340,7 +338,7 @@ skip:
 			}
 		} else {
 			fprintf(stderr,
-				"InnoDB: Warning: Failed to allocate %lu bytes.(new) errno %d\n",
+				"InnoDB: Warning: Failed to allocate %lu bytes. (new) errno %d\n",
 				size, errno);
 			ptr = NULL;
 			goto end;
@ -348,7 +346,7 @@ skip:
 	} else {
 		*is_new = TRUE;
 		fprintf(stderr,
-			"InnoDB: The new shared memory segment is created.\n");
+			"InnoDB: A new shared memory segment has been created.\n");
 	}

 	ptr = shmat(shmid, NULL, 0);
--- a/storage/xtradb/row/row0merge.c
+++ b/storage/xtradb/row/row0merge.c
@ -1787,6 +1787,11 @@ row_merge_copy_blobs(
 		(below). */
 		data = btr_rec_copy_externally_stored_field(
 			mrec, offsets, zip_size, i, &len, heap);
+		/* Because we have locked the table, any records
+		written by incomplete transactions must have been
+		rolled back already. There must not be any incomplete
+		BLOB columns. */
+		ut_a(data);

 		dfield_set_data(field, data, len);
 	}
@ -2399,7 +2404,7 @@ row_merge_rename_tables(
 		goto err_exit;
 	}

-	err = dict_load_foreigns(old_name, TRUE);
+	err = dict_load_foreigns(old_name, FALSE, TRUE);

 	if (err != DB_SUCCESS) {
 err_exit:
--- a/storage/xtradb/row/row0mysql.c
+++ b/storage/xtradb/row/row0mysql.c
@ -577,6 +577,13 @@ handle_new_error:
 		      "InnoDB: " REFMAN "forcing-recovery.html"
 		      " for help.\n", stderr);
 		break;
+	case DB_FOREIGN_EXCEED_MAX_CASCADE:
+		fprintf(stderr, "InnoDB: Cannot delete/update rows with"
+			" cascading foreign key constraints that exceed max"
+			" depth of %lu\n"
+			"Please drop excessive foreign constraints"
+			" and try again\n", (ulong) DICT_FK_MAX_RECURSIVE_LOAD);
+		break;
 	default:
 		fprintf(stderr, "InnoDB: unknown error code %lu\n",
 			(ulong) err);
@ -1392,11 +1399,15 @@ row_update_for_mysql(
 run_again:
 	thr->run_node = node;
 	thr->prev_node = node;
+	thr->fk_cascade_depth = 0;

 	row_upd_step(thr);

 	err = trx->error_state;

+	/* Reset fk_cascade_depth back to 0 */
+	thr->fk_cascade_depth = 0;
+
 	if (err != DB_SUCCESS) {
 		que_thr_stop_for_mysql(thr);

@ -1587,6 +1598,12 @@ row_update_cascade_for_mysql(
 	trx_t*	trx;

 	trx = thr_get_trx(thr);
+
+	thr->fk_cascade_depth++;
+
+	if (thr->fk_cascade_depth > FK_MAX_CASCADE_DEL) {
+		return (DB_FOREIGN_EXCEED_MAX_CASCADE);
+	}
 run_again:
 	thr->run_node = node;
 	thr->prev_node = node;
@ -2106,7 +2123,7 @@ row_table_add_foreign_constraints(
 					      name, reject_fks);
 	if (err == DB_SUCCESS) {
 		/* Check that also referencing constraints are ok */
-		err = dict_load_foreigns(name, TRUE);
+		err = dict_load_foreigns(name, FALSE, TRUE);
 	}

 	if (err != DB_SUCCESS) {
@ -2799,6 +2816,15 @@ row_truncate_table_for_mysql(

 	trx->table_id = table->id;

+	/* Lock all index trees for this table, as we will
+	truncate the table/index and possibly change their metadata.
+	All DML/DDL are blocked by table level lock, with
+	a few exceptions such as queries into information schema
+	about the table, MySQL could try to access index stats
+	for this kind of query, we need to use index locks to
+	sync up */
+	dict_table_x_lock_indexes(table);
+
 	if (table->space && !table->dir_path_of_temp_table) {
 		/* Discard and create the single-table tablespace. */
 		ulint	space	= table->space;
@ -2815,6 +2841,7 @@ row_truncate_table_for_mysql(
 			    || fil_create_new_single_table_tablespace(
 				    space, table->name, FALSE, flags,
 				    FIL_IBD_FILE_INITIAL_SIZE) != DB_SUCCESS) {
+				dict_table_x_unlock_indexes(table);
 				ut_print_timestamp(stderr);
 				fprintf(stderr,
 					"  InnoDB: TRUNCATE TABLE %s failed to"
@ -2918,6 +2945,10 @@ next_rec:

 	mem_heap_free(heap);

+	/* Done with index truncation, release index tree locks,
+	subsequent work relates to table level metadata change */
+	dict_table_x_unlock_indexes(table);
+
 	dict_hdr_get_new_id(&new_id, NULL, NULL);

 	info = pars_info_create();
@ -3967,7 +3998,7 @@ end:
 		an ALTER, not in a RENAME. */

 		err = dict_load_foreigns(
-			new_name, !old_is_tmp || trx->check_foreigns);
+			new_name, FALSE, !old_is_tmp || trx->check_foreigns);

 		if (err != DB_SUCCESS) {
 			ut_print_timestamp(stderr);
--- a/storage/xtradb/row/row0row.c
+++ b/storage/xtradb/row/row0row.c
@ -294,7 +294,13 @@ row_build(

 	ut_ad(dtuple_check_typed(row));

-	if (j) {
+	if (!ext) {
+		/* REDUNDANT and COMPACT formats store a local
+		768-byte prefix of each externally stored
+		column. No cache is needed. */
+		ut_ad(dict_table_get_format(index->table)
+		      < DICT_TF_FORMAT_ZIP);
+	} else if (j) {
 		*ext = row_ext_create(j, ext_cols, row,
 				      dict_table_zip_size(index->table),
 				      heap);
--- a/storage/xtradb/row/row0sel.c
+++ b/storage/xtradb/row/row0sel.c
@ -416,7 +416,7 @@ row_sel_fetch_columns(
 							      field_no))) {

 				/* Copy an externally stored field to the
-				temporary heap */
+				temporary heap, if possible. */

 				heap = mem_heap_create(1);

@ -425,6 +425,17 @@ row_sel_fetch_columns(
 					dict_table_zip_size(index->table),
 					field_no, &len, heap);

+				/* data == NULL means that the
+				externally stored field was not
+				written yet. This record
+				should only be seen by
+				recv_recovery_rollback_active() or any
+				TRX_ISO_READ_UNCOMMITTED
+				transactions. The InnoDB SQL parser
+				(the sole caller of this function)
+				does not implement READ UNCOMMITTED,
+				and it is not involved during rollback. */
+				ut_a(data);
 				ut_a(len != UNIV_SQL_NULL);

 				needs_copy = TRUE;
@ -926,6 +937,7 @@ row_sel_get_clust_rec(
 	when plan->clust_pcur was positioned.  The latch will not be
 	released until mtr_commit(mtr). */

+	ut_ad(!rec_get_deleted_flag(clust_rec, rec_offs_comp(offsets)));
 	row_sel_fetch_columns(index, clust_rec, offsets,
 			      UT_LIST_GET_FIRST(plan->columns));
 	*out_rec = clust_rec;
@ -1628,6 +1640,13 @@ skip_lock:
 				}

 				if (old_vers == NULL) {
+					/* The record does not exist
+					in our read view. Skip it, but
+					first attempt to determine
+					whether the index segment we
+					are searching through has been
+					exhausted. */
+
 					offsets = rec_get_offsets(
 						rec, index, offsets,
 						ULINT_UNDEFINED, &heap);
@ -2647,9 +2666,8 @@ Convert a row in the Innobase format to a row in the MySQL format.
 Note that the template in prebuilt may advise us to copy only a few
 columns to mysql_rec, other columns are left blank. All columns may not
 be needed in the query.
-@return TRUE if success, FALSE if could not allocate memory for a BLOB
-(though we may also assert in that case) */
-static
+@return TRUE on success, FALSE if not all columns could be retrieved */
+static __attribute__((warn_unused_result))
 ibool
 row_sel_store_mysql_rec(
 /*====================*/
@ -2719,6 +2737,21 @@ row_sel_store_mysql_rec(
 				dict_table_zip_size(prebuilt->table),
 				templ->rec_field_no, &len, heap);

+			if (UNIV_UNLIKELY(!data)) {
+				/* The externally stored field
+				was not written yet. This
+				record should only be seen by
+				recv_recovery_rollback_active()
+				or any TRX_ISO_READ_UNCOMMITTED
+				transactions. */
+
+				if (extern_field_heap) {
+					mem_heap_free(extern_field_heap);
+				}
+
+				return(FALSE);
+			}
+
 			ut_a(len != UNIV_SQL_NULL);
 		} else {
 			/* Field is stored in the row. */
@ -3136,9 +3169,10 @@ row_sel_pop_cached_row_for_mysql(
 }

 /********************************************************************//**
-Pushes a row for MySQL to the fetch cache. */
-UNIV_INLINE
-void
+Pushes a row for MySQL to the fetch cache.
+@return TRUE on success, FALSE if the record contains incomplete BLOBs */
+UNIV_INLINE __attribute__((warn_unused_result))
+ibool
 row_sel_push_cache_row_for_mysql(
 /*=============================*/
 	row_prebuilt_t*	prebuilt,	/*!< in: prebuilt struct */
@ -3180,10 +3214,11 @@ row_sel_push_cache_row_for_mysql(
 				  prebuilt->fetch_cache[
 					  prebuilt->n_fetch_cached],
 				  prebuilt, rec, offsets))) {
-		ut_error;
+		return(FALSE);
 	}

 	prebuilt->n_fetch_cached++;
+	return(TRUE);
 }

 /*********************************************************************//**
@ -3576,14 +3611,25 @@ row_search_for_mysql(
 				row_sel_try_search_shortcut_for_mysql().
 				The latch will not be released until
 				mtr_commit(&mtr). */
+				ut_ad(!rec_get_deleted_flag(rec, comp));

 				if (!row_sel_store_mysql_rec(buf, prebuilt,
 							     rec, offsets)) {
-					err = DB_TOO_BIG_RECORD;
-
-					/* We let the main loop to do the
-					error handling */
-					goto shortcut_fails_too_big_rec;
+					/* Only fresh inserts may contain
+					incomplete externally stored
+					columns. Pretend that such
+					records do not exist. Such
+					records may only be accessed
+					at the READ UNCOMMITTED
+					isolation level or when
+					rolling back a recovered
+					transaction. Rollback happens
+					at a lower level, not here. */
+					ut_a(trx->isolation_level
+					     == TRX_ISO_READ_UNCOMMITTED);
+
+					/* Proceed as in case SEL_RETRY. */
+					break;
 				}

 				mtr_commit(&mtr);
@ -3623,7 +3669,7 @@ release_search_latch_if_needed:
 			default:
 				ut_ad(0);
 			}
-shortcut_fails_too_big_rec:
+
 			mtr_commit(&mtr);
 			mtr_start(&mtr);
 		}
@ -4217,7 +4263,7 @@ no_gap_lock:

 				rec = old_vers;
 			}
-		} else if (!lock_sec_rec_cons_read_sees(rec, trx->read_view)) {
+		} else {
 			/* We are looking into a non-clustered index,
 			and to get the right version of the record we
 			have to look also into the clustered index: this
@ -4225,8 +4271,12 @@ no_gap_lock:
 			information via the clustered index record. */

 			ut_ad(index != clust_index);
+			ut_ad(!dict_index_is_clust(index));

-			goto requires_clust_rec;
+			if (!lock_sec_rec_cons_read_sees(
+				    rec, trx->read_view)) {
+				goto requires_clust_rec;
+			}
 		}
 	}

@ -4349,8 +4399,13 @@ requires_clust_rec:
 						  ULINT_UNDEFINED, &heap);
 			result_rec = rec;
 		}
+
+		/* result_rec can legitimately be delete-marked
+		now that it has been established that it points to a
+		clustered index record that exists in the read view. */
 	} else {
 		result_rec = rec;
+		ut_ad(!rec_get_deleted_flag(rec, comp));
 	}

 	/* We found a qualifying record 'result_rec'. At this point,
@ -4382,9 +4437,18 @@ requires_clust_rec:
 		not cache rows because there the cursor is a scrollable
 		cursor. */

-		row_sel_push_cache_row_for_mysql(prebuilt, result_rec,
-						 offsets);
-		if (prebuilt->n_fetch_cached == MYSQL_FETCH_CACHE_SIZE) {
+		if (!row_sel_push_cache_row_for_mysql(prebuilt, result_rec,
+						      offsets)) {
+			/* Only fresh inserts may contain incomplete
+			externally stored columns. Pretend that such
+			records do not exist. Such records may only be
+			accessed at the READ UNCOMMITTED isolation
+			level or when rolling back a recovered
+			transaction. Rollback happens at a lower
+			level, not here. */
+			ut_a(trx->isolation_level == TRX_ISO_READ_UNCOMMITTED);
+		} else if (prebuilt->n_fetch_cached
+			   == MYSQL_FETCH_CACHE_SIZE) {

 			goto got_row;
 		}
@ -4400,9 +4464,17 @@ requires_clust_rec:
 		} else {
 			if (!row_sel_store_mysql_rec(buf, prebuilt,
 						     result_rec, offsets)) {
-				err = DB_TOO_BIG_RECORD;
-
-				goto lock_wait_or_error;
+				/* Only fresh inserts may contain
+				incomplete externally stored
+				columns. Pretend that such records do
+				not exist. Such records may only be
+				accessed at the READ UNCOMMITTED
+				isolation level or when rolling back a
+				recovered transaction. Rollback
+				happens at a lower level, not here. */
+				ut_a(trx->isolation_level
+				     == TRX_ISO_READ_UNCOMMITTED);
+				goto next_rec;
 			}
 		}

--- a/storage/xtradb/row/row0undo.c
+++ b/storage/xtradb/row/row0undo.c
@ -199,8 +199,24 @@ row_undo_search_clust_to_pcur(

 		ret = FALSE;
 	} else {
+		row_ext_t**	ext;
+
+		if (dict_table_get_format(node->table) >= DICT_TF_FORMAT_ZIP) {
+			/* In DYNAMIC or COMPRESSED format, there is
+			no prefix of externally stored columns in the
+			clustered index record. Build a cache of
+			column prefixes. */
+			ext = &node->ext;
+		} else {
+			/* REDUNDANT and COMPACT formats store a local
+			768-byte prefix of each externally stored
+			column. No cache is needed. */
+			ext = NULL;
+			node->ext = NULL;
+		}
+
 		node->row = row_build(ROW_COPY_DATA, clust_index, rec,
-				      offsets, NULL, &node->ext, node->heap);
+				      offsets, NULL, ext, node->heap);
 		if (node->update) {
 			node->undo_row = dtuple_copy(node->row, node->heap);
 			row_upd_replace(node->undo_row, &node->undo_ext,
--- a/storage/xtradb/row/row0upd.c
+++ b/storage/xtradb/row/row0upd.c
@ -1398,6 +1398,7 @@ row_upd_store_row(
 	dict_index_t*	clust_index;
 	rec_t*		rec;
 	mem_heap_t*	heap		= NULL;
+	row_ext_t**	ext;
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 	const ulint*	offsets;
 	rec_offs_init(offsets_);
@ -1414,8 +1415,22 @@ row_upd_store_row(

 	offsets = rec_get_offsets(rec, clust_index, offsets_,
 				  ULINT_UNDEFINED, &heap);
+
+	if (dict_table_get_format(node->table) >= DICT_TF_FORMAT_ZIP) {
+		/* In DYNAMIC or COMPRESSED format, there is no prefix
+		of externally stored columns in the clustered index
+		record. Build a cache of column prefixes. */
+		ext = &node->ext;
+	} else {
+		/* REDUNDANT and COMPACT formats store a local
+		768-byte prefix of each externally stored column.
+		No cache is needed. */
+		ext = NULL;
+		node->ext = NULL;
+	}
+
 	node->row = row_build(ROW_COPY_DATA, clust_index, rec, offsets,
-			      NULL, &node->ext, node->heap);
+			      NULL, ext, node->heap);
 	if (node->is_delete) {
 		node->upd_row = NULL;
 		node->upd_ext = NULL;
@ -1583,6 +1598,7 @@ row_upd_clust_rec_by_insert(
 	dict_table_t*	table;
 	dtuple_t*	entry;
 	ulint		err;
+	ibool		change_ownership = FALSE;

 	ut_ad(node);
 	ut_ad(dict_index_is_clust(index));
@ -1615,9 +1631,9 @@ row_upd_clust_rec_by_insert(
 		index = dict_table_get_first_index(table);
 		offsets = rec_get_offsets(rec, index, offsets_,
 					  ULINT_UNDEFINED, &heap);
-		btr_cur_mark_extern_inherited_fields(
-			btr_cur_get_page_zip(btr_cur),
-			rec, index, offsets, node->update, mtr);
+		change_ownership = btr_cur_mark_extern_inherited_fields(
+			btr_cur_get_page_zip(btr_cur), rec, index, offsets,
+			node->update, mtr);
 		if (check_ref) {
 			/* NOTE that the following call loses
 			the position of pcur ! */
@ -1646,10 +1662,11 @@ row_upd_clust_rec_by_insert(

 	row_upd_index_entry_sys_field(entry, index, DATA_TRX_ID, trx->id);

-	if (node->upd_ext) {
+	if (change_ownership) {
 		/* If we return from a lock wait, for example, we may have
 		extern fields marked as not-owned in entry (marked in the
-		if-branch above). We must unmark them. */
+		if-branch above). We must unmark them, take the ownership
+		back. */

 		btr_cur_unmark_dtuple_extern_fields(entry);

--- a/storage/xtradb/srv/srv0srv.c
+++ b/storage/xtradb/srv/srv0srv.c
@ -213,6 +213,8 @@ UNIV_INTERN ulint	srv_lock_table_size	= ULINT_MAX;

 /* key value for shm */
 UNIV_INTERN uint	srv_buffer_pool_shm_key	= 0;
+UNIV_INTERN ibool	srv_buffer_pool_shm_is_reused = FALSE;
+UNIV_INTERN ibool	srv_buffer_pool_shm_checksum = TRUE;

 /* This parameter is deprecated. Use srv_n_io_[read|write]_threads
 instead. */
@ -307,6 +309,9 @@ UNIV_INTERN ulint srv_buf_pool_flushed = 0;
 reading of a disk page */
 UNIV_INTERN ulint srv_buf_pool_reads = 0;

+/** Time in seconds between automatic buffer pool dumps */
+UNIV_INTERN uint srv_auto_lru_dump = 0;
+
 /* structure to pass status variables to MySQL */
 UNIV_INTERN export_struc export_vars;

@ -2555,6 +2560,56 @@ loop:
 	OS_THREAD_DUMMY_RETURN;
 }

+/*********************************************************************//**
+A thread which restores the buffer pool from a dump file on startup and does
+periodic buffer pool dumps.
+@return	a dummy parameter */
+UNIV_INTERN
+os_thread_ret_t
+srv_LRU_dump_restore_thread(
+/*====================*/
+	void*	arg __attribute__((unused)))
+			/*!< in: a dummy parameter required by
+			os_thread_create */
+{
+	uint	auto_lru_dump;
+	time_t	last_dump_time;
+	time_t	time_elapsed;
+
+#ifdef UNIV_DEBUG_THREAD_CREATION
+	fprintf(stderr, "LRU dump/restore thread starts, id %lu\n",
+		os_thread_pf(os_thread_get_curr_id()));
+#endif
+
+	if (srv_auto_lru_dump)
+		buf_LRU_file_restore();
+
+	last_dump_time = time(NULL);
+
+loop:
+	os_thread_sleep(5000000);
+
+	if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) {
+		goto exit_func;
+	}
+
+	time_elapsed = time(NULL) - last_dump_time;
+	auto_lru_dump = srv_auto_lru_dump;
+	if (auto_lru_dump > 0 && (time_t) auto_lru_dump < time_elapsed) {
+		last_dump_time = time(NULL);
+		buf_LRU_file_dump();
+	}
+
+	goto loop;
+exit_func:
+	/* We count the number of threads in os_thread_exit(). A created
+	thread should always use that to exit and not use return() to exit. */
+
+	os_thread_exit(NULL);
+
+	OS_THREAD_DUMMY_RETURN;
+}
+
 /*******************************************************************//**
 Tells the InnoDB server that there has been activity in the database
 and wakes up the master thread if it is suspended (not sleeping). Used
--- a/storage/xtradb/srv/srv0start.c
+++ b/storage/xtradb/srv/srv0start.c
@ -126,9 +126,9 @@ static mutex_t		ios_mutex;
 static ulint		ios;

 /** io_handler_thread parameters for thread identification */
-static ulint		n[SRV_MAX_N_IO_THREADS + 6 + 64];
+static ulint		n[SRV_MAX_N_IO_THREADS + 7 + 64];
 /** io_handler_thread identifiers */
-static os_thread_id_t	thread_ids[SRV_MAX_N_IO_THREADS + 6 + 64];
+static os_thread_id_t	thread_ids[SRV_MAX_N_IO_THREADS + 7 + 64];

 /** We use this mutex to test the return value of pthread_mutex_trylock
   on successful locking. HP-UX does NOT return 0, though Linux et al do. */
@ -1719,8 +1719,8 @@ innobase_start_or_create_for_mysql(void)
 		Note that this is not as heavy weight as it seems. At
 		this point there will be only ONE page in the buf_LRU
 		and there must be no page in the buf_flush list. */
-		/* TODO: treat more correctly */
-		if (!srv_buffer_pool_shm_key)
+		/* buffer_pool_shm should not be reused when recovery was needed. */
+		if (!srv_buffer_pool_shm_is_reused)
 		buf_pool_invalidate();

 		/* We always try to do a recovery, even if the database had
@ -1835,6 +1835,10 @@ innobase_start_or_create_for_mysql(void)
 	os_thread_create(&srv_monitor_thread, NULL,
 			 thread_ids + 4 + SRV_MAX_N_IO_THREADS);

+	/* Create the thread which automaticaly dumps/restore buffer pool */
+	os_thread_create(&srv_LRU_dump_restore_thread, NULL,
+			 thread_ids + 5 + SRV_MAX_N_IO_THREADS);
+
 	srv_is_being_started = FALSE;

 	if (trx_doublewrite == NULL) {
@ -1859,13 +1863,13 @@ innobase_start_or_create_for_mysql(void)
 		ulint i;

 		os_thread_create(&srv_purge_thread, NULL, thread_ids
-				 + (5 + SRV_MAX_N_IO_THREADS));
+				 + (6 + SRV_MAX_N_IO_THREADS));

 		for (i = 0; i < srv_use_purge_thread - 1; i++) {
-			n[6 + i + SRV_MAX_N_IO_THREADS] = i; /* using as index for arrays in purge_sys */
+			n[7 + i + SRV_MAX_N_IO_THREADS] = i; /* using as index for arrays in purge_sys */
 			os_thread_create(&srv_purge_worker_thread,
-					 n + (6 + i + SRV_MAX_N_IO_THREADS),
-					 thread_ids + (6 + i + SRV_MAX_N_IO_THREADS));
+					 n + (7 + i + SRV_MAX_N_IO_THREADS),
+					 thread_ids + (7 + i + SRV_MAX_N_IO_THREADS));
 		}
 	}
 #ifdef UNIV_DEBUG
@ -2214,6 +2218,10 @@ innobase_shutdown_for_mysql(void)
 	log_mem_free();
 	buf_pool_free();
 	mem_close();
+
+	/* ut_free_all_mem() frees all allocated memory not freed yet
+	in shutdown, and it will also free the ut_list_mutex, so it
+	should be the last one for all operation */
 	ut_free_all_mem();

 	if (os_thread_count != 0
--- a/storage/xtradb/trx/trx0sys.c
+++ b/storage/xtradb/trx/trx0sys.c
@ -541,8 +541,8 @@ start_again:
 		log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);

 		fprintf(stderr, "InnoDB: Doublewrite buffer created in the doublewrite file\n");
+		trx_sys_multiple_tablespace_format = TRUE;
 	}
-
 	trx_doublewrite_buf_is_being_created = FALSE;
    }
 }
--- a/storage/xtradb/trx/trx0undo.c
+++ b/storage/xtradb/trx/trx0undo.c
@ -1976,7 +1976,8 @@ trx_undo_update_cleanup(

 		UT_LIST_ADD_FIRST(undo_list, rseg->update_undo_cached, undo);
 	} else {
-		ut_ad(undo->state == TRX_UNDO_TO_PURGE);
+		ut_ad(undo->state == TRX_UNDO_TO_PURGE
+		      || undo->state == TRX_UNDO_TO_FREE);

 		trx_undo_mem_free(undo);
 	}
--- a/storage/xtradb/ut/ut0mem.c
+++ b/storage/xtradb/ut/ut0mem.c
@ -290,7 +290,8 @@ ut_test_malloc(
 #endif /* !UNIV_HOTBACKUP */

 /**********************************************************************//**
-Frees a memory block allocated with ut_malloc. */
+Frees a memory block allocated with ut_malloc. Freeing a NULL pointer is
+a nop. */
 UNIV_INTERN
 void
 ut_free(
@ -300,7 +301,9 @@ ut_free(
 #ifndef UNIV_HOTBACKUP
 	ut_mem_block_t* block;

-	if (UNIV_LIKELY(srv_use_sys_malloc)) {
+	if (ptr == NULL) {
+		return;
+	} else if (UNIV_LIKELY(srv_use_sys_malloc)) {
 		free(ptr);
 		return;
 	}