mirror of https://github.com/MariaDB/server
				
				
			
			You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							1621 lines
						
					
					
						
							61 KiB
						
					
					
				
			
		
		
		
			
			
			
		
		
	
	
							1621 lines
						
					
					
						
							61 KiB
						
					
					
				| #ifndef HA_PARTITION_INCLUDED | |
| #define HA_PARTITION_INCLUDED | |
|  | |
| /* | |
|    Copyright (c) 2005, 2012, Oracle and/or its affiliates. | |
|    Copyright (c) 2009, 2022, MariaDB Corporation. | |
|  | |
|    This program is free software; you can redistribute it and/or modify | |
|    it under the terms of the GNU General Public License as published by | |
|    the Free Software Foundation; version 2 of the License. | |
|  | |
|    This program is distributed in the hope that it will be useful, | |
|    but WITHOUT ANY WARRANTY; without even the implied warranty of | |
|    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | |
|    GNU General Public License for more details. | |
|  | |
|    You should have received a copy of the GNU General Public License | |
|    along with this program; if not, write to the Free Software | |
|    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */ | |
| 
 | |
| #include "sql_partition.h"      /* part_id_range, partition_element */ | |
| #include "queues.h"             /* QUEUE */ | |
|  | |
| struct Ordered_blob_storage | |
| { | |
|   String blob; | |
|   bool set_read_value; | |
|   Ordered_blob_storage() : set_read_value(false) | |
|   {} | |
| }; | |
| 
 | |
| #define PARTITION_BYTES_IN_POS 2 | |
| #define ORDERED_PART_NUM_OFFSET sizeof(Ordered_blob_storage **) | |
| #define ORDERED_REC_OFFSET (ORDERED_PART_NUM_OFFSET + PARTITION_BYTES_IN_POS) | |
|  | |
| 
 | |
| /** Struct used for partition_name_hash */ | |
| typedef struct st_part_name_def | |
| { | |
|   uchar *partition_name; | |
|   uint length; | |
|   uint32 part_id; | |
|   my_bool is_subpart; | |
| } PART_NAME_DEF; | |
| 
 | |
| /** class where to save partitions Handler_share's */ | |
| class Parts_share_refs | |
| { | |
| public: | |
|   uint num_parts;                              /**< Size of ha_share array */ | |
|   Handler_share **ha_shares;                   /**< Storage for each part */ | |
|   Parts_share_refs() | |
|   { | |
|     num_parts= 0; | |
|     ha_shares= NULL; | |
|   } | |
|   ~Parts_share_refs() | |
|   { | |
|     uint i; | |
|     for (i= 0; i < num_parts; i++) | |
|       delete ha_shares[i]; | |
|     delete[] ha_shares; | |
|   } | |
|   bool init(uint arg_num_parts) | |
|   { | |
|     DBUG_ASSERT(!num_parts && !ha_shares); | |
|     num_parts= arg_num_parts; | |
|     /* Allocate an array of Handler_share pointers */ | |
|     ha_shares= new Handler_share *[num_parts]; | |
|     if (!ha_shares) | |
|     { | |
|       num_parts= 0; | |
|       return true; | |
|     } | |
|     memset(ha_shares, 0, sizeof(Handler_share*) * num_parts); | |
|     return false; | |
|   } | |
| }; | |
| 
 | |
| class ha_partition; | |
| 
 | |
| /* Partition Full Text Search info */ | |
| struct st_partition_ft_info | |
| { | |
|   struct _ft_vft        *please; | |
|   st_partition_ft_info  *next; | |
|   ha_partition          *file; | |
|   FT_INFO               **part_ft_info; | |
| }; | |
| 
 | |
| 
 | |
| #ifdef HAVE_PSI_MUTEX_INTERFACE | |
| extern PSI_mutex_key key_partition_auto_inc_mutex; | |
| #endif | |
|  | |
| /** | |
|   Partition specific Handler_share. | |
| */ | |
| class Partition_share : public Handler_share | |
| { | |
| public: | |
|   bool auto_inc_initialized; | |
|   mysql_mutex_t auto_inc_mutex;                /**< protecting auto_inc val */ | |
|   ulonglong next_auto_inc_val;                 /**< first non reserved value */ | |
|   /** | |
|     Hash of partition names. Initialized in the first ha_partition::open() | |
|     for the table_share. After that it is read-only, i.e. no locking required. | |
|   */ | |
|   bool partition_name_hash_initialized; | |
|   HASH partition_name_hash; | |
|   /** Storage for each partitions Handler_share */ | |
|   Parts_share_refs partitions_share_refs; | |
|   Partition_share() | |
|     : auto_inc_initialized(false), | |
|     next_auto_inc_val(0), | |
|     partition_name_hash_initialized(false), | |
|     partition_names(NULL) | |
|   { | |
|     mysql_mutex_init(key_partition_auto_inc_mutex, | |
|                     &auto_inc_mutex, | |
|                     MY_MUTEX_INIT_FAST); | |
|   } | |
| 
 | |
|   ~Partition_share() | |
|   { | |
|     mysql_mutex_destroy(&auto_inc_mutex); | |
|     if (partition_names) | |
|     { | |
|       my_free(partition_names); | |
|     } | |
|     if (partition_name_hash_initialized) | |
|     { | |
|       my_hash_free(&partition_name_hash); | |
|     } | |
|   } | |
|    | |
|   bool init(uint num_parts); | |
| 
 | |
|   /** | |
|     Release reserved auto increment values not used. | |
|     @param thd             Thread. | |
|     @param table_share     Table Share | |
|     @param next_insert_id  Next insert id (first non used auto inc value). | |
|     @param max_reserved    End of reserved auto inc range. | |
|   */ | |
|   void release_auto_inc_if_possible(THD *thd, TABLE_SHARE *table_share, | |
|                                     const ulonglong next_insert_id, | |
|                                     const ulonglong max_reserved); | |
| 
 | |
|   /** lock mutex protecting auto increment value next_auto_inc_val. */ | |
|   inline void lock_auto_inc() | |
|   { | |
|     mysql_mutex_lock(&auto_inc_mutex); | |
|   } | |
|   /** unlock mutex protecting auto increment value next_auto_inc_val. */ | |
|   inline void unlock_auto_inc() | |
|   { | |
|     mysql_mutex_unlock(&auto_inc_mutex); | |
|   } | |
|   /** | |
|     Populate partition_name_hash with partition and subpartition names | |
|     from part_info. | |
|     @param part_info  Partition info containing all partitions metadata. | |
|  | |
|     @return Operation status. | |
|       @retval false Success. | |
|       @retval true  Failure. | |
|   */ | |
|   bool populate_partition_name_hash(partition_info *part_info); | |
|   /** Get partition name. | |
|  | |
|   @param part_id  Partition id (for subpartitioned table only subpartition | |
|                   names will be returned.) | |
|  | |
|   @return partition name or NULL if error. | |
|   */ | |
|   const char *get_partition_name(size_t part_id) const; | |
| private: | |
|   const uchar **partition_names; | |
|   /** | |
|     Insert [sub]partition name into  partition_name_hash | |
|     @param name        Partition name. | |
|     @param part_id     Partition id. | |
|     @param is_subpart  True if subpartition else partition. | |
|  | |
|     @return Operation status. | |
|       @retval false Success. | |
|       @retval true  Failure. | |
|   */ | |
|   bool insert_partition_name_in_hash(const char *name, | |
|                                      uint part_id, | |
|                                      bool is_subpart); | |
| }; | |
| 
 | |
| 
 | |
| /* | |
|   List of ranges to be scanned by ha_partition's MRR implementation | |
|  | |
|   This object is | |
|    - A KEY_MULTI_RANGE structure (the MRR range) | |
|    - Storage for the range endpoints that the KEY_MULTI_RANGE has pointers to | |
|    - list of such ranges (connected through the "next" pointer). | |
| */ | |
| 
 | |
| typedef struct st_partition_key_multi_range | |
| { | |
|   /* | |
|     Number of the range. The ranges are numbered in the order RANGE_SEQ_IF has | |
|     emitted them, starting from 1. The numbering in used by ordered MRR scans. | |
|   */ | |
|   uint id; | |
|   uchar *key[2]; | |
|   /* | |
|     Sizes of allocated memory in key[]. These may be larger then the actual | |
|     values as this structure is reused across MRR scans | |
|   */ | |
|   uint length[2]; | |
| 
 | |
|   /* | |
|     The range. | |
|     key_multi_range.ptr is a pointer to the this PARTITION_KEY_MULTI_RANGE | |
|     object | |
|   */ | |
|   KEY_MULTI_RANGE key_multi_range; | |
| 
 | |
|   // Range id from the SQL layer | |
|   range_id_t ptr; | |
| 
 | |
|   // The next element in the list of MRR ranges. | |
|   st_partition_key_multi_range *next; | |
| } PARTITION_KEY_MULTI_RANGE; | |
| 
 | |
| 
 | |
| /* | |
|   List of ranges to be scanned in a certain [sub]partition | |
|  | |
|   The idea is that there's a list of ranges to be scanned in the table | |
|   (formed by PARTITION_KEY_MULTI_RANGE structures), | |
|   and for each [sub]partition, we only need to scan a subset of that list. | |
|  | |
|      PKMR1 --> PKMR2 --> PKMR3 -->... // list of PARTITION_KEY_MULTI_RANGE | |
|        ^                   ^ | |
|        |                   | | |
|      PPKMR1 ----------> PPKMR2 -->... // list of PARTITION_PART_KEY_MULTI_RANGE | |
|  | |
|   This way, per-partition lists of PARTITION_PART_KEY_MULTI_RANGE have pointers | |
|   to the elements of the global list of PARTITION_KEY_MULTI_RANGE. | |
| */ | |
| 
 | |
| typedef struct st_partition_part_key_multi_range | |
| { | |
|   PARTITION_KEY_MULTI_RANGE *partition_key_multi_range; | |
|   st_partition_part_key_multi_range *next; | |
| } PARTITION_PART_KEY_MULTI_RANGE; | |
| 
 | |
| 
 | |
| class ha_partition; | |
| 
 | |
| /* | |
|   The structure holding information about range sequence to be used with one | |
|   partition. | |
|   (pointer to this is used as seq_init_param for RANGE_SEQ_IF structure when | |
|    invoking MRR for an individual partition) | |
| */ | |
| 
 | |
| typedef struct st_partition_part_key_multi_range_hld | |
| { | |
|   /* Owner object */ | |
|   ha_partition *partition; | |
| 
 | |
|   /* id of the the partition this structure is for */ | |
|   uint32 part_id; | |
| 
 | |
|   /* Current range we're iterating through */ | |
|   PARTITION_PART_KEY_MULTI_RANGE *partition_part_key_multi_range; | |
| } PARTITION_PART_KEY_MULTI_RANGE_HLD; | |
| 
 | |
| 
 | |
| extern "C" int cmp_key_part_id(void *key_p, uchar *ref1, uchar *ref2); | |
| extern "C" int cmp_key_rowid_part_id(void *ptr, uchar *ref1, uchar *ref2); | |
| 
 | |
| class ha_partition :public handler | |
| { | |
| private: | |
|   enum partition_index_scan_type | |
|   { | |
|     partition_index_read= 0, | |
|     partition_index_first= 1, | |
|     partition_index_last= 3, | |
|     partition_index_read_last= 4, | |
|     partition_read_range = 5, | |
|     partition_no_index_scan= 6, | |
|     partition_read_multi_range = 7, | |
|     partition_ft_read= 8 | |
|   }; | |
|   /* Data for the partition handler */ | |
|   int  m_mode;                          // Open mode | |
|   uint m_open_test_lock;                // Open test_if_locked | |
|   uchar *m_file_buffer;                 // Content of the .par file | |
|   char *m_name_buffer_ptr;		// Pointer to first partition name | |
|   MEM_ROOT m_mem_root; | |
|   plugin_ref *m_engine_array;           // Array of types of the handlers | |
|   handler **m_file;                     // Array of references to handler inst. | |
|   uint m_file_tot_parts;                // Debug | |
|   handler **m_new_file;                 // Array of references to new handlers | |
|   handler **m_reorged_file;             // Reorganised partitions | |
|   handler **m_added_file;               // Added parts kept for errors | |
|   LEX_CSTRING *m_connect_string; | |
|   partition_info *m_part_info;          // local reference to partition | |
|   Field **m_part_field_array;           // Part field array locally to save acc | |
|   uchar *m_ordered_rec_buffer;          // Row and key buffer for ord. idx scan | |
|   st_partition_ft_info *ft_first; | |
|   st_partition_ft_info *ft_current; | |
|   /* | |
|     Current index. | |
|     When used in key_rec_cmp: If clustered pk, index compare | |
|     must compare pk if given index is same for two rows. | |
|     So normally m_curr_key_info[0]= current index and m_curr_key[1]= NULL, | |
|     and if clustered pk, [0]= current index, [1]= pk, [2]= NULL | |
|   */ | |
|   KEY *m_curr_key_info[3];              // Current index | |
|   uchar *m_rec0;                        // table->record[0] | |
|   const uchar *m_err_rec;               // record which gave error | |
|   QUEUE m_queue;                        // Prio queue used by sorted read | |
|  | |
|   /* | |
|     Length of an element in m_ordered_rec_buffer. The elements are composed of | |
|  | |
|       [part_no] [table->record copy] [underlying_table_rowid] | |
|  | |
|     underlying_table_rowid is only stored when the table has no extended keys. | |
|   */ | |
|   size_t m_priority_queue_rec_len; | |
| 
 | |
|   /* | |
|     If true, then sorting records by key value also sorts them by their | |
|     underlying_table_rowid. | |
|   */ | |
|   bool m_using_extended_keys; | |
| 
 | |
|   /* | |
|     Since the partition handler is a handler on top of other handlers, it | |
|     is necessary to keep information about what the underlying handler | |
|     characteristics is. It is not possible to keep any handler instances | |
|     for this since the MySQL Server sometimes allocating the handler object | |
|     without freeing them. | |
|   */ | |
|   enum enum_handler_status | |
|   { | |
|     handler_not_initialized= 0, | |
|     handler_initialized, | |
|     handler_opened, | |
|     handler_closed | |
|   }; | |
|   enum_handler_status m_handler_status; | |
| 
 | |
|   uint m_reorged_parts;                  // Number of reorganised parts | |
|   uint m_tot_parts;                      // Total number of partitions; | |
|   uint m_num_locks;                       // For engines like ha_blackhole, which needs no locks | |
|   uint m_last_part;                      // Last file that we update,write,read | |
|   part_id_range m_part_spec;             // Which parts to scan | |
|   uint m_scan_value;                     // Value passed in rnd_init | |
|                                          // call | |
|   uint m_ref_length;                     // Length of position in this | |
|                                          // handler object | |
|   key_range m_start_key;                 // index read key range | |
|   enum partition_index_scan_type m_index_scan_type;// What type of index | |
|                                                    // scan | |
|   uint m_top_entry;                      // Which partition is to | |
|                                          // deliver next result | |
|   uint m_rec_length;                     // Local copy of record length | |
|  | |
|   bool m_ordered;                        // Ordered/Unordered index scan | |
|   bool m_pkey_is_clustered;              // Is primary key clustered | |
|   bool m_create_handler;                 // Handler used to create table | |
|   bool m_is_sub_partitioned;             // Is subpartitioned | |
|   bool m_ordered_scan_ongoing; | |
|   bool m_rnd_init_and_first; | |
|   bool m_ft_init_and_first; | |
| 
 | |
|   /* | |
|     If set, this object was created with ha_partition::clone and doesn't | |
|     "own" the m_part_info structure. | |
|   */ | |
|   ha_partition *m_is_clone_of; | |
|   MEM_ROOT *m_clone_mem_root; | |
| 
 | |
|   /* | |
|     We keep track if all underlying handlers are MyISAM since MyISAM has a | |
|     great number of extra flags not needed by other handlers. | |
|   */ | |
|   bool m_myisam;                         // Are all underlying handlers | |
|                                          // MyISAM | |
|   /* | |
|     We keep track of InnoDB handlers below since it requires proper setting | |
|     of query_id in fields at index_init and index_read calls. | |
|   */ | |
|   bool m_innodb;                        // Are all underlying handlers | |
|                                         // InnoDB | |
|   /* | |
|     When calling extra(HA_EXTRA_CACHE) we do not pass this to the underlying | |
|     handlers immediately. Instead we cache it and call the underlying | |
|     immediately before starting the scan on the partition. This is to | |
|     prevent allocating a READ CACHE for each partition in parallel when | |
|     performing a full table scan on MyISAM partitioned table. | |
|     This state is cleared by extra(HA_EXTRA_NO_CACHE). | |
|   */ | |
|   bool m_extra_cache; | |
|   uint m_extra_cache_size; | |
|   /* The same goes for HA_EXTRA_PREPARE_FOR_UPDATE */ | |
|   bool m_extra_prepare_for_update; | |
|   /* Which partition has active cache */ | |
|   uint m_extra_cache_part_id; | |
| 
 | |
|   void init_handler_variables(); | |
|   /* | |
|     Variables for lock structures. | |
|   */ | |
| 
 | |
|   bool auto_increment_lock;             /**< lock reading/updating auto_inc */ | |
|   /** | |
|     Flag to keep the auto_increment lock through out the statement. | |
|     This to ensure it will work with statement based replication. | |
|   */ | |
|   bool auto_increment_safe_stmt_log_lock; | |
|   /** For optimizing ha_start_bulk_insert calls */ | |
|   MY_BITMAP m_bulk_insert_started; | |
|   ha_rows   m_bulk_inserted_rows; | |
|   /** used for prediction of start_bulk_insert rows */ | |
|   enum_monotonicity_info m_part_func_monotonicity_info; | |
|   part_id_range m_direct_update_part_spec; | |
|   bool                m_pre_calling; | |
|   bool                m_pre_call_use_parallel; | |
|   /* Keep track of bulk access requests */ | |
|   bool                bulk_access_executing; | |
| 
 | |
|   /** keep track of locked partitions */ | |
|   MY_BITMAP m_locked_partitions; | |
|   /** Stores shared auto_increment etc. */ | |
|   Partition_share *part_share; | |
|   /** Temporary storage for new partitions Handler_shares during ALTER */ | |
|   List<Parts_share_refs> m_new_partitions_share_refs; | |
|   /** Sorted array of partition ids in descending order of number of rows. */ | |
|   uint32 *m_part_ids_sorted_by_num_of_records; | |
|   /* Compare function for my_qsort2, for reversed order. */ | |
|   static int compare_number_of_records(ha_partition *me, | |
|                                        const uint32 *a, | |
|                                        const uint32 *b); | |
|   /** keep track of partitions to call ha_reset */ | |
|   MY_BITMAP m_partitions_to_reset; | |
|   /** partitions that returned HA_ERR_KEY_NOT_FOUND. */ | |
|   MY_BITMAP m_key_not_found_partitions; | |
|   bool m_key_not_found; | |
|   List<String> *m_partitions_to_open; | |
|   MY_BITMAP m_opened_partitions; | |
|   /** This is one of the m_file-s that it guaranteed to be opened. */ | |
|   /**  It is set in open_read_partitions() */ | |
|   handler *m_file_sample; | |
| public: | |
|   handler **get_child_handlers() | |
|   { | |
|     return m_file; | |
|   } | |
|   virtual part_id_range *get_part_spec() | |
|   { | |
|     return &m_part_spec; | |
|   } | |
|   virtual uint get_no_current_part_id() | |
|   { | |
|     return NO_CURRENT_PART_ID; | |
|   } | |
|   Partition_share *get_part_share() { return part_share; } | |
|   handler *clone(const char *name, MEM_ROOT *mem_root); | |
|   virtual void set_part_info(partition_info *part_info) | |
|   { | |
|      m_part_info= part_info; | |
|      m_is_sub_partitioned= part_info->is_sub_partitioned(); | |
|   } | |
| 
 | |
|   virtual void return_record_by_parent(); | |
| 
 | |
|   virtual bool vers_can_native(THD *thd) | |
|   { | |
|     if (thd->lex->part_info) | |
|     { | |
|       // PARTITION BY SYSTEM_TIME is not supported for now | |
|       return thd->lex->part_info->part_type != VERSIONING_PARTITION; | |
|     } | |
|     else | |
|     { | |
|       bool can= true; | |
|       for (uint i= 0; i < m_tot_parts && can; i++) | |
|         can= can && m_file[i]->vers_can_native(thd); | |
|       return can; | |
|     } | |
|   } | |
| 
 | |
|   /* | |
|     ------------------------------------------------------------------------- | |
|     MODULE create/delete handler object | |
|     ------------------------------------------------------------------------- | |
|     Object create/delete method. Normally called when a table object | |
|     exists. There is also a method to create the handler object with only | |
|     partition information. This is used from mysql_create_table when the | |
|     table is to be created and the engine type is deduced to be the | |
|     partition handler. | |
|     ------------------------------------------------------------------------- | |
|   */ | |
|     ha_partition(handlerton *hton, TABLE_SHARE * table); | |
|     ha_partition(handlerton *hton, partition_info * part_info); | |
|     ha_partition(handlerton *hton, TABLE_SHARE *share, | |
|                  partition_info *part_info_arg, | |
|                  ha_partition *clone_arg, | |
|                  MEM_ROOT *clone_mem_root_arg); | |
|    ~ha_partition(); | |
|    void ha_partition_init(); | |
|   /* | |
|     A partition handler has no characteristics in itself. It only inherits | |
|     those from the underlying handlers. Here we set-up those constants to | |
|     enable later calls of the methods to retrieve constants from the under- | |
|     lying handlers. Returns false if not successful. | |
|   */ | |
|    bool initialize_partition(MEM_ROOT *mem_root); | |
| 
 | |
|   /* | |
|     ------------------------------------------------------------------------- | |
|     MODULE meta data changes | |
|     ------------------------------------------------------------------------- | |
|     Meta data routines to CREATE, DROP, RENAME table and often used at | |
|     ALTER TABLE (update_create_info used from ALTER TABLE and SHOW ..). | |
|  | |
|     create_partitioning_metadata is called before opening a new handler object | |
|     with openfrm to call create. It is used to create any local handler | |
|     object needed in opening the object in openfrm | |
|     ------------------------------------------------------------------------- | |
|   */ | |
|   virtual int delete_table(const char *from); | |
|   virtual int rename_table(const char *from, const char *to); | |
|   virtual int create(const char *name, TABLE *form, | |
| 		     HA_CREATE_INFO *create_info); | |
|   virtual int create_partitioning_metadata(const char *name, | |
|                                    const char *old_name, int action_flag); | |
|   virtual void update_create_info(HA_CREATE_INFO *create_info); | |
|   virtual int change_partitions(HA_CREATE_INFO *create_info, | |
|                                 const char *path, | |
|                                 ulonglong * const copied, | |
|                                 ulonglong * const deleted, | |
|                                 const uchar *pack_frm_data, | |
|                                 size_t pack_frm_len); | |
|   virtual int drop_partitions(const char *path); | |
|   virtual int rename_partitions(const char *path); | |
|   bool get_no_parts(const char *name, uint *num_parts) | |
|   { | |
|     DBUG_ENTER("ha_partition::get_no_parts"); | |
|     *num_parts= m_tot_parts; | |
|     DBUG_RETURN(0); | |
|   } | |
|   virtual void change_table_ptr(TABLE *table_arg, TABLE_SHARE *share); | |
|   virtual bool check_if_incompatible_data(HA_CREATE_INFO *create_info, | |
|                                           uint table_changes); | |
|   void update_part_create_info(HA_CREATE_INFO *create_info, uint part_id) | |
|   { | |
|     m_file[part_id]->update_create_info(create_info); | |
|   } | |
| private: | |
|   int copy_partitions(ulonglong * const copied, ulonglong * const deleted); | |
|   void cleanup_new_partition(uint part_count); | |
|   int prepare_new_partition(TABLE *table, HA_CREATE_INFO *create_info, | |
|                             handler *file, const char *part_name, | |
|                             partition_element *p_elem, | |
|                             uint disable_non_uniq_indexes); | |
|   /* | |
|     delete_table and rename_table uses very similar logic which | |
|     is packed into this routine. | |
|   */ | |
|   uint del_ren_table(const char *from, const char *to); | |
|   /* | |
|     One method to create the table_name.par file containing the names of the | |
|     underlying partitions, their engine and the number of partitions. | |
|     And one method to read it in. | |
|   */ | |
|   bool create_handler_file(const char *name); | |
|   bool setup_engine_array(MEM_ROOT *mem_root, handlerton *first_engine); | |
|   bool read_par_file(const char *name); | |
|   handlerton *get_def_part_engine(const char *name); | |
|   bool get_from_handler_file(const char *name, MEM_ROOT *mem_root, | |
|                              bool is_clone); | |
|   bool new_handlers_from_part_info(MEM_ROOT *mem_root); | |
|   bool create_handlers(MEM_ROOT *mem_root); | |
|   void clear_handler_file(); | |
|   int set_up_table_before_create(TABLE *table_arg, | |
|                                  const char *partition_name_with_path, | |
|                                  HA_CREATE_INFO *info, | |
|                                  partition_element *p_elem); | |
|   partition_element *find_partition_element(uint part_id); | |
|   bool insert_partition_name_in_hash(const char *name, uint part_id, | |
|                                      bool is_subpart); | |
|   bool populate_partition_name_hash(); | |
|   Partition_share *get_share(); | |
|   bool set_ha_share_ref(Handler_share **ha_share); | |
|   void fix_data_dir(char* path); | |
|   bool init_partition_bitmaps(); | |
|   void free_partition_bitmaps(); | |
| 
 | |
| public: | |
| 
 | |
|   /* | |
|     ------------------------------------------------------------------------- | |
|     MODULE open/close object | |
|     ------------------------------------------------------------------------- | |
|     Open and close handler object to ensure all underlying files and | |
|     objects allocated and deallocated for query handling is handled | |
|     properly. | |
|     ------------------------------------------------------------------------- | |
|  | |
|     A handler object is opened as part of its initialisation and before | |
|     being used for normal queries (not before meta-data changes always. | |
|     If the object was opened it will also be closed before being deleted. | |
|   */ | |
|   virtual int open(const char *name, int mode, uint test_if_locked); | |
|   virtual int close(void); | |
| 
 | |
|   /* | |
|     ------------------------------------------------------------------------- | |
|     MODULE start/end statement | |
|     ------------------------------------------------------------------------- | |
|     This module contains methods that are used to understand start/end of | |
|     statements, transaction boundaries, and aid for proper concurrency | |
|     control. | |
|     The partition handler need not implement abort and commit since this | |
|     will be handled by any underlying handlers implementing transactions. | |
|     There is only one call to each handler type involved per transaction | |
|     and these go directly to the handlers supporting transactions | |
|     ------------------------------------------------------------------------- | |
|   */ | |
|   virtual THR_LOCK_DATA **store_lock(THD * thd, THR_LOCK_DATA ** to, | |
| 				     enum thr_lock_type lock_type); | |
|   virtual int external_lock(THD * thd, int lock_type); | |
|   LEX_CSTRING *engine_name() { return hton_name(partition_ht()); } | |
|   /* | |
|     When table is locked a statement is started by calling start_stmt | |
|     instead of external_lock | |
|   */ | |
|   virtual int start_stmt(THD * thd, thr_lock_type lock_type); | |
|   /* | |
|     Lock count is number of locked underlying handlers (I assume) | |
|   */ | |
|   virtual uint lock_count(void) const; | |
|   /* | |
|     Call to unlock rows not to be updated in transaction | |
|   */ | |
|   virtual void unlock_row(); | |
|   /* | |
|     Check if semi consistent read | |
|   */ | |
|   virtual bool was_semi_consistent_read(); | |
|   /* | |
|     Call to hint about semi consistent read | |
|   */ | |
|   virtual void try_semi_consistent_read(bool); | |
| 
 | |
|   /* | |
|     NOTE: due to performance and resource issues with many partitions, | |
|     we only use the m_psi on the ha_partition handler, excluding all | |
|     partitions m_psi. | |
|   */ | |
| #ifdef HAVE_M_PSI_PER_PARTITION | |
|   /* | |
|     Bind the table/handler thread to track table i/o. | |
|   */ | |
|   virtual void unbind_psi(); | |
|   virtual void rebind_psi(); | |
| #endif | |
|   /* | |
|     ------------------------------------------------------------------------- | |
|     MODULE change record | |
|     ------------------------------------------------------------------------- | |
|     This part of the handler interface is used to change the records | |
|     after INSERT, DELETE, UPDATE, REPLACE method calls but also other | |
|     special meta-data operations as ALTER TABLE, LOAD DATA, TRUNCATE. | |
|     ------------------------------------------------------------------------- | |
|  | |
|     These methods are used for insert (write_row), update (update_row) | |
|     and delete (delete_row). All methods to change data always work on | |
|     one row at a time. update_row and delete_row also contains the old | |
|     row. | |
|     delete_all_rows will delete all rows in the table in one call as a | |
|     special optimisation for DELETE from table; | |
|  | |
|     Bulk inserts are supported if all underlying handlers support it. | |
|     start_bulk_insert and end_bulk_insert is called before and after a | |
|     number of calls to write_row. | |
|   */ | |
|   virtual int write_row(uchar * buf); | |
|   virtual bool start_bulk_update(); | |
|   virtual int exec_bulk_update(ha_rows *dup_key_found); | |
|   virtual int end_bulk_update(); | |
|   virtual int bulk_update_row(const uchar *old_data, const uchar *new_data, | |
|                               ha_rows *dup_key_found); | |
|   virtual int update_row(const uchar * old_data, const uchar * new_data); | |
|   virtual int direct_update_rows_init(List<Item> *update_fields); | |
|   virtual int pre_direct_update_rows_init(List<Item> *update_fields); | |
|   virtual int direct_update_rows(ha_rows *update_rows); | |
|   virtual int pre_direct_update_rows(); | |
|   virtual bool start_bulk_delete(); | |
|   virtual int end_bulk_delete(); | |
|   virtual int delete_row(const uchar * buf); | |
|   virtual int direct_delete_rows_init(); | |
|   virtual int pre_direct_delete_rows_init(); | |
|   virtual int direct_delete_rows(ha_rows *delete_rows); | |
|   virtual int pre_direct_delete_rows(); | |
|   virtual int delete_all_rows(void); | |
|   virtual int truncate(); | |
|   virtual void start_bulk_insert(ha_rows rows, uint flags); | |
|   virtual int end_bulk_insert(); | |
| private: | |
|   ha_rows guess_bulk_insert_rows(); | |
|   void start_part_bulk_insert(THD *thd, uint part_id); | |
|   long estimate_read_buffer_size(long original_size); | |
| public: | |
| 
 | |
|   /* | |
|     Method for truncating a specific partition. | |
|     (i.e. ALTER TABLE t1 TRUNCATE PARTITION p). | |
|  | |
|     @remark This method is a partitioning-specific hook | |
|             and thus not a member of the general SE API. | |
|   */ | |
|   int truncate_partition(Alter_info *, bool *binlog_stmt); | |
| 
 | |
|   virtual bool is_fatal_error(int error, uint flags) | |
|   { | |
|     if (!handler::is_fatal_error(error, flags) || | |
|         error == HA_ERR_NO_PARTITION_FOUND || | |
|         error == HA_ERR_NOT_IN_LOCK_PARTITIONS) | |
|       return FALSE; | |
|     return TRUE; | |
|   } | |
| 
 | |
| 
 | |
|   /* | |
|     ------------------------------------------------------------------------- | |
|     MODULE full table scan | |
|     ------------------------------------------------------------------------- | |
|     This module is used for the most basic access method for any table | |
|     handler. This is to fetch all data through a full table scan. No | |
|     indexes are needed to implement this part. | |
|     It contains one method to start the scan (rnd_init) that can also be | |
|     called multiple times (typical in a nested loop join). Then proceeding | |
|     to the next record (rnd_next) and closing the scan (rnd_end). | |
|     To remember a record for later access there is a method (position) | |
|     and there is a method used to retrieve the record based on the stored | |
|     position. | |
|     The position can be a file position, a primary key, a ROWID dependent | |
|     on the handler below. | |
|     ------------------------------------------------------------------------- | |
|   */ | |
|   /* | |
|     unlike index_init(), rnd_init() can be called two times | |
|     without rnd_end() in between (it only makes sense if scan=1). | |
|     then the second call should prepare for the new table scan | |
|     (e.g if rnd_init allocates the cursor, second call should | |
|     position it to the start of the table, no need to deallocate | |
|     and allocate it again | |
|   */ | |
|   virtual int rnd_init(bool scan); | |
|   virtual int rnd_end(); | |
|   virtual int rnd_next(uchar * buf); | |
|   virtual int rnd_pos(uchar * buf, uchar * pos); | |
|   virtual int rnd_pos_by_record(uchar *record); | |
|   virtual void position(const uchar * record); | |
| 
 | |
|   /* | |
|     ------------------------------------------------------------------------- | |
|     MODULE index scan | |
|     ------------------------------------------------------------------------- | |
|     This part of the handler interface is used to perform access through | |
|     indexes. The interface is defined as a scan interface but the handler | |
|     can also use key lookup if the index is a unique index or a primary | |
|     key index. | |
|     Index scans are mostly useful for SELECT queries but are an important | |
|     part also of UPDATE, DELETE, REPLACE and CREATE TABLE table AS SELECT | |
|     and so forth. | |
|     Naturally an index is needed for an index scan and indexes can either | |
|     be ordered, hash based. Some ordered indexes can return data in order | |
|     but not necessarily all of them. | |
|     There are many flags that define the behavior of indexes in the | |
|     various handlers. These methods are found in the optimizer module. | |
|     ------------------------------------------------------------------------- | |
|  | |
|     index_read is called to start a scan of an index. The find_flag defines | |
|     the semantics of the scan. These flags are defined in | |
|     include/my_base.h | |
|     index_read_idx is the same but also initializes index before calling doing | |
|     the same thing as index_read. Thus it is similar to index_init followed | |
|     by index_read. This is also how we implement it. | |
|  | |
|     index_read/index_read_idx does also return the first row. Thus for | |
|     key lookups, the index_read will be the only call to the handler in | |
|     the index scan. | |
|  | |
|     index_init initializes an index before using it and index_end does | |
|     any end processing needed. | |
|   */ | |
|   virtual int index_read_map(uchar * buf, const uchar * key, | |
|                              key_part_map keypart_map, | |
|                              enum ha_rkey_function find_flag); | |
|   virtual int index_init(uint idx, bool sorted); | |
|   virtual int index_end(); | |
| 
 | |
|   /** | |
|     @breif | |
|     Positions an index cursor to the index specified in the handle. Fetches the | |
|     row if available. If the key value is null, begin at first key of the | |
|     index. | |
|   */ | |
|   virtual int index_read_idx_map(uchar *buf, uint index, const uchar *key, | |
|                                  key_part_map keypart_map, | |
|                                  enum ha_rkey_function find_flag); | |
|   /* | |
|     These methods are used to jump to next or previous entry in the index | |
|     scan. There are also methods to jump to first and last entry. | |
|   */ | |
|   virtual int index_next(uchar * buf); | |
|   virtual int index_prev(uchar * buf); | |
|   virtual int index_first(uchar * buf); | |
|   virtual int index_last(uchar * buf); | |
|   virtual int index_next_same(uchar * buf, const uchar * key, uint keylen); | |
| 
 | |
|   int index_read_last_map(uchar *buf, | |
|                           const uchar *key, | |
|                           key_part_map keypart_map); | |
| 
 | |
|   /* | |
|     read_first_row is virtual method but is only implemented by | |
|     handler.cc, no storage engine has implemented it so neither | |
|     will the partition handler. | |
|  | |
|     virtual int read_first_row(uchar *buf, uint primary_key); | |
|   */ | |
| 
 | |
| 
 | |
|   virtual int read_range_first(const key_range * start_key, | |
| 			       const key_range * end_key, | |
| 			       bool eq_range, bool sorted); | |
|   virtual int read_range_next(); | |
| 
 | |
| 
 | |
|   HANDLER_BUFFER *m_mrr_buffer; | |
|   uint *m_mrr_buffer_size; | |
|   uchar *m_mrr_full_buffer; | |
|   uint m_mrr_full_buffer_size; | |
|   uint m_mrr_new_full_buffer_size; | |
|   MY_BITMAP m_mrr_used_partitions; | |
|   uint *m_stock_range_seq; | |
|   /* not used: uint m_current_range_seq; */ | |
| 
 | |
|   /* Value of mrr_mode passed to ha_partition::multi_range_read_init */ | |
|   uint m_mrr_mode; | |
| 
 | |
|   /* Value of n_ranges passed to ha_partition::multi_range_read_init */ | |
|   uint m_mrr_n_ranges; | |
| 
 | |
|   /* | |
|     Ordered MRR mode:  m_range_info[N] has the range_id of the last record that | |
|     we've got from partition N | |
|   */ | |
|   range_id_t *m_range_info; | |
| 
 | |
|   /* | |
|     TRUE <=> This ha_partition::multi_range_read_next() call is the first one | |
|   */ | |
|   bool m_multi_range_read_first; | |
| 
 | |
|   /* not used: uint m_mrr_range_init_flags; */ | |
| 
 | |
|   /* Number of elements in the list pointed by m_mrr_range_first. Not used */ | |
|   uint m_mrr_range_length; | |
| 
 | |
|   /* Linked list of ranges to scan */ | |
|   PARTITION_KEY_MULTI_RANGE *m_mrr_range_first; | |
|   PARTITION_KEY_MULTI_RANGE *m_mrr_range_current; | |
| 
 | |
|   /* | |
|     For each partition: number of ranges MRR scan will scan in the partition | |
|   */ | |
|   uint *m_part_mrr_range_length; | |
| 
 | |
|   /* For each partition: List of ranges to scan in this partition */ | |
|   PARTITION_PART_KEY_MULTI_RANGE **m_part_mrr_range_first; | |
|   PARTITION_PART_KEY_MULTI_RANGE **m_part_mrr_range_current; | |
|   PARTITION_PART_KEY_MULTI_RANGE_HLD *m_partition_part_key_multi_range_hld; | |
| 
 | |
|   /* | |
|     Sequence of ranges to be scanned (TODO: why not store this in | |
|     handler::mrr_{iter,funcs}?) | |
|   */ | |
|   range_seq_t m_seq; | |
|   RANGE_SEQ_IF *m_seq_if; | |
| 
 | |
|   /* Range iterator structure to be supplied to partitions */ | |
|   RANGE_SEQ_IF m_part_seq_if; | |
| 
 | |
|   virtual int multi_range_key_create_key( | |
|     RANGE_SEQ_IF *seq, | |
|     range_seq_t seq_it | |
|   ); | |
|   virtual ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, | |
|                                               void *seq_init_param, | |
|                                               uint n_ranges, uint *bufsz, | |
|                                               uint *mrr_mode, | |
|                                               Cost_estimate *cost); | |
|   virtual ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys, | |
|                                         uint key_parts, uint *bufsz, | |
|                                         uint *mrr_mode, Cost_estimate *cost); | |
|   virtual int multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param, | |
|                                     uint n_ranges, uint mrr_mode, | |
|                                     HANDLER_BUFFER *buf); | |
|   virtual int multi_range_read_next(range_id_t *range_info); | |
|   virtual int multi_range_read_explain_info(uint mrr_mode, char *str, | |
|                                             size_t size); | |
|   uint last_part() { return m_last_part; } | |
| 
 | |
| private: | |
|   bool init_record_priority_queue(); | |
|   void destroy_record_priority_queue(); | |
|   int common_index_read(uchar * buf, bool have_start_key); | |
|   int common_first_last(uchar * buf); | |
|   int partition_scan_set_up(uchar * buf, bool idx_read_flag); | |
|   bool check_parallel_search(); | |
|   int handle_pre_scan(bool reverse_order, bool use_parallel); | |
|   int handle_unordered_next(uchar * buf, bool next_same); | |
|   int handle_unordered_scan_next_partition(uchar * buf); | |
|   int handle_ordered_index_scan(uchar * buf, bool reverse_order); | |
|   int handle_ordered_index_scan_key_not_found(); | |
|   int handle_ordered_next(uchar * buf, bool next_same); | |
|   int handle_ordered_prev(uchar * buf); | |
|   void return_top_record(uchar * buf); | |
|   void swap_blobs(uchar* rec_buf, Ordered_blob_storage ** storage, bool restore); | |
| public: | |
|   /* | |
|     ------------------------------------------------------------------------- | |
|     MODULE information calls | |
|     ------------------------------------------------------------------------- | |
|     This calls are used to inform the handler of specifics of the ongoing | |
|     scans and other actions. Most of these are used for optimisation | |
|     purposes. | |
|     ------------------------------------------------------------------------- | |
|   */ | |
|   virtual int info(uint); | |
|   void get_dynamic_partition_info(PARTITION_STATS *stat_info, | |
|                                   uint part_id); | |
|   void set_partitions_to_open(List<String> *partition_names); | |
|   int change_partitions_to_open(List<String> *partition_names); | |
|   int open_read_partitions(char *name_buff, size_t name_buff_size); | |
|   virtual int extra(enum ha_extra_function operation); | |
|   virtual int extra_opt(enum ha_extra_function operation, ulong arg); | |
|   virtual int reset(void); | |
|   virtual uint count_query_cache_dependant_tables(uint8 *tables_type); | |
|   virtual my_bool | |
|     register_query_cache_dependant_tables(THD *thd, | |
|                                           Query_cache *cache, | |
|                                           Query_cache_block_table **block, | |
|                                           uint *n); | |
| 
 | |
| private: | |
|   typedef int handler_callback(handler *, void *); | |
| 
 | |
|   my_bool reg_query_cache_dependant_table(THD *thd, | |
|                                           char *engine_key, | |
|                                           uint engine_key_len, | |
|                                           char *query_key, uint query_key_len, | |
|                                           uint8 type, | |
|                                           Query_cache *cache, | |
|                                           Query_cache_block_table | |
|                                           **block_table, | |
|                                           handler *file, uint *n); | |
|   static const uint NO_CURRENT_PART_ID= NOT_A_PARTITION_ID; | |
|   int loop_partitions(handler_callback callback, void *param); | |
|   int loop_extra_alter(enum ha_extra_function operations); | |
|   void late_extra_cache(uint partition_id); | |
|   void late_extra_no_cache(uint partition_id); | |
|   void prepare_extra_cache(uint cachesize); | |
|   handler *get_open_file_sample() const { return m_file_sample; } | |
| public: | |
| 
 | |
|   /* | |
|     ------------------------------------------------------------------------- | |
|     MODULE optimiser support | |
|     ------------------------------------------------------------------------- | |
|     ------------------------------------------------------------------------- | |
|   */ | |
| 
 | |
|   /* | |
|     NOTE !!!!!! | |
|      ------------------------------------------------------------------------- | |
|      ------------------------------------------------------------------------- | |
|      One important part of the public handler interface that is not depicted in | |
|      the methods is the attribute records | |
|  | |
|      which is defined in the base class. This is looked upon directly and is | |
|      set by calling info(HA_STATUS_INFO) ? | |
|      ------------------------------------------------------------------------- | |
|   */ | |
| 
 | |
| private: | |
|   /* Helper functions for optimizer hints. */ | |
|   ha_rows min_rows_for_estimate(); | |
|   uint get_biggest_used_partition(uint *part_index); | |
| public: | |
| 
 | |
|   /* | |
|     keys_to_use_for_scanning can probably be implemented as the | |
|     intersection of all underlying handlers if mixed handlers are used. | |
|     This method is used to derive whether an index can be used for | |
|     index-only scanning when performing an ORDER BY query. | |
|     Only called from one place in sql_select.cc | |
|   */ | |
|   virtual const key_map *keys_to_use_for_scanning(); | |
| 
 | |
|   /* | |
|     Called in test_quick_select to determine if indexes should be used. | |
|   */ | |
|   virtual double scan_time(); | |
| 
 | |
|   /* | |
|     The next method will never be called if you do not implement indexes. | |
|   */ | |
|   virtual double read_time(uint index, uint ranges, ha_rows rows); | |
|   /* | |
|     For the given range how many records are estimated to be in this range. | |
|     Used by optimiser to calculate cost of using a particular index. | |
|   */ | |
|   virtual ha_rows records_in_range(uint inx, key_range * min_key, | |
| 				   key_range * max_key); | |
| 
 | |
|   /* | |
|     Upper bound of number records returned in scan is sum of all | |
|     underlying handlers. | |
|   */ | |
|   virtual ha_rows estimate_rows_upper_bound(); | |
| 
 | |
|   /* | |
|     table_cache_type is implemented by the underlying handler but all | |
|     underlying handlers must have the same implementation for it to work. | |
|   */ | |
|   virtual uint8 table_cache_type(); | |
|   virtual ha_rows records(); | |
| 
 | |
|   /* Calculate hash value for PARTITION BY KEY tables. */ | |
|   static uint32 calculate_key_hash_value(Field **field_array); | |
| 
 | |
|   /* | |
|     ------------------------------------------------------------------------- | |
|     MODULE print messages | |
|     ------------------------------------------------------------------------- | |
|     This module contains various methods that returns text messages for | |
|     table types, index type and error messages. | |
|     ------------------------------------------------------------------------- | |
|   */ | |
|   /* | |
|     The name of the index type that will be used for display | |
|     Here we must ensure that all handlers use the same index type | |
|     for each index created. | |
|   */ | |
|   virtual const char *index_type(uint inx); | |
| 
 | |
|   /* The name of the table type that will be used for display purposes */ | |
|   virtual const char *table_type() const; | |
| 
 | |
|   /* The name of the row type used for the underlying tables. */ | |
|   virtual enum row_type get_row_type() const; | |
| 
 | |
|   /* | |
|      Handler specific error messages | |
|   */ | |
|   virtual void print_error(int error, myf errflag); | |
|   virtual bool get_error_message(int error, String * buf); | |
|   /* | |
|    ------------------------------------------------------------------------- | |
|     MODULE handler characteristics | |
|     ------------------------------------------------------------------------- | |
|     This module contains a number of methods defining limitations and | |
|     characteristics of the handler. The partition handler will calculate | |
|     this characteristics based on underlying handler characteristics. | |
|     ------------------------------------------------------------------------- | |
|  | |
|     This is a list of flags that says what the storage engine | |
|     implements. The current table flags are documented in handler.h | |
|     The partition handler will support whatever the underlying handlers | |
|     support except when specifically mentioned below about exceptions | |
|     to this rule. | |
|     NOTE: This cannot be cached since it can depend on TRANSACTION ISOLATION | |
|     LEVEL which is dynamic, see bug#39084. | |
|  | |
|     HA_TABLE_SCAN_ON_INDEX: | |
|     Used to avoid scanning full tables on an index. If this flag is set then | |
|     the handler always has a primary key (hidden if not defined) and this | |
|     index is used for scanning rather than a full table scan in all | |
|     situations. | |
|     (InnoDB, Federated) | |
|  | |
|     HA_REC_NOT_IN_SEQ: | |
|     This flag is set for handlers that cannot guarantee that the rows are | |
|     returned according to incremental positions (0, 1, 2, 3...). | |
|     This also means that rnd_next() should return HA_ERR_RECORD_DELETED | |
|     if it finds a deleted row. | |
|     (MyISAM (not fixed length row), HEAP, InnoDB) | |
|  | |
|     HA_CAN_GEOMETRY: | |
|     Can the storage engine handle spatial data. | |
|     Used to check that no spatial attributes are declared unless | |
|     the storage engine is capable of handling it. | |
|     (MyISAM) | |
|  | |
|     HA_FAST_KEY_READ: | |
|     Setting this flag indicates that the handler is equally fast in | |
|     finding a row by key as by position. | |
|     This flag is used in a very special situation in conjunction with | |
|     filesort's. For further explanation see intro to init_read_record. | |
|     (HEAP, InnoDB) | |
|  | |
|     HA_NULL_IN_KEY: | |
|     Is NULL values allowed in indexes. | |
|     If this is not allowed then it is not possible to use an index on a | |
|     NULLable field. | |
|     (HEAP, MyISAM, InnoDB) | |
|  | |
|     HA_DUPLICATE_POS: | |
|     Tells that we can the position for the conflicting duplicate key | |
|     record is stored in table->file->dupp_ref. (insert uses rnd_pos() on | |
|     this to find the duplicated row) | |
|     (MyISAM) | |
|  | |
|     HA_CAN_INDEX_BLOBS: | |
|     Is the storage engine capable of defining an index of a prefix on | |
|     a BLOB attribute. | |
|     (Federated, MyISAM, InnoDB) | |
|  | |
|     HA_AUTO_PART_KEY: | |
|     Auto increment fields can be part of a multi-part key. For second part | |
|     auto-increment keys, the auto_incrementing is done in handler.cc | |
|     (Federated, MyISAM) | |
|  | |
|     HA_REQUIRE_PRIMARY_KEY: | |
|     Can't define a table without primary key (and cannot handle a table | |
|     with hidden primary key) | |
|     (No handler has this limitation currently) | |
|  | |
|     HA_WANTS_PRIMARY_KEY: | |
|     Can't define a table without primary key except sequences | |
|     (Only InnoDB has this when using innodb_force_primary_key == ON) | |
|  | |
|     HA_STATS_RECORDS_IS_EXACT: | |
|     Does the counter of records after the info call specify an exact | |
|     value or not. If it does this flag is set. | |
|     Only MyISAM and HEAP uses exact count. | |
|  | |
|     HA_CAN_INSERT_DELAYED: | |
|     Can the storage engine support delayed inserts. | |
|     To start with the partition handler will not support delayed inserts. | |
|     Further investigation needed. | |
|     (HEAP, MyISAM) | |
|  | |
|     HA_PRIMARY_KEY_IN_READ_INDEX: | |
|     This parameter is set when the handler will also return the primary key | |
|     when doing read-only-key on another index. | |
|  | |
|     HA_NOT_DELETE_WITH_CACHE: | |
|     Seems to be an old MyISAM feature that is no longer used. No handler | |
|     has it defined but it is checked in init_read_record. | |
|     Further investigation needed. | |
|     (No handler defines it) | |
|  | |
|     HA_NO_PREFIX_CHAR_KEYS: | |
|     Indexes on prefixes of character fields is not allowed. | |
|     (Federated) | |
|  | |
|     HA_CAN_FULLTEXT: | |
|     Does the storage engine support fulltext indexes | |
|     The partition handler will start by not supporting fulltext indexes. | |
|     (MyISAM) | |
|  | |
|     HA_CAN_SQL_HANDLER: | |
|     Can the HANDLER interface in the MySQL API be used towards this | |
|     storage engine. | |
|     (MyISAM, InnoDB) | |
|  | |
|     HA_NO_AUTO_INCREMENT: | |
|     Set if the storage engine does not support auto increment fields. | |
|     (Currently not set by any handler) | |
|  | |
|     HA_HAS_CHECKSUM: | |
|     Special MyISAM feature. Has special SQL support in CREATE TABLE. | |
|     No special handling needed by partition handler. | |
|     (MyISAM) | |
|  | |
|     HA_FILE_BASED: | |
|     Should file names always be in lower case (used by engines | |
|     that map table names to file names. | |
|     Since partition handler has a local file this flag is set. | |
|     (Federated, MyISAM) | |
|  | |
|     HA_CAN_BIT_FIELD: | |
|     Is the storage engine capable of handling bit fields? | |
|     (MyISAM) | |
|  | |
|     HA_NEED_READ_RANGE_BUFFER: | |
|     Is Read Multi-Range supported => need multi read range buffer | |
|     This parameter specifies whether a buffer for read multi range | |
|     is needed by the handler. Whether the handler supports this | |
|     feature or not is dependent of whether the handler implements | |
|     read_multi_range* calls or not. The only handler currently | |
|     supporting this feature is NDB so the partition handler need | |
|     not handle this call. There are methods in handler.cc that will | |
|     transfer those calls into index_read and other calls in the | |
|     index scan module. | |
|     (No handler defines it) | |
|  | |
|     HA_PRIMARY_KEY_REQUIRED_FOR_POSITION: | |
|     Does the storage engine need a PK for position? | |
|     (InnoDB) | |
|  | |
|     HA_FILE_BASED is always set for partition handler since we use a | |
|     special file for handling names of partitions, engine types. | |
|     HA_REC_NOT_IN_SEQ is always set for partition handler since we cannot | |
|     guarantee that the records will be returned in sequence. | |
|     HA_DUPLICATE_POS, | |
|     HA_CAN_INSERT_DELAYED, HA_PRIMARY_KEY_REQUIRED_FOR_POSITION is disabled | |
|     until further investigated. | |
|   */ | |
|   virtual Table_flags table_flags() const; | |
| 
 | |
|   /* | |
|     This is a bitmap of flags that says how the storage engine | |
|     implements indexes. The current index flags are documented in | |
|     handler.h. If you do not implement indexes, just return zero | |
|     here. | |
|  | |
|     part is the key part to check. First key part is 0 | |
|     If all_parts it's set, MySQL want to know the flags for the combined | |
|     index up to and including 'part'. | |
|  | |
|     HA_READ_NEXT: | |
|     Does the index support read next, this is assumed in the server | |
|     code and never checked so all indexes must support this. | |
|     Note that the handler can be used even if it doesn't have any index. | |
|     (HEAP, MyISAM, Federated, InnoDB) | |
|  | |
|     HA_READ_PREV: | |
|     Can the index be used to scan backwards. | |
|     (HEAP, MyISAM, InnoDB) | |
|  | |
|     HA_READ_ORDER: | |
|     Can the index deliver its record in index order. Typically true for | |
|     all ordered indexes and not true for hash indexes. | |
|     In first step this is not true for partition handler until a merge | |
|     sort has been implemented in partition handler. | |
|     Used to set keymap part_of_sortkey | |
|     This keymap is only used to find indexes usable for resolving an ORDER BY | |
|     in the query. Thus in most cases index_read will work just fine without | |
|     order in result production. When this flag is set it is however safe to | |
|     order all output started by index_read since most engines do this. With | |
|     read_multi_range calls there is a specific flag setting order or not | |
|     order so in those cases ordering of index output can be avoided. | |
|     (InnoDB, HEAP, MyISAM) | |
|  | |
|     HA_READ_RANGE: | |
|     Specify whether index can handle ranges, typically true for all | |
|     ordered indexes and not true for hash indexes. | |
|     Used by optimiser to check if ranges (as key >= 5) can be optimised | |
|     by index. | |
|     (InnoDB, MyISAM, HEAP) | |
|  | |
|     HA_ONLY_WHOLE_INDEX: | |
|     Can't use part key searches. This is typically true for hash indexes | |
|     and typically not true for ordered indexes. | |
|     (Federated, HEAP) | |
|  | |
|     HA_KEYREAD_ONLY: | |
|     Does the storage engine support index-only scans on this index. | |
|     Enables use of HA_EXTRA_KEYREAD and HA_EXTRA_NO_KEYREAD | |
|     Used to set key_map keys_for_keyread and to check in optimiser for | |
|     index-only scans.  When doing a read under HA_EXTRA_KEYREAD the handler | |
|     only have to fill in the columns the key covers. If | |
|     HA_PRIMARY_KEY_IN_READ_INDEX is set then also the PRIMARY KEY columns | |
|     must be updated in the row. | |
|     (InnoDB, MyISAM) | |
|   */ | |
|   virtual ulong index_flags(uint inx, uint part, bool all_parts) const | |
|   { | |
|     /* | |
|       The following code is not safe if you are using different | |
|       storage engines or different index types per partition. | |
|     */ | |
|     return m_file[0]->index_flags(inx, part, all_parts); | |
|   } | |
| 
 | |
|   /** | |
|     wrapper function for handlerton alter_table_flags, since | |
|     the ha_partition_hton cannot know all its capabilities | |
|   */ | |
|   virtual alter_table_operations alter_table_flags(alter_table_operations flags); | |
|   /* | |
|     unireg.cc will call the following to make sure that the storage engine | |
|     can handle the data it is about to send. | |
|  | |
|     The maximum supported values is the minimum of all handlers in the table | |
|   */ | |
|   uint min_of_the_max_uint(uint (handler::*operator_func)(void) const) const; | |
|   virtual uint max_supported_record_length() const; | |
|   virtual uint max_supported_keys() const; | |
|   virtual uint max_supported_key_parts() const; | |
|   virtual uint max_supported_key_length() const; | |
|   virtual uint max_supported_key_part_length() const; | |
|   virtual uint min_record_length(uint options) const; | |
| 
 | |
|   /* | |
|     Primary key is clustered can only be true if all underlying handlers have | |
|     this feature. | |
|   */ | |
|   virtual bool primary_key_is_clustered() | |
|   { return m_pkey_is_clustered; } | |
| 
 | |
|   /* | |
|     ------------------------------------------------------------------------- | |
|     MODULE compare records | |
|     ------------------------------------------------------------------------- | |
|     cmp_ref checks if two references are the same. For most handlers this is | |
|     a simple memcmp of the reference. However some handlers use primary key | |
|     as reference and this can be the same even if memcmp says they are | |
|     different. This is due to character sets and end spaces and so forth. | |
|     For the partition handler the reference is first two bytes providing the | |
|     partition identity of the referred record and then the reference of the | |
|     underlying handler. | |
|     Thus cmp_ref for the partition handler always returns FALSE for records | |
|     not in the same partition and uses cmp_ref on the underlying handler | |
|     to check whether the rest of the reference part is also the same. | |
|     ------------------------------------------------------------------------- | |
|   */ | |
|   virtual int cmp_ref(const uchar * ref1, const uchar * ref2); | |
|   /* | |
|     ------------------------------------------------------------------------- | |
|     MODULE auto increment | |
|     ------------------------------------------------------------------------- | |
|     This module is used to handle the support of auto increments. | |
|  | |
|     This variable in the handler is used as part of the handler interface | |
|     It is maintained by the parent handler object and should not be | |
|     touched by child handler objects (see handler.cc for its use). | |
|  | |
|     auto_increment_column_changed | |
|      ------------------------------------------------------------------------- | |
|   */ | |
|   virtual bool need_info_for_auto_inc(); | |
|   virtual bool can_use_for_auto_inc_init(); | |
|   virtual void get_auto_increment(ulonglong offset, ulonglong increment, | |
|                                   ulonglong nb_desired_values, | |
|                                   ulonglong *first_value, | |
|                                   ulonglong *nb_reserved_values); | |
|   virtual void release_auto_increment(); | |
| private: | |
|   virtual int reset_auto_increment(ulonglong value); | |
|   void update_next_auto_inc_val(); | |
|   virtual void lock_auto_increment() | |
|   { | |
|     /* lock already taken */ | |
|     if (auto_increment_safe_stmt_log_lock) | |
|       return; | |
|     if (table_share->tmp_table == NO_TMP_TABLE) | |
|     { | |
|       part_share->lock_auto_inc(); | |
|       DBUG_ASSERT(!auto_increment_lock); | |
|       auto_increment_lock= TRUE; | |
|     } | |
|   } | |
|   virtual void unlock_auto_increment() | |
|   { | |
|     /* | |
|       If auto_increment_safe_stmt_log_lock is true, we have to keep the lock. | |
|       It will be set to false and thus unlocked at the end of the statement by | |
|       ha_partition::release_auto_increment. | |
|     */ | |
|     if (auto_increment_lock && !auto_increment_safe_stmt_log_lock) | |
|     { | |
|       auto_increment_lock= FALSE; | |
|       part_share->unlock_auto_inc(); | |
|     } | |
|   } | |
|   virtual void set_auto_increment_if_higher(Field *field) | |
|   { | |
|     ulonglong nr= (((Field_num*) field)->unsigned_flag || | |
|                    field->val_int() > 0) ? field->val_int() : 0; | |
|     lock_auto_increment(); | |
|     DBUG_ASSERT(part_share->auto_inc_initialized || | |
|                 !can_use_for_auto_inc_init()); | |
|     /* must check when the mutex is taken */ | |
|     if (nr >= part_share->next_auto_inc_val) | |
|       part_share->next_auto_inc_val= nr + 1; | |
|     unlock_auto_increment(); | |
|   } | |
| 
 | |
|   void check_insert_autoincrement() | |
|   { | |
|     /* | |
|       If we INSERT into the table having the AUTO_INCREMENT column, | |
|       we have to read all partitions for the next autoincrement value | |
|       unless we already did it. | |
|     */ | |
|     if (!part_share->auto_inc_initialized && | |
|         ha_thd()->lex->sql_command == SQLCOM_INSERT && | |
|         table->found_next_number_field) | |
|       bitmap_set_all(&m_part_info->read_partitions); | |
|   } | |
| 
 | |
| public: | |
| 
 | |
|   /* | |
|      ------------------------------------------------------------------------- | |
|      MODULE initialize handler for HANDLER call | |
|      ------------------------------------------------------------------------- | |
|      This method is a special InnoDB method called before a HANDLER query. | |
|      ------------------------------------------------------------------------- | |
|   */ | |
|   virtual void init_table_handle_for_HANDLER(); | |
| 
 | |
|   /* | |
|     The remainder of this file defines the handler methods not implemented | |
|     by the partition handler | |
|   */ | |
| 
 | |
|   /* | |
|     ------------------------------------------------------------------------- | |
|     MODULE foreign key support | |
|     ------------------------------------------------------------------------- | |
|     The following methods are used to implement foreign keys as supported by | |
|     InnoDB. Implement this ?? | |
|     get_foreign_key_create_info is used by SHOW CREATE TABLE to get a textual | |
|     description of how the CREATE TABLE part to define FOREIGN KEY's is done. | |
|     free_foreign_key_create_info is used to free the memory area that provided | |
|     this description. | |
|     can_switch_engines checks if it is ok to switch to a new engine based on | |
|     the foreign key info in the table. | |
|     ------------------------------------------------------------------------- | |
|  | |
|     virtual char* get_foreign_key_create_info() | |
|     virtual void free_foreign_key_create_info(char* str) | |
|  | |
|     virtual int get_foreign_key_list(THD *thd, | |
|     List<FOREIGN_KEY_INFO> *f_key_list) | |
|     virtual uint referenced_by_foreign_key() | |
|   */ | |
|     virtual bool can_switch_engines(); | |
|   /* | |
|     ------------------------------------------------------------------------- | |
|     MODULE fulltext index | |
|     ------------------------------------------------------------------------- | |
|   */ | |
|     void ft_close_search(FT_INFO *handler); | |
|     virtual int ft_init(); | |
|     virtual int pre_ft_init(); | |
|     virtual void ft_end(); | |
|     virtual int pre_ft_end(); | |
|     virtual FT_INFO *ft_init_ext(uint flags, uint inx, String *key); | |
|     virtual int ft_read(uchar *buf); | |
|     virtual int pre_ft_read(bool use_parallel); | |
| 
 | |
|   /* | |
|      ------------------------------------------------------------------------- | |
|      MODULE restart full table scan at position (MyISAM) | |
|      ------------------------------------------------------------------------- | |
|      The following method is only used by MyISAM when used as | |
|      temporary tables in a join. | |
|      virtual int restart_rnd_next(uchar *buf, uchar *pos); | |
|   */ | |
| 
 | |
|   /* | |
|     ------------------------------------------------------------------------- | |
|     MODULE in-place ALTER TABLE | |
|     ------------------------------------------------------------------------- | |
|     These methods are in the handler interface. (used by innodb-plugin) | |
|     They are used for in-place alter table: | |
|     ------------------------------------------------------------------------- | |
|   */ | |
|     virtual enum_alter_inplace_result | |
|       check_if_supported_inplace_alter(TABLE *altered_table, | |
|                                        Alter_inplace_info *ha_alter_info); | |
|     virtual bool prepare_inplace_alter_table(TABLE *altered_table, | |
|                                              Alter_inplace_info *ha_alter_info); | |
|     virtual bool inplace_alter_table(TABLE *altered_table, | |
|                                      Alter_inplace_info *ha_alter_info); | |
|     virtual bool commit_inplace_alter_table(TABLE *altered_table, | |
|                                             Alter_inplace_info *ha_alter_info, | |
|                                             bool commit); | |
|     virtual void notify_table_changed(); | |
| 
 | |
|   /* | |
|     ------------------------------------------------------------------------- | |
|     MODULE tablespace support | |
|     ------------------------------------------------------------------------- | |
|     Admin of table spaces is not applicable to the partition handler (InnoDB) | |
|     This means that the following method is not implemented: | |
|     ------------------------------------------------------------------------- | |
|     virtual int discard_or_import_tablespace(my_bool discard) | |
|   */ | |
| 
 | |
|   /* | |
|     ------------------------------------------------------------------------- | |
|     MODULE admin MyISAM | |
|     ------------------------------------------------------------------------- | |
|  | |
|     ------------------------------------------------------------------------- | |
|       OPTIMIZE TABLE, CHECK TABLE, ANALYZE TABLE and REPAIR TABLE are | |
|       mapped to a routine that handles looping over a given set of | |
|       partitions and those routines send a flag indicating to execute on | |
|       all partitions. | |
|     ------------------------------------------------------------------------- | |
|   */ | |
|     virtual int optimize(THD* thd, HA_CHECK_OPT *check_opt); | |
|     virtual int analyze(THD* thd, HA_CHECK_OPT *check_opt); | |
|     virtual int check(THD* thd, HA_CHECK_OPT *check_opt); | |
|     virtual int repair(THD* thd, HA_CHECK_OPT *check_opt); | |
|     virtual bool check_and_repair(THD *thd); | |
|     virtual bool auto_repair(int error) const; | |
|     virtual bool is_crashed() const; | |
|     virtual int check_for_upgrade(HA_CHECK_OPT *check_opt); | |
| 
 | |
|   /* | |
|     ------------------------------------------------------------------------- | |
|     MODULE condition pushdown | |
|     ------------------------------------------------------------------------- | |
|   */ | |
|     virtual const COND *cond_push(const COND *cond); | |
|     virtual void cond_pop(); | |
|     virtual void clear_top_table_fields(); | |
|     virtual int info_push(uint info_type, void *info); | |
| 
 | |
|     private: | |
|     int handle_opt_partitions(THD *thd, HA_CHECK_OPT *check_opt, uint flags); | |
|     int handle_opt_part(THD *thd, HA_CHECK_OPT *check_opt, uint part_id, | |
|                         uint flag); | |
|     /** | |
|       Check if the rows are placed in the correct partition.  If the given | |
|       argument is true, then move the rows to the correct partition. | |
|     */ | |
|     int check_misplaced_rows(uint read_part_id, bool repair); | |
|     void append_row_to_str(String &str); | |
|     public: | |
| 
 | |
|   /* Enabled keycache for performance reasons, WL#4571 */ | |
|     virtual int assign_to_keycache(THD* thd, HA_CHECK_OPT *check_opt); | |
|     virtual int preload_keys(THD* thd, HA_CHECK_OPT* check_opt); | |
|     virtual TABLE_LIST *get_next_global_for_child(); | |
| 
 | |
|   /* | |
|     ------------------------------------------------------------------------- | |
|     MODULE enable/disable indexes | |
|     ------------------------------------------------------------------------- | |
|     Enable/Disable Indexes are only supported by HEAP and MyISAM. | |
|     ------------------------------------------------------------------------- | |
|   */ | |
|     virtual int disable_indexes(uint mode); | |
|     virtual int enable_indexes(uint mode); | |
|     virtual int indexes_are_disabled(void); | |
| 
 | |
|   /* | |
|     ------------------------------------------------------------------------- | |
|     MODULE append_create_info | |
|     ------------------------------------------------------------------------- | |
|     append_create_info is only used by MyISAM MERGE tables and the partition | |
|     handler will not support this handler as underlying handler. | |
|     Implement this?? | |
|     ------------------------------------------------------------------------- | |
|     virtual void append_create_info(String *packet) | |
|   */ | |
| 
 | |
|   /* | |
|     the following heavily relies on the fact that all partitions | |
|     are in the same storage engine. | |
|  | |
|     When this limitation is lifted, the following hack should go away, | |
|     and a proper interface for engines needs to be introduced: | |
|  | |
|       an PARTITION_SHARE structure that has a pointer to the TABLE_SHARE. | |
|       is given to engines everywhere where TABLE_SHARE is used now | |
|       has members like option_struct, ha_data | |
|       perhaps TABLE needs to be split the same way too... | |
|  | |
|     this can also be done before partition will support a mix of engines, | |
|     but preferably together with other incompatible API changes. | |
|   */ | |
|   virtual handlerton *partition_ht() const | |
|   { | |
|     handlerton *h= m_file[0]->ht; | |
|     for (uint i=1; i < m_tot_parts; i++) | |
|       DBUG_ASSERT(h == m_file[i]->ht); | |
|     return h; | |
|   } | |
| 
 | |
|   ha_rows part_records(partition_element *part_elem) | |
|   { | |
|     DBUG_ASSERT(m_part_info); | |
|     uint32 sub_factor= m_part_info->num_subparts ? m_part_info->num_subparts : 1; | |
|     uint32 part_id= part_elem->id * sub_factor; | |
|     uint32 part_id_end= part_id + sub_factor; | |
|     DBUG_ASSERT(part_id_end <= m_tot_parts); | |
|     ha_rows part_recs= 0; | |
|     for (; part_id < part_id_end; ++part_id) | |
|     { | |
|       handler *file= m_file[part_id]; | |
|       DBUG_ASSERT(bitmap_is_set(&(m_part_info->read_partitions), part_id)); | |
|       file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK | HA_STATUS_OPEN); | |
|       part_recs+= file->stats.records; | |
|     } | |
|     return part_recs; | |
|   } | |
| 
 | |
|   friend int cmp_key_rowid_part_id(void *ptr, uchar *ref1, uchar *ref2); | |
|   friend int cmp_key_part_id(void *key_p, uchar *ref1, uchar *ref2); | |
| }; | |
| #endif /* HA_PARTITION_INCLUDED */
 |