DS-MRR improvements: address review feedback for R3 version of the patch

15 years ago · d8efc3b155
3 changed files with 898 additions and 614 deletions
--- a/sql/handler.h
+++ b/sql/handler.h
@ -2177,7 +2177,8 @@ public:
      TRUE    if the engine supports virtual columns
  */
  virtual bool check_if_supported_virtual_columns(void) { return FALSE;}
-
+  
+  TABLE* get_table() { return table; }
 protected:
  /* deprecated, don't use in new engines */
  inline void ha_statistic_increment(ulong SSV::*offset) const { }
@ -2370,7 +2371,6 @@ private:
  virtual int rename_partitions(const char *path)
  { return HA_ERR_WRONG_COMMAND; }
  friend class ha_partition;
-  friend class DsMrr_impl;
 public:
  /* XXX to be removed, see ha_partition::partition_ht() */
  virtual handlerton *partition_ht() const
--- a/sql/multi_range_read.cc
+++ b/sql/multi_range_read.cc
--- a/sql/multi_range_read.h
+++ b/sql/multi_range_read.h
@ -50,6 +50,26 @@

 class DsMrr_impl;

+class Key_parameters
+{
+public:
+  /* TRUE <=> We can get at most one index tuple for a lookup key */
+  bool index_ranges_unique;
+
+  uint         key_tuple_length; /* Length of index lookup tuple, in bytes */
+  key_part_map key_tuple_map;    /* keyparts used in index lookup tuples */
+
+  /*
+    This is 
+      = key_tuple_length   if we copy keys to buffer
+      = sizeof(void*)      if we're using pointers to materialized keys.
+  */
+  uint key_size_in_keybuf;
+
+  /* TRUE <=> don't copy key values, use pointers to them instead.  */
+  bool use_key_pointers;
+};
+
 /**
  Iterator over (record, range_id) pairs that match given key value.
  
@ -57,16 +77,23 @@ class DsMrr_impl;
  key value. A key value may have multiple matching records, so we'll need to
  produce a cross-product of sets of matching records and range_id-s.
 */
-
+class Mrr_ordered_index_reader;
 class Key_value_records_iterator
 {
  /* Scan parameters */
-  DsMrr_impl *dsmrr;
+  Key_parameters *param;
  Lifo_buffer_iterator identical_key_it;
  uchar *last_identical_key_ptr;
  bool get_next_row;
+  //handler *h;
+  /* TRUE <=> We can get at most one index tuple for a lookup key */
+  //bool index_ranges_unique;
+  
+  Mrr_ordered_index_reader *owner;
+  /* key_buffer.read() reads to here */
+  uchar *cur_index_tuple;
 public:
-  bool init(DsMrr_impl *dsmrr);
+  bool init(Mrr_ordered_index_reader *owner_arg);

  /*
    Get next (key_val, range_id) pair.
@ -74,9 +101,184 @@ public:
  int get_next();

  void close();
+  friend class Mrr_ordered_index_reader;
 };


+/*
+  Something that will manage buffers for those that call it
+*/
+class Buffer_manager
+{
+public:
+  virtual void reset_buffer_sizes()= 0;
+  virtual void setup_buffer_sizes(uint key_size_in_keybuf, 
+                                  key_part_map key_tuple_map)=0;
+  virtual Lifo_buffer* get_key_buffer()= 0;
+  virtual ~Buffer_manager(){}
+};
+
+
+/* 
+  Abstract MRR execution strategy
+  
+  An object of this class produces (R, range_info) pairs where R can be an
+  index tuple or a table record.
+
+  Getting HA_ERR_END_OF_FILE from get_next() means that the source should be
+  re-filled. if eof() returns true after refill attempt, then end of stream has
+  been reached and get_next() must not be called anymore.
+*/
+
+class Mrr_strategy 
+{
+public:
+  virtual int get_next(char **range_info) = 0;
+  virtual int refill_buffer()=0;
+
+  virtual ~Mrr_strategy() {};
+};
+
+
+/* A common base for strategies that do index scans and produce index tuples */
+class Mrr_index_reader : public Mrr_strategy
+{
+public:
+  handler *h;
+
+  virtual int init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, 
+                   void *seq_init_param, uint n_ranges,
+                   uint mode, Buffer_manager *buf_manager_arg) = 0;
+  virtual bool eof() = 0; 
+  virtual uchar *get_rowid_ptr()= 0;
+  virtual bool skip_record(char *range_id, uchar *rowid)=0;
+};
+
+
+/*
+  A "bypass" strategy that uses default MRR implementation (i.e.
+  handler::multi_range_read_XXX() calls) to produce rows.
+*/
+
+class Mrr_simple_index_reader : public Mrr_index_reader
+{
+  int res; 
+public:
+  int init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, 
+           void *seq_init_param, uint n_ranges,
+           uint mode, Buffer_manager *buf_manager_arg);
+  int get_next(char **range_info);
+  int refill_buffer() { return 0; }
+  bool eof() { return test(res); }
+  uchar *get_rowid_ptr() { return h->ref; }
+  bool skip_record(char *range_id, uchar *rowid)
+  {
+    return (h->mrr_funcs.skip_record &&
+            h->mrr_funcs.skip_record(h->mrr_iter, range_id, rowid));
+  }
+};
+
+
+
+/* 
+  A strategy that sorts index lookup keys before scanning the index
+*/
+
+class Mrr_ordered_index_reader : public Mrr_index_reader
+{
+public:
+  int init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, 
+           void *seq_init_param, uint n_ranges,
+           uint mode, Buffer_manager *buf_manager_arg);
+  int get_next(char **range_info);
+  int refill_buffer();
+  bool eof() { return index_scan_eof; }
+  uchar *get_rowid_ptr() { return h->ref; }
+  
+  bool skip_record(char *range_info, uchar *rowid)
+  {
+    return (mrr_funcs.skip_record &&
+            mrr_funcs.skip_record(mrr_iter, range_info, rowid));
+  }
+private:
+  Key_value_records_iterator kv_it;
+
+  bool scanning_key_val_iter;
+  
+  char *cur_range_info;
+
+  /* Buffer to store (key, range_id) pairs */
+  Lifo_buffer *key_buffer;
+
+  Buffer_manager *buf_manager;
+
+  /* Initially FALSE, becomes TRUE when we've set key_tuple_xxx members */
+  bool know_key_tuple_params;
+
+ // bool use_key_pointers;
+  
+  Key_parameters  keypar;
+  /* TRUE <=> need range association, buffers hold {rowid, range_id} pairs */
+  bool is_mrr_assoc;
+
+  bool no_more_keys;
+  RANGE_SEQ_IF mrr_funcs;
+  range_seq_t mrr_iter;
+
+  bool auto_refill;
+
+  bool index_scan_eof;
+
+  static int key_tuple_cmp(void* arg, uchar* key1, uchar* key2);
+  static int key_tuple_cmp_reverse(void* arg, uchar* key1, uchar* key2);
+  //void cleanup();
+  
+  friend class Key_value_records_iterator; 
+  friend class DsMrr_impl;
+  friend class Mrr_ordered_rndpos_reader;
+};
+
+
+/* MRR strategy that fetches rowids */
+
+class Mrr_ordered_rndpos_reader : public Mrr_strategy 
+{
+public:
+  int init(handler *h, Mrr_index_reader *index_reader, uint mode,
+           Lifo_buffer *buf);
+  int get_next(char **range_info);
+  int refill_buffer();
+  void cleanup();
+private:
+  handler *h;
+  
+  DsMrr_impl *dsmrr;
+  /* This what we get (rowid, range_info) pairs from */
+  Mrr_index_reader *index_reader;
+  uchar *index_rowid;
+  
+  /* TRUE <=> need range association, buffers hold {rowid, range_id} pairs */
+  bool is_mrr_assoc;
+
+  uchar *last_identical_rowid;
+  Lifo_buffer *rowid_buffer;
+  
+  /* = h->ref_length  [ + sizeof(range_assoc_info) ] */
+  //uint rowid_buff_elem_size;
+
+  /* rowid_buffer.read() will set the following:  */
+  uchar *rowid;
+  uchar *rowids_range_id;
+};
+
+class Mrr_strategy_factory
+{
+public:
+  Mrr_ordered_rndpos_reader ordered_rndpos_reader;
+  Mrr_ordered_index_reader  ordered_index_reader;
+  Mrr_simple_index_reader   simple_index_reader;
+};
+
 /*
  DS-MRR implementation for one table. Create/use one object of this class for
  each ha_{myisam/innobase/etc} object. That object will be further referred to
@ -154,9 +356,58 @@ public:
         get record by rowid and return the {record, range_id} pair
    4. Repeat the above steps until we've exhausted the list of ranges we're
       scanning.
+
+  Buffer space management considerations
+  --------------------------------------
+  With regards to buffer/memory management, MRR interface specifies that 
+   - SQL layer provides multi_range_read_init() with buffer of certain size.
+   - MRR implementation may use (i.e. have at its disposal till the end of 
+     the MRR scan) all of the buffer, or return the unused end of the buffer 
+     to SQL layer.
+
+  DS-MRR needs buffer in order to accumulate and sort rowids and/or keys. When
+  we need to accumulate/sort only keys (or only rowids), it is fairly trivial.
+
+  When we need to accumulate/sort both keys and rowids, efficient buffer use
+  gets complicated. We need to:
+   - First, accumulate keys and sort them
+   - Then use the keys (smaller values go first) to obtain rowids. A key is not
+     needed after we've got matching rowids for it.
+   - Make sure that rowids are accumulated at the front of the buffer, so that we
+     can return the end part of the buffer to SQL layer, should there be too
+     few rowid values to occupy the buffer.
+
+  All of these goals are achieved by using the following scheme:
+
+     |                    |   We get an empty buffer from SQL layer.   
+
+     |                  *-|    
+     |               *----|   First, we fill the buffer with keys. Key_buffer
+     |            *-------|   part grows from end of the buffer space to start
+     |         *----------|   (In this picture, the buffer is big enough to
+     |      *-------------|    accomodate all keys and even have some space left)
+
+     |      *=============|   We want to do key-ordered index scan, so we sort
+                              the keys
+
+     |-x      *===========|   Then we use the keys get rowids. Rowids are 
+     |----x      *========|   stored from start of buffer space towards the end.
+     |--------x     *=====|   The part of the buffer occupied with keys
+     |------------x   *===|   gradually frees up space for rowids. In this
+     |--------------x   *=|   picture we run out of keys before we've ran out
+     |----------------x   |   of buffer space (it can be other way as well).
+
+     |================x   |   Then we sort the rowids.
+                     
+     |                |~~~|   The unused part of the buffer is at the end, so
+                              we can return it to the SQL layer.
+
+     |================*       Sorted rowids are then used to read table records 
+                              in disk order
+
 */

-class DsMrr_impl
+class DsMrr_impl : public Buffer_manager
 {
 public:
  typedef void (handler::*range_check_toggle_func_t)(bool on);
@ -181,6 +432,9 @@ public:
                            void *seq_init_param, uint n_ranges, uint *bufsz,
                            uint *flags, COST_VECT *cost);
 private:
+  /* Buffer to store (key, range_id) pairs */
+  Lifo_buffer *key_buffer;
+
  /*
    The "owner" handler object (the one that is expected to "own" this object
    and call its functions).
@ -197,20 +451,16 @@ private:
  /** Properties of current MRR scan **/

  uint keyno; /* index we're running the scan on */
-  bool use_default_impl; /* TRUE <=> shortcut all calls to default MRR impl */
  /* TRUE <=> need range association, buffers hold {rowid, range_id} pairs */
  bool is_mrr_assoc;
  /* TRUE <=> sort the keys before making index lookups */
-  bool do_sort_keys;
+  //bool do_sort_keys;
  /* TRUE <=> sort rowids and use rnd_pos() to get and return full records */
-  bool do_rndpos_scan;
-
-  /*
-    (if do_sort_keys==TRUE) don't copy key values, use pointers to them 
-    instead.
-  */
-  bool use_key_pointers;
+  //bool do_rndpos_scan;

+  Mrr_strategy_factory strategy_factory;
+  Mrr_strategy *strategy;
+  Mrr_index_reader *index_strategy;

  /* The whole buffer space that we're using */
  uchar *full_buf;
@ -226,12 +476,6 @@ private:
 
  /** Index scaning and key buffer-related members **/

-  /* TRUE <=> We can get at most one index tuple for a lookup key */
-  bool index_ranges_unique;
-
-  /* TRUE<=> we're in a middle of enumerating records for a key range */
-  //bool in_index_range;
-  
  /*
    One of the following two is used for key buffer: forward is used when 
    we only need key buffer, backward is used when we need both key and rowid
@ -240,39 +484,10 @@ private:
  Forward_lifo_buffer forward_key_buf;
  Backward_lifo_buffer backward_key_buf;

-  /* Buffer to store (key, range_id) pairs */
-  Lifo_buffer *key_buffer;
-  
-  /* Index scan state */
-  bool scanning_key_val_iter;
-  /* 
-    TRUE <=> we've got index tuples/rowids for all keys (need this flag because 
-    we may have a situation where we've read everything from the key buffer but 
-    haven't finished with getting index tuples for the last key)
-  */
-  bool index_scan_eof;  
-  Key_value_records_iterator kv_it;
-  
-  /* key_buffer.read() reads to here */
-  uchar *cur_index_tuple;
-
-  /* if in_index_range==TRUE: range_id of the range we're enumerating */
-  char *cur_range_info;
-
-  /* Initially FALSE, becomes TRUE when we've set key_tuple_xxx members */
-  bool know_key_tuple_params;
-  uint         key_tuple_length; /* Length of index lookup tuple, in bytes */
-  key_part_map key_tuple_map;    /* keyparts used in index lookup tuples */
-
-  /*
-    This is 
-      = key_tuple_length   if we copy keys to buffer
-      = sizeof(void*)      if we're using pointers to materialized keys.
-  */
-  uint key_size_in_keybuf;
+  Forward_lifo_buffer rowid_buffer;
  
  /* = key_size_in_keybuf [ + sizeof(range_assoc_info) ] */
-  uint key_buff_elem_size;
+  //uint key_buff_elem_size_;
  
  /** rnd_pos() scan and rowid buffer-related members **/

@ -280,36 +495,27 @@ private:
    Buffer to store (rowid, range_id) pairs, or just rowids if 
    is_mrr_assoc==FALSE
  */
-  Forward_lifo_buffer rowid_buffer;
-  
-  /* rowid_buffer.read() will set the following:  */
-  uchar *rowid;
-  uchar *rowids_range_id;
-
-  uchar *last_identical_rowid;
-
-  bool dsmrr_eof; /* TRUE <=> We have reached EOF when reading index tuples */
-  
-  /* = h->ref_length  [ + sizeof(range_assoc_info) ] */
-  uint rowid_buff_elem_size;
+  //Forward_lifo_buffer rowid_buffer;
  
  bool choose_mrr_impl(uint keyno, ha_rows rows, uint *flags, uint *bufsz, 
                       COST_VECT *cost);
  bool get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags, 
                               uint *buffer_size, COST_VECT *cost);
  bool check_cpk_scan(THD *thd, uint keyno, uint mrr_flags);
-  static int key_tuple_cmp(void* arg, uchar* key1, uchar* key2);
-  static int key_tuple_cmp_reverse(void* arg, uchar* key1, uchar* key2);
-  int dsmrr_fill_rowid_buffer();
-  void dsmrr_fill_key_buffer();
-  int dsmrr_next_from_index(char **range_info);

-  void setup_buffer_sizes(key_range *sample_key);
  void reallocate_buffer_space();
  
-  static range_seq_t key_buf_seq_init(void *init_param, uint n_ranges, uint flags);
-  static uint key_buf_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range);
+  /* Buffer_manager implementation */
+  void setup_buffer_sizes(uint key_size_in_keybuf, key_part_map key_tuple_map);
+  void reset_buffer_sizes();
+  Lifo_buffer* get_key_buffer() { return key_buffer; }
+
  friend class Key_value_records_iterator;
+  friend class Mrr_ordered_index_reader;
+  friend class Mrr_ordered_rndpos_reader;
+
+  int  setup_two_handlers();
+  void close_second_handler();
 };

 /**