Browse Source
Addresses #1125 Merged nested transactions from temporary merge branch into main.
Addresses #1125 Merged nested transactions from temporary merge branch into main.
Current tests fail (not regressions, they fail as of 13461)
* {{{x1.tdbrun}}}
* {{{test_log(2,3,4,5,6,7,8,9,10).recover}}}
* {{{test-recover(1,2,3).tdbrun}}}
* {{{test1324.tdbrun}}}
ULE_DEBUG disabled (defined to 0) Can be re-enabled for test purposes (set to 1).
refs [t:1125]
Merging into the temp branch (tokudb.main_13461+1125)
{{{svn merge --accept=postpone -r 12527:13461 ../tokudb.1125 ./}}}
Merging into main
{{{svn merge --accept=postpone -r13462:13463 ../tokudb.main_13461+1125/ ./}}}
git-svn-id: file:///svn/toku/tokudb@13464 c7de825b-a66e-492c-adef-691d508d4ae1
pull/73/head
48 changed files with 3300 additions and 1749 deletions
-
20db-benchmark-test/Makefile
-
51db-benchmark-test/db-benchmark-test.c
-
4include/tdb-internal.h
-
4newbrt/Makefile
-
7newbrt/brt-internal.h
-
45newbrt/brt-serialize.c
-
24newbrt/brt-test-helpers.c
-
10newbrt/brt-verify.c
-
577newbrt/brt.c
-
74newbrt/brt_msg.c
-
33newbrt/brt_msg.h
-
4newbrt/brtdump.c
-
22newbrt/brttypes.h
-
38newbrt/fifo.c
-
19newbrt/fifo.h
-
142newbrt/fifo_msg.c
-
37newbrt/fifo_msg.h
-
7newbrt/fingerprint.c
-
351newbrt/leafentry.c
-
177newbrt/leafentry.h
-
1newbrt/log-internal.h
-
1newbrt/log.h
-
10newbrt/logformat.c
-
769newbrt/omt-with-o1-cursors/omt.c
-
24newbrt/recover.c
-
56newbrt/roll.c
-
24newbrt/roll.h
-
1newbrt/rollback.c
-
3newbrt/tests/Makefile
-
25newbrt/tests/brt-serialize-test.c
-
14newbrt/tests/fifo-test.c
-
431newbrt/tests/test-leafentry-nested.c
-
10newbrt/tests/test-leafentry10.c
-
14newbrt/txn.c
-
1newbrt/txn.h
-
1528newbrt/ule.c
-
70newbrt/ule.h
-
18newbrt/xids-internal.h
-
211newbrt/xids.c
-
63newbrt/xids.h
-
2src/tests/Makefile
-
7src/tests/test_thread_stack.c
-
15src/ydb-internal.h
-
80src/ydb.c
-
8test.wiki
-
4test2.wiki
-
3toku_include/Makefile.include
-
10toku_include/toku_htod.h
@ -0,0 +1,74 @@ |
|||
/* -*- mode: C; c-basic-offset: 4 -*- */ |
|||
#ident "Copyright (c) 2007, 2008 Tokutek Inc. All rights reserved." |
|||
|
|||
|
|||
#include <toku_portability.h> |
|||
#include "brttypes.h" |
|||
#include "xids.h" |
|||
#include "fifo_msg.h" |
|||
#include "brt_msg.h" |
|||
|
|||
//BRT_MSG internals are in host order |
|||
//XIDS are not 'internal' to BRT_MSG |
|||
|
|||
void |
|||
brt_msg_from_dbts(BRT_MSG brt_msg, |
|||
DBT *key, DBT *val, |
|||
XIDS xids, brt_msg_type type) { |
|||
brt_msg->u.id.key = key; |
|||
brt_msg->u.id.val = val; |
|||
brt_msg->xids = xids; |
|||
brt_msg->type = type; |
|||
} |
|||
|
|||
//No conversion (from disk to host) is necessary |
|||
//Accessor functions for fifo return host order bytes. |
|||
#if 0 |
|||
void |
|||
brt_msg_from_fifo_msg(BRT_MSG brt_msg, FIFO_MSG fifo_msg) { |
|||
brt_msg->keylen = fifo_msg_get_keylen(fifo_msg); |
|||
brt_msg->vallen = fifo_msg_get_vallen(fifo_msg); |
|||
brt_msg->vallen = fifo_msg_get_vallen(fifo_msg); |
|||
brt_msg->key = fifo_msg_get_key(fifo_msg); |
|||
brt_msg->val = fifo_msg_get_val(fifo_msg); |
|||
brt_msg->xids = fifo_msg_get_xids(fifo_msg); |
|||
brt_msg->type = fifo_msg_get_type(fifo_msg); |
|||
} |
|||
#endif |
|||
|
|||
u_int32_t |
|||
brt_msg_get_keylen(BRT_MSG brt_msg) { |
|||
u_int32_t rval = brt_msg->u.id.key->size; |
|||
return rval; |
|||
} |
|||
|
|||
u_int32_t |
|||
brt_msg_get_vallen(BRT_MSG brt_msg) { |
|||
u_int32_t rval = brt_msg->u.id.val->size; |
|||
return rval; |
|||
} |
|||
|
|||
XIDS |
|||
brt_msg_get_xids(BRT_MSG brt_msg) { |
|||
XIDS rval = brt_msg->xids; |
|||
return rval; |
|||
} |
|||
|
|||
void * |
|||
brt_msg_get_key(BRT_MSG brt_msg) { |
|||
void * rval = brt_msg->u.id.key->data; |
|||
return rval; |
|||
} |
|||
|
|||
void * |
|||
brt_msg_get_val(BRT_MSG brt_msg) { |
|||
void * rval = brt_msg->u.id.val->data; |
|||
return rval; |
|||
} |
|||
|
|||
brt_msg_type |
|||
brt_msg_get_type(BRT_MSG brt_msg) { |
|||
brt_msg_type rval = brt_msg->type; |
|||
return rval; |
|||
} |
|||
|
|||
@ -0,0 +1,33 @@ |
|||
/* -*- mode: C; c-basic-offset: 4 -*- */ |
|||
|
|||
#ident "Copyright (c) 2007, 2008 Tokutek Inc. All rights reserved." |
|||
|
|||
|
|||
/* The purpose of this file is to provide access to the brt_msg, |
|||
* which is the ephemeral version of the fifo_msg. |
|||
*/ |
|||
|
|||
|
|||
|
|||
|
|||
#ifndef BRT_MSG_H |
|||
#define BRT_MSG_H |
|||
|
|||
u_int32_t brt_msg_get_keylen(BRT_MSG brt_msg); |
|||
|
|||
u_int32_t brt_msg_get_vallen(BRT_MSG brt_msg); |
|||
|
|||
XIDS brt_msg_get_xids(BRT_MSG brt_msg); |
|||
|
|||
void * brt_msg_get_key(BRT_MSG brt_msg); |
|||
|
|||
void * brt_msg_get_val(BRT_MSG brt_msg); |
|||
|
|||
brt_msg_type brt_msg_get_type(BRT_MSG brt_msg); |
|||
|
|||
void brt_msg_from_fifo_msg(BRT_MSG brt_msg, FIFO_MSG fifo_msg); |
|||
|
|||
void brt_msg_from_dbts(BRT_MSG brt_msg, DBT *key, DBT *val, XIDS xids, brt_msg_type type); |
|||
|
|||
#endif |
|||
|
|||
@ -0,0 +1,142 @@ |
|||
/* -*- mode: C; c-basic-offset: 4 -*- */ |
|||
#ident "Copyright (c) 2007, 2008 Tokutek Inc. All rights reserved." |
|||
|
|||
|
|||
/* Purpose of this file is to define and handle the fifo_msg, which |
|||
* is the stored format of a brt_msg. |
|||
* |
|||
* Note, when translating from fifo_msg to brt_msg, the brt_msg |
|||
* will be created with a pointer into the xids in the fifo_msg. |
|||
* (The xids will not be embedded in the brt_msg.) This means |
|||
* that a valid xids struct must be embedded in the fifo_msg. |
|||
* |
|||
* NOTE: fifo_msg is stored in memory and on disk in same format. |
|||
* fifo_msg is stored in same byte order both in-memory |
|||
* and on-disk. Accessors are responsible for tranposition |
|||
* to host order. |
|||
*/ |
|||
|
|||
|
|||
#include <string.h> |
|||
|
|||
#include <toku_portability.h> |
|||
#include "brttypes.h" |
|||
#include "xids.h" |
|||
#include "xids-internal.h" |
|||
#include "brt_msg.h" |
|||
#include "fifo_msg.h" |
|||
#include <toku_htod.h> |
|||
|
|||
|
|||
// xids_and_key_and_val field is XIDS_S followed by key |
|||
// followed by value. |
|||
|
|||
struct fifo_msg_t { |
|||
u_int32_t keylen; |
|||
u_int32_t vallen; |
|||
u_int8_t type; |
|||
// u_int8_t pad[7]; // force 64-bit alignment if needed ??? |
|||
u_int8_t xids_and_key_and_val[]; // undifferentiated bytes |
|||
}; |
|||
|
|||
|
|||
u_int32_t |
|||
fifo_msg_get_keylen(FIFO_MSG fifo_msg) { |
|||
u_int32_t rval = fifo_msg->keylen; |
|||
rval = toku_dtoh32(rval); |
|||
return rval; |
|||
} |
|||
|
|||
u_int32_t |
|||
fifo_msg_get_vallen(FIFO_MSG fifo_msg) { |
|||
u_int32_t rval = fifo_msg->vallen; |
|||
rval = toku_dtoh32(rval); |
|||
return rval; |
|||
} |
|||
|
|||
XIDS |
|||
fifo_msg_get_xids(FIFO_MSG fifo_msg) { |
|||
XIDS rval = (XIDS) &fifo_msg->xids_and_key_and_val; |
|||
return rval; |
|||
} |
|||
|
|||
|
|||
static u_int32_t |
|||
fifo_msg_get_xids_size(FIFO_MSG fifo_msg) { |
|||
u_int32_t rval; |
|||
XIDS xids = fifo_msg_get_xids(fifo_msg); |
|||
rval = xids_get_size(xids); |
|||
return rval; |
|||
} |
|||
|
|||
|
|||
void * |
|||
fifo_msg_get_key(FIFO_MSG fifo_msg) { |
|||
void * rval; |
|||
u_int32_t xidslen = fifo_msg_get_xids_size(fifo_msg); |
|||
rval = (void*)fifo_msg->xids_and_key_and_val + xidslen; |
|||
return rval; |
|||
} |
|||
|
|||
void * |
|||
fifo_msg_get_val(FIFO_MSG fifo_msg) { |
|||
void * rval; |
|||
void * key = fifo_msg_get_key(fifo_msg); |
|||
u_int32_t keylen = fifo_msg_get_keylen(fifo_msg); |
|||
rval = key + keylen; |
|||
return rval; |
|||
} |
|||
|
|||
brt_msg_type |
|||
fifo_msg_get_type(FIFO_MSG fifo_msg) { |
|||
brt_msg_type rval = fifo_msg->type; |
|||
return rval; |
|||
} |
|||
|
|||
|
|||
// Finds size of a fifo msg. |
|||
u_int32_t |
|||
fifo_msg_get_size(FIFO_MSG fifo_msg) { |
|||
u_int32_t rval; |
|||
u_int32_t keylen = fifo_msg_get_keylen(fifo_msg); |
|||
u_int32_t vallen = fifo_msg_get_vallen(fifo_msg); |
|||
u_int32_t xidslen = fifo_msg_get_xids_size(fifo_msg); |
|||
rval = keylen + vallen + xidslen + sizeof(*fifo_msg); |
|||
return rval; |
|||
} |
|||
|
|||
// Return number of bytes required for a fifo_msg created from |
|||
// the given brt_msg |
|||
u_int32_t |
|||
fifo_msg_get_size_required(BRT_MSG brt_msg) { |
|||
u_int32_t rval; |
|||
u_int32_t keylen = brt_msg_get_keylen(brt_msg); |
|||
u_int32_t vallen = brt_msg_get_vallen(brt_msg); |
|||
XIDS xids = brt_msg_get_xids(brt_msg); |
|||
u_int32_t xidslen = xids_get_size(xids); |
|||
rval = keylen + vallen + xidslen + sizeof(struct fifo_msg_t); |
|||
return rval; |
|||
} |
|||
|
|||
void |
|||
fifo_msg_from_brt_msg(FIFO_MSG fifo_msg, BRT_MSG brt_msg) { |
|||
u_int32_t keylen_host = brt_msg_get_keylen(brt_msg); |
|||
u_int32_t vallen_host = brt_msg_get_vallen(brt_msg); |
|||
fifo_msg->type = brt_msg_get_type(brt_msg); |
|||
fifo_msg->keylen = toku_htod32(keylen_host); |
|||
fifo_msg->vallen = toku_htod32(vallen_host); |
|||
//Copy XIDS |
|||
XIDS xids = brt_msg_get_xids(brt_msg); |
|||
XIDS xids_target = fifo_msg_get_xids(fifo_msg); |
|||
u_int32_t xidslen = xids_get_size(xids); |
|||
memcpy(xids_target, xids, xidslen); |
|||
//Copy Key |
|||
void *key = brt_msg_get_key(brt_msg); |
|||
void *key_target = fifo_msg_get_key(fifo_msg); |
|||
memcpy(key_target, key, keylen_host); |
|||
//Copy Val |
|||
void *val = brt_msg_get_val(brt_msg); |
|||
void *val_target = fifo_msg_get_val(fifo_msg); |
|||
memcpy(val_target, val, vallen_host); |
|||
} |
|||
|
|||
@ -0,0 +1,37 @@ |
|||
/* -*- mode: C; c-basic-offset: 4 -*- */ |
|||
|
|||
#ident "Copyright (c) 2007, 2008 Tokutek Inc. All rights reserved." |
|||
|
|||
|
|||
/* The purpose of this file is to provide access to the fifo_msg, |
|||
* which is the stored representation of the brt_msg. |
|||
* |
|||
* NOTE: Accessor functions return all values in host byte order. |
|||
*/ |
|||
|
|||
|
|||
|
|||
|
|||
#ifndef FIFO_MSG_H |
|||
#define FIFO_MSG_H |
|||
|
|||
u_int32_t fifo_msg_get_keylen(FIFO_MSG fifo_msg); |
|||
|
|||
u_int32_t fifo_msg_get_vallen(FIFO_MSG fifo_msg); |
|||
|
|||
XIDS fifo_msg_get_xids(FIFO_MSG fifo_msg); |
|||
|
|||
void * fifo_msg_get_key(FIFO_MSG fifo_msg); |
|||
|
|||
void * fifo_msg_get_val(FIFO_MSG fifo_msg); |
|||
|
|||
brt_msg_type fifo_msg_get_type(FIFO_MSG fifo_msg); |
|||
|
|||
u_int32_t fifo_msg_get_size(FIFO_MSG fifo_msg); |
|||
|
|||
// Return number of bytes required for a fifo_msg created from |
|||
// the given brt_msg |
|||
u_int32_t fifo_msg_get_size_required(BRT_MSG brt_msg); |
|||
|
|||
#endif |
|||
|
|||
@ -1,769 +0,0 @@ |
|||
#ident "Copyright (c) 2007 Tokutek Inc. All rights reserved." |
|||
|
|||
#include <errno.h> |
|||
#include <sys/types.h> |
|||
#include <stdint.h> |
|||
|
|||
typedef void *OMTVALUE; |
|||
#include "omt.h" |
|||
#include "../newbrt/memory.h" |
|||
#include "../newbrt/toku_assert.h" |
|||
#include "../include/db.h" |
|||
#include "../newbrt/brttypes.h" |
|||
|
|||
typedef u_int32_t node_idx; |
|||
static const node_idx NODE_NULL = UINT32_MAX; |
|||
|
|||
typedef struct omt_node *OMT_NODE; |
|||
struct omt_node { |
|||
u_int32_t weight; /* Size of subtree rooted at this node (including this one). */ |
|||
node_idx left; /* Index of left subtree. */ |
|||
node_idx right; /* Index of right subtree. */ |
|||
OMTVALUE value; /* The value stored in the node. */ |
|||
}; |
|||
|
|||
struct omt { |
|||
node_idx root; |
|||
|
|||
u_int32_t node_capacity; |
|||
OMT_NODE nodes; |
|||
node_idx free_idx; |
|||
|
|||
u_int32_t tmparray_size; |
|||
node_idx* tmparray; |
|||
|
|||
OMTCURSOR associated; // the OMTs associated with this. |
|||
}; |
|||
|
|||
struct omt_cursor { |
|||
OMT omt; // The omt this cursor is associated with. NULL if not present. |
|||
u_int32_t max_pathlen; //Max (root to leaf) path length; |
|||
u_int32_t pathlen; //Length of current path |
|||
node_idx *path; |
|||
OMTCURSOR next,prev; // circular linked list of all OMTCURSORs associated with omt. |
|||
}; |
|||
|
|||
//Initial max size of root-to-leaf path |
|||
static const u_int32_t TOKU_OMTCURSOR_INITIAL_SIZE = 4; |
|||
|
|||
static int omt_create_internal(OMT *omtp, u_int32_t num_starting_nodes) { |
|||
if (num_starting_nodes < 2) num_starting_nodes = 2; |
|||
OMT MALLOC(result); |
|||
if (result==NULL) return errno; |
|||
result->root=NODE_NULL; |
|||
result->node_capacity = num_starting_nodes*2; |
|||
MALLOC_N(result->node_capacity, result->nodes); |
|||
if (result->nodes==NULL) { |
|||
toku_free(result); |
|||
return errno; |
|||
} |
|||
result->tmparray_size = num_starting_nodes*2; |
|||
MALLOC_N(result->tmparray_size, result->tmparray); |
|||
if (result->tmparray==NULL) { |
|||
toku_free(result->nodes); |
|||
toku_free(result); |
|||
return errno; |
|||
} |
|||
result->free_idx = 0; |
|||
result->associated = NULL; |
|||
*omtp = result; |
|||
return 0; |
|||
} |
|||
|
|||
int toku_omt_create (OMT *omtp) { |
|||
return omt_create_internal(omtp, 2); |
|||
} |
|||
|
|||
int toku_omt_cursor_create (OMTCURSOR *omtcp) { |
|||
OMTCURSOR MALLOC(c); |
|||
if (c==NULL) return errno; |
|||
c->omt = NULL; |
|||
c->next = c->prev = NULL; |
|||
c->max_pathlen = TOKU_OMTCURSOR_INITIAL_SIZE; |
|||
c->pathlen = 0; |
|||
MALLOC_N(c->max_pathlen, c->path); |
|||
if (c->path==NULL) { |
|||
toku_free(c); |
|||
return errno; |
|||
} |
|||
*omtcp = c; |
|||
return 0; |
|||
} |
|||
|
|||
void toku_omt_cursor_invalidate (OMTCURSOR c) { |
|||
if (c==NULL || c->omt==NULL) return; |
|||
if (c->next == c) { |
|||
// It's the last one. |
|||
c->omt->associated = NULL; |
|||
} else { |
|||
OMTCURSOR next = c->next; |
|||
OMTCURSOR prev = c->prev; |
|||
if (c->omt->associated == c) { |
|||
c->omt->associated = next; |
|||
} |
|||
next->prev = prev; |
|||
prev->next = next; |
|||
} |
|||
c->next = c->prev = NULL; |
|||
c->omt = NULL; |
|||
} |
|||
|
|||
void toku_omt_cursor_destroy (OMTCURSOR *p) { |
|||
toku_omt_cursor_invalidate(*p); |
|||
toku_free((*p)->path); |
|||
toku_free(*p); |
|||
*p = NULL; |
|||
} |
|||
|
|||
static void invalidate_cursors (OMT omt) { |
|||
OMTCURSOR assoced; |
|||
while ((assoced = omt->associated)) { |
|||
toku_omt_cursor_invalidate(assoced); |
|||
} |
|||
} |
|||
|
|||
static void associate (OMT omt, OMTCURSOR c) |
|||
{ |
|||
if (c->omt==omt) return; |
|||
toku_omt_cursor_invalidate(c); |
|||
if (omt->associated==NULL) { |
|||
c->prev = c; |
|||
c->next = c; |
|||
omt->associated = c; |
|||
} else { |
|||
c->prev = omt->associated->prev; |
|||
c->next = omt->associated; |
|||
omt->associated->prev->next = c; |
|||
omt->associated->prev = c; |
|||
} |
|||
c->omt = omt; |
|||
} |
|||
|
|||
void toku_omt_destroy(OMT *omtp) { |
|||
OMT omt=*omtp; |
|||
invalidate_cursors(omt); |
|||
toku_free(omt->nodes); |
|||
toku_free(omt->tmparray); |
|||
toku_free(omt); |
|||
*omtp=NULL; |
|||
} |
|||
|
|||
static inline u_int32_t nweight(OMT omt, node_idx idx) { |
|||
if (idx==NODE_NULL) return 0; |
|||
else return (omt->nodes+idx)->weight; |
|||
} |
|||
|
|||
u_int32_t toku_omt_size(OMT V) { |
|||
return nweight(V, V->root); |
|||
} |
|||
|
|||
static inline node_idx omt_node_malloc(OMT omt) { |
|||
assert(omt->free_idx < omt->node_capacity); |
|||
return omt->free_idx++; |
|||
} |
|||
|
|||
static inline void omt_node_free(OMT omt, node_idx idx) { |
|||
assert(idx < omt->node_capacity); |
|||
} |
|||
|
|||
static inline void fill_array_with_subtree_values(OMT omt, OMTVALUE *array, node_idx tree_idx) { |
|||
if (tree_idx==NODE_NULL) return; |
|||
OMT_NODE tree = omt->nodes+tree_idx; |
|||
fill_array_with_subtree_values(omt, array, tree->left); |
|||
array[nweight(omt, tree->left)] = tree->value; |
|||
fill_array_with_subtree_values(omt, array+nweight(omt, tree->left)+1, tree->right); |
|||
} |
|||
|
|||
// Example: numvalues=4, halfway=2, left side is values of size 2 |
|||
// right side is values+3 of size 1 |
|||
// numvalues=3, halfway=1, left side is values of size 1 |
|||
// right side is values+2 of size 1 |
|||
// numvalues=2, halfway=1, left side is values of size 1 |
|||
// right side is values+2 of size 0 |
|||
// numvalues=1, halfway=0, left side is values of size 0 |
|||
// right side is values of size 0. |
|||
static inline void create_from_sorted_array_internal(OMT omt, node_idx *n_idxp, |
|||
OMTVALUE *values, u_int32_t numvalues) { |
|||
if (numvalues==0) { |
|||
*n_idxp = NODE_NULL; |
|||
} else { |
|||
u_int32_t halfway = numvalues/2; |
|||
node_idx newidx = omt_node_malloc(omt); |
|||
OMT_NODE newnode = omt->nodes+newidx; |
|||
newnode->weight = numvalues; |
|||
newnode->value = values[halfway]; |
|||
create_from_sorted_array_internal(omt, &newnode->left, values, halfway); |
|||
create_from_sorted_array_internal(omt, &newnode->right, values+halfway+1, numvalues-(halfway+1)); |
|||
*n_idxp = newidx; |
|||
} |
|||
} |
|||
|
|||
int toku_omt_create_from_sorted_array(OMT *omtp, OMTVALUE *values, u_int32_t numvalues) { |
|||
OMT omt = NULL; |
|||
int r; |
|||
if ((r = omt_create_internal(&omt, numvalues))) return r; |
|||
create_from_sorted_array_internal(omt, &omt->root, values, numvalues); |
|||
*omtp=omt; |
|||
return 0; |
|||
} |
|||
|
|||
enum build_choice { MAYBE_REBUILD, JUST_RESIZE }; |
|||
|
|||
static inline int maybe_resize_and_rebuild(OMT omt, u_int32_t n, enum build_choice choice) { |
|||
node_idx *new_tmparray = NULL; |
|||
OMT_NODE new_nodes = NULL; |
|||
OMTVALUE *tmp_values = NULL; |
|||
int r = ENOSYS; |
|||
u_int32_t new_size = n<=2 ? 4 : 2*n; |
|||
|
|||
if (omt->tmparray_size<n || |
|||
(omt->tmparray_size/2 >= new_size)) { |
|||
/* Malloc and free instead of realloc (saves the memcpy). */ |
|||
MALLOC_N(new_size, new_tmparray); |
|||
if (new_tmparray==NULL) { r = errno; goto cleanup; } |
|||
} |
|||
/* Rebuild/realloc the nodes array iff any of the following: |
|||
* The array is smaller than the number of elements we want. |
|||
* We are increasing the number of elements and there is no free space. |
|||
* The array is too large. */ |
|||
u_int32_t num_nodes = nweight(omt, omt->root); |
|||
if ((omt->node_capacity/2 >= new_size) || |
|||
(omt->free_idx>=omt->node_capacity && num_nodes<n) || |
|||
(omt->node_capacity<n)) { |
|||
if (choice==MAYBE_REBUILD) { |
|||
MALLOC_N(num_nodes, tmp_values); |
|||
if (tmp_values==NULL) { r = errno; goto cleanup;} |
|||
} |
|||
MALLOC_N(new_size, new_nodes); |
|||
if (new_nodes==NULL) { r = errno; goto cleanup; } |
|||
} |
|||
|
|||
/* Nothing can fail now. Atomically update both sizes. */ |
|||
if (new_tmparray) { |
|||
toku_free(omt->tmparray); |
|||
omt->tmparray = new_tmparray; |
|||
omt->tmparray_size = new_size; |
|||
} |
|||
if (new_nodes) { |
|||
/* Rebuild the tree in the new array, leftshifted, in preorder */ |
|||
if (choice==MAYBE_REBUILD) { |
|||
fill_array_with_subtree_values(omt, tmp_values, omt->root); |
|||
} |
|||
toku_free(omt->nodes); |
|||
omt->nodes = new_nodes; |
|||
omt->node_capacity = new_size; |
|||
omt->free_idx = 0; /* Allocating from mempool starts over. */ |
|||
omt->root = NODE_NULL; |
|||
if (choice==MAYBE_REBUILD) { |
|||
create_from_sorted_array_internal(omt, &omt->root, tmp_values, num_nodes); |
|||
} |
|||
} |
|||
r = 0; |
|||
cleanup: |
|||
if (r!=0) { |
|||
if (new_tmparray) toku_free(new_tmparray); |
|||
if (new_nodes) toku_free(new_nodes); |
|||
} |
|||
if (tmp_values) toku_free(tmp_values); |
|||
return r; |
|||
} |
|||
|
|||
static inline void fill_array_with_subtree_idxs(OMT omt, node_idx *array, node_idx tree_idx) { |
|||
if (tree_idx==NODE_NULL) return; |
|||
OMT_NODE tree = omt->nodes+tree_idx; |
|||
fill_array_with_subtree_idxs(omt, array, tree->left); |
|||
array[nweight(omt, tree->left)] = tree_idx; |
|||
fill_array_with_subtree_idxs(omt, array+nweight(omt, tree->left)+1, tree->right); |
|||
} |
|||
|
|||
/* Reuses existing OMT_NODE structures (used for rebalancing). */ |
|||
static inline void rebuild_subtree_from_idxs(OMT omt, node_idx *n_idxp, node_idx *idxs, |
|||
u_int32_t numvalues) { |
|||
if (numvalues==0) { |
|||
*n_idxp=NODE_NULL; |
|||
} else { |
|||
u_int32_t halfway = numvalues/2; |
|||
node_idx newidx = idxs[halfway]; |
|||
OMT_NODE newnode = omt->nodes+newidx; |
|||
newnode->weight = numvalues; |
|||
// value is already in there. |
|||
rebuild_subtree_from_idxs(omt, &newnode->left, idxs, halfway); |
|||
rebuild_subtree_from_idxs(omt, &newnode->right, idxs+halfway+1, numvalues-(halfway+1)); |
|||
*n_idxp = newidx; |
|||
} |
|||
} |
|||
|
|||
static inline void rebalance(OMT omt, node_idx *n_idxp) { |
|||
node_idx idx = *n_idxp; |
|||
OMT_NODE n = omt->nodes+idx; |
|||
fill_array_with_subtree_idxs(omt, omt->tmparray, idx); |
|||
rebuild_subtree_from_idxs(omt, n_idxp, omt->tmparray, n->weight); |
|||
} |
|||
|
|||
static inline BOOL will_need_rebalance(OMT omt, node_idx n_idx, int leftmod, int rightmod) { |
|||
if (n_idx==NODE_NULL) return FALSE; |
|||
OMT_NODE n = omt->nodes+n_idx; |
|||
// one of the 1's is for the root. |
|||
// the other is to take ceil(n/2) |
|||
u_int32_t weight_left = nweight(omt, n->left) + leftmod; |
|||
u_int32_t weight_right = nweight(omt, n->right) + rightmod; |
|||
return ((1+weight_left < (1+1+weight_right)/2) |
|||
|| |
|||
(1+weight_right < (1+1+weight_left)/2)); |
|||
} |
|||
|
|||
static inline void insert_internal(OMT omt, node_idx *n_idxp, OMTVALUE value, u_int32_t index, node_idx **rebalance_idx) { |
|||
if (*n_idxp==NODE_NULL) { |
|||
assert(index==0); |
|||
node_idx newidx = omt_node_malloc(omt); |
|||
OMT_NODE newnode = omt->nodes+newidx; |
|||
newnode->weight = 1; |
|||
newnode->left = NODE_NULL; |
|||
newnode->right = NODE_NULL; |
|||
newnode->value = value; |
|||
*n_idxp = newidx; |
|||
} else { |
|||
node_idx idx = *n_idxp; |
|||
OMT_NODE n = omt->nodes+idx; |
|||
n->weight++; |
|||
if (index <= nweight(omt, n->left)) { |
|||
if (*rebalance_idx==NULL && will_need_rebalance(omt, idx, 1, 0)) { |
|||
*rebalance_idx = n_idxp; |
|||
} |
|||
insert_internal(omt, &n->left, value, index, rebalance_idx); |
|||
} else { |
|||
if (*rebalance_idx==NULL && will_need_rebalance(omt, idx, 0, 1)) { |
|||
*rebalance_idx = n_idxp; |
|||
} |
|||
u_int32_t sub_index = index-nweight(omt, n->left)-1; |
|||
insert_internal(omt, &n->right, value, sub_index, rebalance_idx); |
|||
} |
|||
} |
|||
} |
|||
|
|||
int toku_omt_insert_at(OMT omt, OMTVALUE value, u_int32_t index) { |
|||
int r; |
|||
invalidate_cursors(omt); |
|||
if (index>nweight(omt, omt->root)) return EINVAL; |
|||
if ((r=maybe_resize_and_rebuild(omt, 1+nweight(omt, omt->root), MAYBE_REBUILD))) return r; |
|||
node_idx* rebalance_idx = NULL; |
|||
insert_internal(omt, &omt->root, value, index, &rebalance_idx); |
|||
if (rebalance_idx) rebalance(omt, rebalance_idx); |
|||
return 0; |
|||
} |
|||
|
|||
static inline void set_at_internal(OMT omt, node_idx n_idx, OMTVALUE v, u_int32_t index) { |
|||
assert(n_idx!=NODE_NULL); |
|||
OMT_NODE n = omt->nodes+n_idx; |
|||
if (index<nweight(omt, n->left)) |
|||
set_at_internal(omt, n->left, v, index); |
|||
else if (index==nweight(omt, n->left)) { |
|||
n->value = v; |
|||
} else { |
|||
set_at_internal(omt, n->right, v, index-nweight(omt, n->left)-1); |
|||
} |
|||
} |
|||
|
|||
int toku_omt_set_at (OMT omt, OMTVALUE value, u_int32_t index) { |
|||
if (index>=nweight(omt, omt->root)) return EINVAL; |
|||
set_at_internal(omt, omt->root, value, index); |
|||
return 0; |
|||
} |
|||
|
|||
static inline void delete_internal(OMT omt, node_idx *n_idxp, u_int32_t index, OMTVALUE *vp, node_idx **rebalance_idx) { |
|||
assert(*n_idxp!=NODE_NULL); |
|||
OMT_NODE n = omt->nodes+*n_idxp; |
|||
if (index < nweight(omt, n->left)) { |
|||
n->weight--; |
|||
if (*rebalance_idx==NULL && will_need_rebalance(omt, *n_idxp, -1, 0)) { |
|||
*rebalance_idx = n_idxp; |
|||
} |
|||
delete_internal(omt, &n->left, index, vp, rebalance_idx); |
|||
} else if (index == nweight(omt, n->left)) { |
|||
if (n->left==NODE_NULL) { |
|||
u_int32_t idx = *n_idxp; |
|||
*n_idxp = n->right; |
|||
*vp = n->value; |
|||
omt_node_free(omt, idx); |
|||
} else if (n->right==NODE_NULL) { |
|||
u_int32_t idx = *n_idxp; |
|||
*n_idxp = n->left; |
|||
*vp = n->value; |
|||
omt_node_free(omt, idx); |
|||
} else { |
|||
OMTVALUE zv; |
|||
// delete the successor of index, get the value, and store it here. |
|||
if (*rebalance_idx==NULL && will_need_rebalance(omt, *n_idxp, 0, -1)) { |
|||
*rebalance_idx = n_idxp; |
|||
} |
|||
delete_internal(omt, &n->right, 0, &zv, rebalance_idx); |
|||
n->value = zv; |
|||
n->weight--; |
|||
} |
|||
} else { |
|||
n->weight--; |
|||
if (*rebalance_idx==NULL && will_need_rebalance(omt, *n_idxp, 0, -1)) { |
|||
*rebalance_idx = n_idxp; |
|||
} |
|||
delete_internal(omt, &n->right, index-nweight(omt, n->left)-1, vp, rebalance_idx); |
|||
} |
|||
} |
|||
|
|||
int toku_omt_delete_at(OMT omt, u_int32_t index) { |
|||
OMTVALUE v; |
|||
int r; |
|||
invalidate_cursors(omt); |
|||
if (index>=nweight(omt, omt->root)) return EINVAL; |
|||
if ((r=maybe_resize_and_rebuild(omt, -1+nweight(omt, omt->root), MAYBE_REBUILD))) return r; |
|||
node_idx* rebalance_idx = NULL; |
|||
delete_internal(omt, &omt->root, index, &v, &rebalance_idx); |
|||
if (rebalance_idx) rebalance(omt, rebalance_idx); |
|||
return 0; |
|||
} |
|||
|
|||
static inline void omtcursor_stack_pop(OMTCURSOR c) { |
|||
assert(c->pathlen); |
|||
c->pathlen--; |
|||
} |
|||
|
|||
static inline int omtcursor_stack_push(OMTCURSOR c, node_idx idx) { |
|||
if (c->max_pathlen-1<=c->pathlen) { |
|||
//Increase max_pathlen |
|||
u_int32_t new_max = c->max_pathlen*2; |
|||
node_idx *tmp_path = toku_realloc(c->path, new_max*sizeof(*c->path)); |
|||
if (tmp_path==NULL) return errno; |
|||
c->path = tmp_path; |
|||
c->max_pathlen = new_max; |
|||
} |
|||
c->path[c->pathlen++] = idx; |
|||
return 0; |
|||
} |
|||
|
|||
static inline node_idx omtcursor_stack_peek(OMTCURSOR c) { |
|||
return c->path[c->pathlen-1]; |
|||
} |
|||
|
|||
static inline int fetch_internal(OMT V, node_idx idx, u_int32_t i, OMTVALUE *v, OMTCURSOR c) { |
|||
OMT_NODE n = V->nodes+idx; |
|||
int r; |
|||
if (c!=NULL && (r=omtcursor_stack_push(c, idx))) return r; |
|||
if (i < nweight(V, n->left)) { |
|||
return fetch_internal(V, n->left, i, v, c); |
|||
} else if (i == nweight(V, n->left)) { |
|||
*v = n->value; |
|||
return 0; |
|||
} else { |
|||
return fetch_internal(V, n->right, i-nweight(V, n->left)-1, v, c); |
|||
} |
|||
} |
|||
|
|||
int toku_omt_fetch(OMT V, u_int32_t i, OMTVALUE *v, OMTCURSOR c) { |
|||
if (i>=nweight(V, V->root)) return EINVAL; |
|||
if (c) associate(V,c); |
|||
int r = fetch_internal(V, V->root, i, v, c); |
|||
if (c && r!=0) { |
|||
toku_omt_cursor_invalidate(c); |
|||
} |
|||
return r; |
|||
} |
|||
|
|||
static inline int iterate_internal(OMT omt, u_int32_t left, u_int32_t right, |
|||
node_idx n_idx, u_int32_t idx, |
|||
int (*f)(OMTVALUE, u_int32_t, void*), void*v) { |
|||
int r; |
|||
if (n_idx==NODE_NULL) return 0; |
|||
OMT_NODE n = omt->nodes+n_idx; |
|||
u_int32_t idx_root = idx+nweight(omt,n->left); |
|||
if (left< idx_root && (r=iterate_internal(omt, left, right, n->left, idx, f, v))) return r; |
|||
if (left<=idx_root && idx_root<right && (r=f(n->value, idx_root, v))) return r; |
|||
if (idx_root+1<right) return iterate_internal(omt, left, right, n->right, idx_root+1, f, v); |
|||
return 0; |
|||
} |
|||
|
|||
int toku_omt_iterate(OMT omt, int (*f)(OMTVALUE, u_int32_t, void*), void*v) { |
|||
return iterate_internal(omt, 0, nweight(omt, omt->root), omt->root, 0, f, v); |
|||
} |
|||
|
|||
int toku_omt_iterate_on_range(OMT omt, u_int32_t left, u_int32_t right, int (*f)(OMTVALUE, u_int32_t, void*), void*v) { |
|||
return iterate_internal(omt, left, right, omt->root, 0, f, v); |
|||
} |
|||
|
|||
int toku_omt_insert(OMT omt, OMTVALUE value, int(*h)(OMTVALUE, void*v), void *v, u_int32_t *index) { |
|||
int r; |
|||
u_int32_t idx; |
|||
|
|||
invalidate_cursors(omt); |
|||
|
|||
r = toku_omt_find_zero(omt, h, v, NULL, &idx, NULL); |
|||
if (r==0) { |
|||
if (index) *index = idx; |
|||
return DB_KEYEXIST; |
|||
} |
|||
if (r!=DB_NOTFOUND) return r; |
|||
|
|||
if ((r = toku_omt_insert_at(omt, value, idx))) return r; |
|||
if (index) *index = idx; |
|||
|
|||
return 0; |
|||
} |
|||
|
|||
static inline int find_internal_zero(OMT omt, node_idx n_idx, int (*h)(OMTVALUE, void*extra), void*extra, OMTVALUE *value, u_int32_t *index, OMTCURSOR c) |
|||
// requires: index!=NULL |
|||
{ |
|||
int r; |
|||
if (n_idx==NODE_NULL) { |
|||
*index = 0; |
|||
return DB_NOTFOUND; |
|||
} |
|||
if (c!=NULL && (r=omtcursor_stack_push(c, n_idx))) return r; |
|||
OMT_NODE n = omt->nodes+n_idx; |
|||
int hv = h(n->value, extra); |
|||
if (hv<0) { |
|||
r = find_internal_zero(omt, n->right, h, extra, value, index, c); |
|||
*index += nweight(omt, n->left)+1; |
|||
return r; |
|||
} else if (hv>0) { |
|||
return find_internal_zero(omt, n->left, h, extra, value, index, c); |
|||
} else { |
|||
r = find_internal_zero(omt, n->left, h, extra, value, index, c); |
|||
if (r==DB_NOTFOUND) { |
|||
*index = nweight(omt, n->left); |
|||
if (value) *value = n->value; |
|||
if (c!=NULL) { |
|||
//Truncate the saved cursor path at n_idx. |
|||
while (omtcursor_stack_peek(c)!=n_idx) omtcursor_stack_pop(c); |
|||
} |
|||
r = 0; |
|||
} |
|||
return r; |
|||
} |
|||
} |
|||
|
|||
int toku_omt_find_zero(OMT V, int (*h)(OMTVALUE, void*extra), void*extra, OMTVALUE *value, u_int32_t *index, OMTCURSOR c) { |
|||
//Index can be modified before a cursor error, so we must use a temp. |
|||
u_int32_t tmp_index; |
|||
if (c) associate(V,c); |
|||
int r = find_internal_zero(V, V->root, h, extra, value, &tmp_index, c); |
|||
if (c && r!=0) { |
|||
toku_omt_cursor_invalidate(c); |
|||
} |
|||
if ((r==0 || r==DB_NOTFOUND) && index!=NULL) *index = tmp_index; |
|||
return r; |
|||
} |
|||
|
|||
// If direction <0 then find the largest i such that h(V_i,extra)<0. |
|||
static inline int find_internal_minus(OMT omt, node_idx n_idx, int (*h)(OMTVALUE, void*extra), void*extra, OMTVALUE *value, u_int32_t *index, OMTCURSOR c) |
|||
// requires: index!=NULL |
|||
{ |
|||
int r; |
|||
if (n_idx==NODE_NULL) return DB_NOTFOUND; |
|||
if (c!=NULL && (r=omtcursor_stack_push(c, n_idx))) return r; |
|||
OMT_NODE n = omt->nodes+n_idx; |
|||
int hv = h(n->value, extra); |
|||
if (hv<0) { |
|||
r = find_internal_minus(omt, n->right, h, extra, value, index, c); |
|||
if (r==0) *index += nweight(omt, n->left)+1; |
|||
else if (r==DB_NOTFOUND) { |
|||
*index = nweight(omt, n->left); |
|||
if (value!=NULL) *value = n->value; |
|||
if (c!=NULL) { |
|||
//Truncate the saved cursor path at n_idx. |
|||
while (omtcursor_stack_peek(c)!=n_idx) omtcursor_stack_pop(c); |
|||
} |
|||
r = 0; |
|||
} |
|||
return r; |
|||
} else { |
|||
return find_internal_minus(omt, n->left, h, extra, value, index, c); |
|||
} |
|||
} |
|||
|
|||
// If direction >0 then find the smallest i such that h(V_i,extra)>0. |
|||
static inline int find_internal_plus(OMT omt, node_idx n_idx, int (*h)(OMTVALUE, void*extra), void*extra, OMTVALUE *value, u_int32_t *index, OMTCURSOR c) |
|||
// requires: index!=NULL |
|||
{ |
|||
int r; |
|||
if (n_idx==NODE_NULL) return DB_NOTFOUND; |
|||
if (c!=NULL && (r=omtcursor_stack_push(c, n_idx))) return r; |
|||
OMT_NODE n = omt->nodes+n_idx; |
|||
int hv = h(n->value, extra); |
|||
if (hv>0) { |
|||
r = find_internal_plus(omt, n->left, h, extra, value, index, c); |
|||
if (r==DB_NOTFOUND) { |
|||
*index = nweight(omt, n->left); |
|||
if (value!=NULL) *value = n->value; |
|||
if (c!=NULL) { |
|||
//Truncate the saved cursor path at n_idx. |
|||
while (omtcursor_stack_peek(c)!=n_idx) omtcursor_stack_pop(c); |
|||
} |
|||
r = 0; |
|||
} |
|||
return r; |
|||
} else { |
|||
r = find_internal_plus(omt, n->right, h, extra, value, index, c); |
|||
if (r==0) *index += nweight(omt, n->left)+1; |
|||
return r; |
|||
} |
|||
} |
|||
|
|||
int toku_omt_find(OMT V, int (*h)(OMTVALUE, void*extra), void*extra, int direction, OMTVALUE *value, u_int32_t *index, OMTCURSOR c) { |
|||
u_int32_t tmp_index; |
|||
int r; |
|||
if (index==NULL) index=&tmp_index; |
|||
if (c) associate(V,c); |
|||
if (direction==0) { |
|||
abort(); |
|||
} else if (direction<0) { |
|||
r = find_internal_minus(V, V->root, h, extra, value, index, c); |
|||
} else { |
|||
r = find_internal_plus( V, V->root, h, extra, value, index, c); |
|||
} |
|||
if (c && r!=0) { |
|||
toku_omt_cursor_invalidate(c); |
|||
} |
|||
return r; |
|||
} |
|||
|
|||
int toku_omt_split_at(OMT omt, OMT *newomtp, u_int32_t index) { |
|||
int r = ENOSYS; |
|||
OMT newomt = NULL; |
|||
OMTVALUE *tmp_values = NULL; |
|||
invalidate_cursors(omt); |
|||
if (index>nweight(omt, omt->root)) { r = EINVAL; goto cleanup; } |
|||
u_int32_t newsize = nweight(omt, omt->root)-index; |
|||
if ((r = omt_create_internal(&newomt, newsize))) goto cleanup; |
|||
MALLOC_N(nweight(omt, omt->root), tmp_values); |
|||
if (tmp_values==NULL) { r = errno; goto cleanup; } |
|||
fill_array_with_subtree_values(omt, tmp_values, omt->root); |
|||
// Modify omt's array at the last possible moment, since after this nothing can fail. |
|||
if ((r = maybe_resize_and_rebuild(omt, index, TRUE))) goto cleanup; |
|||
create_from_sorted_array_internal(omt, &omt->root, tmp_values, index); |
|||
create_from_sorted_array_internal(newomt, &newomt->root, tmp_values+index, newsize); |
|||
*newomtp = newomt; |
|||
r = 0; |
|||
cleanup: |
|||
if (r!=0) { |
|||
if (newomt) toku_omt_destroy(&newomt); |
|||
} |
|||
if (tmp_values) toku_free(tmp_values); |
|||
return r; |
|||
} |
|||
|
|||
int toku_omt_merge(OMT leftomt, OMT rightomt, OMT *newomtp) { |
|||
int r = ENOSYS; |
|||
OMT newomt = NULL; |
|||
OMTVALUE *tmp_values = NULL; |
|||
invalidate_cursors(leftomt); |
|||
invalidate_cursors(rightomt); |
|||
u_int32_t newsize = toku_omt_size(leftomt)+toku_omt_size(rightomt); |
|||
if ((r = omt_create_internal(&newomt, newsize))) goto cleanup; |
|||
MALLOC_N(newsize, tmp_values); |
|||
if (tmp_values==NULL) { r = errno; goto cleanup; } |
|||
|
|||
fill_array_with_subtree_values(leftomt, tmp_values, leftomt->root); |
|||
fill_array_with_subtree_values(rightomt, tmp_values+toku_omt_size(leftomt), rightomt->root); |
|||
create_from_sorted_array_internal(newomt, &newomt->root, tmp_values, newsize); |
|||
toku_omt_destroy(&leftomt); |
|||
toku_omt_destroy(&rightomt); |
|||
*newomtp = newomt; |
|||
r = 0; |
|||
cleanup: |
|||
if (r!=0) { |
|||
if (newomt) toku_omt_destroy(&newomt); |
|||
} |
|||
if (tmp_values) toku_free(tmp_values); |
|||
return r; |
|||
} |
|||
|
|||
void toku_omt_clear(OMT omt) { |
|||
invalidate_cursors(omt); |
|||
omt->free_idx = 0; |
|||
omt->root = NODE_NULL; |
|||
} |
|||
|
|||
unsigned long toku_omt_memory_size (OMT omt) { |
|||
return sizeof(*omt)+omt->node_capacity*sizeof(omt->nodes[0]) + omt->tmparray_size*sizeof(omt->tmparray[0]); |
|||
} |
|||
|
|||
int toku_omt_cursor_is_valid (OMTCURSOR c) { |
|||
return c->omt!=NULL; |
|||
} |
|||
|
|||
static inline void omtcursor_current_internal(OMTCURSOR c, OMTVALUE *v) { |
|||
*v = c->omt->nodes[omtcursor_stack_peek(c)].value; |
|||
} |
|||
|
|||
static inline int omtcursor_next_internal(OMTCURSOR c) { |
|||
OMT_NODE current = c->omt->nodes+omtcursor_stack_peek(c); |
|||
if (current->right!=NODE_NULL) { |
|||
//Enter into subtree |
|||
if (omtcursor_stack_push(c, current->right)) return EINVAL; |
|||
current = c->omt->nodes+current->right; |
|||
while (current->left!=NODE_NULL) { |
|||
if (omtcursor_stack_push(c, current->left)) return EINVAL; |
|||
current = c->omt->nodes+current->left; |
|||
} |
|||
return 0; |
|||
} |
|||
else { |
|||
//Pop the stack till we remove a left child. |
|||
node_idx parent_idx = omtcursor_stack_peek(c); |
|||
node_idx child_idx; |
|||
while (c->pathlen>=2) { |
|||
child_idx = parent_idx; |
|||
omtcursor_stack_pop(c); |
|||
parent_idx = omtcursor_stack_peek(c); |
|||
if (c->omt->nodes[parent_idx].left==child_idx) return 0; |
|||
} |
|||
return EINVAL; |
|||
} |
|||
} |
|||
|
|||
int toku_omt_cursor_next (OMTCURSOR c, OMTVALUE *v) { |
|||
if (c->omt == NULL) return EINVAL; |
|||
int r = omtcursor_next_internal(c); |
|||
if (r!=0) toku_omt_cursor_invalidate(c); |
|||
else omtcursor_current_internal(c, v); |
|||
return r; |
|||
} |
|||
|
|||
static inline int omtcursor_prev_internal(OMTCURSOR c) { |
|||
OMT_NODE current = c->omt->nodes+omtcursor_stack_peek(c); |
|||
if (current->left!=NODE_NULL) { |
|||
//Enter into subtree |
|||
if (omtcursor_stack_push(c, current->left)) return EINVAL; |
|||
current = c->omt->nodes+current->left; |
|||
while (current->right!=NODE_NULL) { |
|||
if (omtcursor_stack_push(c, current->right)) return EINVAL; |
|||
current = c->omt->nodes+current->right; |
|||
} |
|||
return 0; |
|||
} |
|||
else { |
|||
//Pop the stack till we remove a right child. |
|||
node_idx parent_idx = omtcursor_stack_peek(c); |
|||
node_idx child_idx; |
|||
while (c->pathlen>=2) { |
|||
child_idx = parent_idx; |
|||
omtcursor_stack_pop(c); |
|||
parent_idx = omtcursor_stack_peek(c); |
|||
if (c->omt->nodes[parent_idx].right==child_idx) return 0; |
|||
} |
|||
return EINVAL; |
|||
} |
|||
} |
|||
|
|||
int toku_omt_cursor_prev (OMTCURSOR c, OMTVALUE *v) { |
|||
if (c->omt == NULL) return EINVAL; |
|||
int r = omtcursor_prev_internal(c); |
|||
if (r!=0) toku_omt_cursor_invalidate(c); |
|||
else omtcursor_current_internal(c, v); |
|||
return r; |
|||
} |
|||
|
|||
int toku_omt_cursor_current (OMTCURSOR c, OMTVALUE *v) { |
|||
if (c->omt == NULL) return EINVAL; |
|||
omtcursor_current_internal(c, v); |
|||
return 0; |
|||
} |
|||
|
|||
@ -0,0 +1,24 @@ |
|||
/* -*- mode: C; c-basic-offset: 4 -*- */ |
|||
#ident "$Id: roll.c 12588 2009-06-09 00:05:02Z yfogel $" |
|||
#ident "Copyright (c) 2007, 2008, 2009 Tokutek Inc. All rights reserved." |
|||
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." |
|||
|
|||
#ifndef TOKUDB_ROLL_H |
|||
#define TOKUDB_ROLL_H |
|||
// these flags control whether or not we send commit messages for |
|||
// various operations |
|||
|
|||
// When a transaction is committed, should we send a BRT_COMMIT message |
|||
// for each BRT_INSERT message sent earlier by the transaction? |
|||
#define TOKU_DO_COMMIT_CMD_INSERT 0 |
|||
|
|||
// When a transaction is committed, should we send a BRT_COMMIT message |
|||
// for each BRT_DELETE_ANY message sent earlier by the transaction? |
|||
#define TOKU_DO_COMMIT_CMD_DELETE 1 |
|||
|
|||
// When a transaction is committed, should we send a BRT_COMMIT message |
|||
// for each BRT_DELETE_BOTH message sent earlier by the transaction? |
|||
#define TOKU_DO_COMMIT_CMD_DELETE_BOTH 1 |
|||
|
|||
#endif |
|||
|
|||
@ -0,0 +1,431 @@ |
|||
#include <toku_portability.h> |
|||
#include <string.h> |
|||
|
|||
#include "test.h" |
|||
#include "brttypes.h" |
|||
#include "includes.h" |
|||
#include "ule.h" |
|||
|
|||
enum {MAX_SIZE = 256}; |
|||
|
|||
static void |
|||
verify_ule_equal(ULE a, ULE b) { |
|||
assert(a->num_uxrs > 0); |
|||
assert(a->num_uxrs <= MAX_TRANSACTION_RECORDS); |
|||
assert(a->num_uxrs == b->num_uxrs); |
|||
assert(a->keylen == b->keylen); |
|||
assert(memcmp(a->keyp, b->keyp, a->keylen) == 0); |
|||
u_int32_t i; |
|||
for (i = 0; i < a->num_uxrs; i++) { |
|||
assert(a->uxrs[i].type == b->uxrs[i].type); |
|||
assert(a->uxrs[i].xid == b->uxrs[i].xid); |
|||
if (a->uxrs[i].type == XR_INSERT) { |
|||
assert(a->uxrs[i].vallen == b->uxrs[i].vallen); |
|||
assert(memcmp(a->uxrs[i].valp, b->uxrs[i].valp, a->uxrs[i].vallen) == 0); |
|||
} |
|||
} |
|||
} |
|||
|
|||
static void |
|||
fillrandom(u_int8_t buf[MAX_SIZE], u_int32_t length) { |
|||
assert(length < MAX_SIZE); |
|||
u_int32_t i; |
|||
for (i = 0; i < length; i++) { |
|||
buf[i] = random() & 0xFF; |
|||
} |
|||
} |
|||
|
|||
static void |
|||
test_le_offset_is(LEAFENTRY le, void *field, size_t expected_offset) { |
|||
size_t le_address = (size_t) le; |
|||
size_t field_address = (size_t) field; |
|||
assert(field_address >= le_address); |
|||
size_t actual_offset = field_address - le_address; |
|||
assert(actual_offset == expected_offset); |
|||
} |
|||
|
|||
//Fixed offsets in a packed leafentry. |
|||
enum { |
|||
LE_OFFSET_NUM = 0, |
|||
LE_OFFSET_KEYLEN = 1+LE_OFFSET_NUM, |
|||
LE_OFFSET_VALLEN = 4+LE_OFFSET_KEYLEN, //Vallen of innermost insert record |
|||
LE_OFFSET_VARIABLE = 4+LE_OFFSET_VALLEN |
|||
}; |
|||
|
|||
static void |
|||
test_le_fixed_offsets (void) { |
|||
LEAFENTRY XMALLOC(le); |
|||
test_le_offset_is(le, &le->num_xrs, LE_OFFSET_NUM); |
|||
test_le_offset_is(le, &le->keylen, LE_OFFSET_KEYLEN); |
|||
test_le_offset_is(le, &le->innermost_inserted_vallen, LE_OFFSET_VALLEN); |
|||
toku_free(le); |
|||
} |
|||
|
|||
//Fixed offsets in a leafentry with no uncommitted transaction records. |
|||
//(Note, there is no type required.) |
|||
enum { |
|||
LE_COMMITTED_OFFSET_KEY = LE_OFFSET_VARIABLE |
|||
}; |
|||
|
|||
static void |
|||
test_le_committed_offsets (void) { |
|||
LEAFENTRY XMALLOC(le); |
|||
test_le_offset_is(le, &le->u.comm.key_val, LE_COMMITTED_OFFSET_KEY); |
|||
toku_free(le); |
|||
} |
|||
|
|||
//Fixed offsets in a leafentry with uncommitted transaction records. |
|||
enum { |
|||
LE_PROVISIONAL_OFFSET_TYPE = LE_OFFSET_VARIABLE, //Type of innermost record |
|||
LE_PROVISIONAL_OFFSET_XID = 1+LE_PROVISIONAL_OFFSET_TYPE, //XID of outermost noncommitted record |
|||
LE_PROVISIONAL_OFFSET_KEY = 8+LE_PROVISIONAL_OFFSET_XID |
|||
}; |
|||
|
|||
static void |
|||
test_le_provisional_offsets (void) { |
|||
LEAFENTRY XMALLOC(le); |
|||
test_le_offset_is(le, &le->u.prov.innermost_type, LE_PROVISIONAL_OFFSET_TYPE); |
|||
test_le_offset_is(le, &le->u.prov.xid_outermost_uncommitted, LE_PROVISIONAL_OFFSET_XID); |
|||
test_le_offset_is(le, &le->u.prov.key_val_xrs, LE_PROVISIONAL_OFFSET_KEY); |
|||
toku_free(le); |
|||
} |
|||
|
|||
//We use a packed struct to represent a leafentry. |
|||
//We want to make sure the compiler correctly represents the offsets. |
|||
//This test verifies all offsets in a packed leafentry correspond to the required memory format. |
|||
static void |
|||
test_le_offsets (void) { |
|||
test_le_fixed_offsets(); |
|||
test_le_committed_offsets(); |
|||
test_le_provisional_offsets(); |
|||
} |
|||
|
|||
static void |
|||
test_ule_packs_to_nothing (ULE ule) { |
|||
size_t memsize; |
|||
size_t disksize; |
|||
LEAFENTRY le; |
|||
int r = le_pack(ule, |
|||
&memsize, &disksize, |
|||
&le, NULL, NULL, NULL); |
|||
assert(r==0); |
|||
assert(le==NULL); |
|||
} |
|||
|
|||
//A leafentry must contain at least one 'insert' (all deletes means the leafentry |
|||
//should not exist). |
|||
//Verify that 'le_pack' of any set of all deletes ends up not creating a leafentry. |
|||
static void |
|||
test_le_empty_packs_to_nothing (void) { |
|||
ULE_S ule; |
|||
|
|||
int key = random(); //Arbitrary number |
|||
//Set up defaults. |
|||
ule.keylen = sizeof(key); |
|||
ule.keyp = &key; |
|||
ule.uxrs[0].type = XR_DELETE; |
|||
ule.uxrs[0].xid = 0; |
|||
u_int8_t num_xrs; |
|||
for (num_xrs = 1; num_xrs < MAX_TRANSACTION_RECORDS; num_xrs++) { |
|||
if (num_xrs > 1) { |
|||
ule.uxrs[num_xrs-1].type = XR_DELETE, |
|||
ule.uxrs[num_xrs-1].xid = ule.uxrs[num_xrs-2].xid + (random() % 32 + 1); //Abitrary number, xids must be strictly increasing |
|||
} |
|||
ule.num_uxrs = num_xrs; |
|||
test_ule_packs_to_nothing(&ule); |
|||
if (num_xrs > 2 && num_xrs % 4) { |
|||
//Set some of them to placeholders instead of deletes |
|||
ule.uxrs[num_xrs-2].type = XR_PLACEHOLDER; |
|||
} |
|||
test_ule_packs_to_nothing(&ule); |
|||
} |
|||
} |
|||
|
|||
static void |
|||
le_verify_accessors(LEAFENTRY le, ULE ule, |
|||
size_t pre_calculated_memsize, |
|||
size_t pre_calculated_disksize) { |
|||
assert(le); |
|||
assert(ule->num_uxrs > 0); |
|||
assert(ule->num_uxrs <= MAX_TRANSACTION_RECORDS); |
|||
assert(ule->uxrs[ule->num_uxrs-1].type != XR_PLACEHOLDER); |
|||
//Extract expected values from ULE |
|||
size_t memsize = le_memsize_from_ule(ule); |
|||
size_t disksize = le_memsize_from_ule(ule); |
|||
|
|||
void *latest_key = ule->uxrs[ule->num_uxrs-1].type == XR_DELETE ? NULL : ule->keyp; |
|||
u_int32_t latest_keylen = ule->uxrs[ule->num_uxrs-1].type == XR_DELETE ? 0 : ule->keylen; |
|||
void *key = ule->keyp; |
|||
u_int32_t keylen = ule->keylen; |
|||
void *latest_val = ule->uxrs[ule->num_uxrs-1].type == XR_DELETE ? NULL : ule->uxrs[ule->num_uxrs-1].valp; |
|||
u_int32_t latest_vallen = ule->uxrs[ule->num_uxrs-1].type == XR_DELETE ? 0 : ule->uxrs[ule->num_uxrs-1].vallen; |
|||
void *innermost_inserted_val; |
|||
u_int32_t innermost_inserted_vallen; |
|||
{ |
|||
int i; |
|||
for (i = ule->num_uxrs - 1; i >= 0; i--) { |
|||
if (ule->uxrs[i].type == XR_INSERT) { |
|||
innermost_inserted_val = ule->uxrs[i].valp; |
|||
innermost_inserted_vallen = ule->uxrs[i].vallen; |
|||
goto found_insert; |
|||
} |
|||
} |
|||
assert(FALSE); |
|||
} |
|||
found_insert:; |
|||
TXNID outermost_uncommitted_xid = ule->num_uxrs == 1 ? 0 : ule->uxrs[1].xid; |
|||
int is_provdel = ule->uxrs[ule->num_uxrs-1].type == XR_DELETE; |
|||
|
|||
assert(le!=NULL); |
|||
//Verify all accessors |
|||
assert(memsize == pre_calculated_memsize); |
|||
assert(disksize == pre_calculated_disksize); |
|||
assert(memsize == disksize); |
|||
assert(memsize == leafentry_memsize(le)); |
|||
assert(disksize == leafentry_disksize(le)); |
|||
{ |
|||
u_int32_t test_keylen; |
|||
void* test_keyp = le_latest_key_and_len(le, &test_keylen); |
|||
if (latest_key != NULL) assert(test_keyp != latest_key); |
|||
assert(test_keylen == latest_keylen); |
|||
assert(memcmp(test_keyp, latest_key, test_keylen) == 0); |
|||
assert(le_latest_key(le) == test_keyp); |
|||
assert(le_latest_keylen(le) == test_keylen); |
|||
} |
|||
{ |
|||
u_int32_t test_keylen; |
|||
void* test_keyp = le_key_and_len(le, &test_keylen); |
|||
if (key != NULL) assert(test_keyp != key); |
|||
assert(test_keylen == keylen); |
|||
assert(memcmp(test_keyp, key, test_keylen) == 0); |
|||
assert(le_key(le) == test_keyp); |
|||
assert(le_keylen(le) == test_keylen); |
|||
} |
|||
{ |
|||
u_int32_t test_vallen; |
|||
void* test_valp = le_latest_val_and_len(le, &test_vallen); |
|||
if (latest_val != NULL) assert(test_valp != latest_val); |
|||
assert(test_vallen == latest_vallen); |
|||
assert(memcmp(test_valp, latest_val, test_vallen) == 0); |
|||
assert(le_latest_val(le) == test_valp); |
|||
assert(le_latest_vallen(le) == test_vallen); |
|||
} |
|||
{ |
|||
u_int32_t test_vallen; |
|||
void* test_valp = le_innermost_inserted_val_and_len(le, &test_vallen); |
|||
if (innermost_inserted_val != NULL) assert(test_valp != innermost_inserted_val); |
|||
assert(test_vallen == innermost_inserted_vallen); |
|||
assert(memcmp(test_valp, innermost_inserted_val, test_vallen) == 0); |
|||
assert(le_innermost_inserted_val(le) == test_valp); |
|||
assert(le_innermost_inserted_vallen(le) == test_vallen); |
|||
} |
|||
{ |
|||
assert(le_outermost_uncommitted_xid(le) == outermost_uncommitted_xid); |
|||
} |
|||
{ |
|||
assert((le_is_provdel(le)==0) == (is_provdel==0)); |
|||
} |
|||
} |
|||
|
|||
|
|||
|
|||
static void |
|||
test_le_pack_committed (void) { |
|||
ULE_S ule; |
|||
|
|||
u_int8_t key[MAX_SIZE]; |
|||
u_int8_t val[MAX_SIZE]; |
|||
u_int32_t keysize; |
|||
u_int32_t valsize; |
|||
for (keysize = 0; keysize < MAX_SIZE; keysize += (random() % MAX_SIZE) + 1) { |
|||
for (valsize = 0; valsize < MAX_SIZE; valsize += (random() % MAX_SIZE) + 1) { |
|||
fillrandom(key, keysize); |
|||
fillrandom(val, valsize); |
|||
|
|||
ule.num_uxrs = 1; |
|||
ule.keylen = keysize; |
|||
ule.keyp = key; |
|||
ule.uxrs[0].type = XR_INSERT; |
|||
ule.uxrs[0].xid = 0; |
|||
ule.uxrs[0].valp = val; |
|||
ule.uxrs[0].vallen = valsize; |
|||
|
|||
size_t memsize; |
|||
size_t disksize; |
|||
LEAFENTRY le; |
|||
int r = le_pack(&ule, |
|||
&memsize, &disksize, |
|||
&le, NULL, NULL, NULL); |
|||
assert(r==0); |
|||
assert(le!=NULL); |
|||
le_verify_accessors(le, &ule, memsize, disksize); |
|||
ULE_S tmp_ule; |
|||
le_unpack(&tmp_ule, le); |
|||
verify_ule_equal(&ule, &tmp_ule); |
|||
LEAFENTRY tmp_le; |
|||
size_t tmp_memsize; |
|||
size_t tmp_disksize; |
|||
r = le_pack(&tmp_ule, |
|||
&tmp_memsize, &tmp_disksize, |
|||
&tmp_le, NULL, NULL, NULL); |
|||
assert(r==0); |
|||
assert(tmp_memsize == memsize); |
|||
assert(tmp_disksize == disksize); |
|||
assert(memcmp(le, tmp_le, memsize) == 0); |
|||
|
|||
toku_free(tmp_le); |
|||
toku_free(le); |
|||
} |
|||
} |
|||
} |
|||
|
|||
static void |
|||
test_le_pack_uncommitted (u_int8_t committed_type, u_int8_t prov_type, int num_placeholders) { |
|||
ULE_S ule; |
|||
|
|||
u_int8_t key[MAX_SIZE]; |
|||
u_int8_t cval[MAX_SIZE]; |
|||
u_int8_t pval[MAX_SIZE]; |
|||
u_int32_t keysize; |
|||
u_int32_t cvalsize; |
|||
u_int32_t pvalsize; |
|||
for (keysize = 0; keysize < MAX_SIZE; keysize += (random() % MAX_SIZE) + 1) { |
|||
for (cvalsize = 0; cvalsize < MAX_SIZE; cvalsize += (random() % MAX_SIZE) + 1) { |
|||
pvalsize = (cvalsize + random()) % MAX_SIZE; |
|||
fillrandom(key, keysize); |
|||
if (committed_type == XR_INSERT) |
|||
fillrandom(cval, cvalsize); |
|||
if (prov_type == XR_INSERT) |
|||
fillrandom(pval, pvalsize); |
|||
ule.uxrs[0].type = committed_type; |
|||
ule.uxrs[0].xid = 0; |
|||
ule.uxrs[0].vallen = cvalsize; |
|||
ule.uxrs[0].valp = cval; |
|||
ule.keylen = keysize; |
|||
ule.keyp = key; |
|||
ule.num_uxrs = 2 + num_placeholders; |
|||
|
|||
u_int8_t idx; |
|||
for (idx = 1; idx <= num_placeholders; idx++) { |
|||
ule.uxrs[idx].type = XR_PLACEHOLDER; |
|||
ule.uxrs[idx].xid = ule.uxrs[idx-1].xid + (random() % 32 + 1); //Abitrary number, xids must be strictly increasing |
|||
} |
|||
ule.uxrs[idx].xid = ule.uxrs[idx-1].xid + (random() % 32 + 1); //Abitrary number, xids must be strictly increasing |
|||
ule.uxrs[idx].type = prov_type; |
|||
ule.uxrs[idx].vallen = pvalsize; |
|||
ule.uxrs[idx].valp = pval; |
|||
|
|||
size_t memsize; |
|||
size_t disksize; |
|||
LEAFENTRY le; |
|||
int r = le_pack(&ule, |
|||
&memsize, &disksize, |
|||
&le, NULL, NULL, NULL); |
|||
assert(r==0); |
|||
assert(le!=NULL); |
|||
le_verify_accessors(le, &ule, memsize, disksize); |
|||
ULE_S tmp_ule; |
|||
le_unpack(&tmp_ule, le); |
|||
verify_ule_equal(&ule, &tmp_ule); |
|||
LEAFENTRY tmp_le; |
|||
size_t tmp_memsize; |
|||
size_t tmp_disksize; |
|||
r = le_pack(&tmp_ule, |
|||
&tmp_memsize, &tmp_disksize, |
|||
&tmp_le, NULL, NULL, NULL); |
|||
assert(r==0); |
|||
assert(tmp_memsize == memsize); |
|||
assert(tmp_disksize == disksize); |
|||
assert(memcmp(le, tmp_le, memsize) == 0); |
|||
|
|||
toku_free(tmp_le); |
|||
toku_free(le); |
|||
} |
|||
} |
|||
} |
|||
|
|||
static void |
|||
test_le_pack_provpair (int num_placeholders) { |
|||
test_le_pack_uncommitted(XR_DELETE, XR_INSERT, num_placeholders); |
|||
} |
|||
|
|||
static void |
|||
test_le_pack_provdel (int num_placeholders) { |
|||
test_le_pack_uncommitted(XR_INSERT, XR_DELETE, num_placeholders); |
|||
} |
|||
|
|||
static void |
|||
test_le_pack_both (int num_placeholders) { |
|||
test_le_pack_uncommitted(XR_INSERT, XR_INSERT, num_placeholders); |
|||
} |
|||
|
|||
//Test of PACK |
|||
// Committed leafentry |
|||
// delete -> nothing (le_empty_packs_to_nothing) |
|||
// insert |
|||
// make key/val have diff lengths/content |
|||
// Uncommitted |
|||
// committed delete |
|||
// followed by placeholder*, delete (le_empty_packs_to_nothing) |
|||
// followed by placeholder*, insert |
|||
// committed insert |
|||
// followed by placeholder*, delete |
|||
// followed by placeholder*, insert |
|||
// |
|||
// placeholder* is 0,1, or 2 placeholders |
|||
static void |
|||
test_le_pack (void) { |
|||
test_le_empty_packs_to_nothing(); |
|||
test_le_pack_committed(); |
|||
int i; |
|||
for (i = 0; i < 3; i++) { |
|||
test_le_pack_provpair(i); |
|||
test_le_pack_provdel(i); |
|||
test_le_pack_both(i); |
|||
} |
|||
} |
|||
|
|||
//TODO: #1125 tests: |
|||
// Will probably have to expose ULE_S definition |
|||
// - Check memsize function is correct |
|||
// - Assert == disksize (almost useless, but go ahead) |
|||
// - Check standard accessors |
|||
// - le_latest_key_and_len |
|||
// - le_latest_key |
|||
// - le_latest_keylen |
|||
// - le_latest_val_and_len |
|||
// - le_latest_val |
|||
// - le_latest_vallen |
|||
// - le_key_and_len |
|||
// - le_key |
|||
// - le_keylen |
|||
// - le_innermost_inserted_val_and_len |
|||
// - le_innermost_inserted_val |
|||
// - le_innermost_inserted_vallen |
|||
// - Check le_outermost_uncommitted_xid |
|||
// - Check le_is_provdel |
|||
// - Check unpack+pack memcmps equal |
|||
// - Check exact memory expected (including size) for various leafentry types. |
|||
// - Check apply_msg logic |
|||
// - Known start, known expected.. various types. |
|||
// - Go through test-leafentry10.c |
|||
// - Verify we have tests for all analogous stuff. |
|||
// |
|||
// PACK |
|||
// UNPACK |
|||
// verify pack+unpack is no-op |
|||
// verify unpack+pack is no-op |
|||
// accessors |
|||
// Test apply_msg logic |
|||
// i.e. start with LE, apply message |
|||
// in parallel, construct the expected ULE manually, and pack that |
|||
// Compare the two results |
|||
// Test full_promote |
|||
|
|||
int |
|||
test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) { |
|||
srandom(7); //Arbitrary seed. |
|||
test_le_offsets(); |
|||
test_le_pack(); |
|||
return 0; |
|||
} |
|||
1528
newbrt/ule.c
File diff suppressed because it is too large
View File
File diff suppressed because it is too large
View File
@ -0,0 +1,70 @@ |
|||
/* -*- mode: C; c-basic-offset: 4 -*- */ |
|||
#ident "Copyright (c) 2007, 2008 Tokutek Inc. All rights reserved." |
|||
|
|||
/* Purpose of this file is to provide the world with everything necessary |
|||
* to use the nested transaction logic and nothing else. No internal |
|||
* requirements of the nested transaction logic belongs here. |
|||
*/ |
|||
|
|||
#ifndef ULE_H |
|||
#define ULE_H |
|||
|
|||
//1 does much slower debugging |
|||
#define ULE_DEBUG 0 |
|||
|
|||
///////////////////////////////////////////////////////////////////////////////// |
|||
// Following data structures are the unpacked format of a leafentry. |
|||
// * ule is the unpacked leaf entry, that contains an array of unpacked |
|||
// transaction records |
|||
// * uxr is the unpacked transaction record |
|||
// |
|||
|
|||
|
|||
//Types of transaction records. |
|||
enum {XR_INSERT = 1, |
|||
XR_DELETE = 2, |
|||
XR_PLACEHOLDER = 3}; |
|||
|
|||
typedef struct { // unpacked transaction record |
|||
u_int8_t type; // delete/insert/placeholder |
|||
u_int32_t vallen; // number of bytes in value |
|||
void * valp; // pointer to value (Where is value really stored?) |
|||
TXNID xid; // transaction id |
|||
// Note: when packing ule into a new leafentry, will need |
|||
// to copy actual data from valp to new leafentry |
|||
} UXR_S, *UXR; |
|||
|
|||
// Unpacked Leaf Entry is of fixed size because it's just on the |
|||
// stack and we care about ease of access more than the memory footprint. |
|||
typedef struct { // unpacked leaf entry |
|||
u_int8_t num_uxrs; // how many of uxrs[] are valid |
|||
u_int32_t keylen; |
|||
void * keyp; |
|||
UXR_S uxrs[MAX_TRANSACTION_RECORDS]; // uxrs[0] is outermost, uxrs[num_uxrs-1] is innermost |
|||
} ULE_S, *ULE; |
|||
|
|||
int apply_msg_to_leafentry(BRT_MSG msg, |
|||
LEAFENTRY old_leafentry, // NULL if there was no stored data. |
|||
size_t *new_leafentry_memorysize, |
|||
size_t *new_leafentry_disksize, |
|||
LEAFENTRY *new_leafentry_p, |
|||
OMT omt, |
|||
struct mempool *mp, |
|||
void **maybe_free); |
|||
|
|||
////////////////////////////////////////////////////////////////////////////////////// |
|||
//Functions exported for test purposes only (used internally for non-test purposes). |
|||
void le_unpack(ULE ule, LEAFENTRY le); |
|||
int le_pack(ULE ule, // data to be packed into new leafentry |
|||
size_t *new_leafentry_memorysize, |
|||
size_t *new_leafentry_disksize, |
|||
LEAFENTRY * const new_leafentry_p, // this is what this function creates |
|||
OMT omt, |
|||
struct mempool *mp, |
|||
void **maybe_free); |
|||
|
|||
|
|||
size_t le_memsize_from_ule (ULE ule); |
|||
|
|||
#endif // ULE_H |
|||
|
|||
@ -0,0 +1,18 @@ |
|||
/* -*- mode: C; c-basic-offset: 4 -*- */ |
|||
#ident "Copyright (c) 2007, 2008 Tokutek Inc. All rights reserved." |
|||
|
|||
|
|||
#ifndef XIDS_INTERNAL_H |
|||
#define XIDS_INTERNAL_H |
|||
|
|||
// Variable size list of transaction ids (known in design doc as xids<>). |
|||
// ids[0] is the outermost transaction. |
|||
// ids[num_xids - 1] is the innermost transaction. |
|||
// Should only be accessed by accessor functions xids_xxx, not directly. |
|||
typedef struct xids_t { |
|||
u_int8_t num_stored_xids; // maximum value of MAX_TRANSACTION_RECORDS - 1 ... |
|||
// ... because transaction 0 is implicit |
|||
TXNID ids[]; |
|||
} XIDS_S; |
|||
|
|||
#endif |
|||
@ -0,0 +1,211 @@ |
|||
/* -*- mode: C; c-basic-offset: 4 -*- */ |
|||
|
|||
#ident "Copyright (c) 2007, 2008 Tokutek Inc. All rights reserved." |
|||
|
|||
/* Purpose of this file is to implement xids list of nested transactions |
|||
* ids. |
|||
* |
|||
* See design documentation for nested transactions at |
|||
* TokuWiki/Imp/TransactionsOverview. |
|||
* |
|||
* NOTE: xids are always stored in disk byte order. |
|||
* Accessors are responsible for transposing bytes to |
|||
* host order. |
|||
*/ |
|||
|
|||
|
|||
#include <errno.h> |
|||
#include <string.h> |
|||
|
|||
#include <toku_portability.h> |
|||
#include "brttypes.h" |
|||
#include "xids.h" |
|||
#include "xids-internal.h" |
|||
#include "toku_assert.h" |
|||
#include "memory.h" |
|||
#include <toku_htod.h> |
|||
|
|||
|
|||
///////////////////////////////////////////////////////////////////////////////// |
|||
// This layer of abstraction (xids_xxx) understands xids<> and nothing else. |
|||
// It contains all the functions that understand xids<> |
|||
// |
|||
// xids<> do not store the implicit transaction id of 0 at index 0. |
|||
// The accessor functions make the id of 0 explicit at index 0. |
|||
// The number of xids physically stored in the xids array is in |
|||
// the variable num_stored_xids. |
|||
// |
|||
// The xids struct is immutable. The caller gets an initial version of XIDS |
|||
// by calling xids_get_root_xids(), which returns the constant struct |
|||
// representing the root transaction (id 0). When a transaction begins, |
|||
// a new XIDS is created with the id of the current transaction appended to |
|||
// the list. |
|||
// |
|||
// |
|||
|
|||
|
|||
// This is the xids list for a transactionless environment. |
|||
// It is also the initial state of any xids list created for |
|||
// nested transactions. |
|||
|
|||
|
|||
XIDS |
|||
xids_get_root_xids(void) { |
|||
static const struct xids_t root_xids = { |
|||
.num_stored_xids = 0 |
|||
}; |
|||
|
|||
XIDS rval = (XIDS)&root_xids; |
|||
return rval; |
|||
} |
|||
|
|||
|
|||
// xids is immutable. This function creates a new xids by copying the |
|||
// parent's list and then appending the xid of the new transaction. |
|||
int |
|||
xids_create_child(XIDS parent_xids, // xids list for parent transaction |
|||
XIDS * xids_p, // xids list created |
|||
TXNID this_xid) { // xid of this transaction (new innermost) |
|||
int rval; |
|||
assert(parent_xids); |
|||
assert(this_xid > xids_get_innermost_xid(parent_xids)); |
|||
u_int8_t num_stored_xids = parent_xids->num_stored_xids + 1; |
|||
u_int8_t num_xids = num_stored_xids + 1; |
|||
assert(num_xids > 0); |
|||
assert(num_xids <= MAX_TRANSACTION_RECORDS); |
|||
if (num_xids == MAX_TRANSACTION_RECORDS) rval = EINVAL; |
|||
else { |
|||
XIDS xids = toku_malloc(sizeof(*xids) + num_stored_xids*sizeof(xids->ids[0])); |
|||
if (!xids) rval = ENOMEM; |
|||
else { |
|||
xids->num_stored_xids = num_stored_xids; |
|||
memcpy(xids->ids, |
|||
parent_xids->ids, |
|||
parent_xids->num_stored_xids*sizeof(parent_xids->ids[0])); |
|||
TXNID this_xid_disk = toku_htod64(this_xid); |
|||
xids->ids[num_stored_xids-1] = this_xid_disk; |
|||
*xids_p = xids; |
|||
rval = 0; |
|||
} |
|||
} |
|||
return rval; |
|||
} |
|||
|
|||
void |
|||
xids_create_from_buffer(struct rbuf *rb, // xids list for parent transaction |
|||
XIDS * xids_p) { // xids list created |
|||
u_int8_t num_stored_xids = rbuf_char(rb); |
|||
u_int8_t num_xids = num_stored_xids + 1; |
|||
assert(num_xids > 0); |
|||
assert(num_xids < MAX_TRANSACTION_RECORDS); |
|||
XIDS xids = toku_xmalloc(sizeof(*xids) + num_stored_xids*sizeof(xids->ids[0])); |
|||
xids->num_stored_xids = num_stored_xids; |
|||
u_int8_t index; |
|||
for (index = 0; index < xids->num_stored_xids; index++) { |
|||
rbuf_TXNID(rb, &xids->ids[index]); |
|||
if (index > 0) |
|||
assert(xids->ids[index] > xids->ids[index-1]); |
|||
} |
|||
*xids_p = xids; |
|||
} |
|||
|
|||
|
|||
void |
|||
xids_destroy(XIDS *xids_p) { |
|||
if (*xids_p != xids_get_root_xids()) toku_free(*xids_p); |
|||
*xids_p = NULL; |
|||
} |
|||
|
|||
|
|||
|
|||
|
|||
// Return xid at requested position. |
|||
// If requesting an xid out of range (which will be the case if xids array is empty) |
|||
// then return 0, the xid of the root transaction. |
|||
TXNID |
|||
xids_get_xid(XIDS xids, u_int8_t index) { |
|||
TXNID rval = 0; |
|||
if (index > 0) { |
|||
assert(index < xids_get_num_xids(xids)); |
|||
rval = xids->ids[index-1]; |
|||
rval = toku_dtoh64(rval); |
|||
} |
|||
return rval; |
|||
} |
|||
|
|||
// This function assumes that target_xid IS in the list |
|||
// of xids. |
|||
u_int8_t |
|||
xids_find_index_of_xid(XIDS xids, TXNID target_xid) { |
|||
u_int8_t index = 0; // search outer to inner |
|||
TXNID current_xid = xids_get_xid(xids, index); |
|||
while (current_xid != target_xid) { |
|||
assert(current_xid < target_xid); |
|||
index++; |
|||
current_xid = xids_get_xid(xids, index); // Next inner txnid in xids. |
|||
} |
|||
return index; |
|||
} |
|||
|
|||
u_int8_t |
|||
xids_get_num_xids(XIDS xids) { |
|||
u_int8_t rval = xids->num_stored_xids+1; //+1 for the id of 0 made explicit by xids<> accessors |
|||
return rval; |
|||
} |
|||
|
|||
// Return innermost xid |
|||
TXNID |
|||
xids_get_innermost_xid(XIDS xids) { |
|||
TXNID rval = xids_get_xid(xids, xids_get_num_xids(xids)-1); |
|||
return rval; |
|||
} |
|||
|
|||
void |
|||
xids_cpy(XIDS target, XIDS source) { |
|||
size_t size = xids_get_size(source); |
|||
memcpy(target, source, size); |
|||
} |
|||
|
|||
// return size in bytes |
|||
u_int32_t |
|||
xids_get_size(XIDS xids){ |
|||
u_int32_t rval; |
|||
u_int8_t num_stored_xids = xids->num_stored_xids; |
|||
rval = sizeof(*xids) + num_stored_xids * sizeof(xids->ids[0]); |
|||
return rval; |
|||
}; |
|||
|
|||
u_int32_t |
|||
xids_get_serialize_size(XIDS xids){ |
|||
u_int32_t rval; |
|||
u_int8_t num_stored_xids = xids->num_stored_xids; |
|||
rval = 1 + //num stored xids |
|||
8 * num_stored_xids; |
|||
return rval; |
|||
}; |
|||
|
|||
void |
|||
toku_calc_more_murmur_xids (struct x1764 *mm, XIDS xids) { |
|||
x1764_add(mm, &xids->num_stored_xids, 1); |
|||
u_int8_t index; |
|||
u_int8_t num_xids = xids_get_num_xids(xids); |
|||
for (index = 0; index < num_xids; index++) { |
|||
TXNID current_xid = xids_get_xid(xids, index); |
|||
x1764_add(mm, ¤t_xid, 8); |
|||
} |
|||
} |
|||
|
|||
unsigned char * |
|||
xids_get_end_of_array(XIDS xids) { |
|||
TXNID *r = xids->ids + xids->num_stored_xids; |
|||
return (unsigned char*)r; |
|||
} |
|||
|
|||
void wbuf_xids(struct wbuf *wb, XIDS xids) { |
|||
wbuf_char(wb, (unsigned char)xids->num_stored_xids); |
|||
u_int8_t index; |
|||
for (index = 0; index < xids->num_stored_xids; index++) { |
|||
wbuf_TXNID(wb, xids->ids[index]); |
|||
} |
|||
} |
|||
|
|||
@ -0,0 +1,63 @@ |
|||
/* -*- mode: C; c-basic-offset: 4 -*- */ |
|||
#ident "Copyright (c) 2007, 2008 Tokutek Inc. All rights reserved." |
|||
|
|||
/* Purpose of this file is to provide the world with everything necessary |
|||
* to use the xids and nothing else. |
|||
* Internal requirements of the xids logic do not belong here. |
|||
* |
|||
* xids is (abstractly) an immutable list of nested transaction ids, accessed only |
|||
* via the functions in this file. |
|||
* |
|||
* See design documentation for nested transactions at |
|||
* TokuWiki/Imp/TransactionsOverview. |
|||
*/ |
|||
|
|||
#ifndef XIDS_H |
|||
#define XIDS_H |
|||
|
|||
#include "x1764.h" |
|||
|
|||
#include "rbuf.h" |
|||
#include "wbuf.h" |
|||
/* The number of transaction ids stored in the xids structure is |
|||
* represented by an 8-bit value. The value 255 is reserved. |
|||
* The constant MAX_NESTED_TRANSACTIONS is one less because |
|||
* one slot in the packed leaf entry is used for the implicit |
|||
* root transaction (id 0). |
|||
*/ |
|||
enum {MAX_NESTED_TRANSACTIONS = 253}; |
|||
enum {MAX_TRANSACTION_RECORDS = MAX_NESTED_TRANSACTIONS + 1}; |
|||
|
|||
|
|||
//Retrieve an XIDS representing the root transaction. |
|||
XIDS xids_get_root_xids(void); |
|||
|
|||
void xids_cpy(XIDS target, XIDS source); |
|||
|
|||
//Creates an XIDS representing this transaction. |
|||
//You must pass in an XIDS representing the parent of this transaction. |
|||
int xids_create_child(XIDS parent_xids, XIDS *xids_p, TXNID this_xid); |
|||
void xids_create_from_buffer(struct rbuf *rb, XIDS * xids_p); |
|||
|
|||
void xids_destroy(XIDS *xids_p); |
|||
|
|||
TXNID xids_get_xid(XIDS xids, u_int8_t index); |
|||
|
|||
u_int8_t xids_find_index_of_xid(XIDS xids, TXNID target_xid); |
|||
|
|||
u_int8_t xids_get_num_xids(XIDS xids); |
|||
|
|||
TXNID xids_get_innermost_xid(XIDS xids); |
|||
|
|||
// return size in bytes |
|||
u_int32_t xids_get_size(XIDS xids); |
|||
|
|||
u_int32_t xids_get_serialize_size(XIDS xids); |
|||
|
|||
void toku_calc_more_murmur_xids (struct x1764 *mm, XIDS xids); |
|||
|
|||
unsigned char *xids_get_end_of_array(XIDS xids); |
|||
|
|||
void wbuf_xids(struct wbuf *wb, XIDS xids); |
|||
|
|||
#endif |
|||
@ -0,0 +1,8 @@ |
|||
* One |
|||
* two |
|||
1. FOO |
|||
a. sock |
|||
a. pizza |
|||
2. elephant |
|||
|
|||
[[Include(source:toku/tokudb.1125/test2.wiki,wiki)]] |
|||
@ -0,0 +1,4 @@ |
|||
1. these |
|||
1. lines |
|||
1. from |
|||
1. test2.wiki |
|||
Write
Preview
Loading…
Cancel
Save
Reference in new issue