Browse Source

close[t:4298] Merge from 4298j: {{{svn merge -c41355 ../tokudb.4298i}}}. Closes #4298.

git-svn-id: file:///svn/toku/tokudb@41357 c7de825b-a66e-492c-adef-691d508d4ae1
pull/73/head
Bradley C. Kuszmaul 13 years ago
committed by Yoni Fogel
parent
commit
7ad7bb0611
  1. 2
      buildheader/Makefile
  2. 2
      db-benchmark-test/db-benchmark-test.c
  3. 2
      newbrt/brt-internal.h
  4. 17
      newbrt/brt.c
  5. 8
      newbrt/brt.h
  6. 3
      newbrt/brttypes.h
  7. 57
      newbrt/cachetable.c
  8. 10
      newbrt/log-internal.h
  9. 15
      newbrt/logformat.c
  10. 34
      newbrt/logger.c
  11. 7
      newbrt/logger.h
  12. 434
      newbrt/recover.c
  13. 21
      newbrt/recover.h
  14. 9
      newbrt/rollback.c
  15. 9
      newbrt/tdb-recover.c
  16. 2
      newbrt/tests/benchmark-test.c
  17. 4
      newbrt/tests/brt-serialize-sub-block-test.c
  18. 2
      newbrt/tests/brt-test-cursor-2.c
  19. 26
      newbrt/tests/brt-test-cursor.c
  20. 4
      newbrt/tests/brt-test-header.c
  21. 56
      newbrt/tests/brt-test.c
  22. 2
      newbrt/tests/brt-test0.c
  23. 2
      newbrt/tests/brt-test1.c
  24. 2
      newbrt/tests/brt-test2.c
  25. 2
      newbrt/tests/brt-test3.c
  26. 2
      newbrt/tests/brt-test4.c
  27. 2
      newbrt/tests/brt-test5.c
  28. 2
      newbrt/tests/brtloader-test-writer.c
  29. 2
      newbrt/tests/brtloader-test.c
  30. 8
      newbrt/tests/is_empty.c
  31. 2
      newbrt/tests/keyrange.c
  32. 4
      newbrt/tests/le-cursor-provdel.c
  33. 8
      newbrt/tests/le-cursor-right.c
  34. 4
      newbrt/tests/le-cursor-walk.c
  35. 2
      newbrt/tests/make-tree.c
  36. 2
      newbrt/tests/msnfilter.c
  37. 2
      newbrt/tests/orthopush-flush.c
  38. 9
      newbrt/tests/recovery-bad-last-entry.c
  39. 8
      newbrt/tests/recovery-cbegin-cend-hello.c
  40. 8
      newbrt/tests/recovery-cbegin-cend.c
  41. 8
      newbrt/tests/recovery-cbegin.c
  42. 8
      newbrt/tests/recovery-cend-cbegin.c
  43. 9
      newbrt/tests/recovery-datadir-is-file.c
  44. 8
      newbrt/tests/recovery-empty.c
  45. 8
      newbrt/tests/recovery-fopen-missing-file.c
  46. 8
      newbrt/tests/recovery-hello.c
  47. 8
      newbrt/tests/recovery-lsn-error-during-forward-scan.c
  48. 8
      newbrt/tests/recovery-no-datadir.c
  49. 8
      newbrt/tests/recovery-no-log.c
  50. 8
      newbrt/tests/recovery-no-logdir.c
  51. 2
      newbrt/tests/shortcut.c
  52. 2
      newbrt/tests/test-brt-overflow.c
  53. 4
      newbrt/tests/test-checkpoint-during-flush.c
  54. 4
      newbrt/tests/test-checkpoint-during-merge.c
  55. 4
      newbrt/tests/test-checkpoint-during-rebalance.c
  56. 4
      newbrt/tests/test-checkpoint-during-split.c
  57. 2
      newbrt/tests/test-del-inorder.c
  58. 2
      newbrt/tests/test-dirty-flushes-on-cleaner.c
  59. 2
      newbrt/tests/test-dump-brt.c
  60. 2
      newbrt/tests/test-flushes-on-cleaner.c
  61. 2
      newbrt/tests/test-inc-split.c
  62. 2
      newbrt/tests/test-merges-on-cleaner.c
  63. 2
      newbrt/tests/test-pick-child-to-flush.c
  64. 2
      newbrt/tests/test.h
  65. 2
      newbrt/tests/test3681.c
  66. 4
      newbrt/tests/test3856.c
  67. 12
      newbrt/tests/test3884.c
  68. 2
      newbrt/tests/test4115.c
  69. 2
      newbrt/tests/test4244.c
  70. 2
      newbrt/tests/verify-bad-msn.c
  71. 2
      newbrt/tests/verify-bad-pivots.c
  72. 2
      newbrt/tests/verify-dup-in-leaf.c
  73. 2
      newbrt/tests/verify-dup-pivots.c
  74. 2
      newbrt/tests/verify-misrouted-msgs.c
  75. 2
      newbrt/tests/verify-unsorted-leaf.c
  76. 2
      newbrt/tests/verify-unsorted-pivots.c
  77. 63
      newbrt/txn.c
  78. 10
      newbrt/txn.h
  79. 6
      newbrt/wbuf.h
  80. 2
      release/examples/db-insert.c
  81. 16
      src/tests/Makefile
  82. 102
      src/tests/test-prepare.c
  83. 123
      src/tests/test-prepare2.c
  84. 298
      src/tests/test-prepare3.c
  85. 7
      src/tests/test.h
  86. 54
      src/tests/test_txn_close_before_commit.c
  87. 57
      src/tests/test_txn_close_before_prepare_commit.c
  88. 81
      src/ydb.c
  89. 74
      src/ydb_db.c
  90. 5
      src/ydb_db.h
  91. 79
      src/ydb_txn.c
  92. 1
      src/ydb_txn.h
  93. 2
      windows/tests/test.h

2
buildheader/Makefile

@ -22,4 +22,4 @@ default: ../include/db.h
hfiles: tdb.h
clean:
rm -f *.o make_tdb make_db_h_?_? sample_offsets sample_offsets_mysql make_tdb_h
rm -f *.o make_tdb

2
db-benchmark-test/db-benchmark-test.c

@ -33,7 +33,7 @@ enum { DEFAULT_ITEMS_PER_TRANSACTION = 1<<14 };
#define DEFAULT_N_ITERATIONS (DEFAULT_N_ITEMS/DEFAULT_ITEMS_TO_INSERT_PER_ITERATION)
static void insert (long long v);
#define CKERR(r) do { if (r!=0) fprintf(stderr, "%s:%d error %d %s\n", __FILE__, __LINE__, r, db_strerror(r)); assert(r==0); } while (0)
#define CKERR(r) ({ int __r = r; if (__r!=0) fprintf(stderr, "%s:%d error %d %s\n", __FILE__, __LINE__, __r, db_strerror(r)); assert(__r==0); })
#define CKERR2(r,rexpect) do { if (r!=rexpect) fprintf(stderr, "%s:%d error %d %s\n", __FILE__, __LINE__, r, db_strerror(r)); assert(r==rexpect); } while (0)
/* default test parameters */

2
newbrt/brt-internal.h

@ -433,7 +433,7 @@ struct brt {
int pinned_by_checkpoint; //Keep this brt around for checkpoint, like a transaction
int was_closed; //True when this brt was closed, but is being kept around for transactions (or checkpoint).
int (*close_db)(DB*, u_int32_t);
int (*close_db)(DB*, u_int32_t, bool oplsn_valid, LSN oplsn);
u_int32_t close_flags;
struct toku_list live_brt_link;

17
newbrt/brt.c

@ -3806,10 +3806,10 @@ brt_redirect_db (BRT brt_to, BRT brt_from) {
}
static int
fake_db_brt_close_delayed(DB *db, u_int32_t UU(flags)) {
fake_db_brt_close_delayed(DB *db, u_int32_t UU(flags), bool oplsn_valid, LSN oplsn) {
BRT brt_to_close = db->api_internal;
char *error_string = NULL;
int r = toku_close_brt(brt_to_close, &error_string);
int r = toku_close_brt_lsn(brt_to_close, &error_string, oplsn_valid, oplsn);
assert_zero(r);
assert(error_string == NULL);
toku_free(db);
@ -3841,7 +3841,7 @@ toku_brt_header_close_redirected_brts(struct brt_header * h) {
assert(which == num_brts);
for (which = 0; which < num_brts; which++) {
int r;
r = toku_brt_db_delay_closed(brts[which], dbs[which], fake_db_brt_close_delayed, 0);
r = toku_brt_db_delay_closed(brts[which], dbs[which], fake_db_brt_close_delayed, 0, false, ZERO_LSN);
assert_zero(r);
}
return 0;
@ -4198,7 +4198,7 @@ brtheader_note_unpin_by_checkpoint (CACHEFILE UU(cachefile), void *header_v)
if (brt_to_unpin->was_closed && !toku_brt_zombie_needed(brt_to_unpin)) {
//Close immediately.
assert(brt_to_unpin->close_db);
r = brt_to_unpin->close_db(brt_to_unpin->db, brt_to_unpin->close_flags);
r = brt_to_unpin->close_db(brt_to_unpin->db, brt_to_unpin->close_flags, false, ZERO_LSN);
}
return r;
@ -4351,8 +4351,9 @@ toku_brtheader_close (CACHEFILE cachefile, int fd, void *header_v, char **malloc
}
int
toku_brt_db_delay_closed (BRT zombie, DB* db, int (*close_db)(DB*, u_int32_t), u_int32_t close_flags) {
//Requires: close_db needs to call toku_close_brt to delete the final reference.
toku_brt_db_delay_closed (BRT zombie, DB* db, int (*close_db)(DB*, u_int32_t, bool oplsn_valid, LSN oplsn), u_int32_t close_flags, bool oplsn_valid, LSN oplsn)
// Effect: See brt.h for the specification of this function.
{
int r;
struct brt_header *h = zombie->h;
if (zombie->was_closed) r = EINVAL;
@ -4365,7 +4366,7 @@ toku_brt_db_delay_closed (BRT zombie, DB* db, int (*close_db)(DB*, u_int32_t), u
if (!zombie->db) zombie->db = db;
if (!toku_brt_zombie_needed(zombie)) {
//Close immediately.
r = zombie->close_db(zombie->db, zombie->close_flags);
r = zombie->close_db(zombie->db, zombie->close_flags, oplsn_valid, oplsn);
}
else {
//Try to pass responsibility off.
@ -4423,7 +4424,7 @@ int toku_close_brt_lsn (BRT brt, char **error_string, BOOL oplsn_valid, LSN opls
return r;
}
int toku_close_brt (BRT brt, char **error_string) {
int toku_close_brt_nolsn (BRT brt, char **error_string) {
return toku_close_brt_lsn(brt, error_string, FALSE, ZERO_LSN);
}

8
newbrt/brt.h

@ -151,8 +151,12 @@ int toku_brt_send_insert(BRT brt, DBT *key, DBT *val, XIDS xids, enum brt_msg_ty
int toku_brt_send_delete(BRT brt, DBT *key, XIDS xids) __attribute__ ((warn_unused_result));
int toku_brt_send_commit_any(BRT brt, DBT *key, XIDS xids) __attribute__ ((warn_unused_result));
int toku_brt_db_delay_closed (BRT brt, DB* db, int (*close_db)(DB*, u_int32_t), u_int32_t close_flags) __attribute__ ((warn_unused_result));
int toku_close_brt (BRT, char **error_string) __attribute__ ((warn_unused_result));
int toku_brt_db_delay_closed (BRT brt, DB* db, int (*close_db)(DB*, u_int32_t, bool oplsn_valid, LSN oplsn), u_int32_t close_flags, bool oplsn_valid, LSN oplsn) __attribute__ ((warn_unused_result));
// Effect: Arrange to really (eventually) close a zombie DB. When it is closed the CLOSE_DB function will be alled.
// Requires: close_db needs to call toku_close_brt to delete the final reference.
int toku_close_brt_nolsn (BRT, char **error_string) __attribute__ ((warn_unused_result));
int toku_close_brt_lsn (BRT brt, char **error_string, BOOL oplsn_valid, LSN oplsn) __attribute__ ((warn_unused_result));
int toku_brt_set_panic(BRT brt, int panic, char *panic_string) __attribute__ ((warn_unused_result));

3
newbrt/brttypes.h

@ -2,7 +2,7 @@
#define BRTTYPES_H
#ident "$Id$"
#ident "Copyright (c) 2007-2010 Tokutek Inc. All rights reserved."
#ident "Copyright (c) 2007-2011 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
#include <sys/types.h>
@ -48,6 +48,7 @@ typedef u_int64_t TXNID;
#define TXNID_NONE ((TXNID)0)
typedef struct blocknum_s { int64_t b; } BLOCKNUM; // make a struct so that we will notice type problems.
typedef struct gid_s { uint8_t *gid; } GID; // the gid is of size [DB_GID_SIZE]
#define ROLLBACK_NONE ((BLOCKNUM){0})
static inline BLOCKNUM make_blocknum(int64_t b) { BLOCKNUM result={b}; return result; }

57
newbrt/cachetable.c

@ -241,7 +241,6 @@ struct cachetable {
toku_pthread_mutex_t openfd_mutex; // make toku_cachetable_openfd() single-threaded
OMT reserved_filenums;
char *env_dir;
BOOL set_env_dir; //Can only set env_dir once
// For releasing locks during I/O. These are named "ydb_lock_callback" but it could be viewed more generally as being used to release and reacquire locks while I/O is takign place.
void (*ydb_lock_callback)(void);
@ -563,10 +562,8 @@ void toku_cachetable_release_reserved_memory(CACHETABLE ct, uint64_t reserved_me
void
toku_cachetable_set_env_dir(CACHETABLE ct, const char *env_dir) {
assert(!ct->set_env_dir);
toku_free(ct->env_dir);
ct->env_dir = toku_xstrdup(env_dir);
ct->set_env_dir = TRUE;
}
void
@ -3529,19 +3526,47 @@ log_open_txn (OMTVALUE txnv, u_int32_t UU(index), void *UU(extra)) {
int r = toku_omt_iterate(txn->open_brts, set_filenum_in_array, array);
assert(r==0);
}
int r = toku_log_xstillopen(logger, NULL, 0,
toku_txn_get_txnid(txn),
toku_txn_get_txnid(toku_logger_txn_parent(txn)),
txn->rollentry_raw_count,
open_filenums,
txn->force_fsync_on_commit,
txn->num_rollback_nodes,
txn->num_rollentries,
txn->spilled_rollback_head,
txn->spilled_rollback_tail,
txn->current_rollback);
assert(r==0);
return 0;
switch (toku_txn_get_state(txn)) {
case TOKUTXN_LIVE:
case TOKUTXN_COMMITTING:
case TOKUTXN_ABORTING: {
int r = toku_log_xstillopen(logger, NULL, 0,
toku_txn_get_txnid(txn),
toku_txn_get_txnid(toku_logger_txn_parent(txn)),
txn->rollentry_raw_count,
open_filenums,
txn->force_fsync_on_commit,
txn->num_rollback_nodes,
txn->num_rollentries,
txn->spilled_rollback_head,
txn->spilled_rollback_tail,
txn->current_rollback);
assert(r==0);
return 0;
}
case TOKUTXN_PREPARING: {
GID gid;
toku_txn_get_prepared_gid(txn, &gid);
int r = toku_log_xstillopenprepared(logger, NULL, 0,
toku_txn_get_txnid(txn),
gid,
txn->rollentry_raw_count,
open_filenums,
txn->force_fsync_on_commit,
txn->num_rollback_nodes,
txn->num_rollentries,
txn->spilled_rollback_head,
txn->spilled_rollback_tail,
txn->current_rollback);
assert(r==0);
toku_free(gid.gid);
return 0;
}
case TOKUTXN_RETIRED:
return 0;
}
// default is an error
assert(0);
}
static int

10
newbrt/log-internal.h

@ -2,7 +2,7 @@
#define LOG_INTERNAL_H
#ident "$Id$"
#ident "Copyright (c) 2007-2010 Tokutek Inc. All rights reserved."
#ident "Copyright (c) 2007-2011 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
#include "brt-internal.h"
@ -116,6 +116,8 @@ struct tokulogger {
void (*remove_finalize_callback) (DICTIONARY_ID, void*); // ydb-level callback to be called when a transaction that ...
void * remove_finalize_callback_extra; // ... deletes a file is committed or when one that creates a file is aborted.
CACHEFILE rollback_cachefile;
struct toku_list prepared_txns; // transactions that have been prepared and are unresolved, but have not been returned through txn_recover.
struct toku_list prepared_and_returned_txns; // transactions that have been prepared and unresolved, and have been returned through txn_recover. We need this list so that we can restart the recovery.
};
int toku_logger_find_next_unused_log_file(const char *directory, long long *result);
@ -159,6 +161,8 @@ struct tokutxn {
TOKUTXN_STATE state;
LSN do_fsync_lsn;
BOOL do_fsync;
GID gid; // for prepared transactions
struct toku_list prepared_txns_link; // list of prepared transactions
};
struct txninfo {
@ -204,6 +208,10 @@ static inline int toku_logsizeof_TXNID (TXNID txnid __attribute__((__unused__)))
return 8;
}
static inline int toku_logsizeof_GID (GID gid __attribute__((__unused__))) {
return DB_GID_SIZE;
}
static inline int toku_logsizeof_FILENUMS (FILENUMS fs) {
static const FILENUM f = {0}; //fs could have .num==0 and then we cannot dereference
return 4 + fs.num * toku_logsizeof_FILENUM(f);

15
newbrt/logformat.c

@ -1,6 +1,6 @@
/* -*- mode: C; c-basic-offset: 4 -*- */
#ident "$Id$"
#ident "Copyright (c) 2007-2010 Tokutek Inc. All rights reserved."
#ident "Copyright (c) 2007-2011 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
/* This file defines the logformat in an executable fashion.
@ -115,12 +115,25 @@ const struct logtype logtypes[] = {
{"BLOCKNUM", "spilled_rollback_tail", 0},
{"BLOCKNUM", "current_rollback", 0},
NULLFIELD}}, // record all transactions
// prepared txns need a gid
{"xstillopenprepared", 'p', FA{{"TXNID", "xid", 0},
{"GID", "gid", 0}, // prepared transactions need a gid, and have no parentxid.
{"u_int64_t", "rollentry_raw_count", 0},
{"FILENUMS", "open_filenums", 0},
{"u_int8_t", "force_fsync_on_commit", 0},
{"u_int64_t", "num_rollback_nodes", 0},
{"u_int64_t", "num_rollentries", 0},
{"BLOCKNUM", "spilled_rollback_head", 0},
{"BLOCKNUM", "spilled_rollback_tail", 0},
{"BLOCKNUM", "current_rollback", 0},
NULLFIELD}}, // record all transactions
{"suppress_rollback", 'S', FA{{"FILENUM", "filenum", 0},
{"TXNID", "xid", 0},
NULLFIELD}},
// Records produced by transactions
{"xbegin", 'b', FA{{"TXNID", "parentxid", 0},NULLFIELD}},
{"xcommit",'C', FA{{"TXNID", "xid", 0},NULLFIELD}},
{"xprepare",'P', FA{{"TXNID", "xid", 0}, {"GID", "gid", 0},NULLFIELD}},
{"xabort", 'q', FA{{"TXNID", "xid", 0},NULLFIELD}},
//TODO: #2037 Add dname
{"fcreate", 'F', FA{{"TXNID", "xid", 0},

34
newbrt/logger.c

@ -1,6 +1,6 @@
/* -*- mode: C; c-basic-offset: 4; indent-tabs-mode: nil -*- */
#ident "$Id$"
#ident "Copyright (c) 2007-2010 Tokutek Inc. All rights reserved."
#ident "Copyright (c) 2007-2011 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
#include "includes.h"
@ -91,6 +91,8 @@ int toku_logger_create (TOKULOGGER *resultp) {
result->swap_ctr = 0;
result->rollback_cachefile = NULL;
result->output_is_available = TRUE;
toku_list_init(&result->prepared_txns);
toku_list_init(&result->prepared_and_returned_txns);
return 0;
panic:
@ -158,6 +160,10 @@ int toku_logger_open (const char *directory, TOKULOGGER logger) {
return 0;
}
bool toku_logger_rollback_is_open (TOKULOGGER logger) {
return logger->rollback_cachefile != NULL;
}
int
toku_logger_open_rollback(TOKULOGGER logger, CACHETABLE cachetable, BOOL create) {
assert(logger->is_open);
@ -224,7 +230,7 @@ toku_logger_close_rollback(TOKULOGGER logger, BOOL recovery_failed) {
}
char *error_string_ignore = NULL;
r = toku_close_brt(brt_to_close, &error_string_ignore);
r = toku_close_brt_nolsn(brt_to_close, &error_string_ignore);
//Set as dealt with already.
logger->rollback_cachefile = NULL;
}
@ -727,16 +733,14 @@ int toku_logger_maybe_trim_log(TOKULOGGER logger, LSN trim_lsn)
}
void toku_logger_write_log_files (TOKULOGGER logger, BOOL write_log_files)
// Called only during initialization, so no locks are needed.
// Called only during initialization (or just after recovery), so no locks are needed.
{
assert(!logger->is_open);
logger->write_log_files = write_log_files;
}
void toku_logger_trim_log_files (TOKULOGGER logger, BOOL trim_log_files)
// Called only during initialization, so no locks are needed.
{
assert(logger);
logger->trim_log_files = trim_log_files;
}
@ -953,6 +957,15 @@ int toku_fread_TXNID (FILE *f, TXNID *txnid, struct x1764 *checksum, u_int32_t
return toku_fread_u_int64_t (f, txnid, checksum, len);
}
int toku_fread_GID (FILE *f, GID *gid, struct x1764 *checksum, u_int32_t *len) {
gid->gid = toku_xmalloc(DB_GID_SIZE);
for (int i=0; i<DB_GID_SIZE; i++) {
int r = toku_fread_u_int8_t(f, &gid->gid[i], checksum, len);
if (r!=0) return r;
}
return 0;
}
// fills in the bs with malloced data.
int toku_fread_BYTESTRING (FILE *f, BYTESTRING *bs, struct x1764 *checksum, u_int32_t *len) {
int r=toku_fread_u_int32_t(f, (u_int32_t*)&bs->len, checksum, len);
@ -1003,6 +1016,17 @@ int toku_logprint_TXNID (FILE *outf, FILE *inf, const char *fieldname, struct x1
return 0;
}
int toku_logprint_GID (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, u_int32_t *len, const char *format __attribute__((__unused__))) {
GID v;
int r = toku_fread_GID(inf, &v, checksum, len);
if (r!=0) return r;
fprintf(outf, "%s=0x", fieldname);
for (int i=0; i<DB_GID_SIZE; i++) printf("%02x", v.gid[i]);
toku_free(v.gid);
v.gid=NULL;
return 0;
}
int toku_logprint_u_int8_t (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, u_int32_t *len, const char *format) {
u_int8_t v;
int r = toku_fread_u_int8_t(inf, &v, checksum, len);

7
newbrt/logger.h

@ -2,7 +2,7 @@
#define TOKU_LOGGER_H
#ident "$Id$"
#ident "Copyright (c) 2007-2010 Tokutek Inc. All rights reserved."
#ident "Copyright (c) 2007-2011 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
#if defined(__cplusplus) || defined(__cilkplusplus)
@ -27,6 +27,7 @@ int toku_logger_shutdown(TOKULOGGER logger);
int toku_logger_close(TOKULOGGER *loggerp);
int toku_logger_open_rollback(TOKULOGGER logger, CACHETABLE cachetable, BOOL create);
int toku_logger_close_rollback(TOKULOGGER logger, BOOL recovery_failed);
bool toku_logger_rollback_is_open (TOKULOGGER); // return true iff the rollback is open.
int toku_logger_fsync (TOKULOGGER logger);
int toku_logger_fsync_if_lsn_not_fsynced(TOKULOGGER logger, LSN lsn);
@ -70,11 +71,13 @@ int toku_fread_LSN (FILE *f, LSN *lsn, struct x1764 *checksum, u_int32_t *le
int toku_fread_BLOCKNUM (FILE *f, BLOCKNUM *lsn, struct x1764 *checksum, u_int32_t *len);
int toku_fread_FILENUM (FILE *f, FILENUM *filenum, struct x1764 *checksum, u_int32_t *len);
int toku_fread_TXNID (FILE *f, TXNID *txnid, struct x1764 *checksum, u_int32_t *len);
int toku_fread_GID (FILE *f, GID *gid, struct x1764 *checksum, u_int32_t *len);
int toku_fread_BYTESTRING (FILE *f, BYTESTRING *bs, struct x1764 *checksum, u_int32_t *len);
int toku_fread_FILENUMS (FILE *f, FILENUMS *fs, struct x1764 *checksum, u_int32_t *len);
int toku_logprint_LSN (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, u_int32_t *len, const char *format __attribute__((__unused__)));
int toku_logprint_TXNID (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, u_int32_t *len, const char *format __attribute__((__unused__)));
int toku_logprint_GID (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, u_int32_t *len, const char *format __attribute__((__unused__)));
int toku_logprint_u_int8_t (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, u_int32_t *len, const char *format);
int toku_logprint_u_int32_t (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, u_int32_t *len, const char *format);
int toku_logprint_BLOCKNUM (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, u_int32_t *len, const char *format);
@ -186,6 +189,8 @@ void toku_logger_get_status(TOKULOGGER logger, LOGGER_STATUS s);
int toku_get_version_of_logs_on_disk(const char *log_dir, BOOL *found_any_logs, uint32_t *version_found);
int toku_delete_all_logs_of_version(const char *log_dir, uint32_t version_to_delete);
static const TOKULOGGER NULL_logger __attribute__((__unused__)) = NULL;
#if defined(__cplusplus) || defined(__cilkplusplus)
}
#endif

434
newbrt/recover.c

@ -56,7 +56,7 @@ static const char *scan_state_string(struct scan_state *ss) {
// File map tuple
struct file_map_tuple {
FILENUM filenum;
BRT brt;
BRT brt; // NULL brt means it's a rollback file.
char *iname;
};
@ -78,6 +78,28 @@ struct file_map {
OMT filenums;
};
// The recovery environment
struct recover_env {
DB_ENV *env;
keep_zombie_callback_t keep_zombie_callback; // at the end of recovery, the zombie BRTs that need to be recorded in the environment are sent this way. The iname is malloc'd and will now be owned by the environment (so the env must free it)
prepared_txn_callback_t prepared_txn_callback; // at the end of recovery, all the prepared txns are passed back to the ydb layer to make them into valid transactions.
keep_cachetable_callback_t keep_cachetable_callback; // after recovery, store the cachetable into the environment.
setup_db_callback_t setup_db_callback; // when creating a DB, we first create a BRT and then use this to biuld the DB we need.
close_db_callback_t close_db_callback; // If this function is non-NULL then use it to close the DB and close the BRT. Otherwise we call toku_close_brt ourselves.
CACHETABLE ct;
TOKULOGGER logger;
brt_compare_func bt_compare;
brt_update_func update_function;
generate_row_for_put_func generate_row_for_put;
generate_row_for_del_func generate_row_for_del;
struct scan_state ss;
struct file_map fmap;
BOOL goforward;
bool destroy_logger_at_end; // If true then destroy the logger when we are done. If false then set the logger into write-files mode when we are done with recovery.*/
};
typedef struct recover_env *RECOVER_ENV;
static void file_map_init(struct file_map *fmap) {
int r = toku_omt_create(&fmap->filenums);
assert(r == 0);
@ -91,7 +113,7 @@ static uint32_t file_map_get_num_dictionaries(struct file_map *fmap) {
return toku_omt_size(fmap->filenums);
}
static void file_map_close_dictionaries(struct file_map *fmap, BOOL recovery_succeeded, TOKULOGGER logger) {
static void file_map_close_dictionaries(RECOVER_ENV renv, struct file_map *fmap, BOOL recovery_succeeded, LSN oplsn) {
int r;
while (1) {
@ -110,23 +132,40 @@ static void file_map_close_dictionaries(struct file_map *fmap, BOOL recovery_suc
r = toku_brt_set_panic(tuple->brt, DB_RUNRECOVERY, "recovery failed");
assert(r==0);
}
//Logging is already back on. No need to pass LSN into close.
// Logging is on again, but we must pass the right LSN into close.
char *error_string = NULL;
DB *fake_db = tuple->brt->db; //Need to free the fake db that was malloced
if (logger->rollback_cachefile != tuple->brt->cf) {
//Rollback cachefile is closed manually at end of recovery, not here
r = toku_close_brt(tuple->brt, &error_string);
if (!recovery_succeeded) {
if (tokudb_recovery_trace)
fprintf(stderr, "%s:%d %d %s\n", __FUNCTION__, __LINE__, r, error_string);
assert(r != 0);
} else
assert(r == 0);
if (error_string)
toku_free(error_string);
}
toku_free(fake_db); //Must free the DB after the brt is closed
if (tuple->brt) { // it's a DB, not a rollback file
DB *db = tuple->brt->db; //Need to free the fake db that was malloced
if (!toku_brt_zombie_needed(tuple->brt)) {
if (renv->close_db_callback) {
r = renv->close_db_callback(tuple->brt->db, true, oplsn);
if (r!=0) error_string = toku_strdup("Cannot close DB");
db = NULL; // so it won't get freed again below.
} else {
r = toku_close_brt_nolsn(tuple->brt, &error_string);
}
if (!recovery_succeeded) {
if (tokudb_recovery_trace)
fprintf(stderr, "%s:%d %d %s\n", __FUNCTION__, __LINE__, r, error_string);
assert(r != 0);
} else
assert(r == 0);
if (error_string)
toku_free(error_string);
} else {
if (renv->keep_zombie_callback) {
renv->keep_zombie_callback(renv->env,
tuple->brt,
tuple->iname, // use iname for the dname.
true, oplsn);
tuple->iname = NULL; // so it won't be freed again below.
db = NULL; // so it won't get freed again below.
}
}
toku_free(db); //Must free the DB after the brt is closed
} else {
assert(tuple->brt==NULL);
}
file_map_tuple_destroy(tuple);
toku_free(tuple);
}
@ -172,22 +211,15 @@ static int file_map_find(struct file_map *fmap, FILENUM fnum, struct file_map_tu
return r;
}
// The recovery environment
struct recover_env {
CACHETABLE ct;
TOKULOGGER logger;
brt_compare_func bt_compare;
brt_update_func update_function;
generate_row_for_put_func generate_row_for_put;
generate_row_for_del_func generate_row_for_del;
struct scan_state ss;
struct file_map fmap;
BOOL goforward;
};
typedef struct recover_env *RECOVER_ENV;
static int recover_env_init (RECOVER_ENV renv,
const char *env_dir,
DB_ENV *env,
keep_zombie_callback_t keep_zombie_callback,
prepared_txn_callback_t prepared_txn_callback,
keep_cachetable_callback_t keep_cachetable_callback,
setup_db_callback_t setup_db_callback,
close_db_callback_t close_db_callback,
TOKULOGGER logger,
brt_compare_func bt_compare,
brt_update_func update_function,
generate_row_for_put_func generate_row_for_put,
@ -195,17 +227,30 @@ static int recover_env_init (RECOVER_ENV renv,
size_t cachetable_size) {
int r;
r = toku_brt_create_cachetable(&renv->ct, cachetable_size ? cachetable_size : 1<<25, (LSN){0}, 0);
r = toku_brt_create_cachetable(&renv->ct, cachetable_size ? cachetable_size : 1<<25, (LSN){0}, logger);
assert(r == 0);
toku_cachetable_set_env_dir(renv->ct, env_dir);
r = toku_logger_create(&renv->logger);
assert(r == 0);
if (keep_cachetable_callback) keep_cachetable_callback(env, renv->ct);
// If we are passed a logger use it, otherwise create one.
renv->destroy_logger_at_end = logger==NULL;
if (logger) {
renv->logger = logger;
} else {
r = toku_logger_create(&renv->logger);
assert(r == 0);
}
toku_logger_write_log_files(renv->logger, FALSE);
toku_logger_set_cachetable(renv->logger, renv->ct);
renv->bt_compare = bt_compare;
renv->update_function = update_function;
renv->generate_row_for_put = generate_row_for_put;
renv->generate_row_for_del = generate_row_for_del;
renv->env = env;
renv->keep_zombie_callback = keep_zombie_callback;
renv->prepared_txn_callback = prepared_txn_callback;
renv->keep_cachetable_callback = keep_cachetable_callback;
renv->setup_db_callback = setup_db_callback;
renv->close_db_callback = close_db_callback;
renv->bt_compare = bt_compare;
renv->update_function = update_function;
renv->generate_row_for_put = generate_row_for_put;
renv->generate_row_for_del = generate_row_for_del;
file_map_init(&renv->fmap);
renv->goforward = FALSE;
@ -214,19 +259,28 @@ static int recover_env_init (RECOVER_ENV renv,
return r;
}
static void recover_env_cleanup (RECOVER_ENV renv, BOOL recovery_succeeded) {
static void recover_env_cleanup (RECOVER_ENV renv, bool recovery_succeeded) {
int r;
file_map_close_dictionaries(&renv->fmap, recovery_succeeded, renv->logger);
assert(toku_omt_size(renv->fmap.filenums)==0);
//file_map_close_dictionaries(renv, &renv->fmap, recovery_succeeded, oplsn);
file_map_destroy(&renv->fmap);
r = toku_logger_close_rollback(renv->logger, !recovery_succeeded);
assert(r==0);
r = toku_logger_close(&renv->logger);
assert(r == 0);
if (renv->destroy_logger_at_end) {
r = toku_logger_close_rollback(renv->logger, !recovery_succeeded);
assert(r==0);
r = toku_logger_close(&renv->logger);
assert(r == 0);
} else {
toku_logger_write_log_files(renv->logger, true);
}
r = toku_cachetable_close(&renv->ct);
assert(r == 0);
if (renv->keep_cachetable_callback) {
renv->ct = NULL;
} else {
r = toku_cachetable_close(&renv->ct);
assert(r == 0);
}
if (tokudb_recovery_trace)
fprintf(stderr, "%s:%d\n", __FUNCTION__, __LINE__);
@ -246,9 +300,9 @@ static void recover_yield(voidfp f, void *fpthunk, void *UU(yieldthunk)) {
static int internal_recover_fopen_or_fcreate (RECOVER_ENV renv, BOOL must_create, int mode, BYTESTRING *bs_iname, FILENUM filenum, u_int32_t treeflags,
TOKUTXN txn, uint32_t nodesize, uint32_t basementnodesize, LSN max_acceptable_lsn) {
int r;
BRT brt = NULL;
char *iname = fixup_fname(bs_iname);
BRT brt = NULL;
r = toku_brt_create(&brt);
assert(r == 0);
@ -256,41 +310,51 @@ static int internal_recover_fopen_or_fcreate (RECOVER_ENV renv, BOOL must_create
assert(r == 0);
if (nodesize != 0) {
r = toku_brt_set_nodesize(brt, nodesize);
assert(r == 0);
r = toku_brt_set_nodesize(brt, nodesize);
assert(r == 0);
}
if (basementnodesize != 0) {
r = toku_brt_set_basementnodesize(brt, basementnodesize);
assert(r == 0);
r = toku_brt_set_basementnodesize(brt, basementnodesize);
assert(r == 0);
}
// set the key compare functions
if (!(treeflags & TOKU_DB_KEYCMP_BUILTIN) && renv->bt_compare) {
r = toku_brt_set_bt_compare(brt, renv->bt_compare);
r = toku_brt_set_bt_compare(brt, renv->bt_compare);
assert(r == 0);
}
if (renv->update_function) {
r = toku_brt_set_update(brt, renv->update_function);
assert(r == 0);
r = toku_brt_set_update(brt, renv->update_function);
assert(r == 0);
}
// TODO mode (FUTURE FEATURE)
mode = mode;
//Create fake DB for comparison functions.
DB *XCALLOC(fake_db);
r = toku_brt_open_recovery(brt, iname, must_create, must_create, renv->ct, txn, fake_db, filenum, max_acceptable_lsn);
//Create DB (e.g., for comparison functions and also so that when finishing recovery we can make them into zombies)
DB *db;
db = NULL;
if (renv->setup_db_callback) {
r = renv->setup_db_callback(&db, renv->env, 0, brt, true);
assert(r==0);
}
r = toku_brt_open_recovery(brt, iname, must_create, must_create, renv->ct, txn, db, filenum, max_acceptable_lsn);
if (r != 0) {
//Note: If brt_open fails, then close_brt will NOT write a header to disk.
//No need to provide lsn
int rr = toku_close_brt(brt, NULL); assert(rr == 0);
toku_free(iname);
toku_free(fake_db); //Free memory allocated for the fake db.
if (r == ENOENT) //Not an error to simply be missing.
r = 0;
return r;
//Note: If brt_open fails, then close_brt will NOT write a header to disk.
//No need to provide lsn
if (renv->close_db_callback) {
int rr = renv->close_db_callback(db, false, ZERO_LSN);
assert(rr==0);
} else {
int rr = toku_close_brt_nolsn(brt, NULL); assert(rr == 0);
toku_free(db); //Free memory allocated for the fake db.
}
toku_free(iname);
if (r == ENOENT) //Not an error to simply be missing.
r = 0;
return r;
}
file_map_insert(&renv->fmap, filenum, brt, iname);
@ -421,17 +485,19 @@ static int toku_recover_fassociate (struct logtype_fassociate *l, RECOVER_ENV re
// If rollback file, specify which checkpointed version of file we need (not just the latest)
// because we cannot use a rollback log that is later than the last complete checkpoint. See #3113.
{
BOOL rollback_file = !strcmp(fname, ROLLBACK_CACHEFILE_NAME);
BOOL rollback_file = (0==strcmp(fname, ROLLBACK_CACHEFILE_NAME));
LSN max_acceptable_lsn = MAX_LSN;
if (rollback_file)
if (rollback_file) {
max_acceptable_lsn = renv->ss.checkpoint_begin_lsn;
r = internal_recover_fopen_or_fcreate(renv, FALSE, 0, &l->iname, l->filenum, l->treeflags, NULL, 0, 0, max_acceptable_lsn);
if (r==0 && rollback_file) {
//Load rollback cachefile
r = file_map_find(&renv->fmap, l->filenum, &tuple);
assert(r==0);
renv->logger->rollback_cachefile = tuple->brt->cf;
}
BRT t;
r = toku_brt_create(&t);
assert(r==0);
r = toku_brt_open_recovery(t, ROLLBACK_CACHEFILE_NAME, false, false, renv->ct, (TOKUTXN)NULL, (DB*)NULL, l->filenum, max_acceptable_lsn);
renv->logger->rollback_cachefile = t->cf;
} else {
r = internal_recover_fopen_or_fcreate(renv, FALSE, 0, &l->iname, l->filenum, l->treeflags, NULL, 0, 0, max_acceptable_lsn);
assert(r==0);
}
}
break;
case FORWARD_NEWER_CHECKPOINT_END:
@ -480,19 +546,35 @@ recover_transaction(TOKUTXN *txnp, TXNID xid, TXNID parentxid, TOKULOGGER logger
return 0;
}
static int toku_recover_xstillopen (struct logtype_xstillopen *l, RECOVER_ENV UU(renv)) {
static int recover_xstillopen_internal (TOKUTXN *txnp,
LSN UU(lsn),
TXNID xid,
TXNID parentxid,
u_int64_t rollentry_raw_count,
FILENUMS open_filenums,
u_int8_t force_fsync_on_commit,
u_int64_t num_rollback_nodes,
u_int64_t num_rollentries,
BLOCKNUM spilled_rollback_head,
BLOCKNUM spilled_rollback_tail,
BLOCKNUM current_rollback,
u_int32_t UU(crc),
u_int32_t UU(len),
RECOVER_ENV renv) {
int r;
*txnp = NULL;
switch (renv->ss.ss) {
case FORWARD_BETWEEN_CHECKPOINT_BEGIN_END: {
renv->ss.checkpoint_num_xstillopen++;
TOKUTXN txn = NULL;
{ //Create the transaction.
r = recover_transaction(&txn, l->xid, l->parentxid, renv->logger);
r = recover_transaction(&txn, xid, parentxid, renv->logger);
assert(r==0);
assert(txn!=NULL);
*txnp = txn;
}
{ //Recover rest of transaction.
#define COPY_TO_INFO(field) .field = l->field
#define COPY_TO_INFO(field) .field = field
struct txninfo info = {
COPY_TO_INFO(rollentry_raw_count),
.num_brts = 0, //Set afterwards
@ -506,13 +588,13 @@ static int toku_recover_xstillopen (struct logtype_xstillopen *l, RECOVER_ENV UU
};
#undef COPY_TO_INFO
//Generate open_brts
BRT array[l->open_filenums.num]; //Allocate maximum possible requirement
BRT array[open_filenums.num]; //Allocate maximum possible requirement
info.open_brts = array;
uint32_t i;
for (i = 0; i < l->open_filenums.num; i++) {
for (i = 0; i < open_filenums.num; i++) {
//open_filenums.filenums[]
struct file_map_tuple *tuple = NULL;
r = file_map_find(&renv->fmap, l->open_filenums.filenums[i], &tuple);
r = file_map_find(&renv->fmap, open_filenums.filenums[i], &tuple);
if (r==0) {
info.open_brts[info.num_brts++] = tuple->brt;
}
@ -528,9 +610,10 @@ static int toku_recover_xstillopen (struct logtype_xstillopen *l, RECOVER_ENV UU
case FORWARD_NEWER_CHECKPOINT_END: {
// assert that the transaction exists
TOKUTXN txn = NULL;
r = toku_txnid2txn(renv->logger, l->xid, &txn);
r = toku_txnid2txn(renv->logger, xid, &txn);
assert(r == 0 && txn != NULL);
r = 0;
*txnp = txn;
break;
}
default:
@ -540,10 +623,56 @@ static int toku_recover_xstillopen (struct logtype_xstillopen *l, RECOVER_ENV UU
return r;
}
static int toku_recover_xstillopen (struct logtype_xstillopen *l, RECOVER_ENV renv) {
TOKUTXN txn;
return recover_xstillopen_internal (&txn,
l->lsn,
l->xid,
l->parentxid,
l->rollentry_raw_count,
l->open_filenums,
l->force_fsync_on_commit,
l->num_rollback_nodes,
l->num_rollentries,
l->spilled_rollback_head,
l->spilled_rollback_tail,
l->current_rollback,
l->crc,
l->len,
renv);
}
static int toku_recover_xstillopenprepared (struct logtype_xstillopenprepared *l, RECOVER_ENV renv) {
TOKUTXN txn;
int r = recover_xstillopen_internal (&txn,
l->lsn,
l->xid,
(TXNID)0,
l->rollentry_raw_count,
l->open_filenums,
l->force_fsync_on_commit,
l->num_rollback_nodes,
l->num_rollentries,
l->spilled_rollback_head,
l->spilled_rollback_tail,
l->current_rollback,
l->crc,
l->len,
renv);
if (r==0)
return toku_txn_prepare_txn(txn, l->gid);
else
return r;
}
static int toku_recover_backward_xstillopen (struct logtype_xstillopen *UU(l), RECOVER_ENV UU(renv)) {
// nothing
return 0;
}
static int toku_recover_backward_xstillopenprepared (struct logtype_xstillopenprepared *UU(l), RECOVER_ENV UU(renv)) {
// nothing
return 0;
}
static int toku_recover_suppress_rollback (struct logtype_suppress_rollback *UU(l), RECOVER_ENV UU(renv)) {
struct file_map_tuple *tuple = NULL;
@ -604,6 +733,29 @@ static int toku_recover_backward_xcommit (struct logtype_xcommit *UU(l), RECOVER
return 0;
}
static int toku_recover_xprepare (struct logtype_xprepare *l, RECOVER_ENV renv) {
int r;
// find the transaction by transaction id
TOKUTXN txn = NULL;
r = toku_txnid2txn(renv->logger, l->xid, &txn);
assert(r == 0);
assert(txn!=NULL);
// Save the transaction
r = toku_txn_prepare_txn(txn, l->gid);
assert(r == 0);
return 0;
}
static int toku_recover_backward_xprepare (struct logtype_xprepare *UU(l), RECOVER_ENV UU(renv)) {
// nothing
return 0;
}
static int toku_recover_xabort (struct logtype_xabort *l, RECOVER_ENV renv) {
int r;
@ -652,7 +804,7 @@ static int toku_recover_fcreate (struct logtype_fcreate *l, RECOVER_ENV renv) {
toku_free(iname);
return r;
}
assert(strcmp(iname, ROLLBACK_CACHEFILE_NAME)); //Creation of rollback cachefile never gets logged.
assert(0!=strcmp(iname, ROLLBACK_CACHEFILE_NAME)); //Creation of rollback cachefile never gets logged.
toku_free(iname_in_cwd);
toku_free(iname);
@ -680,10 +832,9 @@ static int toku_recover_fopen (struct logtype_fopen *l, RECOVER_ENV renv) {
TOKUTXN txn = NULL;
char *fname = fixup_fname(&l->iname);
if (strcmp(fname, ROLLBACK_CACHEFILE_NAME)) {
//Rollback cachefile can only be opened via fassociate.
r = internal_recover_fopen_or_fcreate(renv, must_create, 0, &l->iname, l->filenum, l->treeflags, txn, 0, 0, MAX_LSN);
}
assert(0!=strcmp(fname, ROLLBACK_CACHEFILE_NAME)); //Rollback cachefile can be opened only via fassociate.
r = internal_recover_fopen_or_fcreate(renv, must_create, 0, &l->iname, l->filenum, l->treeflags, txn, 0, 0, MAX_LSN);
toku_free(fname);
return r;
}
@ -738,13 +889,18 @@ static int toku_recover_fclose (struct logtype_fclose *l, RECOVER_ENV renv) {
char *iname = fixup_fname(&l->iname);
assert(strcmp(tuple->iname, iname) == 0); // verify that file_map has same iname as log entry
DB *fake_db = tuple->brt->db; //Need to free the fake db that was malloced
if (strcmp(iname, ROLLBACK_CACHEFILE_NAME)) {
if (0!=strcmp(iname, ROLLBACK_CACHEFILE_NAME)) {
//Rollback cachefile is closed manually at end of recovery, not here
r = toku_close_brt_lsn(tuple->brt, 0, TRUE, l->lsn);
assert(r == 0);
}
toku_free(fake_db); //Must free the DB after the brt is closed
DB *db = tuple->brt->db;
if (renv->close_db_callback) {
r = renv->close_db_callback(db, true, l->lsn);
assert(r==0);
} else {
r = toku_close_brt_lsn(tuple->brt, 0, TRUE, l->lsn);
assert(r == 0);
toku_free(db); //Must free the DB after the brt is closed
}
}
file_map_remove(&renv->fmap, l->filenum);
toku_free(iname);
}
@ -802,6 +958,8 @@ static int toku_recover_enq_insert (struct logtype_enq_insert *l, RECOVER_ENV re
toku_fill_dbt(&valdbt, l->value.data, l->value.len);
r = toku_brt_maybe_insert(tuple->brt, &keydbt, &valdbt, txn, TRUE, l->lsn, FALSE, BRT_INSERT);
assert(r == 0);
r = toku_txn_note_brt(txn, tuple->brt);
assert(r == 0);
}
return 0;
}
@ -1140,25 +1298,47 @@ static uint32_t recover_get_num_live_txns(RECOVER_ENV renv) {
return toku_omt_size(renv->logger->live_txns);
}
static int find_an_unprepared_txn (RECOVER_ENV renv, TOKUTXN *txnp) {
u_int32_t n_live_txns = toku_omt_size(renv->logger->live_txns);
for (u_int32_t i=0; i<n_live_txns; i++) {
OMTVALUE v;
int r = toku_omt_fetch(renv->logger->live_txns, n_live_txns-1-i, &v);
assert(r==0);
TOKUTXN txn = (TOKUTXN) v;
if (txn->state == TOKUTXN_PREPARING)
continue;
*txnp = txn;
return 0;
}
return DB_NOTFOUND;
}
// abort all of the remaining live transactions in descending transaction id order
static void recover_abort_live_txns(RECOVER_ENV renv) {
int r;
while (1) {
u_int32_t n_live_txns = toku_omt_size(renv->logger->live_txns);
if (n_live_txns == 0)
break;
OMTVALUE v;
r = toku_omt_fetch(renv->logger->live_txns, n_live_txns-1, &v);
if (r != 0)
break;
TOKUTXN txn = (TOKUTXN) v;
// abort the transaction
r = toku_txn_abort_txn(txn, recover_yield, NULL, NULL, NULL, false);
assert(r == 0);
TOKUTXN txn;
int r = find_an_unprepared_txn (renv, &txn);
if (r==0) {
// abort the transaction
r = toku_txn_abort_txn(txn, recover_yield, NULL, NULL, NULL, false);
assert(r == 0);
// close the transaction
toku_txn_close_txn(txn);
} else if (r==DB_NOTFOUND) {
break;
} else {
abort();
}
}
// close the transaction
toku_txn_close_txn(txn);
// Now we have only prepared txns. These prepared txns don't have full DB_TXNs in them, so we need to make some.
for (u_int32_t i=0; i<toku_omt_size(renv->logger->live_txns); i++) {
OMTVALUE v;
int r = toku_omt_fetch(renv->logger->live_txns, i, &v);
assert(r==0);
TOKUTXN txn = v;
renv->prepared_txn_callback(renv->env, txn);
}
}
@ -1226,7 +1406,6 @@ static int do_recovery(RECOVER_ENV renv, const char *env_dir, const char *log_di
rr = ENOTDIR; goto errorexit;
}
}
// scan backwards
scan_state_init(&renv->ss);
tnow = time(NULL);
@ -1334,20 +1513,29 @@ static int do_recovery(RECOVER_ENV renv, const char *env_dir, const char *log_di
toku_logger_restart(renv->logger, lastlsn);
// abort the live transactions
uint32_t n = recover_get_num_live_txns(renv);
if (n > 0) {
tnow = time(NULL);
fprintf(stderr, "%.24s Tokudb recovery aborting %"PRIu32" live transaction%s\n", ctime(&tnow), n, n > 1 ? "s" : "");
{
uint32_t n = recover_get_num_live_txns(renv);
if (n > 0) {
tnow = time(NULL);
fprintf(stderr, "%.24s Tokudb recovery has %"PRIu32" live transaction%s\n", ctime(&tnow), n, n > 1 ? "s" : "");
}
}
recover_abort_live_txns(renv);
{
uint32_t n = recover_get_num_live_txns(renv);
if (n > 0) {
tnow = time(NULL);
fprintf(stderr, "%.24s Tokudb recovery has %"PRIu32" prepared transaction%s\n", ctime(&tnow), n, n > 1 ? "s" : "");
}
}
// close the open dictionaries
n = file_map_get_num_dictionaries(&renv->fmap);
uint32_t n = file_map_get_num_dictionaries(&renv->fmap);
if (n > 0) {
tnow = time(NULL);
fprintf(stderr, "%.24s Tokudb recovery closing %"PRIu32" dictionar%s\n", ctime(&tnow), n, n > 1 ? "ies" : "y");
}
file_map_close_dictionaries(&renv->fmap, TRUE, renv->logger);
file_map_close_dictionaries(renv, &renv->fmap, TRUE, lastlsn);
// write a recovery log entry
BYTESTRING recover_comment = { strlen("recover"), "recover" };
@ -1404,7 +1592,14 @@ toku_recover_unlock(int lockfd) {
int tokudb_recover(const char *env_dir, const char *log_dir,
int tokudb_recover(DB_ENV *env,
keep_zombie_callback_t keep_zombie_callback,
prepared_txn_callback_t prepared_txn_callback,
keep_cachetable_callback_t keep_cachetable_callback,
setup_db_callback_t setup_db_callback,
close_db_callback_t close_db_callback,
TOKULOGGER logger,
const char *env_dir, const char *log_dir,
brt_compare_func bt_compare,
brt_update_func update_function,
generate_row_for_put_func generate_row_for_put,
@ -1422,6 +1617,13 @@ int tokudb_recover(const char *env_dir, const char *log_dir,
struct recover_env renv;
r = recover_env_init(&renv,
env_dir,
env,
keep_zombie_callback,
prepared_txn_callback,
keep_cachetable_callback,
setup_db_callback,
close_db_callback,
logger,
bt_compare,
update_function,
generate_row_for_put,

21
newbrt/recover.h

@ -17,9 +17,22 @@
extern "C" {
#endif
typedef void (*prepared_txn_callback_t)(DB_ENV*, TOKUTXN);
typedef void (*keep_zombie_callback_t)(DB_ENV*, BRT, char *iname, bool oplsn_valid, LSN oplsn);
typedef void (*keep_cachetable_callback_t)(DB_ENV*, CACHETABLE);
typedef int (*setup_db_callback_t)(DB **, DB_ENV *, u_int32_t db_create_flags, BRT, bool /*is_open*/) __attribute__ ((warn_unused_result));
typedef int (*close_db_callback_t)(DB *, bool oplsn_valid, LSN oplsn) __attribute__ ((warn_unused_result));
// Run tokudb recovery from the log
// Returns 0 if success
int tokudb_recover (const char *env_dir, const char *log_dir,
int tokudb_recover (DB_ENV *env,
keep_zombie_callback_t keep_zombie_callback,
prepared_txn_callback_t prepared_txn_callback,
keep_cachetable_callback_t keep_cachetable_callback,
setup_db_callback_t setup_db_callback,
close_db_callback_t close_db_callback,
TOKULOGGER logger,
const char *env_dir, const char *log_dir,
brt_compare_func bt_compare,
brt_update_func update_function,
generate_row_for_put_func generate_row_for_put,
@ -45,6 +58,12 @@ int toku_recover_lock (const char *lock_dir, int *lockfd);
int toku_recover_unlock(int lockfd);
static const prepared_txn_callback_t NULL_prepared_txn_callback __attribute__((__unused__)) = NULL;
static const keep_zombie_callback_t NULL_keep_zombie_callback __attribute__((__unused__)) = NULL;
static const keep_cachetable_callback_t NULL_keep_cachetable_callback __attribute__((__unused__)) = NULL;
static const setup_db_callback_t NULL_setup_db_callback __attribute__((__unused__)) = NULL;
static const close_db_callback_t NULL_close_db_callback __attribute__((__unused__)) = NULL;
#if defined(__cplusplus) || defined(__cilkplusplus)
};

9
newbrt/rollback.c

@ -760,7 +760,7 @@ int toku_txn_note_swap_brt (BRT live, BRT zombie) {
//Close immediately.
assert(zombie->close_db);
assert(!toku_brt_zombie_needed(zombie));
r = zombie->close_db(zombie->db, zombie->close_flags);
r = zombie->close_db(zombie->db, zombie->close_flags, false, ZERO_LSN);
return r;
}
@ -785,7 +785,10 @@ int toku_txn_note_close_brt (BRT brt) {
return 0;
}
static int remove_txn (OMTVALUE brtv, u_int32_t UU(idx), void *txnv) {
static int remove_txn (OMTVALUE brtv, u_int32_t UU(idx), void *txnv)
// Effect: This function is called on every open BRT that a transaction used.
// This function removes the transaction from that BRT.
{
BRT brt = brtv;
TOKUTXN txn = txnv;
OMTVALUE txnv_again=NULL;
@ -805,7 +808,7 @@ static int remove_txn (OMTVALUE brtv, u_int32_t UU(idx), void *txnv) {
if (!toku_brt_zombie_needed(brt) && brt->was_closed) {
//Close immediately.
assert(brt->close_db);
r = brt->close_db(brt->db, brt->close_flags);
r = brt->close_db(brt->db, brt->close_flags, false, ZERO_LSN);
}
return r;
}

9
newbrt/tdb-recover.c

@ -42,7 +42,14 @@ int recovery_main (int argc, const char *const argv[]) {
return(1);
}
int r = tokudb_recover(data_dir, log_dir, NULL, NULL, NULL, NULL, 0);
int r = tokudb_recover(NULL,
NULL_keep_zombie_callback,
NULL_prepared_txn_callback,
NULL_keep_cachetable_callback,
NULL_setup_db_callback,
NULL_close_db_callback,
NULL_logger,
data_dir, log_dir, NULL, NULL, NULL, NULL, 0);
if (r!=0) {
fprintf(stderr, "Recovery failed\n");
return(1);

2
newbrt/tests/benchmark-test.c

@ -39,7 +39,7 @@ static void setup (void) {
static void toku_shutdown (void) {
int r;
r = toku_close_brt(t, 0); assert(r==0);
r = toku_close_brt_nolsn(t, 0); assert(r==0);
r = toku_cachetable_close(&ct); assert(r==0);
}
static void long_long_to_array (unsigned char *a, unsigned long long l) {

4
newbrt/tests/brt-serialize-sub-block-test.c

@ -44,7 +44,7 @@ static void test_sub_block(int n) {
}
// write to the file
error = toku_close_brt(brt, 0);
error = toku_close_brt_nolsn(brt, 0);
assert(error == 0);
// verify the brt by walking a cursor through the rows
@ -71,7 +71,7 @@ static void test_sub_block(int n) {
error = toku_brt_cursor_close(cursor);
assert(error == 0);
error = toku_close_brt(brt, 0);
error = toku_close_brt_nolsn(brt, 0);
assert(error == 0);
error = toku_cachetable_close(&ct);

2
newbrt/tests/brt-test-cursor-2.c

@ -83,7 +83,7 @@ static void test_multiple_brt_cursor_dbts(int n, DB *db) {
toku_free(ptrs[i]);
}
r = toku_close_brt(brt, 0);
r = toku_close_brt_nolsn(brt, 0);
assert(r==0);
r = toku_cachetable_close(&ct);

26
newbrt/tests/brt-test-cursor.c

@ -111,7 +111,7 @@ static void test_brt_cursor_first(int n, DB *db) {
else
assert_cursor_value(brt, DB_FIRST, 0);
r = toku_close_brt(brt, 0);
r = toku_close_brt_nolsn(brt, 0);
assert(r==0);
r = toku_cachetable_close(&ct);
@ -152,7 +152,7 @@ static void test_brt_cursor_last(int n, DB *db) {
else
assert_cursor_value(brt, DB_LAST, n-1);
r = toku_close_brt(brt, 0);
r = toku_close_brt_nolsn(brt, 0);
assert(r==0);
r = toku_cachetable_close(&ct);
@ -195,7 +195,7 @@ static void test_brt_cursor_first_last(int n, DB *db) {
} else
assert_cursor_first_last(brt, 0, n-1);
r = toku_close_brt(brt, 0);
r = toku_close_brt_nolsn(brt, 0);
assert(r==0);
r = toku_cachetable_close(&ct);
@ -239,7 +239,7 @@ static void test_brt_cursor_rfirst(int n, DB *db) {
else
assert_cursor_value(brt, DB_FIRST, 0);
r = toku_close_brt(brt, 0);
r = toku_close_brt_nolsn(brt, 0);
assert(r==0);
r = toku_cachetable_close(&ct);
@ -304,7 +304,7 @@ static void test_brt_cursor_walk(int n, DB *db) {
/* walk the tree */
assert_cursor_walk(brt, n);
r = toku_close_brt(brt, 0);
r = toku_close_brt_nolsn(brt, 0);
assert(r==0);
r = toku_cachetable_close(&ct);
@ -370,7 +370,7 @@ static void test_brt_cursor_rwalk(int n, DB *db) {
/* walk the tree */
assert_cursor_rwalk(brt, n);
r = toku_close_brt(brt, 0);
r = toku_close_brt_nolsn(brt, 0);
assert(r==0);
r = toku_cachetable_close(&ct);
@ -467,7 +467,7 @@ static void test_brt_cursor_rand(int n, DB *db) {
/* walk the tree */
assert_cursor_walk_inorder(brt, n);
r = toku_close_brt(brt, 0);
r = toku_close_brt_nolsn(brt, 0);
assert(r==0);
r = toku_cachetable_close(&ct);
@ -546,7 +546,7 @@ static void test_brt_cursor_split(int n, DB *db) {
r = toku_brt_cursor_close(cursor);
assert(r==0);
r = toku_close_brt(brt, 0);
r = toku_close_brt_nolsn(brt, 0);
assert(r==0);
r = toku_cachetable_close(&ct);
@ -580,7 +580,7 @@ static void test_multiple_brt_cursors(int n, DB *db) {
assert(r == 0);
}
r = toku_close_brt(brt, 0);
r = toku_close_brt_nolsn(brt, 0);
assert(r==0);
r = toku_cachetable_close(&ct);
@ -672,7 +672,7 @@ static void test_multiple_brt_cursor_walk(int n, DB *db) {
assert(r == 0);
}
r = toku_close_brt(brt, 0);
r = toku_close_brt_nolsn(brt, 0);
assert(r==0);
r = toku_cachetable_close(&ct);
@ -744,7 +744,7 @@ static void test_brt_cursor_set(int n, int cursor_op, DB *db) {
r = toku_brt_cursor_close(cursor);
assert(r==0);
r = toku_close_brt(brt, 0);
r = toku_close_brt_nolsn(brt, 0);
assert(r==0);
r = toku_cachetable_close(&ct);
@ -808,7 +808,7 @@ static void test_brt_cursor_set_range(int n, DB *db) {
r = toku_brt_cursor_close(cursor);
assert(r==0);
r = toku_close_brt(brt, 0);
r = toku_close_brt_nolsn(brt, 0);
assert(r==0);
r = toku_cachetable_close(&ct);
@ -869,7 +869,7 @@ static void test_brt_cursor_delete(int n, DB *db) {
error = toku_brt_cursor_close(cursor);
assert(error == 0);
error = toku_close_brt(brt, 0);
error = toku_close_brt_nolsn(brt, 0);
assert(error == 0);
error = toku_cachetable_close(&ct);

4
newbrt/tests/brt-test-header.c

@ -35,7 +35,7 @@ static void test_header (void) {
h->in_memory_stats = (STAT64INFO_S) {10, 11};
h->on_disk_stats = (STAT64INFO_S) {20, 21};
h->checkpoint_staging_stats = (STAT64INFO_S) {30, 31};
r = toku_close_brt(t, 0); assert(r==0);
r = toku_close_brt_nolsn(t, 0); assert(r==0);
r = toku_cachetable_close(&ct);
assert(r==0);
@ -55,7 +55,7 @@ static void test_header (void) {
assert(h->num_blocks_to_upgrade_14 == 1014);
assert(h->in_memory_stats.numrows == expected_stats.numrows);
assert(h->on_disk_stats.numbytes == expected_stats.numbytes);
r = toku_close_brt(t, 0); assert(r==0);
r = toku_close_brt_nolsn(t, 0); assert(r==0);
r = toku_cachetable_close(&ct);
assert(r==0);

56
newbrt/tests/brt-test.c

@ -21,7 +21,7 @@ static void test_dump_empty_db (void) {
r = toku_open_brt(fname, 1, &t, 1024, 256, ct, null_txn, toku_builtin_compare_fun, null_db);
assert(r==0);
if (verbose) { r=toku_dump_brt(stdout, t); assert(r==0); }
r = toku_close_brt(t, 0); assert(r==0);
r = toku_close_brt_nolsn(t, 0); assert(r==0);
r = toku_cachetable_close(&ct); assert(r==0);
}
@ -55,8 +55,8 @@ static void test_multiple_files_of_size (int size) {
r = toku_verify_brt(t0); assert(r==0);
r = toku_verify_brt(t1); assert(r==0);
r = toku_close_brt(t0, 0); assert(r==0);
r = toku_close_brt(t1, 0); assert(r==0);
r = toku_close_brt_nolsn(t0, 0); assert(r==0);
r = toku_close_brt_nolsn(t1, 0); assert(r==0);
r = toku_cachetable_close(&ct); assert(r==0);
@ -76,8 +76,8 @@ static void test_multiple_files_of_size (int size) {
brt_lookup_and_check_nodup(t1, key, val);
}
r = toku_close_brt(t0, 0); assert(r==0);
r = toku_close_brt(t1, 0); assert(r==0);
r = toku_close_brt_nolsn(t0, 0); assert(r==0);
r = toku_close_brt_nolsn(t1, 0); assert(r==0);
r = toku_cachetable_close(&ct); assert(r==0);
}
@ -116,7 +116,7 @@ static void test_multiple_brts_one_db_one_file (void) {
brt_lookup_and_check_nodup(trees[0], k, vexpect);
}
for (i=0; i<MANYN; i++) {
r=toku_close_brt(trees[i], 0); assert(r==0);
r=toku_close_brt_nolsn(trees[i], 0); assert(r==0);
}
r = toku_cachetable_close(&ct); assert(r==0);
@ -138,7 +138,7 @@ static void test_read_what_was_written (void) {
r = toku_brt_create_cachetable(&ct, 0, ZERO_LSN, NULL_LOGGER); assert(r==0);
r = toku_open_brt(fname, 1, &brt, 1<<12, 1<<9, ct, null_txn, toku_builtin_compare_fun, null_db); assert(r==0);
r = toku_close_brt(brt, 0); assert(r==0);
r = toku_close_brt_nolsn(brt, 0); assert(r==0);
r = toku_cachetable_close(&ct); assert(r==0);
@ -154,7 +154,7 @@ static void test_read_what_was_written (void) {
assert(r==0);
}
r = toku_close_brt(brt, 0); assert(r==0);
r = toku_close_brt_nolsn(brt, 0); assert(r==0);
r = toku_cachetable_close(&ct); assert(r==0);
@ -218,7 +218,7 @@ static void test_read_what_was_written (void) {
}
}
r = toku_close_brt(brt, 0); assert(r==0);
r = toku_close_brt_nolsn(brt, 0); assert(r==0);
if (verbose) printf("%s:%d About to close %p\n", __FILE__, __LINE__, ct);
r = toku_cachetable_close(&ct); assert(r==0);
@ -238,7 +238,7 @@ static void test_read_what_was_written (void) {
}
}
r = toku_close_brt(brt, 0); assert(r==0);
r = toku_close_brt_nolsn(brt, 0); assert(r==0);
r = toku_cachetable_close(&ct); assert(r==0);
@ -274,7 +274,7 @@ static void test_cursor_last_empty(void) {
assert(pair.call_count==0);
assert(r==DB_NOTFOUND);
}
r = toku_close_brt(brt, 0);
r = toku_close_brt_nolsn(brt, 0);
//printf("%s:%d %d alloced\n", __FILE__, __LINE__, toku_get_n_items_malloced()); toku_print_malloced_items();
r = toku_cachetable_close(&ct); assert(r==0);
//printf("%s:%d %d alloced\n", __FILE__, __LINE__, toku_get_n_items_malloced()); toku_print_malloced_items();
@ -325,7 +325,7 @@ static void test_cursor_next (void) {
assert(pair.call_count==0);
}
r = toku_close_brt(brt, 0);
r = toku_close_brt_nolsn(brt, 0);
//printf("%s:%d %d alloced\n", __FILE__, __LINE__, toku_get_n_items_malloced()); toku_print_malloced_items();
r = toku_cachetable_close(&ct); assert(r==0);
//printf("%s:%d %d alloced\n", __FILE__, __LINE__, toku_get_n_items_malloced()); toku_print_malloced_items();
@ -404,7 +404,7 @@ static void test_wrongendian_compare (int wrong_p, unsigned int N) {
}
r = toku_close_brt(brt, 0);
r = toku_close_brt_nolsn(brt, 0);
}
}
@ -444,7 +444,7 @@ static void test_wrongendian_compare (int wrong_p, unsigned int N) {
toku_cachetable_verify(ct);
}
r = toku_close_brt(brt, 0);
r = toku_close_brt_nolsn(brt, 0);
assert(r==0);
}
r = toku_cachetable_close(&ct); assert(r==0);
@ -481,7 +481,7 @@ static void test_large_kv(int bsize, int ksize, int vsize) {
toku_free(k);
toku_free(v);
r = toku_close_brt(t, 0); assert(r==0);
r = toku_close_brt_nolsn(t, 0); assert(r==0);
r = toku_cachetable_close(&ct); assert(r==0);
}
@ -520,7 +520,7 @@ static void test_brt_delete_empty(void) {
r = toku_brt_delete(t, &key, null_txn);
assert(r == 0);
r = toku_close_brt(t, 0); assert(r==0);
r = toku_close_brt_nolsn(t, 0); assert(r==0);
r = toku_cachetable_close(&ct); assert(r==0);
}
@ -586,7 +586,7 @@ static void test_brt_delete_present(int n) {
r = toku_brt_cursor_close(cursor);
assert(r == 0);
r = toku_close_brt(t, 0); assert(r==0);
r = toku_close_brt_nolsn(t, 0); assert(r==0);
r = toku_cachetable_close(&ct); assert(r==0);
}
@ -632,7 +632,7 @@ static void test_brt_delete_not_present(int n) {
return value depends */
if (verbose) printf("toku_brt_delete k=%d %d\n", k, r);
r = toku_close_brt(t, 0); assert(r==0);
r = toku_close_brt_nolsn(t, 0); assert(r==0);
r = toku_cachetable_close(&ct); assert(r==0);
}
@ -719,7 +719,7 @@ static void test_brt_delete_cursor_first(int n) {
r = toku_brt_cursor_close(cursor);
assert(r == 0);
r = toku_close_brt(t, 0); assert(r==0);
r = toku_close_brt_nolsn(t, 0); assert(r==0);
r = toku_cachetable_close(&ct); assert(r==0);
}
@ -770,7 +770,7 @@ static void test_insert_delete_lookup(int n) {
}
}
r = toku_close_brt(t, 0); assert(r==0);
r = toku_close_brt_nolsn(t, 0); assert(r==0);
r = toku_cachetable_close(&ct); assert(r==0);
}
@ -812,7 +812,7 @@ static void test_new_brt_cursor_create_close (void) {
r = toku_brt_cursor_close(cursors[i]); assert(r == 0);
}
r = toku_close_brt(brt, 0); assert(r == 0);
r = toku_close_brt_nolsn(brt, 0); assert(r == 0);
}
static void test_new_brt_cursor_first(int n) {
@ -863,7 +863,7 @@ static void test_new_brt_cursor_first(int n) {
if (val.data) toku_free(val.data);
r = toku_brt_cursor_close(cursor); assert(r == 0);
r = toku_close_brt(t, 0); assert(r==0);
r = toku_close_brt_nolsn(t, 0); assert(r==0);
r = toku_cachetable_close(&ct);assert(r==0);
}
@ -916,7 +916,7 @@ static void test_new_brt_cursor_last(int n) {
if (val.data) toku_free(val.data);
r = toku_brt_cursor_close(cursor); assert(r == 0);
r = toku_close_brt(t, 0); assert(r==0);
r = toku_close_brt_nolsn(t, 0); assert(r==0);
r = toku_cachetable_close(&ct);assert(r==0);
}
@ -959,7 +959,7 @@ static void test_new_brt_cursor_next(int n) {
assert(i == n);
r = toku_brt_cursor_close(cursor); assert(r == 0);
r = toku_close_brt(t, 0); assert(r==0);
r = toku_close_brt_nolsn(t, 0); assert(r==0);
r = toku_cachetable_close(&ct);assert(r==0);
}
@ -1002,7 +1002,7 @@ static void test_new_brt_cursor_prev(int n) {
assert(i == -1);
r = toku_brt_cursor_close(cursor); assert(r == 0);
r = toku_close_brt(t, 0); assert(r==0);
r = toku_close_brt_nolsn(t, 0); assert(r==0);
r = toku_cachetable_close(&ct);assert(r==0);
}
@ -1084,7 +1084,7 @@ static void test_new_brt_cursor_current(int n) {
assert(i == n);
r = toku_brt_cursor_close(cursor); assert(r == 0);
r = toku_close_brt(t, 0); assert(r==0);
r = toku_close_brt_nolsn(t, 0); assert(r==0);
r = toku_cachetable_close(&ct);assert(r==0);
}
@ -1141,7 +1141,7 @@ static void test_new_brt_cursor_set_range(int n) {
r = toku_brt_cursor_close(cursor); assert(r==0);
r = toku_close_brt(brt, 0); assert(r==0);
r = toku_close_brt_nolsn(brt, 0); assert(r==0);
r = toku_cachetable_close(&ct); assert(r==0);
}
@ -1200,7 +1200,7 @@ static void test_new_brt_cursor_set(int n, int cursor_op, DB *db) {
r = toku_brt_cursor_close(cursor); assert(r==0);
r = toku_close_brt(brt, 0); assert(r==0);
r = toku_close_brt_nolsn(brt, 0); assert(r==0);
r = toku_cachetable_close(&ct); assert(r==0);
}

2
newbrt/tests/brt-test0.c

@ -23,7 +23,7 @@ static void test0 (void) {
assert(r==0);
//printf("%s:%d test0\n", __FILE__, __LINE__);
//printf("%s:%d n_items_malloced=%lld\n", __FILE__, __LINE__, n_items_malloced);
r = toku_close_brt(t, 0); assert(r==0);
r = toku_close_brt_nolsn(t, 0); assert(r==0);
//printf("%s:%d n_items_malloced=%lld\n", __FILE__, __LINE__, n_items_malloced);
r = toku_cachetable_close(&ct);
assert(r==0);

2
newbrt/tests/brt-test1.c

@ -28,7 +28,7 @@ static void test1 (void) {
assert(r==0);
assert(pair.call_count==1);
}
r = toku_close_brt(t, 0); assert(r==0);
r = toku_close_brt_nolsn(t, 0); assert(r==0);
r = toku_cachetable_close(&ct); assert(r==0);
if (verbose) printf("test1 ok\n");

2
newbrt/tests/brt-test2.c

@ -37,7 +37,7 @@ static void test2 (int limit) {
}
if (verbose) printf("%s:%d inserted\n", __FILE__, __LINE__);
r = toku_verify_brt(t); assert(r==0);
r = toku_close_brt(t, 0); assert(r==0);
r = toku_close_brt_nolsn(t, 0); assert(r==0);
r = toku_cachetable_close(&ct); assert(r==0);
if (verbose) printf("test2 ok\n");

2
newbrt/tests/brt-test3.c

@ -32,7 +32,7 @@ static void test3 (int nodesize, int basementnodesize, int count) {
assert(r==0);
}
r = toku_verify_brt(t); assert(r==0);
r = toku_close_brt(t, 0); assert(r==0);
r = toku_close_brt_nolsn(t, 0); assert(r==0);
r = toku_cachetable_close(&ct); assert(r==0);
gettimeofday(&t1, 0);

2
newbrt/tests/brt-test4.c

@ -32,7 +32,7 @@ static void test4 (int nodesize, int count) {
assert(r==0);
}
r = toku_verify_brt(t); assert(r==0);
r = toku_close_brt(t, 0); assert(r==0);
r = toku_close_brt_nolsn(t, 0); assert(r==0);
r = toku_cachetable_close(&ct); assert(r==0);
gettimeofday(&t1, 0);

2
newbrt/tests/brt-test5.c

@ -52,7 +52,7 @@ static void test5 (void) {
if (verbose) printf("\n");
toku_free(values);
r = toku_verify_brt(t); assert(r==0);
r = toku_close_brt(t, 0); assert(r==0);
r = toku_close_brt_nolsn(t, 0); assert(r==0);
r = toku_cachetable_close(&ct); assert(r==0);
}

2
newbrt/tests/brtloader-test-writer.c

@ -83,7 +83,7 @@ static void verify_dbfile(int n, const char *name) {
r = toku_brt_stat64(t, NULL, &s); assert(r == 0);
assert(s.nkeys == (u_int64_t)n && s.ndata == (u_int64_t)n && s.dsize == userdata);
r = toku_close_brt(t, 0); assert(r==0);
r = toku_close_brt_nolsn(t, 0); assert(r==0);
r = toku_cachetable_close(&ct);assert(r==0);
if (verbose) traceit("verify done");
}

2
newbrt/tests/brtloader-test.c

@ -276,7 +276,7 @@ static void verify_dbfile(int n, int sorted_keys[], const char *sorted_vals[], c
r = toku_brt_stat64(t, NULL, &s); assert(r == 0);
assert(s.nkeys == (u_int64_t) n && s.ndata == (u_int64_t) n && s.dsize == userdata);
r = toku_close_brt(t, 0); assert(r==0);
r = toku_close_brt_nolsn(t, 0); assert(r==0);
r = toku_cachetable_close(&ct);assert(r==0);
}

8
newbrt/tests/is_empty.c

@ -45,7 +45,7 @@ static void test_it (int N) {
toku_txn_close_txn(txn);
r = toku_checkpoint(ct, logger, NULL, NULL, NULL, NULL, CLIENT_CHECKPOINT); CKERR(r);
r = toku_close_brt(brt, NULL); CKERR(r);
r = toku_close_brt_nolsn(brt, NULL); CKERR(r);
unsigned int rands[N];
for (int i=0; i<N; i++) {
@ -67,7 +67,7 @@ static void test_it (int N) {
r = toku_checkpoint(ct, logger, NULL, NULL, NULL, NULL, CLIENT_CHECKPOINT); CKERR(r);
r = toku_close_brt(brt, NULL); CKERR(r);
r = toku_close_brt_nolsn(brt, NULL); CKERR(r);
if (verbose) printf("i=%d\n", i);
}
@ -94,7 +94,7 @@ static void test_it (int N) {
r = toku_checkpoint(ct, logger, NULL, NULL, NULL, NULL, CLIENT_CHECKPOINT); CKERR(r);
r = toku_close_brt(brt, NULL); CKERR(r);
r = toku_close_brt_nolsn(brt, NULL); CKERR(r);
if (verbose) printf("d=%d\n", i);
}
@ -110,7 +110,7 @@ static void test_it (int N) {
}
r = toku_checkpoint(ct, logger, NULL, NULL, NULL, NULL, CLIENT_CHECKPOINT); CKERR(r);
r = toku_close_brt(brt, NULL); CKERR(r);
r = toku_close_brt_nolsn(brt, NULL); CKERR(r);
r = toku_checkpoint(ct, logger, NULL, NULL, NULL, NULL, CLIENT_CHECKPOINT); CKERR(r);
r = toku_logger_close_rollback(logger, FALSE); CKERR(r);

2
newbrt/tests/keyrange.c

@ -18,7 +18,7 @@ static BRT t;
static void close_brt_and_ct (void) {
int r;
r = toku_close_brt(t, 0); assert(r==0);
r = toku_close_brt_nolsn(t, 0); assert(r==0);
r = toku_cachetable_close(&ct); assert(r==0);
}

4
newbrt/tests/le-cursor-provdel.c

@ -72,7 +72,7 @@ create_populate_tree(const char *logdir, const char *fname, int n) {
assert(error == 0);
toku_txn_close_txn(txn);
error = toku_close_brt(brt, NULL);
error = toku_close_brt_nolsn(brt, NULL);
assert(error == 0);
error = toku_checkpoint(ct, logger, NULL, NULL, NULL, NULL, CLIENT_CHECKPOINT);
@ -173,7 +173,7 @@ test_provdel(const char *logdir, const char *fname, int n) {
assert(error == 0);
toku_txn_close_txn(txn);
error = toku_close_brt(brt, NULL);
error = toku_close_brt_nolsn(brt, NULL);
assert(error == 0);
error = toku_checkpoint(ct, logger, NULL, NULL, NULL, NULL, CLIENT_CHECKPOINT);

8
newbrt/tests/le-cursor-right.c

@ -76,7 +76,7 @@ create_populate_tree(const char *logdir, const char *fname, int n) {
assert(error == 0);
toku_txn_close_txn(txn);
error = toku_close_brt(brt, NULL);
error = toku_close_brt_nolsn(brt, NULL);
assert(error == 0);
error = toku_checkpoint(ct, logger, NULL, NULL, NULL, NULL, CLIENT_CHECKPOINT);
@ -120,7 +120,7 @@ test_neg_infinity(const char *fname, int n) {
error = le_cursor_close(cursor);
assert(error == 0);
error = toku_close_brt(brt, 0);
error = toku_close_brt_nolsn(brt, 0);
assert(error == 0);
error = toku_cachetable_close(&ct);
@ -181,7 +181,7 @@ test_pos_infinity(const char *fname, int n) {
error = le_cursor_close(cursor);
assert(error == 0);
error = toku_close_brt(brt, 0);
error = toku_close_brt_nolsn(brt, 0);
assert(error == 0);
error = toku_cachetable_close(&ct);
@ -253,7 +253,7 @@ test_between(const char *fname, int n) {
error = le_cursor_close(cursor);
assert(error == 0);
error = toku_close_brt(brt, 0);
error = toku_close_brt_nolsn(brt, 0);
assert(error == 0);
error = toku_cachetable_close(&ct);

4
newbrt/tests/le-cursor-walk.c

@ -72,7 +72,7 @@ create_populate_tree(const char *logdir, const char *fname, int n) {
assert(error == 0);
toku_txn_close_txn(txn);
error = toku_close_brt(brt, NULL);
error = toku_close_brt_nolsn(brt, NULL);
assert(error == 0);
error = toku_checkpoint(ct, logger, NULL, NULL, NULL, NULL, CLIENT_CHECKPOINT);
@ -134,7 +134,7 @@ walk_tree(const char *fname, int n) {
error = le_cursor_close(cursor);
assert(error == 0);
error = toku_close_brt(brt, 0);
error = toku_close_brt_nolsn(brt, 0);
assert(error == 0);
error = toku_cachetable_close(&ct);

2
newbrt/tests/make-tree.c

@ -152,7 +152,7 @@ test_make_tree(int height, int fanout, int nperleaf, int do_verify) {
}
// flush to the file system
r = toku_close_brt(brt, 0);
r = toku_close_brt_nolsn(brt, 0);
assert(r == 0);
// shutdown the cachetable

2
newbrt/tests/msnfilter.c

@ -149,7 +149,7 @@ test_msnfilter(int do_verify) {
}
// flush to the file system
r = toku_close_brt(brt, 0);
r = toku_close_brt_nolsn(brt, 0);
assert(r == 0);
// shutdown the cachetable

2
newbrt/tests/orthopush-flush.c

@ -1164,7 +1164,7 @@ test_main (int argc, const char *argv[]) {
}
}
r = toku_close_brt(t, 0); assert(r==0);
r = toku_close_brt_nolsn(t, 0); assert(r==0);
r = toku_cachetable_close(&ct); assert(r==0);
return 0;

9
newbrt/tests/recovery-bad-last-entry.c

@ -57,7 +57,14 @@ run_test(void) {
else
break;
// run recovery
r = tokudb_recover(TESTDIR, TESTDIR, 0, 0, 0, NULL, 0);
r = tokudb_recover(NULL,
NULL_keep_zombie_callback,
NULL_prepared_txn_callback,
NULL_keep_cachetable_callback,
NULL_setup_db_callback,
NULL_close_db_callback,
NULL_logger,
TESTDIR, TESTDIR, 0, 0, 0, NULL, 0);
assert(r == 0);
trim += 1;

8
newbrt/tests/recovery-cbegin-cend-hello.c

@ -42,7 +42,13 @@ run_test(void) {
r = close(devnul); assert(r==0);
// run recovery
r = tokudb_recover(TESTDIR, TESTDIR, 0, 0, 0, NULL, 0);
r = tokudb_recover(NULL,
NULL_keep_zombie_callback,
NULL_prepared_txn_callback,
NULL_keep_cachetable_callback,
NULL_setup_db_callback,
NULL_close_db_callback,
NULL_logger, TESTDIR, TESTDIR, 0, 0, 0, NULL, 0);
assert(r == 0);
r = system("rm -rf " TESTDIR);

8
newbrt/tests/recovery-cbegin-cend.c

@ -27,7 +27,13 @@ run_test(void) {
r = toku_logger_close(&logger); assert(r == 0);
// run recovery
r = tokudb_recover(TESTDIR, TESTDIR, 0, 0, 0, NULL, 0);
r = tokudb_recover(NULL,
NULL_keep_zombie_callback,
NULL_prepared_txn_callback,
NULL_keep_cachetable_callback,
NULL_setup_db_callback,
NULL_close_db_callback,
NULL_logger, TESTDIR, TESTDIR, 0, 0, 0, NULL, 0);
assert(r == 0);
r = system("rm -rf " TESTDIR);

8
newbrt/tests/recovery-cbegin.c

@ -33,7 +33,13 @@ run_test(void) {
r = close(devnul);
assert(r==0);
r = tokudb_recover(TESTDIR, TESTDIR, 0, 0, 0, NULL, 0);
r = tokudb_recover(NULL,
NULL_keep_zombie_callback,
NULL_prepared_txn_callback,
NULL_keep_cachetable_callback,
NULL_setup_db_callback,
NULL_close_db_callback,
NULL_logger, TESTDIR, TESTDIR, 0, 0, 0, NULL, 0);
assert(r == 0);
r = system("rm -rf " TESTDIR);

8
newbrt/tests/recovery-cend-cbegin.c

@ -35,7 +35,13 @@ run_test(void) {
}
// run recovery
r = tokudb_recover(TESTDIR, TESTDIR,
r = tokudb_recover(NULL,
NULL_keep_zombie_callback,
NULL_prepared_txn_callback,
NULL_keep_cachetable_callback,
NULL_setup_db_callback,
NULL_close_db_callback,
NULL_logger, TESTDIR, TESTDIR,
toku_builtin_compare_fun,
NULL, NULL, NULL,
0);

9
newbrt/tests/recovery-datadir-is-file.c

@ -32,7 +32,14 @@ run_test(void) {
// run recovery
r = system("touch " TESTFILE); CKERR(r);
r = tokudb_recover(TESTFILE, TESTDIR, 0, 0, 0, NULL, 0);
r = tokudb_recover(NULL,
NULL_keep_zombie_callback,
NULL_prepared_txn_callback,
NULL_keep_cachetable_callback,
NULL_setup_db_callback,
NULL_close_db_callback,
NULL_logger,
TESTFILE, TESTDIR, 0, 0, 0, NULL, 0);
assert(r != 0);
r = system("rm -rf " TESTDIR " " TESTFILE); CKERR(r);

8
newbrt/tests/recovery-empty.c

@ -30,7 +30,13 @@ run_test(void) {
}
// run recovery
r = tokudb_recover(TESTDIR, TESTDIR, 0, 0, 0, NULL, 0);
r = tokudb_recover(NULL,
NULL_keep_zombie_callback,
NULL_prepared_txn_callback,
NULL_keep_cachetable_callback,
NULL_setup_db_callback,
NULL_close_db_callback,
NULL_logger, TESTDIR, TESTDIR, 0, 0, 0, NULL, 0);
assert(r == DB_RUNRECOVERY);
r = system("rm -rf " TESTDIR);

8
newbrt/tests/recovery-fopen-missing-file.c

@ -36,7 +36,13 @@ run_test(void) {
r = close(devnul); assert(r==0);
// run recovery
r = tokudb_recover(TESTDIR, TESTDIR, 0, 0, 0, NULL, 0);
r = tokudb_recover(NULL,
NULL_keep_zombie_callback,
NULL_prepared_txn_callback,
NULL_keep_cachetable_callback,
NULL_setup_db_callback,
NULL_close_db_callback,
NULL_logger, TESTDIR, TESTDIR, 0, 0, 0, NULL, 0);
assert(r == 0);
r = system("rm -rf " TESTDIR);

8
newbrt/tests/recovery-hello.c

@ -36,7 +36,13 @@ run_test(void) {
r = close(devnul); assert(r==0);
// run recovery
r = tokudb_recover(TESTDIR, TESTDIR, 0, 0, 0, NULL, 0);
r = tokudb_recover(NULL,
NULL_keep_zombie_callback,
NULL_prepared_txn_callback,
NULL_keep_cachetable_callback,
NULL_setup_db_callback,
NULL_close_db_callback,
NULL_logger, TESTDIR, TESTDIR, 0, 0, 0, NULL, 0);
assert(r == 0);
r = system("rm -rf " TESTDIR);

8
newbrt/tests/recovery-lsn-error-during-forward-scan.c

@ -66,7 +66,13 @@ run_test(void) {
toku_recover_set_callback(recover_callback_at_turnaround, NULL);
// run recovery
r = tokudb_recover(TESTDIR, TESTDIR, 0, 0, 0, NULL, 0);
r = tokudb_recover(NULL,
NULL_keep_zombie_callback,
NULL_prepared_txn_callback,
NULL_keep_cachetable_callback,
NULL_setup_db_callback,
NULL_close_db_callback,
NULL_logger, TESTDIR, TESTDIR, 0, 0, 0, NULL, 0);
assert(r != 0);
r = system("rm -rf " TESTDIR);

8
newbrt/tests/recovery-no-datadir.c

@ -30,7 +30,13 @@ run_test(void) {
r = close(devnul); assert(r==0);
// run recovery
r = tokudb_recover("/junk", TESTDIR, 0, 0, 0, NULL, 0);
r = tokudb_recover(NULL,
NULL_keep_zombie_callback,
NULL_prepared_txn_callback,
NULL_keep_cachetable_callback,
NULL_setup_db_callback,
NULL_close_db_callback,
NULL_logger, "/junk", TESTDIR, 0, 0, 0, NULL, 0);
assert(r != 0);
r = system("rm -rf " TESTDIR);

8
newbrt/tests/recovery-no-log.c

@ -22,7 +22,13 @@ run_test(void) {
r = close(devnul); assert(r==0);
// run recovery
r = tokudb_recover(TESTDIR, TESTDIR, 0, 0, 0, NULL, 0);
r = tokudb_recover(NULL,
NULL_keep_zombie_callback,
NULL_prepared_txn_callback,
NULL_keep_cachetable_callback,
NULL_setup_db_callback,
NULL_close_db_callback,
NULL_logger, TESTDIR, TESTDIR, 0, 0, 0, NULL, 0);
assert(r != 0);
r = system("rm -rf " TESTDIR);

8
newbrt/tests/recovery-no-logdir.c

@ -16,7 +16,13 @@ run_test(void) {
r = toku_os_mkdir(TESTDIR, S_IRWXU); assert(r == 0);
// run recovery
r = tokudb_recover(NULL, NULL, 0, 0, 0, NULL, 0);
r = tokudb_recover(NULL,
NULL_keep_zombie_callback,
NULL_prepared_txn_callback,
NULL_keep_cachetable_callback,
NULL_setup_db_callback,
NULL_close_db_callback,
NULL_logger, NULL, NULL, 0, 0, 0, NULL, 0);
assert(r != 0);
r = system("rm -rf " TESTDIR);

2
newbrt/tests/shortcut.c

@ -54,7 +54,7 @@ test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute
}
r = toku_brt_cursor_close(cursor); assert(r==0);
r = toku_close_brt(brt, 0); assert(r==0);
r = toku_close_brt_nolsn(brt, 0); assert(r==0);
r = toku_cachetable_close(&ct); assert(r==0);
return 0;
}

2
newbrt/tests/test-brt-overflow.c

@ -35,7 +35,7 @@ test_overflow (void) {
r = toku_brt_insert(t, toku_fill_dbt(&k, key, 2), toku_fill_dbt(&v,buf,sizeof(buf)), null_txn);
assert(r==0);
}
r = toku_close_brt(t, 0); assert(r==0);
r = toku_close_brt_nolsn(t, 0); assert(r==0);
r = toku_cachetable_close(&ct); assert(r==0);
}

4
newbrt/tests/test-checkpoint-during-flush.c

@ -244,8 +244,8 @@ doit (BOOL after_child_pin) {
assert(r==0);
r = toku_close_brt(t, 0); assert(r==0);
r = toku_close_brt(c_brt, 0); assert(r==0);
r = toku_close_brt_nolsn(t, 0); assert(r==0);
r = toku_close_brt_nolsn(c_brt, 0); assert(r==0);
r = toku_cachetable_close(&ct); assert(r==0);
}

4
newbrt/tests/test-checkpoint-during-merge.c

@ -308,8 +308,8 @@ doit (int state) {
assert(r==0);
r = toku_close_brt(t, 0); assert(r==0);
r = toku_close_brt(c_brt, 0); assert(r==0);
r = toku_close_brt_nolsn(t, 0); assert(r==0);
r = toku_close_brt_nolsn(c_brt, 0); assert(r==0);
r = toku_cachetable_close(&ct); assert(r==0);
toku_free(pivots[0]);
}

4
newbrt/tests/test-checkpoint-during-rebalance.c

@ -304,8 +304,8 @@ doit (int state) {
assert(r==0);
r = toku_close_brt(t, 0); assert(r==0);
r = toku_close_brt(c_brt, 0); assert(r==0);
r = toku_close_brt_nolsn(t, 0); assert(r==0);
r = toku_close_brt_nolsn(c_brt, 0); assert(r==0);
r = toku_cachetable_close(&ct); assert(r==0);
toku_free(pivots[0]);
}

4
newbrt/tests/test-checkpoint-during-split.c

@ -299,8 +299,8 @@ doit (BOOL after_split) {
assert(r==0);
r = toku_close_brt(t, 0); assert(r==0);
r = toku_close_brt(c_brt, 0); assert(r==0);
r = toku_close_brt_nolsn(t, 0); assert(r==0);
r = toku_close_brt_nolsn(c_brt, 0); assert(r==0);
r = toku_cachetable_close(&ct); assert(r==0);
}

2
newbrt/tests/test-del-inorder.c

@ -59,7 +59,7 @@ doit (void) {
assert(r==0);
assert(pair.call_count == 1);
r = toku_close_brt(t, 0); assert(r==0);
r = toku_close_brt_nolsn(t, 0); assert(r==0);
r = toku_cachetable_close(&ct); assert(r==0);
}

2
newbrt/tests/test-dirty-flushes-on-cleaner.c

@ -264,7 +264,7 @@ doit (void) {
assert(r==0);
r = toku_close_brt(brt, 0); assert(r==0);
r = toku_close_brt_nolsn(brt, 0); assert(r==0);
r = toku_cachetable_close(&ct); assert(r==0);
toku_free(pivots[0]);

2
newbrt/tests/test-dump-brt.c

@ -29,7 +29,7 @@ test_main(int argc, const char *argv[]) {
assert(r==0);
}
r = toku_dump_brt(f, t); assert(r==0);
r = toku_close_brt(t, 0); assert(r==0);
r = toku_close_brt_nolsn(t, 0); assert(r==0);
r = toku_cachetable_close(&ct); assert(r==0);
fclose(f);
return 0;

2
newbrt/tests/test-flushes-on-cleaner.c

@ -276,7 +276,7 @@ doit (void) {
assert(r==0);
r = toku_close_brt(brt, 0); assert(r==0);
r = toku_close_brt_nolsn(brt, 0); assert(r==0);
r = toku_cachetable_close(&ct); assert(r==0);
toku_free(pivots[0]);

2
newbrt/tests/test-inc-split.c

@ -131,7 +131,7 @@ doit (int ksize __attribute__((__unused__))) {
r = toku_testsetup_root(t, anode);
assert(r==0);
r = toku_close_brt(t, 0); assert(r==0);
r = toku_close_brt_nolsn(t, 0); assert(r==0);
r = toku_cachetable_close(&ct); assert(r==0);
//printf("ksize=%d, unused\n", ksize);

2
newbrt/tests/test-merges-on-cleaner.c

@ -208,7 +208,7 @@ doit (void) {
assert(r==0);
r = toku_close_brt(brt, 0); assert(r==0);
r = toku_close_brt_nolsn(brt, 0); assert(r==0);
r = toku_cachetable_close(&ct); assert(r==0);
toku_free(pivots[0]);

2
newbrt/tests/test-pick-child-to-flush.c

@ -298,7 +298,7 @@ doit (void) {
flush_some_child(t->h, node, &fa);
assert(num_flushes_called == 2);
r = toku_close_brt(t, 0); assert(r==0);
r = toku_close_brt_nolsn(t, 0); assert(r==0);
r = toku_cachetable_close(&ct); assert(r==0);
toku_free(pivots[0]);

2
newbrt/tests/test.h

@ -14,7 +14,7 @@ extern "C" {
#endif
#define CKERR(r) do { if (r!=0) fprintf(stderr, "%s:%d error %d %s\n", __FILE__, __LINE__, r, strerror(r)); assert(r==0); } while (0)
#define CKERR(r) ({ int __r = r; if (__r!=0) fprintf(stderr, "%s:%d error %d %s\n", __FILE__, __LINE__, __r, strerror(r)); assert(__r==0); })
#define CKERR2(r,r2) do { if (r!=r2) fprintf(stderr, "%s:%d error %d %s, expected %d\n", __FILE__, __LINE__, r, strerror(r), r2); assert(r==r2); } while (0)
#define CKERR2s(r,r2,r3) do { if (r!=r2 && r!=r3) fprintf(stderr, "%s:%d error %d %s, expected %d or %d\n", __FILE__, __LINE__, r, strerror(r), r2,r3); assert(r==r2||r==r3); } while (0)

2
newbrt/tests/test3681.c

@ -29,7 +29,7 @@ static void setup (void) {
static void finish (void) {
{ int r = toku_close_brt(t, 0); assert(r==0); };
{ int r = toku_close_brt_nolsn(t, 0); assert(r==0); };
{ int r = toku_cachetable_close(&ct); assert(r == 0 && ct == 0); }
}

4
newbrt/tests/test3856.c

@ -48,7 +48,7 @@ test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute_
r = toku_brt_insert(t, toku_fill_dbt(&k, key, 1+strlen(key)), toku_fill_dbt(&v, val, 1+strlen(val)), null_txn);
assert(r==0);
}
r = toku_close_brt(t, 0); assert(r == 0);
r = toku_close_brt_nolsn(t, 0); assert(r == 0);
r = toku_cachetable_close(&ct); assert(r == 0);
r = toku_brt_create_cachetable(&ct, 0, ZERO_LSN, NULL_LOGGER); assert(r == 0);
@ -72,7 +72,7 @@ test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute_
r = toku_brt_cursor_close(c); assert(r == 0);
}
r = toku_close_brt(t, 0); assert(r == 0);
r = toku_close_brt_nolsn(t, 0); assert(r == 0);
r = toku_cachetable_close(&ct), assert(r == 0);
return 0;

12
newbrt/tests/test3884.c

@ -177,7 +177,7 @@ test_split_on_boundary(void)
verify_basement_node_msns(nodeb, dummy_msn_3884);
toku_unpin_brtnode(brt, nodeb);
r = toku_close_brt(brt, NULL); assert(r == 0);
r = toku_close_brt_nolsn(brt, NULL); assert(r == 0);
r = toku_cachetable_close(&ct); assert(r == 0);
if (splitk.data) {
@ -247,7 +247,7 @@ test_split_with_everything_on_the_left(void)
brtleaf_split(brt->h, &sn, &nodea, &nodeb, &splitk, TRUE, 0, NULL);
toku_unpin_brtnode(brt, nodeb);
r = toku_close_brt(brt, NULL); assert(r == 0);
r = toku_close_brt_nolsn(brt, NULL); assert(r == 0);
r = toku_cachetable_close(&ct); assert(r == 0);
if (splitk.data) {
@ -322,7 +322,7 @@ test_split_on_boundary_of_last_node(void)
brtleaf_split(brt->h, &sn, &nodea, &nodeb, &splitk, TRUE, 0, NULL);
toku_unpin_brtnode(brt, nodeb);
r = toku_close_brt(brt, NULL); assert(r == 0);
r = toku_close_brt_nolsn(brt, NULL); assert(r == 0);
r = toku_cachetable_close(&ct); assert(r == 0);
if (splitk.data) {
@ -390,7 +390,7 @@ test_split_at_begin(void)
brtleaf_split(brt->h, &sn, &nodea, &nodeb, &splitk, TRUE, 0, NULL);
toku_unpin_brtnode(brt, nodeb);
r = toku_close_brt(brt, NULL); assert(r == 0);
r = toku_close_brt_nolsn(brt, NULL); assert(r == 0);
r = toku_cachetable_close(&ct); assert(r == 0);
if (splitk.data) {
@ -454,7 +454,7 @@ test_split_at_end(void)
brtleaf_split(brt->h, &sn, &nodea, &nodeb, &splitk, TRUE, 0, NULL);
toku_unpin_brtnode(brt, nodeb);
r = toku_close_brt(brt, NULL); assert(r == 0);
r = toku_close_brt_nolsn(brt, NULL); assert(r == 0);
r = toku_cachetable_close(&ct); assert(r == 0);
if (splitk.data) {
@ -511,7 +511,7 @@ test_split_odd_nodes(void)
verify_basement_node_msns(nodeb, dummy_msn_3884);
toku_unpin_brtnode(brt, nodeb);
r = toku_close_brt(brt, NULL); assert(r == 0);
r = toku_close_brt_nolsn(brt, NULL); assert(r == 0);
r = toku_cachetable_close(&ct); assert(r == 0);
if (splitk.data) {

2
newbrt/tests/test4115.c

@ -24,7 +24,7 @@ static int dont_allow_prefix (DB *db __attribute__((__unused__)), const DBT *a,
static void close_brt_and_ct (void) {
int r;
r = toku_close_brt(t, 0); assert(r==0);
r = toku_close_brt_nolsn(t, 0); assert(r==0);
r = toku_cachetable_close(&ct); assert(r==0);
}

2
newbrt/tests/test4244.c

@ -86,7 +86,7 @@ doit (void) {
assert(toku_bnc_nbytesinbuf(BNC(node, 0)) < 50*1000*1000);
toku_unpin_brtnode_off_client_thread(t->h, node);
r = toku_close_brt(t, 0); assert(r==0);
r = toku_close_brt_nolsn(t, 0); assert(r==0);
r = toku_cachetable_close(&ct); assert(r==0);
}

2
newbrt/tests/verify-bad-msn.c

@ -159,7 +159,7 @@ test_make_tree(int height, int fanout, int nperleaf, int do_verify) {
}
// flush to the file system
r = toku_close_brt(brt, 0);
r = toku_close_brt_nolsn(brt, 0);
assert(r == 0);
// shutdown the cachetable

2
newbrt/tests/verify-bad-pivots.c

@ -126,7 +126,7 @@ test_make_tree(int height, int fanout, int nperleaf, int do_verify) {
}
// flush to the file system
r = toku_close_brt(brt, 0);
r = toku_close_brt_nolsn(brt, 0);
assert(r == 0);
// shutdown the cachetable

2
newbrt/tests/verify-dup-in-leaf.c

@ -84,7 +84,7 @@ test_dup_in_leaf(int do_verify) {
}
// flush to the file system
r = toku_close_brt(brt, 0);
r = toku_close_brt_nolsn(brt, 0);
assert(r == 0);
// shutdown the cachetable

2
newbrt/tests/verify-dup-pivots.c

@ -126,7 +126,7 @@ test_make_tree(int height, int fanout, int nperleaf, int do_verify) {
}
// flush to the file system
r = toku_close_brt(brt, 0);
r = toku_close_brt_nolsn(brt, 0);
assert(r == 0);
// shutdown the cachetable

2
newbrt/tests/verify-misrouted-msgs.c

@ -141,7 +141,7 @@ test_make_tree(int height, int fanout, int nperleaf, int do_verify) {
}
// flush to the file system
r = toku_close_brt(brt, 0);
r = toku_close_brt_nolsn(brt, 0);
assert(r == 0);
// shutdown the cachetable

2
newbrt/tests/verify-unsorted-leaf.c

@ -84,7 +84,7 @@ test_dup_in_leaf(int do_verify) {
}
// flush to the file system
r = toku_close_brt(brt, 0);
r = toku_close_brt_nolsn(brt, 0);
assert(r == 0);
// shutdown the cachetable

2
newbrt/tests/verify-unsorted-pivots.c

@ -126,7 +126,7 @@ test_make_tree(int height, int fanout, int nperleaf, int do_verify) {
}
// flush to the file system
r = toku_close_brt(brt, 0);
r = toku_close_brt_nolsn(brt, 0);
assert(r == 0);
// shutdown the cachetable

63
newbrt/txn.c

@ -93,6 +93,11 @@ toku_txn_get_container_db_txn (TOKUTXN tokutxn) {
return container;
}
void toku_txn_set_container_db_txn (TOKUTXN tokutxn, DB_TXN*container) {
tokutxn->container_db_txn = container;
}
static int
fill_xids (OMTVALUE xev, u_int32_t idx, void *varray) {
TOKUTXN txn = xev;
@ -228,6 +233,7 @@ toku_txn_create_txn (
result->recovered_from_checkpoint = FALSE;
toku_list_init(&result->checkpoint_before_commit);
result->state = TOKUTXN_LIVE;
result->gid.gid = NULL;
result->do_fsync = FALSE;
toku_txn_ignore_init(result); // 2954
@ -415,6 +421,12 @@ int toku_txn_commit_with_lsn(TOKUTXN txn, int nosync, YIELDF yield, void *yieldv
bool release_multi_operation_client_lock)
// Effect: Among other things: if release_multi_operation_client_lock is true, then unlock that lock (even if an error path is taken)
{
if (txn->state==TOKUTXN_PREPARING) {
txn->state=TOKUTXN_LIVE;
toku_free(txn->gid.gid);
txn->gid.gid=NULL;
toku_list_remove(&txn->prepared_txns_link);
}
txn->state = TOKUTXN_COMMITTING;
if (garbage_collection_debug) {
verify_snapshot_system(txn->logger);
@ -460,6 +472,12 @@ int toku_txn_abort_with_lsn(TOKUTXN txn, YIELDF yield, void *yieldv, LSN oplsn,
bool release_multi_operation_client_lock)
// Effect: Ammong other things, if release_multi_operation_client_lock is true, then unlock that lock (even if an error path is taken)
{
if (txn->state==TOKUTXN_PREPARING) {
txn->state=TOKUTXN_LIVE;
toku_free(txn->gid.gid);
txn->gid.gid=NULL;
toku_list_remove(&txn->prepared_txns_link);
}
txn->state = TOKUTXN_ABORTING;
if (garbage_collection_debug) {
verify_snapshot_system(txn->logger);
@ -483,6 +501,50 @@ int toku_txn_abort_with_lsn(TOKUTXN txn, YIELDF yield, void *yieldv, LSN oplsn,
return r;
}
int toku_txn_prepare_txn (TOKUTXN txn, GID gid) {
assert(txn->state==TOKUTXN_LIVE);
txn->state = TOKUTXN_PREPARING;
if (txn->parent) return 0; // nothing to do if there's a parent.
// Do we need to do an fsync?
txn->do_fsync = (txn->force_fsync_on_commit || txn->num_rollentries>0);
txn->gid.gid = toku_memdup(gid.gid, DB_GID_SIZE);
toku_list_push(&txn->logger->prepared_txns, &txn->prepared_txns_link);
return toku_log_xprepare(txn->logger, &txn->do_fsync_lsn, 0, txn->txnid64, gid);
}
void toku_txn_get_prepared_gid (TOKUTXN txn, GID *gidp) {
gidp->gid = toku_memdup(txn->gid.gid, DB_GID_SIZE);
}
int toku_logger_recover_txn (TOKULOGGER logger, DB_PREPLIST preplist[/*count*/], long count, /*out*/ long *retp, u_int32_t flags) {
if (flags==DB_FIRST) {
// Anything in the returned list goes back on the prepared list.
while (!toku_list_empty(&logger->prepared_and_returned_txns)) {
struct toku_list *h = toku_list_head(&logger->prepared_and_returned_txns);
toku_list_remove(h);
toku_list_push(&logger->prepared_txns, h);
}
} else if (flags!=DB_NEXT) {
return EINVAL;
}
long i;
for (i=0; i<count; i++) {
if (!toku_list_empty(&logger->prepared_txns)) {
struct toku_list *h = toku_list_head(&logger->prepared_txns);
toku_list_remove(h);
toku_list_push(&logger->prepared_and_returned_txns, h);
TOKUTXN txn = toku_list_struct(h, struct tokutxn, prepared_txns_link);
assert(txn->container_db_txn);
preplist[i].txn = txn->container_db_txn;
memcpy(preplist[i].gid, txn->gid.gid, DB_GID_SIZE);
} else {
break;
}
}
*retp = i;
return 0;
}
struct txn_fsync_log_info {
TOKULOGGER logger;
LSN do_fsync_lsn;
@ -525,6 +587,7 @@ void toku_txn_destroy_txn(TOKUTXN txn) {
if (txn->open_brts)
toku_omt_destroy(&txn->open_brts);
xids_destroy(&txn->xids);
if (txn->gid.gid) toku_free(txn->gid.gid);
toku_txn_ignore_free(txn); // 2954
toku_free(txn);

10
newbrt/txn.h

@ -19,6 +19,7 @@ int toku_txn_begin_txn (
);
DB_TXN * toku_txn_get_container_db_txn (TOKUTXN tokutxn);
void toku_txn_set_container_db_txn (TOKUTXN, DB_TXN*);
// toku_txn_begin_with_xid is called from recovery and has no containing DB_TXN
int toku_txn_begin_with_xid (
@ -53,6 +54,12 @@ int toku_txn_abort_with_lsn(TOKUTXN txn, YIELDF yield, void *yieldv, LSN oplsn,
TXN_PROGRESS_POLL_FUNCTION poll, void *poll_extra,
bool release_multi_operation_client_lock);
int toku_txn_prepare_txn (TOKUTXN txn, GID gid) __attribute__((warn_unused_result));
// Effect: Do the internal work of preparing a transaction (does not log the prepare record).
void toku_txn_get_prepared_gid (TOKUTXN, GID *);
// Effect: Return a pointer to the GID. The value is allocated, so you must free it.
int toku_txn_maybe_fsync_log(TOKULOGGER logger, LSN do_fsync_lsn, BOOL do_fsync, YIELDF yield, void *yieldv);
void toku_txn_get_fsync_info(TOKUTXN ttxn, BOOL* do_fsync, LSN* do_fsync_lsn);
@ -122,6 +129,7 @@ int toku_txn_ignore_contains(TOKUTXN txn, FILENUM filenum);
enum tokutxn_state {
TOKUTXN_LIVE, // initial txn state
TOKUTXN_PREPARING, // txn is preparing (or prepared)
TOKUTXN_COMMITTING, // txn in the process of committing
TOKUTXN_ABORTING, // txn in the process of aborting
TOKUTXN_RETIRED, // txn no longer exists
@ -130,6 +138,8 @@ typedef enum tokutxn_state TOKUTXN_STATE;
TOKUTXN_STATE toku_txn_get_state(TOKUTXN txn);
int toku_logger_recover_txn (TOKULOGGER logger, DB_PREPLIST preplist[/*count*/], long count, /*out*/ long *retp, u_int32_t flags);
#if defined(__cplusplus) || defined(__cilkplusplus)
}
#endif

6
newbrt/wbuf.h

@ -1,7 +1,7 @@
#ifndef WBUF_H
#define WBUF_H
#ident "$Id$"
#ident "Copyright (c) 2007-2010 Tokutek Inc. All rights reserved."
#ident "Copyright (c) 2007-2011 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
#include "x1764.h"
@ -190,6 +190,10 @@ static inline void wbuf_TXNID (struct wbuf *w, TXNID tid) {
wbuf_ulonglong(w, tid);
}
static inline void wbuf_nocrc_GID (struct wbuf *w, GID gid) {
wbuf_nocrc_literal_bytes(w, gid.gid, DB_GID_SIZE);
}
static inline void wbuf_nocrc_LSN (struct wbuf *w, LSN lsn) {
wbuf_nocrc_ulonglong(w, lsn.lsn);
}

2
release/examples/db-insert.c

@ -38,7 +38,7 @@ enum { DEFAULT_ITEMS_TO_INSERT_PER_ITERATION = 1<<20 };
enum { DEFAULT_ITEMS_PER_TRANSACTION = 1<<14 };
static void insert (long long v);
#define CKERR(r) if (r!=0) fprintf(stderr, "%s:%d error %d %s\n", __FILE__, __LINE__, r, db_strerror(r)); assert(r==0);
#define CKERR(r) ({ int __r = r; if (__r!=0) fprintf(stderr, "%s:%d error %d %s\n", __FILE__, __LINE__, __r, db_strerror(r)); assert(__r==0); })
#define CKERR2(r,rexpect) if (r!=rexpect) fprintf(stderr, "%s:%d error %d %s\n", __FILE__, __LINE__, r, db_strerror(r)); assert(r==rexpect);
/* default test parameters */

16
src/tests/Makefile

@ -909,6 +909,11 @@ test-recover1.tdbrun: VGRIND=
test-recover2.tdbrun: VGRIND=
test-recover3.tdbrun: VGRIND=
# test-prepare cannot handle valgrind since the environment doesn't close properly.
test-prepare.tdbrun: VGRIND=
test-prepare2.tdbrun: VGRIND=
test-prepare3.tdbrun: VGRIND=
# filesize is too slow with vgrind.
filesize.tdbrun: VGRIND=
@ -1092,3 +1097,14 @@ clean:
rm -f *.bdb *.tdb
rm -f *.fastlog
rm -rf rundir.* recover-test_stress*.dir
# Tests which fail whenever I muck with recovery or XA -Bradley
rtests: \
recover-upgrade-db-descriptor.abortrecover \
recovery_fileops_unit.tdbrun \
recover-tablelock.abortrecover \
hotindexer-insert-committed-optimized.tdbrun \
test-prepare.tdbrun \
test-prepare2.tdbrun \
test-prepare3.tdbrun \
#Blank

102
src/tests/test-prepare.c

@ -0,0 +1,102 @@
#include "test.h"
#include <sys/wait.h>
#define ENVDIR2 ENVDIR "2"
static void clean_env (const char *envdir) {
const int len = strlen(envdir)+100;
char cmd[len];
snprintf(cmd, len, "rm -rf %s", envdir);
system(cmd);
CKERR(toku_os_mkdir(envdir, S_IRWXU+S_IRWXG+S_IRWXO));
}
static void setup_env (DB_ENV **envp, const char *envdir) {
CHK(db_env_create(envp, 0));
(*envp)->set_errfile(*envp, stderr);
#ifdef TOKUDB
CHK((*envp)->set_redzone(*envp, 0));
#endif
CHK((*envp)->open(*envp, envdir, DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_MPOOL|DB_INIT_TXN|DB_CREATE|DB_PRIVATE|DB_RECOVER, S_IRWXU+S_IRWXG+S_IRWXO));
}
static void setup_env_and_prepare (DB_ENV **envp, const char *envdir, bool commit) {
DB *db;
DB_TXN *txn;
clean_env(envdir);
setup_env(envp, envdir);
CKERR(db_create(&db, *envp, 0));
CKERR(db->open(db, NULL, "foo.db", 0, DB_BTREE, DB_CREATE | DB_AUTO_COMMIT, S_IRWXU+S_IRWXG+S_IRWXO));
CKERR((*envp)->txn_begin(*envp, 0, &txn, 0));
DBT key={.size=4, .data="foo"};
CKERR(db->put(db, txn, &key, &key, 0));
CHK(db->close(db, 0));
u_int8_t gid[DB_GID_SIZE];
memset(gid, 0, DB_GID_SIZE);
gid[0]=42;
CKERR(txn->prepare(txn, gid));
if (commit)
CKERR(txn->commit(txn, 0));
}
static void test1 (void) {
pid_t pid;
bool do_fork = true;
if (!do_fork || 0==(pid=fork())) {
DB_ENV *env;
setup_env_and_prepare(&env, ENVDIR, false);
{
DB_PREPLIST l[1];
long count=-1;
CKERR(env->txn_recover(env, l, 1, &count, DB_FIRST));
printf("%s:%d count=%ld\n", __FILE__, __LINE__, count);
assert(count==1);
assert(l[0].gid[0]==42);
}
exit(0);
}
int status;
if (do_fork) {
pid_t pid2 = wait(&status);
assert(pid2==pid);
}
DB_ENV *env2;
setup_env_and_prepare(&env2, ENVDIR2, true);
// Now we can look at env2 in the debugger to see if we managed to make it the same
DB_ENV *env;
setup_env(&env, ENVDIR);
{
DB_PREPLIST l[1];
long count=-1;
int r = env->txn_recover(env, l, 1, &count, DB_FIRST);
printf("r=%d count=%ld\n", r, count);
assert(count==1);
assert(l[0].gid[0]==42);
for (int i=1; i<DB_GID_SIZE; i++) {
assert(l[0].gid[i]==0);
}
CHK(l->txn->commit(l->txn, 0));
}
CHK(env2->close(env2, 0));
CHK(env ->close(env, 0));
}
int test_main (int argc, char *const argv[]) {
default_parse_args(argc, argv);
// first test: open an environment, a db, a txn, and do a prepare. Then do txn_prepare (without even closing the environment).
test1();
// second test: poen environment, a db, a txn, prepare, close the environment. Then reopen and do txn_prepare.
// third test: make sure there is an fsync on txn_prepare, but not on the following commit.
// Then close the environment Find out what BDB does when ask for the txn prepares.
// Other tests: read prepared txns, 1 at a time. Then close it and read them again.
return 0;
}

123
src/tests/test-prepare2.c

@ -0,0 +1,123 @@
#include "test.h"
#include <sys/wait.h>
// Verify that if tokudb crashes during recovery, then the prepared transactions are still prepared.
static void clean_env (const char *envdir) {
const int len = strlen(envdir)+100;
char cmd[len];
snprintf(cmd, len, "rm -rf %s", envdir);
system(cmd);
CKERR(toku_os_mkdir(envdir, S_IRWXU+S_IRWXG+S_IRWXO));
}
static void setup_env (DB_ENV **envp, const char *envdir) {
CHK(db_env_create(envp, 0));
(*envp)->set_errfile(*envp, stderr);
#ifdef TOKUDB
CHK((*envp)->set_redzone(*envp, 0));
#endif
CHK((*envp)->open(*envp, envdir, DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_MPOOL|DB_INIT_TXN|DB_CREATE|DB_PRIVATE|DB_RECOVER, S_IRWXU+S_IRWXG+S_IRWXO));
}
static void setup_env_and_prepare (DB_ENV **envp, const char *envdir, bool commit) {
DB *db;
DB_TXN *txn;
clean_env(envdir);
setup_env(envp, envdir);
CKERR(db_create(&db, *envp, 0));
CKERR(db->open(db, NULL, "foo.db", 0, DB_BTREE, DB_CREATE | DB_AUTO_COMMIT, S_IRWXU+S_IRWXG+S_IRWXO));
CKERR((*envp)->txn_begin(*envp, 0, &txn, 0));
DBT key={.size=4, .data="foo"};
CKERR(db->put(db, txn, &key, &key, 0));
CHK(db->close(db, 0));
u_int8_t gid[DB_GID_SIZE];
memset(gid, 0, DB_GID_SIZE);
gid[0]=42;
CKERR(txn->prepare(txn, gid));
if (commit)
CKERR(txn->commit(txn, 0));
}
static void test (void) {
pid_t pid;
if (0==(pid=fork())) {
DB_ENV *env;
setup_env_and_prepare(&env, ENVDIR, false);
{
DB_PREPLIST l[1];
long count=-1;
CKERR(env->txn_recover(env, l, 1, &count, DB_FIRST));
printf("%s:%d count=%ld\n", __FILE__, __LINE__, count);
assert(count==1);
assert(l[0].gid[0]==42);
}
exit(0);
}
{
int status;
pid_t pid2 = wait(&status);
assert(pid2==pid);
assert(WIFEXITED(status) && WEXITSTATUS(status)==0);
}
// Now run recovery and crash on purpose.
if (0==(pid=fork())) {
DB_ENV *env;
setup_env(&env, ENVDIR);
// make sure there is 1 prepared txn.
{
DB_PREPLIST l[1];
long count=-1;
int r = env->txn_recover(env, l, 1, &count, DB_FIRST);
printf("r=%d count=%ld\n", r, count);
assert(count==1);
assert(l[0].gid[0]==42);
for (int i=1; i<DB_GID_SIZE; i++) {
assert(l[0].gid[i]==0);
}
}
exit(0);
}
{
int status;
pid_t pid2 = wait(&status);
assert(pid2==pid);
assert(WIFEXITED(status) && WEXITSTATUS(status)==0);
}
// Now see if recovery works the second time.
DB_ENV *env;
setup_env(&env, ENVDIR);
{
DB_PREPLIST l[1];
long count=-1;
int r = env->txn_recover(env, l, 1, &count, DB_FIRST);
printf("r=%d count=%ld\n", r, count);
assert(count==1);
assert(l[0].gid[0]==42);
for (int i=1; i<DB_GID_SIZE; i++) {
assert(l[0].gid[i]==0);
}
CHK(l->txn->commit(l->txn, 0));
}
CHK(env ->close(env, 0));
}
int test_main (int argc, char *const argv[]) {
default_parse_args(argc, argv);
// first test: open an environment, a db, a txn, and do a prepare. Then do txn_prepare (without even closing the environment).
test();
// second test: poen environment, a db, a txn, prepare, close the environment. Then reopen and do txn_prepare.
// third test: make sure there is an fsync on txn_prepare, but not on the following commit.
// Then close the environment Find out what BDB does when ask for the txn prepares.
// Other tests: read prepared txns, 1 at a time. Then close it and read them again.
return 0;
}

298
src/tests/test-prepare3.c

@ -0,0 +1,298 @@
#include "test.h"
#include <sys/wait.h>
// Verify that if we prepare a transaction, then commit a bunch more transactions so that the logs may have been rotated, then the transaction can commit or abort properly on recovery.
static void clean_env (const char *envdir) {
const int len = strlen(envdir)+100;
char cmd[len];
snprintf(cmd, len, "rm -rf %s", envdir);
system(cmd);
CKERR(toku_os_mkdir(envdir, S_IRWXU+S_IRWXG+S_IRWXO));
}
static void setup_env (DB_ENV **envp, const char *envdir) {
CHK(db_env_create(envp, 0));
(*envp)->set_errfile(*envp, stderr);
#ifdef TOKUDB
CHK((*envp)->set_redzone(*envp, 0));
#endif
CHK((*envp)->open(*envp, envdir, DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_MPOOL|DB_INIT_TXN|DB_CREATE|DB_PRIVATE|DB_RECOVER, S_IRWXU+S_IRWXG+S_IRWXO));
}
#define NTXNS 6
static void setup_env_and_prepare (DB_ENV **envp, const char *envdir) {
DB *db;
clean_env(envdir);
setup_env(envp, envdir);
CKERR(db_create(&db, *envp, 0));
CKERR(db->open(db, NULL, "foo.db", 0, DB_BTREE, DB_CREATE | DB_AUTO_COMMIT, S_IRWXU+S_IRWXG+S_IRWXO));
{
DB_TXN *txn;
CKERR((*envp)->txn_begin(*envp, 0, &txn, 0));
for (int tnum=0; tnum<NTXNS; tnum++) {
for (int k=0; k<26; k++) {
#define DSIZE 200
char data[DSIZE];
memset(data, ' ', DSIZE);
data[0]='a'+tnum;
data[1]='a'+k;
data[DSIZE-1]=0;
DBT key={.size=DSIZE, .data=data};
CKERR(db->put(db, txn, &key, &key, 0));
}
}
CKERR(txn->commit(txn, 0));
}
for (int tnum=0; tnum<NTXNS; tnum++) {
DB_TXN *txn;
CKERR((*envp)->txn_begin(*envp, 0, &txn, 0));
char data[3]={'a'+tnum,'_',0};
DBT key={.size=3, .data=data};
CKERR(db->put(db, txn, &key, &key, 0));
u_int8_t gid[DB_GID_SIZE];
memset(gid, 0, DB_GID_SIZE);
gid[0]='a'+tnum;
CKERR(txn->prepare(txn, gid));
// Drop txn on the ground, since we will commit or abort it after recovery
if (tnum==0) {
//printf("commit %d\n", tnum);
CKERR(txn->commit(txn, 0));
} else if (tnum==1) {
//printf("abort %d\n", tnum);
CKERR(txn->abort(txn));
} else {
//printf("prepare %d\n", tnum);
}
}
CHK(db->close(db, 0));
}
enum prepared_state {
COMMITTED,
ABORTED,
MAYBE_COMMITTED,
MAYBE_ABORTED,
PREPARED};
static void check_prepared_list (enum prepared_state ps[NTXNS], long count, DB_PREPLIST *l) {
int count_prepared=0;
int count_maybe_prepared=0;
for (int j=0; j<NTXNS; j++) {
switch (ps[j]) {
case COMMITTED:
case ABORTED:
goto next;
case PREPARED:
count_prepared++;
case MAYBE_COMMITTED:
case MAYBE_ABORTED:
count_maybe_prepared++;
goto next;
}
assert(0);
next:;
}
assert(count>=count_prepared && count<=count_maybe_prepared);
bool found[NTXNS];
for (int j=0; j<NTXNS; j++) {
found[j] = (ps[j]!=PREPARED);
}
// now found[j] is false on those transactions that I hope to find in the prepared list.
for (int j=0; j<count; j++) {
int num = l[j].gid[0]-'a';
assert(num>=0 && num<NTXNS);
switch (ps[num]) {
case PREPARED:
assert(!found[num]);
found[num]=true;
break;
default:;
}
for (int i=1; i<DB_GID_SIZE; i++) {
assert(l[j].gid[i]==0);
}
}
}
static void get_prepared (DB_ENV *env, long *count, DB_PREPLIST *l) {
CKERR(env->txn_recover(env, l, NTXNS, count, DB_FIRST));
//printf("%s:%d count=%ld\n", __FILE__, __LINE__, *count);
assert(*count>=0);
}
static void check_prepared_txns (DB_ENV *env, enum prepared_state ps[NTXNS]) {
DB_PREPLIST l[NTXNS];
long count=-1;
get_prepared(env, &count, l);
check_prepared_list(ps, count, l);
}
static void check_state_after_full_recovery (DB_ENV *env) {
DB *db;
CKERR(db_create(&db, env, 0));
CKERR(db->open(db, NULL, "foo.db", 0, DB_BTREE, DB_CREATE | DB_AUTO_COMMIT, S_IRWXU+S_IRWXG+S_IRWXO));
for (int tnum=0; tnum<NTXNS; tnum++) {
DB_TXN *txn;
CKERR(env->txn_begin(env, 0, &txn, 0));
char data[3]={'a'+tnum,'_',0};
DBT key = {.size=3, .data=data};
DBT dbt_data = {.size=0, .data=0};
int r = db->get(db, txn, &key, &dbt_data, 0);
if (tnum%2==0) {
assert(r==0);
assert(dbt_data.size==3 && memcmp(dbt_data.data, data, 3)==0);
} else {
assert(r==DB_NOTFOUND);
}
CKERR(txn->commit(txn, 0));
}
CKERR(db->close(db, 0));
}
static void waitfor (pid_t pid) {
int status;
pid_t pid2 = wait(&status);
assert(pid2==pid);
assert(WIFEXITED(status) && WEXITSTATUS(status)==0);
}
static void abort_number(int num, int count, DB_PREPLIST *l) {
for (int j=0; j<count; j++) {
if (l[j].gid[0]=='a'+num) {
CKERR(l[j].txn->abort(l[j].txn));
return;
}
}
assert(0);
}
static void commit_number(int num, int count, DB_PREPLIST *l) {
for (int j=0; j<count; j++) {
if (l[j].gid[0]=='a'+num) {
CKERR(l[j].txn->commit(l[j].txn, 0));
return;
}
}
assert(0);
}
static void test (void) {
pid_t pid;
if (0==(pid=fork())) {
DB_ENV *env;
setup_env_and_prepare(&env, ENVDIR);
enum prepared_state prepared[NTXNS]={COMMITTED,ABORTED,PREPARED,PREPARED,PREPARED,PREPARED};
check_prepared_txns(env, prepared);
exit(0);
}
waitfor(pid);
// Now run recovery and crash on purpose.
if (0==(pid=fork())) {
DB_ENV *env;
setup_env(&env, ENVDIR);
enum prepared_state prepared[NTXNS]={COMMITTED,ABORTED,PREPARED,PREPARED,PREPARED,PREPARED};
check_prepared_txns(env, prepared);
exit(0);
}
waitfor(pid);
// Now see if recovery works the second time.
if (0==(pid=fork())) {
DB_ENV *env;
setup_env(&env, ENVDIR);
enum prepared_state prepared[NTXNS]={COMMITTED,ABORTED,PREPARED,PREPARED,PREPARED,PREPARED};
check_prepared_txns(env, prepared);
exit(0);
}
waitfor(pid);
// Now see if recovery works the third time.
if (0==(pid=fork())) {
DB_ENV *env;
setup_env(&env, ENVDIR);
enum prepared_state prepared[NTXNS]={COMMITTED,ABORTED,PREPARED,PREPARED,PREPARED,PREPARED};
DB_PREPLIST l[NTXNS];
long count=-1;
get_prepared(env, &count, l);
check_prepared_list(prepared, count, l);
abort_number(3, count, l);
commit_number(2, count, l); // do the commit second so it will make it to disk.
exit(0);
}
waitfor(pid);
// Now see if recovery works a third time, with number 2 and 3 no longer in the prepared state.
if (0==(pid=fork())) {
DB_ENV *env;
setup_env(&env, ENVDIR);
enum prepared_state prepared[NTXNS]={COMMITTED,ABORTED,MAYBE_COMMITTED,MAYBE_ABORTED,PREPARED,PREPARED};
DB_PREPLIST l[NTXNS];
long count=-1;
//printf("%s:%d count=%ld\n", __FILE__, __LINE__, count); // it's a little bit funky that the committed transactions in BDB (from commit_number(2,...) above) don't stay committed. But whatever...
get_prepared(env, &count, l);
check_prepared_list(prepared, count, l);
exit(0);
}
waitfor(pid);
// Now see if recovery works a fourth time, with number 2 and 3 no longer in the prepared state.
// This time we'll do get_prepared with a short count.
if (0==(pid=fork())) {
DB_ENV *env;
setup_env(&env, ENVDIR);
//printf("%s:%d count=%ld\n", __FILE__, __LINE__, count); // it's a little bit funky that the committed transactions in BDB (from commit_number(2,...) above) don't stay committed. But whatever...
long actual_count=0;
for (int recover_num=0; 1; recover_num++) {
long count=-1;
DB_PREPLIST *MALLOC_N(1, l); // use malloc so that valgrind might notice a problem
CKERR(env->txn_recover(env, l, 1, &count, recover_num==0 ? DB_FIRST : DB_NEXT));
//printf("recover_num %d count=%ld\n", recover_num,count);
if (count==0) break;
actual_count++;
if ((l[0].gid[0]-'a')%2==0) {
CKERR(l[0].txn->commit(l[0].txn, 0));
} else {
CKERR(l[0].txn->abort(l[0].txn));
}
toku_free(l);
}
//printf("actual_count=%ld\n", actual_count);
// Now let's see what the state is.
check_state_after_full_recovery(env);
CKERR(env->close(env, 0));
exit(0);
}
waitfor(pid);
// Now we should end up with nothing in the recovery list.
{
DB_ENV *env;
setup_env(&env, ENVDIR);
long count=-1;
DB_PREPLIST l[1];
CKERR(env->txn_recover(env, l, 1, &count, DB_FIRST));
assert(count==0);
check_state_after_full_recovery(env);
CKERR(env->close(env, 0));
}
}
int test_main (int argc, char *const argv[]) {
default_parse_args(argc, argv);
// first test: open an environment, a db, a txn, and do a prepare. Then do txn_prepare (without even closing the environment).
test();
return 0;
}

7
src/tests/test.h

@ -42,7 +42,7 @@ int verbose=0;
* do CKERR(function_call(args)). I've added CHK macros below that are
* safer and allow this usage.
*/
#define CKERR(r) do { if (r!=0) fprintf(stderr, "%s:%d error %d %s\n", __FILE__, __LINE__, r, db_strerror(r)); assert(r==0); } while (0)
#define CKERR(r) ({ int __r = r; if (__r!=0) fprintf(stderr, "%s:%d error %d %s\n", __FILE__, __LINE__, __r, db_strerror(r)); assert(__r==0); })
#define CKERR2(r,r2) do { if (r!=r2) fprintf(stderr, "%s:%d error %d %s, expected %d\n", __FILE__, __LINE__, r, db_strerror(r), r2); assert(r==r2); } while (0)
#define CKERR2s(r,r2,r3) do { if (r!=r2 && r!=r3) fprintf(stderr, "%s:%d error %d %s, expected %d or %d\n", __FILE__, __LINE__, r, db_strerror(r), r2,r3); assert(r==r2||r==r3); } while (0)
@ -128,7 +128,6 @@ print_engine_status(DB_ENV * UU(env)) {
}
#endif
#ifdef USE_TDB
static __attribute__((__unused__)) uint64_t
get_engine_status_val(DB_ENV * UU(env), char * keyname) {
@ -432,4 +431,8 @@ main(int argc, char * const argv[])
return r;
}
#ifndef DB_GID_SIZE
#define DB_GID_SIZE DB_XIDDATASIZE
#endif
#endif // __TEST_H

54
src/tests/test_txn_close_before_commit.c

@ -0,0 +1,54 @@
/* -*- mode: C; c-basic-offset: 4 -*- */
#ident "$Id$"
#ident "Copyright (c) 2007-2012 Tokutek Inc. All rights reserved."
#include "test.h"
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <memory.h>
#include <sys/stat.h>
#include <db.h>
// Recreate a mysqld crash by closing and opening a db within a transaction.
// The crash occurs when writing a dirty cachetable pair, so we insert one
// row.
static void
test_txn_close_before_commit (void) {
CHK(system("rm -rf " ENVDIR));
toku_os_mkdir(ENVDIR, S_IRWXU+S_IRWXG+S_IRWXO);
int r;
DB_ENV *env;
r = db_env_create(&env, 0); assert(r == 0);
env->set_errfile(env, stdout);
r = env->open(env, ENVDIR, DB_INIT_MPOOL + DB_INIT_LOG + DB_INIT_LOCK + DB_INIT_TXN + DB_PRIVATE + DB_CREATE, S_IRWXU+S_IRWXG+S_IRWXO);
if (r != 0) printf("%s:%d:%d:%s\n", __FILE__, __LINE__, r, db_strerror(r));
assert(r == 0);
DB *db;
r = db_create(&db, env, 0); assert(r == 0);
r = db->open(db, NULL, "test.db", 0, DB_BTREE, DB_CREATE|DB_AUTO_COMMIT, S_IRWXU+S_IRWXG+S_IRWXO); assert(r == 0);
DB_TXN *txn = 0;
r = env->txn_begin(env, 0, &txn, 0); assert(r == 0);
DBT key, val;
int k = 1, v = 1;
r = db->put(db, txn, dbt_init(&key, &k, sizeof k), dbt_init(&val, &v, sizeof v), 0);
assert(r == 0);
// Close before commit
r = db->close(db, 0); assert(r == 0);
r = txn->commit(txn, 0); assert(r == 0);
r = env->close(env, 0); assert(r == 0);
}
int
test_main(int UU(argc), char UU(*const argv[])) {
test_txn_close_before_commit();
return 0;
}

57
src/tests/test_txn_close_before_prepare_commit.c

@ -0,0 +1,57 @@
/* -*- mode: C; c-basic-offset: 4 -*- */
#ident "$Id$"
#ident "Copyright (c) 2007-2012 Tokutek Inc. All rights reserved."
#include "test.h"
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <memory.h>
#include <sys/stat.h>
#include <db.h>
// Recreate a mysqld crash by closing and opening a db within a transaction.
// The crash occurs when writing a dirty cachetable pair, so we insert one
// row.
static void
test_txn_close_before_prepare_commit (void) {
CHK(system("rm -rf " ENVDIR));
toku_os_mkdir(ENVDIR, S_IRWXU+S_IRWXG+S_IRWXO);
int r;
DB_ENV *env;
r = db_env_create(&env, 0); assert(r == 0);
env->set_errfile(env, stdout);
r = env->open(env, ENVDIR, DB_INIT_MPOOL + DB_INIT_LOG + DB_INIT_LOCK + DB_INIT_TXN + DB_PRIVATE + DB_CREATE, S_IRWXU+S_IRWXG+S_IRWXO);
if (r != 0) printf("%s:%d:%d:%s\n", __FILE__, __LINE__, r, db_strerror(r));
assert(r == 0);
DB *db;
r = db_create(&db, env, 0); assert(r == 0);
r = db->open(db, NULL, "test.db", 0, DB_BTREE, DB_CREATE|DB_AUTO_COMMIT, S_IRWXU+S_IRWXG+S_IRWXO); assert(r == 0);
DB_TXN *txn = 0;
r = env->txn_begin(env, 0, &txn, 0); assert(r == 0);
DBT key, val;
int k = 1, v = 1;
r = db->put(db, txn, dbt_init(&key, &k, sizeof k), dbt_init(&val, &v, sizeof v), 0);
assert(r == 0);
// Close before commit
r = db->close(db, 0); assert(r == 0);
u_int8_t gid[DB_GID_SIZE];
memset(gid, 1, DB_GID_SIZE);
r = txn->prepare(txn, gid); assert(r == 0);
r = txn->commit(txn, 0); assert(r == 0);
r = env->close(env, 0); assert(r == 0);
}
int
test_main(int UU(argc), char UU(*const argv[])) {
test_txn_close_before_prepare_commit();
return 0;
}

81
src/ydb.c

@ -395,11 +395,37 @@ env_setup_real_tmp_dir(DB_ENV *env) {
env_setup_real_dir(env, &env->i->real_tmp_dir, env->i->tmp_dir);
}
static void keep_zombie_callback (DB_ENV *env, BRT brt, char *dname, bool oplsn_valid, LSN oplsn) {
if (brt->db->i==NULL) {
XMALLOC(brt->db->i);
}
brt->db->i->dname = dname;
toku_list_init(&brt->db->i->dbs_that_must_close_before_abort);
env_note_db_opened(env, brt->db);
env_note_db_closed(env, brt->db);
brt->db->i->is_zombie = TRUE;
env_note_zombie_db(env, brt->db);
int r = toku_brt_db_delay_closed(brt, brt->db, db_close_before_brt, 0, oplsn_valid, oplsn);
assert(r==0);
}
static void keep_cachetable_callback (DB_ENV *env, CACHETABLE cachetable)
{
env->i->cachetable = cachetable;
}
static int
ydb_do_recovery (DB_ENV *env) {
assert(env->i->real_log_dir);
toku_ydb_unlock();
int r = tokudb_recover(env->i->dir, env->i->real_log_dir, env->i->bt_compare,
int r = tokudb_recover(env,
keep_zombie_callback,
toku_keep_prepared_txn_callback,
keep_cachetable_callback,
toku_setup_db_internal,
toku_close_db_internal,
env->i->logger,
env->i->dir, env->i->real_log_dir, env->i->bt_compare,
env->i->update_function,
env->i->generate_row_for_put, env->i->generate_row_for_del,
env->i->cachetable_size);
@ -885,12 +911,14 @@ toku_env_open(DB_ENV * env, const char *home, u_int32_t flags, int mode) {
if (flags & (DB_INIT_TXN | DB_INIT_LOG)) {
assert(env->i->logger);
toku_logger_write_log_files(env->i->logger, (BOOL)((flags & DB_INIT_LOG) != 0));
r = toku_logger_open(env->i->real_log_dir, env->i->logger);
if (r!=0) {
toku_ydb_do_error(env, r, "Could not open logger\n");
died2:
toku_logger_close(&env->i->logger);
goto died1;
if (!toku_logger_is_open(env->i->logger)) {
r = toku_logger_open(env->i->real_log_dir, env->i->logger);
if (r!=0) {
toku_ydb_do_error(env, r, "Could not open logger\n");
died2:
toku_logger_close(&env->i->logger);
goto died1;
}
}
} else {
r = toku_logger_close(&env->i->logger); // if no logging system, then kill the logger
@ -917,8 +945,11 @@ toku_env_open(DB_ENV * env, const char *home, u_int32_t flags, int mode) {
goto cleanup;
}
r = toku_brt_create_cachetable(&env->i->cachetable, env->i->cachetable_size, ZERO_LSN, env->i->logger);
if (r!=0) goto died2;
if (env->i->cachetable==NULL) {
// If we ran recovery then the cachetable should be set here.
r = toku_brt_create_cachetable(&env->i->cachetable, env->i->cachetable_size, ZERO_LSN, env->i->logger);
if (r!=0) goto died2;
}
toku_cachetable_set_lock_unlock_for_io(env->i->cachetable, toku_ydb_lock, toku_ydb_unlock);
toku_cachetable_set_env_dir(env->i->cachetable, env->i->dir);
@ -926,12 +957,14 @@ toku_env_open(DB_ENV * env, const char *home, u_int32_t flags, int mode) {
int using_txns = env->i->open_flags & DB_INIT_TXN;
if (env->i->logger) {
// if this is a newborn env or if this is an upgrade, then create a brand new rollback file
BOOL create_new_rollback_file = newenv | upgrade_in_progress;
assert (using_txns);
toku_logger_set_cachetable(env->i->logger, env->i->cachetable);
toku_logger_set_remove_finalize_callback(env->i->logger, finalize_file_removal, env->i->ltm);
r = toku_logger_open_rollback(env->i->logger, env->i->cachetable, create_new_rollback_file);
assert_zero(r);
if (!toku_logger_rollback_is_open(env->i->logger)) {
BOOL create_new_rollback_file = newenv | upgrade_in_progress;
r = toku_logger_open_rollback(env->i->logger, env->i->cachetable, create_new_rollback_file);
assert(r==0);
}
}
DB_TXN *txn=NULL;
@ -1036,7 +1069,7 @@ toku_env_close(DB_ENV * env, u_int32_t flags) {
}
{
if (env->i->persistent_environment) {
r = toku_db_close(env->i->persistent_environment, 0);
r = toku_db_close(env->i->persistent_environment, 0, false, ZERO_LSN);
if (r) {
err_msg = "Cannot close persistent environment dictionary (DB->close error)\n";
toku_ydb_do_error(env, r, "%s", err_msg);
@ -1044,7 +1077,7 @@ toku_env_close(DB_ENV * env, u_int32_t flags) {
}
}
if (env->i->directory) {
r = toku_db_close(env->i->directory, 0);
r = toku_db_close(env->i->directory, 0, false, ZERO_LSN);
if (r) {
err_msg = "Cannot close Directory dictionary (DB->close error)\n";
toku_ydb_do_error(env, r, "%s", err_msg);
@ -1465,6 +1498,16 @@ locked_env_close(DB_ENV * env, u_int32_t flags) {
toku_ydb_lock(); int r = toku_env_close(env, flags); toku_ydb_unlock(); return r;
}
static int
toku_env_recover_txn (DB_ENV *env, DB_PREPLIST preplist[/*count*/], long count, /*out*/ long *retp, u_int32_t flags) {
return toku_logger_recover_txn(env->i->logger, preplist, count, retp, flags);
}
static int
locked_env_txn_recover (DB_ENV *env, DB_PREPLIST preplist[/*count*/], long count, /*out*/ long *retp, u_int32_t flags) {
toku_ydb_lock(); int r = toku_env_recover_txn(env, preplist, count, retp, flags); toku_ydb_unlock(); return r;
}
static int
locked_env_log_archive(DB_ENV * env, char **list[], u_int32_t flags) {
toku_ydb_lock(); int r = toku_env_log_archive(env, list, flags); toku_ydb_unlock(); return r;
@ -2328,6 +2371,7 @@ toku_env_create(DB_ENV ** envp, u_int32_t flags) {
SENV(cleaner_get_iterations);
SENV(open);
SENV(close);
SENV(txn_recover);
SENV(log_flush);
//SENV(set_noticecall);
SENV(set_flags);
@ -2476,7 +2520,9 @@ env_note_db_opened(DB_ENV *env, DB *db) {
}
void
env_note_db_closed(DB_ENV *env, DB *db) {
env_note_db_closed(DB_ENV *env, DB *db)
// Effect: Tell the DB_ENV that the DB is no longer in use by the user of the API. The DB may still be in use by the fractal tree internals.
{
assert(db->i->dname);
assert(!db->i->is_zombie);
assert(env->i->num_open_dbs);
@ -2493,9 +2539,10 @@ env_note_db_closed(DB_ENV *env, DB *db) {
assert_zero(r);
}
// Tell env that there is a new db handle (with non-unique dname in db->i-dname)
void
env_note_zombie_db(DB_ENV *env, DB *db) {
env_note_zombie_db(DB_ENV *env, DB *db)
// Effect: Tell the DB_ENV that the the DB is a zombie. That is, the DB is closed, but there's a unresolved transaction that refers to it (or may still be in use by the fractal tree internals).
{
assert(db->i->dname); // internal (non-user) dictionary has no dname
assert(db->i->is_zombie);
int r;

74
src/ydb_db.c

@ -115,8 +115,11 @@ create_iname(DB_ENV *env, u_int64_t id, char *hint, char *mark, int n) {
static int toku_db_open(DB * db, DB_TXN * txn, const char *fname, const char *dbname, DBTYPE dbtype, u_int32_t flags, int mode);
static int
db_close_before_brt(DB *db, u_int32_t UU(flags)) {
int
db_close_before_brt(DB *db, u_int32_t UU(flags), bool oplsn_valid, LSN oplsn)
// Effect: When the BRT closes a zombie DB, this function is called to inform the YDB layer that the brt is closed (before actually closing the BRT).
// This function will actually close the brt.
{
int r;
char *error_string = NULL;
@ -124,7 +127,7 @@ db_close_before_brt(DB *db, u_int32_t UU(flags)) {
// internal (non-user) dictionary has no dname
env_note_zombie_db_closed(db->dbenv, db); // tell env that this db is no longer a zombie (it is completely closed)
}
r = toku_close_brt(db->i->brt, &error_string);
r = toku_close_brt_lsn(db->i->brt, &error_string, oplsn_valid, oplsn);
if (r) {
if (!error_string)
error_string = "Closing file\n";
@ -158,7 +161,7 @@ toku_db_release_ref(DB *db){
//DB->close()
int
toku_db_close(DB * db, u_int32_t flags) {
toku_db_close(DB * db, u_int32_t flags, bool oplsn_valid, LSN oplsn) {
int r = 0;
if (db->i->refs != 1) {
r = EBUSY;
@ -174,7 +177,7 @@ toku_db_close(DB * db, u_int32_t flags) {
if (!toku_list_empty(&db->i->dbs_that_must_close_before_abort))
toku_list_remove(&db->i->dbs_that_must_close_before_abort);
r = toku_brt_db_delay_closed(db->i->brt, db, db_close_before_brt, flags);
r = toku_brt_db_delay_closed(db->i->brt, db, db_close_before_brt, flags, oplsn_valid, oplsn);
}
return r;
}
@ -481,7 +484,7 @@ toku_db_remove(DB * db, const char *fname, const char *dbname, u_int32_t flags)
HANDLE_PANICKED_DB(db);
DB_TXN *null_txn = NULL;
int r = toku_env_dbremove(db->dbenv, null_txn, fname, dbname, flags);
int r2 = toku_db_close(db, 0);
int r2 = toku_db_close(db, 0, false, ZERO_LSN);
if (r==0) r = r2;
return r;
}
@ -491,7 +494,7 @@ toku_db_rename(DB * db, const char *fname, const char *dbname, const char *newna
HANDLE_PANICKED_DB(db);
DB_TXN *null_txn = NULL;
int r = toku_env_dbrename(db->dbenv, null_txn, fname, dbname, newname, flags);
int r2 = toku_db_close(db, 0);
int r2 = toku_db_close(db, 0, false, ZERO_LSN);
if (r==0) r = r2;
return r;
}
@ -684,7 +687,7 @@ toku_db_pre_acquire_table_lock(DB *db, DB_TXN *txn, BOOL UU(just_lock)) {
static int
locked_db_close(DB * db, u_int32_t flags) {
toku_ydb_lock();
int r = toku_db_close(db, flags);
int r = toku_db_close(db, flags, false, ZERO_LSN);
toku_ydb_unlock();
return r;
}
@ -967,10 +970,11 @@ db_pre_acquire_table_lock(DB *db, DB_TXN *txn) {
return toku_db_pre_acquire_table_lock(db, txn, TRUE);
}
int
toku_db_create(DB ** db, DB_ENV * env, u_int32_t flags) {
int r;
int toku_close_db_internal (DB * db, bool oplsn_valid, LSN oplsn) {
return toku_db_close(db, 0, oplsn_valid, oplsn);
}
int toku_setup_db_internal (DB **dbp, DB_ENV *env, u_int32_t flags, BRT brt, bool is_open) {
if (flags || env == NULL)
return EINVAL;
@ -983,6 +987,39 @@ toku_db_create(DB ** db, DB_ENV * env, u_int32_t flags) {
}
memset(result, 0, sizeof *result);
result->dbenv = env;
MALLOC(result->i);
if (result->i == 0) {
toku_free(result);
return ENOMEM;
}
memset(result->i, 0, sizeof *result->i);
toku_list_init(&result->i->dbs_that_must_close_before_abort);
result->i->brt = brt;
result->i->refs = 1;
result->i->opened = is_open;
*dbp = result;
return 0;
}
int
toku_db_create(DB ** db, DB_ENV * env, u_int32_t flags) {
if (flags || env == NULL)
return EINVAL;
if (!env_opened(env))
return EINVAL;
BRT brt;
int r;
r = toku_brt_create(&brt);
if (r!=0) return r;
r = toku_setup_db_internal(db, env, flags, brt, false);
if (r != 0) return r;
DB *result=*db;
// methods that grab the ydb lock
#define SDB(name) result->name = locked_db_ ## name
SDB(close);
@ -1026,26 +1063,11 @@ toku_db_create(DB ** db, DB_ENV * env, u_int32_t flags) {
result->dbt_pos_infty = toku_db_dbt_pos_infty;
result->dbt_neg_infty = toku_db_dbt_neg_infty;
MALLOC(result->i);
if (result->i == 0) {
toku_free(result);
return ENOMEM;
}
memset(result->i, 0, sizeof *result->i);
result->i->dict_id = DICTIONARY_ID_NONE;
result->i->opened = 0;
result->i->open_flags = 0;
result->i->open_mode = 0;
result->i->brt = 0;
result->i->indexer = NULL;
result->i->refs = 1;
toku_list_init(&result->i->dbs_that_must_close_before_abort);
r = toku_brt_create(&result->i->brt);
if (r != 0) {
toku_free(result->i);
toku_free(result);
return r;
}
*db = result;
return 0;
}

5
src/ydb_db.h

@ -40,7 +40,10 @@ int db_open_iname(DB * db, DB_TXN * txn, const char *iname, u_int32_t flags, int
int toku_db_pre_acquire_table_lock(DB *db, DB_TXN *txn, BOOL just_lock);
int toku_db_get (DB * db, DB_TXN * txn, DBT * key, DBT * data, u_int32_t flags);
int toku_db_create(DB ** db, DB_ENV * env, u_int32_t flags);
int toku_db_close(DB * db, u_int32_t flags);
int toku_db_close(DB * db, u_int32_t flags, bool oplsn_valid, LSN oplsn);
int db_close_before_brt(DB *db, u_int32_t UU(flags), bool oplsn_valid, LSN oplsn);
int toku_close_db_internal (DB * db, bool oplsn_valid, LSN oplsn);
int toku_setup_db_internal (DB **dbp, DB_ENV *env, u_int32_t flags, BRT brt, bool is_open);
int db_getf_set(DB *db, DB_TXN *txn, u_int32_t flags, DBT *key, YDB_CALLBACK_FUNCTION f, void *extra);
int autotxn_db_get(DB* db, DB_TXN* txn, DBT* key, DBT* data, u_int32_t flags);

79
src/ydb_txn.c

@ -49,6 +49,7 @@ ydb_yield (voidfp f, void *fv, void *UU(v)) {
static void
toku_txn_destroy(DB_TXN *txn) {
(void) __sync_fetch_and_sub(&txn->mgrp->i->open_txns, 1);
assert(txn->mgrp->i->open_txns>=0);
toku_txn_destroy_txn(db_txn_struct_i(txn)->tokutxn);
#if !TOKUDB_NATIVE_H
toku_free(db_txn_struct_i(txn));
@ -214,6 +215,33 @@ toku_txn_abort_only(DB_TXN * txn,
return r;
}
static int
toku_txn_prepare (DB_TXN *txn, u_int8_t gid[DB_GID_SIZE]) {
if (!txn) return EINVAL;
if (txn->parent) return EINVAL;
HANDLE_PANICKED_ENV(txn->mgrp);
//Recursively commit any children.
if (db_txn_struct_i(txn)->child) {
//commit of child sets the child pointer to NULL
int r_child = toku_txn_commit(db_txn_struct_i(txn)->child, 0, NULL, NULL, false);
if (r_child !=0 && !toku_env_is_panicked(txn->mgrp)) {
env_panic(txn->mgrp, r_child, "Recursive child commit failed during parent commit.\n");
}
//In a panicked env, the child may not be removed from the list.
HANDLE_PANICKED_ENV(txn->mgrp);
}
assert(!db_txn_struct_i(txn)->child);
TOKUTXN ttxn = db_txn_struct_i(txn)->tokutxn;
GID gids = {gid};
int r = toku_txn_prepare_txn(ttxn, gids);
TOKULOGGER logger = txn->mgrp->i->logger;
LSN do_fsync_lsn;
bool do_fsync;
toku_txn_get_fsync_info(ttxn, &do_fsync, &do_fsync_lsn);
toku_txn_maybe_fsync_log(logger, do_fsync_lsn, do_fsync, ydb_yield, NULL);
return r;
}
int
toku_txn_abort(DB_TXN * txn,
TXN_PROGRESS_POLL_FUNCTION poll, void* poll_extra,
@ -240,15 +268,15 @@ locked_txn_id(DB_TXN *txn) {
}
static int
toku_txn_stat (DB_TXN *txn, struct txn_stat **txn_stat) {
toku_txn_txn_stat (DB_TXN *txn, struct txn_stat **txn_stat) {
XMALLOC(*txn_stat);
return toku_logger_txn_rollback_raw_count(db_txn_struct_i(txn)->tokutxn, &(*txn_stat)->rollback_raw_count);
}
static int
locked_txn_stat (DB_TXN *txn, struct txn_stat **txn_stat) {
locked_txn_txn_stat (DB_TXN *txn, struct txn_stat **txn_stat) {
toku_ydb_lock();
int r = toku_txn_stat(txn, txn_stat);
int r = toku_txn_txn_stat(txn, txn_stat);
toku_ydb_unlock();
return r;
}
@ -305,6 +333,11 @@ locked_txn_abort(DB_TXN *txn) {
return r;
}
static int
locked_txn_prepare (DB_TXN *txn, u_int8_t gid[DB_GID_SIZE]) {
toku_ydb_lock(); int r = toku_txn_prepare (txn, gid); toku_ydb_unlock(); return r;
}
int
toku_txn_begin(DB_ENV *env, DB_TXN * stxn, DB_TXN ** txn, u_int32_t flags, bool internal, bool holds_ydb_lock) {
HANDLE_PANICKED_ENV(env);
@ -400,8 +433,9 @@ toku_txn_begin(DB_ENV *env, DB_TXN * stxn, DB_TXN ** txn, u_int32_t flags, bool
STXN(abort_with_progress);
STXN(commit_with_progress);
STXN(id);
STXN(prepare);
STXN(txn_stat);
#undef STXN
result->txn_stat = locked_txn_stat;
result->parent = stxn;
#if !TOKUDB_NATIVE_H
@ -470,4 +504,41 @@ toku_txn_begin(DB_ENV *env, DB_TXN * stxn, DB_TXN ** txn, u_int32_t flags, bool
return 0;
}
void toku_keep_prepared_txn_callback (DB_ENV *env, TOKUTXN tokutxn) {
struct __toku_db_txn_external *XMALLOC(eresult);
memset(eresult, 0, sizeof(*eresult));
DB_TXN *result = &eresult->external_part;
result->mgrp = env;
#define STXN(name) result->name = locked_txn_ ## name
STXN(abort);
STXN(commit);
STXN(abort_with_progress);
STXN(commit_with_progress);
STXN(id);
STXN(prepare);
STXN(txn_stat);
#undef STXN
result->parent = NULL;
#if !TOKUDB_NATIVE_H
MALLOC(db_txn_struct_i(result));
if (!db_txn_struct_i(result)) {
toku_free(result);
return ENOMEM;
}
#endif
memset(db_txn_struct_i(result), 0, sizeof *db_txn_struct_i(result));
toku_list_init(&db_txn_struct_i(result)->dbs_that_must_close_before_abort);
{
int r = toku_lth_create(&db_txn_struct_i(result)->lth);
assert(r==0);
}
db_txn_struct_i(result)->tokutxn = tokutxn;
toku_txn_set_container_db_txn(tokutxn, result);
(void) __sync_fetch_and_add(&env->i->open_txns, 1);
}

1
src/ydb_txn.h

@ -13,6 +13,7 @@ int toku_txn_commit(DB_TXN * txn, u_int32_t flags, TXN_PROGRESS_POLL_FUNCTION, v
int toku_txn_abort(DB_TXN * txn, TXN_PROGRESS_POLL_FUNCTION, void*, bool release_multi_operation_client_lock);
int locked_txn_commit(DB_TXN *txn, u_int32_t flags);
int locked_txn_abort(DB_TXN *txn);
void toku_keep_prepared_txn_callback (DB_ENV *env, TOKUTXN tokutxn);
#if defined(__cplusplus)
}

2
windows/tests/test.h

@ -1,7 +1,7 @@
#include <toku_portability.h>
#include <toku_assert.h>
#define CKERR(r) do { if (r!=0) fprintf(stderr, "%s:%d error %d %s\n", __FILE__, __LINE__, r, strerror(r)); assert(r==0); } while (0)
#define CKERR(r) ({ int __r = r; if (__r!=0) fprintf(stderr, "%s:%d error %d %s\n", __FILE__, __LINE__, __r, strerror(r)); assert(__r==0); })
#define CKERR2(r,r2) do { if (r!=r2) fprintf(stderr, "%s:%d error %d %s, expected %d\n", __FILE__, __LINE__, r, strerror(r), r2); assert(r==r2); } while (0)
#define CKERR2s(r,r2,r3) do { if (r!=r2 && r!=r3) fprintf(stderr, "%s:%d error %d %s, expected %d or %d\n", __FILE__, __LINE__, r, strerror(r), r2,r3); assert(r==r2||r==r3); } while (0)

Loading…
Cancel
Save