diff --git a/newbrt/Makefile b/newbrt/Makefile index ede2df0867a..6a0342f3dfa 100644 --- a/newbrt/Makefile +++ b/newbrt/Makefile @@ -37,6 +37,7 @@ build default: bins libs $(TEST_NEWBRT) BRT_SOURCES = \ block_allocator \ + block_table \ bread \ brt-serialize \ brt-verify \ diff --git a/newbrt/block_allocator.h b/newbrt/block_allocator.h index f203209cbb0..f0ac705cae6 100644 --- a/newbrt/block_allocator.h +++ b/newbrt/block_allocator.h @@ -6,6 +6,12 @@ #include "brttypes.h" +#define BLOCK_ALLOCATOR_ALIGNMENT 4096 +// How much must be reserved at the beginning for the block? +// The actual header is 8+4+4+8+8_4+8+ the length of the db names + 1 pointer for each root. +// So 4096 should be enough. +#define BLOCK_ALLOCATOR_HEADER_RESERVE 4096 + // A block allocator manages the allocation of variable-sized blocks. // The translation of block numbers to addresses is handled elsewhere. // The allocation of block numbers is handled elsewhere. diff --git a/newbrt/block_table.c b/newbrt/block_table.c new file mode 100644 index 00000000000..b0ff17dd0ae --- /dev/null +++ b/newbrt/block_table.c @@ -0,0 +1,457 @@ +//TODO: What about h->block_translation_size_on_disk +//TODO: What about h->block_translation_address_on_disk +//TODO: What about h->block_allocator + +#include "toku_portability.h" +#include "brttypes.h" +#include "block_table.h" +#include "memory.h" +#include "toku_assert.h" +#include "toku_pthread.h" +#include "block_allocator.h" +#include "rbuf.h" +#include "wbuf.h" + +struct block_table { + // This is the map from block numbers to offsets + //int n_blocks, n_blocks_array_size; + //struct block_descriptor *blocks; + BLOCKNUM free_blocks; // free list for blocks. Use -1 to indicate that there are no free blocks + BLOCKNUM unused_blocks; // first unused block + + u_int64_t translated_blocknum_limit; + struct block_translation_pair *block_translation; + + // Where and how big is the block translation vector stored on disk. + // The size of the on_disk buffer may no longer match the max_blocknum_translated field, since blocks may have been allocated or freed. + // We need to remember this old information so we can free it properly. + u_int64_t block_translation_size_on_disk; // the size of the block containing the translation (i.e. 8 times the number of entries) + u_int64_t block_translation_address_on_disk; // 0 if there is no memory allocated + + // The in-memory data structure for block allocation + BLOCK_ALLOCATOR block_allocator; +}; + +static const DISKOFF diskoff_is_null = (DISKOFF)-1; // in a freelist, this indicates end of list +static const DISKOFF size_is_free = (DISKOFF)-1; + +static void +extend_block_translation(BLOCK_TABLE bt, BLOCKNUM blocknum) +// Effect: Record a block translation. This means extending the translation table, and setting the diskoff and size to zero in any of the unused spots. +{ + assert(0<=blocknum.b); + if (bt->translated_blocknum_limit <= (u_int64_t)blocknum.b) { + if (bt->block_translation == 0) assert(bt->translated_blocknum_limit==0); + u_int64_t new_limit = blocknum.b + 1; + u_int64_t old_limit = bt->translated_blocknum_limit; + u_int64_t j; + XREALLOC_N(new_limit, bt->block_translation); + for (j=old_limit; jblock_translation[j].diskoff = 0; + bt->block_translation[j].size = 0; + } + bt->translated_blocknum_limit = new_limit; + } +} + +static inline void +verify(BLOCK_TABLE bt, BLOCKNUM b) { + // 0<=btranslated_blocknum_limit); +} + +static toku_pthread_mutex_t blocktable_mutex = TOKU_PTHREAD_MUTEX_INITIALIZER; +static int blocktable_is_locked=0; + +void toku_blocktable_lock_init(void) { + int r = toku_pthread_mutex_init(&blocktable_mutex, NULL); assert(r == 0); +} + +void toku_blocktable_lock_destroy(void) { + int r = toku_pthread_mutex_destroy(&blocktable_mutex); assert(r == 0); +} + +static inline void +lock_for_blocktable (void) { + // Locks the blocktable_mutex. + int r = toku_pthread_mutex_lock(&blocktable_mutex); + assert(r==0); + blocktable_is_locked = 1; +} + +static inline void +unlock_for_blocktable (void) { + blocktable_is_locked = 0; + int r = toku_pthread_mutex_unlock(&blocktable_mutex); + assert(r==0); +} + +static void +block_free(BLOCK_TABLE bt, u_int64_t offset) { + block_allocator_free_block(bt->block_allocator, offset); +} + +static void +block_free_blocknum(BLOCK_TABLE bt, BLOCKNUM b) { + verify(bt, b); + if (bt->block_translation[b.b].size > 0) { + block_free(bt, bt->block_translation[b.b].diskoff); + bt->block_translation[b.b].diskoff = 0; + bt->block_translation[b.b].size = 0; + } +} + +static void +block_alloc(BLOCK_TABLE bt, u_int64_t size, u_int64_t *offset) { + block_allocator_alloc_block(bt->block_allocator, size, offset); +} + +static void +block_alloc_and_set_translation(BLOCK_TABLE bt, BLOCKNUM b, u_int64_t size, u_int64_t *offset) { + verify(bt, b); + block_alloc(bt, size, offset); + bt->block_translation[b.b].diskoff = *offset; + bt->block_translation[b.b].size = size; +} + +void +toku_block_alloc(BLOCK_TABLE bt, u_int64_t size, u_int64_t *offset) { + lock_for_blocktable(); + block_alloc(bt, size, offset); + unlock_for_blocktable(); +} + +void +toku_block_free(BLOCK_TABLE bt, u_int64_t offset) { + lock_for_blocktable(); + block_free(bt, offset); + unlock_for_blocktable(); +} + +static void +update_size_on_disk(BLOCK_TABLE bt) { + bt->block_translation_size_on_disk = 4 +//4 for checksum + bt->translated_blocknum_limit*sizeof(bt->block_translation[0]); +} + +void +toku_block_realloc(BLOCK_TABLE bt, BLOCKNUM b, u_int64_t size, u_int64_t *offset) { + lock_for_blocktable(); + extend_block_translation(bt, b); + block_free_blocknum(bt, b); + block_alloc_and_set_translation(bt, b, size, offset); + unlock_for_blocktable(); +} + +void +toku_block_lock_for_multiple_operations(void) { + lock_for_blocktable(); +} + +void +toku_block_unlock_for_multiple_operations(void) { + assert(blocktable_is_locked); + unlock_for_blocktable(); +} + + +void +toku_block_realloc_translation_unlocked(BLOCK_TABLE bt) { + assert(blocktable_is_locked); + if (bt->block_translation_address_on_disk != 0) { + block_allocator_free_block(bt->block_allocator, bt->block_translation_address_on_disk); + } + update_size_on_disk(bt); + block_allocator_alloc_block(bt->block_allocator, + bt->block_translation_size_on_disk, + &bt->block_translation_address_on_disk); +} + +void +toku_block_wbuf_free_blocks_unlocked(BLOCK_TABLE bt, struct wbuf *wbuf) { + assert(blocktable_is_locked); + wbuf_BLOCKNUM(wbuf, bt->free_blocks); +} + +void +toku_block_wbuf_unused_blocks_unlocked(BLOCK_TABLE bt, struct wbuf *wbuf) { + assert(blocktable_is_locked); + wbuf_BLOCKNUM(wbuf, bt->unused_blocks); +} + +void +toku_block_wbuf_translated_blocknum_limit_unlocked(BLOCK_TABLE bt, struct wbuf *wbuf) { + assert(blocktable_is_locked); + wbuf_ulonglong(wbuf, bt->translated_blocknum_limit); +} + +void +toku_block_wbuf_block_translation_address_on_disk_unlocked(BLOCK_TABLE bt, struct wbuf *wbuf) { + assert(blocktable_is_locked); + wbuf_DISKOFF(wbuf, bt->block_translation_address_on_disk); +} + +void +toku_block_wbuf_init_and_fill_unlocked(BLOCK_TABLE bt, struct wbuf *w, + u_int64_t *size, u_int64_t *address) { + assert(blocktable_is_locked); + update_size_on_disk(bt); + u_int64_t size_translation = bt->block_translation_size_on_disk; + //printf("%s:%d writing translation table of size_translation %ld at %ld\n", __FILE__, __LINE__, size_translation, bt->block_translation_address_on_disk); + wbuf_init(w, toku_malloc(size_translation), size_translation); + assert(w->size==size_translation); + u_int64_t i; + for (i=0; itranslated_blocknum_limit; i++) { + //printf("%s:%d %ld,%ld\n", __FILE__, __LINE__, bt->block_translation[i].diskoff, bt->block_translation[i].size_translation); + wbuf_ulonglong(w, bt->block_translation[i].diskoff); + wbuf_ulonglong(w, bt->block_translation[i].size); + } + u_int32_t checksum = x1764_finish(&w->checksum); + wbuf_int(w, checksum); + *size = size_translation; + *address = bt->block_translation_address_on_disk; +} + +DISKOFF +toku_block_get_offset(BLOCK_TABLE bt, BLOCKNUM b) { + lock_for_blocktable(); + verify(bt, b); + DISKOFF r = bt->block_translation[b.b].diskoff; + unlock_for_blocktable(); + return r; +} + +DISKOFF +toku_block_get_size(BLOCK_TABLE bt, BLOCKNUM b) { + lock_for_blocktable(); + verify(bt, b); + DISKOFF r = bt->block_translation[b.b].size; + unlock_for_blocktable(); + return r; +} + +int +toku_allocate_diskblocknumber(BLOCK_TABLE bt, BLOCKNUM *res, int *dirty, TOKULOGGER UU(logger)) { + lock_for_blocktable(); + BLOCKNUM result; + if (bt->free_blocks.b == diskoff_is_null) { + // no blocks in the free list + result = bt->unused_blocks; + bt->unused_blocks.b++; + } else { + result = bt->free_blocks; + assert(bt->block_translation[result.b].size = size_is_free); + bt->block_translation[result.b].size = 0; + bt->free_blocks.b = bt->block_translation[result.b].diskoff; // pop the freelist + } + assert(result.b>0); + *res = result; + *dirty = 1; + unlock_for_blocktable(); + return 0; +} +////CONVERTED above already +//TODO: Convert below + + +int +toku_free_diskblocknumber(BLOCK_TABLE bt, BLOCKNUM *b, int *dirty, TOKULOGGER UU(logger)) +// Effect: Free a diskblock +// Watch out for the case where the disk block was never yet written to disk +{ + lock_for_blocktable(); + extend_block_translation(bt, *b); + // If the block_translation indicates that the size is <=0 + // then there is no disk block allocated. + if (bt->block_translation[b->b].size > 0) { + block_allocator_free_block(bt->block_allocator, + bt->block_translation[b->b].diskoff); + } + verify(bt, *b); + assert(bt->block_translation[b->b].size != size_is_free); + bt->block_translation[b->b].size = size_is_free; + bt->block_translation[b->b].diskoff = bt->free_blocks.b; + bt->free_blocks.b = b->b; + b->b = 0; + *dirty = 1; + unlock_for_blocktable(); + return 0; +} + +//Verify there are no free blocks. +void +toku_block_verify_no_free_blocks(BLOCK_TABLE bt) { + assert(bt->free_blocks.b==-1); +} + +//Verify a block has been allocated at least once. +void +toku_verify_diskblocknumber_allocated(BLOCK_TABLE bt, BLOCKNUM b) { + lock_for_blocktable(); + assert(0 <= b.b); + assert( b.b < bt->unused_blocks.b); + unlock_for_blocktable(); +} + +u_int64_t toku_block_allocator_allocated_limit(BLOCK_TABLE bt) { + lock_for_blocktable(); + u_int64_t r = block_allocator_allocated_limit(bt->block_allocator); + unlock_for_blocktable(); + return r; +} + +void +toku_block_dump_translation_table(FILE *f, BLOCK_TABLE bt) { + lock_for_blocktable(); + u_int64_t i; + fprintf(f, "Block translation:"); + for (i=0; itranslated_blocknum_limit; i++) { + fprintf(f, " %"PRIu64": %"PRId64" %"PRId64"", i, bt->block_translation[i].diskoff, bt->block_translation[i].size); + } + fprintf(f, "\n"); + unlock_for_blocktable(); +} + +void +toku_block_dump_translation(BLOCK_TABLE bt, u_int64_t offset) { + lock_for_blocktable(); + if (offset < bt->translated_blocknum_limit) { + struct block_translation_pair *bx = &bt->block_translation[offset]; + printf("%" PRIu64 ": %" PRId64 " %" PRId64 "\n", offset, bx->diskoff, bx->size); + } + unlock_for_blocktable(); +} + +void +toku_block_recovery_set_unused_blocks(BLOCK_TABLE bt, BLOCKNUM newunused) { + lock_for_blocktable(); + bt->unused_blocks = newunused; + unlock_for_blocktable(); +} + +void +toku_block_recovery_set_free_blocks(BLOCK_TABLE bt, BLOCKNUM newfree) { + lock_for_blocktable(); + bt->free_blocks = newfree; + unlock_for_blocktable(); +} + +void +toku_block_memcpy_translation_table(BLOCK_TABLE bt, size_t n, void *p) { + lock_for_blocktable(); + memcpy(p, bt->block_translation, n); + unlock_for_blocktable(); +} + +u_int64_t +toku_block_get_translated_blocknum_limit(BLOCK_TABLE bt) { + lock_for_blocktable(); + u_int64_t r = bt->translated_blocknum_limit; + unlock_for_blocktable(); + return r; +} + +BLOCKNUM +toku_block_get_free_blocks(BLOCK_TABLE bt) { + lock_for_blocktable(); + BLOCKNUM r = bt->free_blocks; + unlock_for_blocktable(); + return r; +} + +BLOCKNUM +toku_block_get_unused_blocks(BLOCK_TABLE bt) { + lock_for_blocktable(); + BLOCKNUM r = bt->unused_blocks; + unlock_for_blocktable(); + return r; +} + +void +toku_blocktable_destroy(BLOCK_TABLE *btp) { + lock_for_blocktable(); + BLOCK_TABLE bt = *btp; + *btp = NULL; + toku_free(bt->block_translation); + bt->block_translation = NULL; + destroy_block_allocator(&bt->block_allocator); + unlock_for_blocktable(); +} + +void +toku_blocktable_debug_set_translation(BLOCK_TABLE bt, + u_int64_t limit, + struct block_translation_pair *table) { + lock_for_blocktable(); + if (bt->block_translation) toku_free(bt->block_translation); + bt->translated_blocknum_limit = limit; + bt->block_translation = table; + unlock_for_blocktable(); +} + +void +toku_blocktable_create(BLOCK_TABLE *btp, + BLOCKNUM free_blocks, + BLOCKNUM unused_blocks, + u_int64_t translated_blocknum_limit, + u_int64_t block_translation_address_on_disk, + u_int64_t block_translation_size_on_disk, + unsigned char *buffer) { + lock_for_blocktable(); + + BLOCK_TABLE bt; + XMALLOC(bt); + + bt->free_blocks = free_blocks; + bt->unused_blocks = unused_blocks; + bt->translated_blocknum_limit = translated_blocknum_limit; + bt->block_translation_address_on_disk = block_translation_address_on_disk; + update_size_on_disk(bt); + if (block_translation_address_on_disk==0 && block_translation_size_on_disk == 0) { + bt->block_translation_size_on_disk = 0; + } + assert(block_translation_size_on_disk==bt->block_translation_size_on_disk); + + + // Set up the the block translation buffer. + create_block_allocator(&bt->block_allocator, BLOCK_ALLOCATOR_HEADER_RESERVE, BLOCK_ALLOCATOR_ALIGNMENT); + if (block_translation_address_on_disk==0) { + bt->block_translation = NULL; + assert(buffer==NULL); + } + else { + XMALLOC_N(translated_blocknum_limit, bt->block_translation); + //Mark where the translation table is stored on disk. + block_allocator_alloc_block_at(bt->block_allocator, bt->block_translation_size_on_disk, bt->block_translation_address_on_disk); + //Load translations from the buffer. + u_int64_t i; + struct rbuf rt; + rt.buf = buffer; + rt.ndone = 0; + rt.size = bt->block_translation_size_on_disk-4;//4==checksum + assert(rt.size>0); + for (i=0; itranslated_blocknum_limit; i++) { + bt->block_translation[i].diskoff = rbuf_diskoff(&rt); + bt->block_translation[i].size = rbuf_diskoff(&rt); + if (bt->block_translation[i].size > 0) + block_allocator_alloc_block_at(bt->block_allocator, bt->block_translation[i].size, bt->block_translation[i].diskoff); + //printf("%s:%d %ld %ld\n", __FILE__, __LINE__, bt->block_translation[i].diskoff, bt->block_translation[i].size); + } + + } + + // printf("%s:%d translated_blocknum_limit=%ld, block_translation_address_on_disk=%ld\n", __FILE__, __LINE__, bt->translated_blocknum_limit, bt->block_translation_address_on_disk); + + *btp = bt; + unlock_for_blocktable(); +} + +void +toku_blocktable_create_new(BLOCK_TABLE *btp) { + toku_blocktable_create(btp, + make_blocknum(-1), + make_blocknum(2), + 0, 0, 0, NULL); +} + diff --git a/newbrt/block_table.h b/newbrt/block_table.h new file mode 100644 index 00000000000..d5c44c213f5 --- /dev/null +++ b/newbrt/block_table.h @@ -0,0 +1,66 @@ +/* -*- mode: C; c-basic-offset: 4 -*- */ +#ifndef BLOCKTABLE_H +#define BLOCKTABLE_H +#ident "Copyright (c) 2007, 2008 Tokutek Inc. All rights reserved." + +typedef struct block_table *BLOCK_TABLE; + +//Needed by tests, brtdump +struct block_translation_pair { + DISKOFF diskoff; // When in free list, set to the next free block. In this case it's really a BLOCKNUM. + DISKOFF size; // set to 0xFFFFFFFFFFFFFFFF for free +}; + +void toku_blocktable_lock_init(void); +void toku_blocktable_lock_destroy(void); + +void toku_block_realloc(BLOCK_TABLE bt, BLOCKNUM b, u_int64_t size, u_int64_t *offset); +void toku_block_alloc(BLOCK_TABLE bt, u_int64_t size, u_int64_t *offset); +void toku_block_free(BLOCK_TABLE bt, u_int64_t offset); +DISKOFF toku_block_get_offset(BLOCK_TABLE bt, BLOCKNUM b); +DISKOFF toku_block_get_size(BLOCK_TABLE bt, BLOCKNUM b); +int toku_allocate_diskblocknumber(BLOCK_TABLE bt, BLOCKNUM *res, int *dirty, TOKULOGGER logger); +int toku_free_diskblocknumber(BLOCK_TABLE bt, BLOCKNUM *b, int *dirty, TOKULOGGER logger); +void toku_verify_diskblocknumber_allocated(BLOCK_TABLE bt, BLOCKNUM b); +void toku_block_verify_no_free_blocks(BLOCK_TABLE bt); +u_int64_t toku_block_allocator_allocated_limit(BLOCK_TABLE bt); +void toku_block_dump_translation_table(FILE *f, BLOCK_TABLE bt); +void toku_block_dump_translation(BLOCK_TABLE bt, u_int64_t offset); + +void toku_blocktable_destroy(BLOCK_TABLE *btp); +void toku_blocktable_debug_set_translation(BLOCK_TABLE bt, + u_int64_t limit, + struct block_translation_pair *table); +void toku_blocktable_create(BLOCK_TABLE *btp, + BLOCKNUM free_blocks, + BLOCKNUM unused_blocks, + u_int64_t translated_blocknum_limit, + u_int64_t block_translation_address_on_disk, + u_int64_t block_translation_size_on_disk, + unsigned char *buffer); +void toku_blocktable_create_new(BLOCK_TABLE *bt); + +void toku_block_recovery_set_unused_blocks(BLOCK_TABLE bt, BLOCKNUM newunused); +void toku_block_recovery_set_free_blocks(BLOCK_TABLE bt, BLOCKNUM newfree); +BLOCKNUM toku_block_get_unused_blocks(BLOCK_TABLE bt); +BLOCKNUM toku_block_get_free_blocks(BLOCK_TABLE bt); +u_int64_t toku_block_get_translated_blocknum_limit(BLOCK_TABLE bt); + +void toku_block_memcpy_translation_table(BLOCK_TABLE bt, size_t n, void *p); + +//Unlocked/multi ops +void toku_block_lock_for_multiple_operations(void); +void toku_block_unlock_for_multiple_operations(void); + +void toku_block_realloc_translation_unlocked(BLOCK_TABLE bt); +void toku_block_wbuf_free_blocks_unlocked(BLOCK_TABLE bt, struct wbuf *wbuf); +void toku_block_wbuf_unused_blocks_unlocked(BLOCK_TABLE bt, struct wbuf *wbuf); +void toku_block_wbuf_translated_blocknum_limit_unlocked(BLOCK_TABLE bt, struct wbuf *wbuf); +void toku_block_wbuf_block_translation_address_on_disk_unlocked(BLOCK_TABLE bt, struct wbuf *wbuf); +void toku_block_wbuf_init_and_fill_unlocked(BLOCK_TABLE bt, struct wbuf *w, + u_int64_t *size, u_int64_t *address); + + + +#endif + diff --git a/newbrt/brt-internal.h b/newbrt/brt-internal.h index 74043b2586e..9b396b2ae15 100644 --- a/newbrt/brt-internal.h +++ b/newbrt/brt-internal.h @@ -14,6 +14,7 @@ typedef void *OMTVALUE; #include "omt.h" #include "leafentry.h" +#include "block_table.h" #ifndef BRT_FANOUT #define BRT_FANOUT 16 @@ -113,11 +114,6 @@ struct remembered_hash { u_int32_t fullhash; // fullhash is the hashed value of fnum and root. }; -struct block_translation_pair { - DISKOFF diskoff; // When in free list, set to the next free block. In this case it's really a BLOCKNUM. - DISKOFF size; // set to 0xFFFFFFFFFFFFFFFF for free -}; - // The brt_header is not managed by the cachetable. Instead, it hangs off the cachefile as userdata. struct brt_header { @@ -137,23 +133,7 @@ struct brt_header { u_int64_t root_put_counter; // the generation number of the brt - // This is the map from block numbers to offsets - //int n_blocks, n_blocks_array_size; - //struct block_descriptor *blocks; - BLOCKNUM free_blocks; // free list for blocks. Use -1 to indicate that there are no free blocks - BLOCKNUM unused_blocks; // first unused block - - u_int64_t translated_blocknum_limit; - struct block_translation_pair *block_translation; - - // Where and how big is the block translation vector stored on disk. - // The size of the on_disk buffer may no longer match the max_blocknum_translated field, since blocks may have been allocated or freed. - // We need to remember this old information so we can free it properly. - u_int64_t block_translation_size_on_disk; // the size of the block containing the translation (i.e. 8 times the number of entries) - u_int64_t block_translation_address_on_disk; // 0 if there is no memory allocated - - // The in-memory data structure for block allocation - BLOCK_ALLOCATOR block_allocator; + BLOCK_TABLE blocktable; }; struct brt { @@ -292,12 +272,6 @@ void toku_brtheader_free (struct brt_header *h); int toku_brtheader_close (CACHEFILE cachefile, void *header_v, char **error_string); int toku_brtheader_checkpoint (CACHEFILE cachefile, void *header_v); -#define BLOCK_ALLOCATOR_ALIGNMENT 4096 -// How much must be reserved at the beginning for the block? -// The actual header is 8+4+4+8+8_4+8+ the length of the db names + 1 pointer for each root. -// So 4096 should be enough. -#define BLOCK_ALLOCATOR_HEADER_RESERVE 4096 - int toku_db_badformat(void); #endif diff --git a/newbrt/brt-serialize.c b/newbrt/brt-serialize.c index caf83d29557..9b0a401e82f 100644 --- a/newbrt/brt-serialize.c +++ b/newbrt/brt-serialize.c @@ -353,24 +353,17 @@ int toku_serialize_brtnode_to (int fd, BLOCKNUM blocknum, BRTNODE node, struct b int r; { lock_for_pwrite(); +//TODO: #1463 START (might not be the entire range // If the node has never been written, then write the whole buffer, including the zeros assert(blocknum.b>=0); //printf("%s:%d h=%p\n", __FILE__, __LINE__, h); //printf("%s:%d translated_blocknum_limit=%lu blocknum.b=%lu\n", __FILE__, __LINE__, h->translated_blocknum_limit, blocknum.b); //printf("%s:%d allocator=%p\n", __FILE__, __LINE__, h->block_allocator); //printf("%s:%d bt=%p\n", __FILE__, __LINE__, h->block_translation); - extend_block_translation(blocknum, h); - if (h->block_translation[blocknum.b].size > 0) { - block_allocator_free_block(h->block_allocator, h->block_translation[blocknum.b].diskoff); - h->block_translation[blocknum.b].diskoff = 0; - h->block_translation[blocknum.b].size = 0; - } h->dirty = 1; // Allocating a block dirties the header. size_t n_to_write = uncompressed_magic_len + compression_header_len + compressed_len; u_int64_t offset; - block_allocator_alloc_block(h->block_allocator, n_to_write, &offset); - h->block_translation[blocknum.b].diskoff = offset; - h->block_translation[blocknum.b].size = n_to_write; + toku_block_realloc(h->blocktable, blocknum, n_to_write, &offset); ssize_t n_wrote; r=toku_pwrite_extend(fd, compressed_buf, n_to_write, offset, &n_wrote); if (r) { @@ -378,6 +371,7 @@ int toku_serialize_brtnode_to (int fd, BLOCKNUM blocknum, BRTNODE node, struct b } else { r=0; } +//TODO: #1463 END unlock_for_pwrite(); } @@ -391,8 +385,7 @@ int toku_serialize_brtnode_to (int fd, BLOCKNUM blocknum, BRTNODE node, struct b int toku_deserialize_brtnode_from (int fd, BLOCKNUM blocknum, u_int32_t fullhash, BRTNODE *brtnode, struct brt_header *h) { if (0) printf("Deserializing Block %" PRId64 "\n", blocknum.b); if (h->panic) return h->panic; - assert(0 <= blocknum.b && (u_int64_t)blocknum.b < h->translated_blocknum_limit); - DISKOFF offset = h->block_translation[blocknum.b].diskoff; + DISKOFF offset = toku_block_get_offset(h->blocktable, blocknum); TAGMALLOC(BRTNODE, result); struct rbuf rc; int i; @@ -714,16 +707,19 @@ int toku_serialize_brt_header_to_wbuf (struct wbuf *wbuf, struct brt_header *h) wbuf_int (wbuf, size); wbuf_int (wbuf, BRT_LAYOUT_VERSION); wbuf_int (wbuf, h->nodesize); - wbuf_BLOCKNUM(wbuf, h->free_blocks); - wbuf_BLOCKNUM(wbuf, h->unused_blocks); + //TODO: Use 'prelocked/unlocked' versions to make this atomic +//TODO: #1463 START + + toku_block_realloc_translation_unlocked(h->blocktable); + toku_block_wbuf_free_blocks_unlocked(h->blocktable, wbuf); + toku_block_wbuf_unused_blocks_unlocked(h->blocktable, wbuf); +//TODO: #1463 END wbuf_int (wbuf, h->n_named_roots); - if (h->block_translation_address_on_disk != 0) { - block_allocator_free_block(h->block_allocator, h->block_translation_address_on_disk); - } - block_allocator_alloc_block(h->block_allocator, 4 + 16*h->translated_blocknum_limit, &h->block_translation_address_on_disk); +//TODO: #1463 START //printf("%s:%d bta=%lu size=%lu\n", __FILE__, __LINE__, h->block_translation_address_on_disk, 4 + 16*h->translated_blocknum_limit); - wbuf_ulonglong(wbuf, h->translated_blocknum_limit); - wbuf_DISKOFF(wbuf, h->block_translation_address_on_disk); + toku_block_wbuf_translated_blocknum_limit_unlocked(h->blocktable, wbuf); + toku_block_wbuf_block_translation_address_on_disk_unlocked(h->blocktable, wbuf); +//TODO: #1463 END if (h->n_named_roots>=0) { int i; for (i=0; in_named_roots; i++) { @@ -746,18 +742,31 @@ int toku_serialize_brt_header_to (int fd, struct brt_header *h) { int rr = 0; if (h->panic) return h->panic; lock_for_pwrite(); + toku_block_lock_for_multiple_operations(); + struct wbuf w_main; + unsigned int size_main = toku_serialize_brt_header_size (h); { - struct wbuf w; - unsigned int size = toku_serialize_brt_header_size (h); - wbuf_init(&w, toku_malloc(size), size); + wbuf_init(&w_main, toku_malloc(size_main), size_main); { - int r=toku_serialize_brt_header_to_wbuf(&w, h); + int r=toku_serialize_brt_header_to_wbuf(&w_main, h); assert(r==0); } - assert(w.ndone==size); + assert(w_main.ndone==size_main); + } + struct wbuf w_translation; + u_int64_t size_translation; + u_int64_t address_translation; + { + toku_block_wbuf_init_and_fill_unlocked(h->blocktable, &w_translation, + &size_translation, &address_translation); + size_translation = w_translation.size; + } + toku_block_unlock_for_multiple_operations(); + { + //Actual Write main header ssize_t nwrote; - rr = toku_pwrite_extend(fd, w.buf, w.ndone, 0, &nwrote); - toku_free(w.buf); + rr = toku_pwrite_extend(fd, w_main.buf, w_main.ndone, 0, &nwrote); + toku_free(w_main.buf); if (rr) { if (h->panic==0) { char s[200]; @@ -767,31 +776,21 @@ int toku_serialize_brt_header_to (int fd, struct brt_header *h) { } goto finish; } - assert((u_int64_t)nwrote==size); + assert((u_int64_t)nwrote==size_main); } { - struct wbuf w; - u_int64_t size = 4 + h->translated_blocknum_limit * 16; // 4 for the checksum - //printf("%s:%d writing translation table of size %ld at %ld\n", __FILE__, __LINE__, size, h->block_translation_address_on_disk); - wbuf_init(&w, toku_malloc(size), size); - u_int64_t i; - for (i=0; itranslated_blocknum_limit; i++) { - //printf("%s:%d %ld,%ld\n", __FILE__, __LINE__, h->block_translation[i].diskoff, h->block_translation[i].size); - wbuf_ulonglong(&w, h->block_translation[i].diskoff); - wbuf_ulonglong(&w, h->block_translation[i].size); - } - u_int32_t checksum = x1764_finish(&w.checksum); - wbuf_int(&w, checksum); + //Actual Write translation table ssize_t nwrote; - rr = toku_pwrite_extend(fd, w.buf, size, h->block_translation_address_on_disk, &nwrote); - toku_free(w.buf); + rr = toku_pwrite_extend(fd, w_translation.buf, + size_translation, address_translation, &nwrote); if (rr) { //fprintf(stderr, "%s:%d: Error writing data to file. errno=%d (%s)\n", __FILE__, __LINE__, rr, strerror(rr)); goto finish; } - assert((u_int64_t)nwrote==size); + assert((u_int64_t)nwrote==size_translation); } finish: + toku_free(w_translation.buf); unlock_for_pwrite(); return rr; } @@ -820,49 +819,48 @@ deserialize_brtheader (u_int32_t size, int fd, DISKOFF off, struct brt_header ** h->layout_version = rbuf_int(&rc); h->nodesize = rbuf_int(&rc); assert(h->layout_version==BRT_LAYOUT_VERSION_9); - h->free_blocks = rbuf_blocknum(&rc); - h->unused_blocks = rbuf_blocknum(&rc); + BLOCKNUM free_blocks = rbuf_blocknum(&rc); + BLOCKNUM unused_blocks = rbuf_blocknum(&rc); h->n_named_roots = rbuf_int(&rc); - h->translated_blocknum_limit = rbuf_diskoff(&rc); - h->block_translation_size_on_disk = 4 + 16 * h->translated_blocknum_limit; - h->block_translation_address_on_disk = rbuf_diskoff(&rc); - // Set up the the block translation buffer. - create_block_allocator(&h->block_allocator, BLOCK_ALLOCATOR_HEADER_RESERVE, BLOCK_ALLOCATOR_ALIGNMENT); + u_int64_t translated_blocknum_limit = rbuf_diskoff(&rc); + u_int64_t block_translation_address_on_disk = rbuf_diskoff(&rc); + u_int64_t block_translation_size_on_disk = 4 +//4 for checksum + 16*translated_blocknum_limit; // printf("%s:%d translated_blocknum_limit=%ld, block_translation_address_on_disk=%ld\n", __FILE__, __LINE__, h->translated_blocknum_limit, h->block_translation_address_on_disk); - if (h->block_translation_address_on_disk == 0) { - h->block_translation = 0; - } else { + if (block_translation_address_on_disk == 0) { + //There is no data on the disk. + //Create empty translation table. + toku_blocktable_create(&h->blocktable, + free_blocks, unused_blocks, + translated_blocknum_limit, + block_translation_address_on_disk, + block_translation_size_on_disk, NULL); + } + else { + //Load translation table if it exists on disk. lock_for_pwrite(); - block_allocator_alloc_block_at(h->block_allocator, h->block_translation_size_on_disk, h->block_translation_address_on_disk); - XMALLOC_N(h->translated_blocknum_limit, h->block_translation); - unsigned char *XMALLOC_N(h->block_translation_size_on_disk, tbuf); + //TODO: #1463 load! + unsigned char *XMALLOC_N(block_translation_size_on_disk, tbuf); { - ssize_t r = pread(fd, tbuf, h->block_translation_size_on_disk, h->block_translation_address_on_disk); + ssize_t r = pread(fd, tbuf, block_translation_size_on_disk, block_translation_address_on_disk); // This cast is messed up in 32-bits if the block translation table is ever more than 4GB. But in that case, the translation table itself won't fit in main memory. - assert((u_int64_t)r==h->block_translation_size_on_disk); + assert((u_int64_t)r==block_translation_size_on_disk); } { // check the checksum - u_int32_t x1764 = x1764_memory(tbuf, h->block_translation_size_on_disk - 4); - u_int64_t offset = h->block_translation_size_on_disk - 4; - //printf("%s:%d read from %ld (x1764 offset=%ld) size=%ld\n", __FILE__, __LINE__, h->block_translation_address_on_disk, offset, h->block_translation_size_on_disk); + u_int32_t x1764 = x1764_memory(tbuf, block_translation_size_on_disk - 4); + u_int64_t offset = block_translation_size_on_disk - 4; + //printf("%s:%d read from %ld (x1764 offset=%ld) size=%ld\n", __FILE__, __LINE__, block_translation_address_on_disk, offset, block_translation_size_on_disk); u_int32_t stored_x1764 = toku_ntohl(*(int*)(tbuf + offset)); assert(x1764 == stored_x1764); } - // now read all that data. - u_int64_t i; - struct rbuf rt; - rt.buf = tbuf; - rt.ndone = 0; - rt.size = h->block_translation_size_on_disk-4; - assert(rt.size>0); - for (i=0; itranslated_blocknum_limit; i++) { - h->block_translation[i].diskoff = rbuf_diskoff(&rt); - h->block_translation[i].size = rbuf_diskoff(&rt); - if (h->block_translation[i].size > 0) - block_allocator_alloc_block_at(h->block_allocator, h->block_translation[i].size, h->block_translation[i].diskoff); - //printf("%s:%d %ld %ld\n", __FILE__, __LINE__, h->block_translation[i].diskoff, h->block_translation[i].size); - } + // Create table and read in data. + toku_blocktable_create(&h->blocktable, + free_blocks, unused_blocks, + translated_blocknum_limit, + block_translation_address_on_disk, + block_translation_size_on_disk, + tbuf); unlock_for_pwrite(); toku_free(tbuf); } @@ -898,7 +896,7 @@ deserialize_brtheader (u_int32_t size, int fd, DISKOFF off, struct brt_header ** toku_free(rc.buf); { int r; - if ((r = deserialize_fifo_at(fd, block_allocator_allocated_limit(h->block_allocator), &h->fifo))) return r; + if ((r = deserialize_fifo_at(fd, toku_block_allocator_allocated_limit(h->blocktable), &h->fifo))) return r; } *brth = h; return 0; diff --git a/newbrt/brt.c b/newbrt/brt.c index 5f5f52d41f8..63313e1a6d0 100644 --- a/newbrt/brt.c +++ b/newbrt/brt.c @@ -557,10 +557,8 @@ brtheader_init(struct brt_header *h) { static void brtheader_partial_destroy(struct brt_header *h) { - toku_free(h->block_translation); - h->block_translation = 0; + toku_blocktable_destroy(&h->blocktable); toku_fifo_free(&h->fifo); - destroy_block_allocator(&h->block_allocator); } static void @@ -603,62 +601,6 @@ toku_brtheader_free (struct brt_header *h) { brtheader_free(h); } -void -extend_block_translation (BLOCKNUM blocknum, struct brt_header *h) -// Effect: Record a block translation. This means extending the translation table, and setting the diskoff and size to zero in any of the unused spots. -{ - if (h->translated_blocknum_limit <= (u_int64_t)blocknum.b) { - if (h->block_translation == 0) assert(h->translated_blocknum_limit==0); - u_int64_t new_limit = blocknum.b + 1; - u_int64_t old_limit = h->translated_blocknum_limit; - u_int64_t j; - XREALLOC_N(new_limit, h->block_translation); - for (j=old_limit; jblock_translation[j].diskoff = 0; - h->block_translation[j].size = 0; - } - h->translated_blocknum_limit = new_limit; - } -} - -const DISKOFF diskoff_is_null = (DISKOFF)-1; // in a freelist, this indicates end of list -const DISKOFF size_is_free = (DISKOFF)-1; - -static int -allocate_diskblocknumber (BLOCKNUM *res, BRT brt, TOKULOGGER logger __attribute__((__unused__))) { - BLOCKNUM result; - if (brt->h->free_blocks.b == diskoff_is_null) { - // no blocks in the free list - result = brt->h->unused_blocks; - brt->h->unused_blocks.b++; - } else { - result = brt->h->free_blocks; - assert(brt->h->block_translation[result.b].size = size_is_free); - brt->h->block_translation[result.b].size = 0; - brt->h->free_blocks.b = brt->h->block_translation[result.b].diskoff; // pop the freelist - } - assert(result.b>0); - *res = result; - brt->h->dirty = 1; - return 0; -} - -static int -free_diskblocknumber (BLOCKNUM *b, struct brt_header *h, TOKULOGGER logger __attribute__((__unused__))) -// Effect: Free a diskblock -// Watch out for the case where the disk block was never yet written to disk and is beyond the translated_blocknum_limit. -{ - extend_block_translation(*b, h); - assert((u_int64_t)b->b < h->translated_blocknum_limit); // as a "limit" it should be < - assert(h->block_translation[b->b].size != size_is_free); - h->block_translation[b->b].size = size_is_free; - h->block_translation[b->b].diskoff = h->free_blocks.b; - h->free_blocks.b = b->b; - b->b = 0; - h->dirty = 1; - return 0; -} - static void initialize_empty_brtnode (BRT t, BRTNODE n, BLOCKNUM nodename, int height) // Effect: Fill in N as an empty brtnode. @@ -712,7 +654,9 @@ brt_init_new_root(BRT brt, BRTNODE nodea, BRTNODE nodeb, DBT splitk, CACHEKEY *r int new_height = nodea->height+1; int new_nodesize = brt->h->nodesize; BLOCKNUM newroot_diskoff; - r = allocate_diskblocknumber(&newroot_diskoff, brt, logger); + r = toku_allocate_diskblocknumber(brt->h->blocktable, + &newroot_diskoff, + &brt->h->dirty, logger); assert(r==0); assert(newroot); newroot->ever_been_written = 0; @@ -780,7 +724,7 @@ int toku_create_new_brtnode (BRT t, BRTNODE *result, int height, TOKULOGGER logg TAGMALLOC(BRTNODE, n); int r; BLOCKNUM name; - r = allocate_diskblocknumber (&name, t, logger); + r = toku_allocate_diskblocknumber(t->h->blocktable, &name, &t->h->dirty, logger); assert(r==0); assert(n); assert(t->h->nodesize>0); @@ -2227,15 +2171,8 @@ brt_merge_child (BRT t, BRTNODE node, int childnum_to_merge, BOOL *did_io, TOKUL if (did_merge) { BLOCKNUM bn = childb->thisnodename; rrb = toku_cachetable_unpin_and_remove(t->cf, bn); - // If the block_translation indicates that the size is <=0 then there is no block allocated. - // The block translation might not be big enough, and that also indicates no block allocated. - assert(0 <= bn.b); // the blocknumber better be good - if ((unsigned)bn.b < t->h->translated_blocknum_limit) { - if (t->h->block_translation[bn.b].size > 0) { - block_allocator_free_block(t->h->block_allocator, t->h->block_translation[bn.b].diskoff); - } - } - rrb1 = free_diskblocknumber(&bn, t->h, logger); + rrb1 = toku_free_diskblocknumber(t->h->blocktable, &bn, + &t->h->dirty, logger); } else { rrb = toku_unpin_brtnode(t, childb); } @@ -2246,7 +2183,7 @@ brt_merge_child (BRT t, BRTNODE node, int childnum_to_merge, BOOL *did_io, TOKUL } verify_local_fingerprint_nonleaf(node); return r; - } +} static int brt_handle_maybe_reactive_child(BRT t, BRTNODE node, int childnum, enum reactivity re, BOOL *did_io, TOKULOGGER logger, BOOL *did_react) { @@ -2315,7 +2252,8 @@ flush_this_child (BRT t, BRTNODE node, int childnum, TOKULOGGER logger, enum rea { assert(node->height>0); BLOCKNUM targetchild = BNC_BLOCKNUM(node, childnum); - assert(targetchild.b>=0 && targetchild.bh->unused_blocks.b); // This assertion could fail in a concurrent setting since another process might have bumped unused memory. + //TODO: #1463 This assert... + toku_verify_diskblocknumber_allocated(t->h->blocktable, targetchild); u_int32_t childfullhash = compute_child_fullhash(t->cf, node, childnum); BRTNODE child; { @@ -2760,23 +2698,18 @@ static int brt_init_header(BRT t, TOKUTXN txn) { t->h->dirty=1; t->h->flags_array[0] = t->flags; t->h->nodesize=t->nodesize; - t->h->free_blocks = make_blocknum(-1); - t->h->unused_blocks=make_blocknum(2); - t->h->translated_blocknum_limit = 0; - t->h->block_translation = 0; - t->h->block_translation_size_on_disk = 0; - t->h->block_translation_address_on_disk = 0; - // printf("%s:%d translated_blocknum_limit=%ld, block_translation_address_on_disk=%ld\n", __FILE__, __LINE__, t->h->translated_blocknum_limit, t->h->block_translation_address_on_disk); - create_block_allocator(&t->h->block_allocator, BLOCK_ALLOCATOR_HEADER_RESERVE, BLOCK_ALLOCATOR_ALIGNMENT); + toku_blocktable_create_new(&t->h->blocktable); toku_fifo_create(&t->h->fifo); t->h->root_put_counter = global_root_put_counter++; { + BLOCKNUM free_blocks = toku_block_get_free_blocks(t->h->blocktable); + BLOCKNUM unused_blocks = toku_block_get_unused_blocks(t->h->blocktable); LOGGEDBRTHEADER lh = {.size= toku_serialize_brt_header_size(t->h), .flags = t->flags, .nodesize = t->h->nodesize, - .free_blocks = t->h->free_blocks, - .unused_blocks = t->h->unused_blocks, + .free_blocks = free_blocks, + .unused_blocks = unused_blocks, .n_named_roots = t->h->n_named_roots }; if (t->h->n_named_roots>=0) { lh.u.many.names = t->h->names; @@ -2788,7 +2721,7 @@ static int brt_init_header(BRT t, TOKUTXN txn) { } if ((r=setup_initial_brt_root_node(t, root, toku_txn_logger(txn)))!=0) { return r; } //printf("%s:%d putting %p (%d)\n", __FILE__, __LINE__, t->h, 0); - assert(t->h->free_blocks.b==-1); + toku_block_verify_no_free_blocks(t->h->blocktable); toku_cachefile_set_userdata(t->cf, t->h, toku_brtheader_close, toku_brtheader_checkpoint); return r; @@ -2940,7 +2873,7 @@ int toku_brt_open(BRT t, const char *fname, const char *fname_in_env, const char t->h->n_named_roots++; if ((t->h->names[t->h->n_named_roots-1] = toku_strdup(dbname)) == 0) { assert(errno==ENOMEM); r=ENOMEM; goto died_after_read_and_pin; } //printf("%s:%d t=%p\n", __FILE__, __LINE__, t); - r = allocate_diskblocknumber(&t->h->roots[t->h->n_named_roots-1], t, toku_txn_logger(txn)); + r = toku_allocate_diskblocknumber(t->h->blocktable, &t->h->roots[t->h->n_named_roots-1], &t->h->dirty, toku_txn_logger(txn)); if (r!=0) goto died_after_read_and_pin; t->h->dirty = 1; compute_and_fill_remembered_hash(t, t->h->n_named_roots-1); @@ -3074,7 +3007,9 @@ toku_brtheader_checkpoint (CACHEFILE cachefile, void *header_v) int r = toku_serialize_brt_header_to(toku_cachefile_fd(cachefile), h); if (r) return r; } - u_int64_t write_to = block_allocator_allocated_limit(h->block_allocator); // Must compute this after writing the header. + //We would want retrieving 'write_to' and writing to that point to be + //atomic. This is only done during shutdown of a BRT, so we allow it. + u_int64_t write_to = toku_block_allocator_allocated_limit(h->blocktable); // Must compute this after writing the header. //printf("%s:%d fifo written to %lu\n", __FILE__, __LINE__, write_to); { int r = toku_serialize_fifo_at(toku_cachefile_fd(cachefile), write_to, h->fifo); @@ -4360,12 +4295,7 @@ int toku_dump_brt (FILE *f, BRT brt) { CACHEKEY *rootp; assert(brt->h); u_int32_t fullhash; - u_int64_t i; - fprintf(f, "Block translation:"); - for (i=0; ih->translated_blocknum_limit; i++) { - fprintf(f, " %"PRIu64": %"PRId64" %"PRId64"", i, brt->h->block_translation[i].diskoff, brt->h->block_translation[i].size); - } - fprintf(f, "\n"); + toku_block_dump_translation_table(f, brt->h->blocktable); rootp = toku_calculate_root_offset_pointer(brt, &fullhash); return toku_dump_brtnode(f, brt, *rootp, 0, 0, 0, 0, 0); } @@ -4396,12 +4326,14 @@ static void toku_brt_lock_init(void) { toku_pwrite_lock_init(); toku_logger_lock_init(); toku_graceful_lock_init(); + toku_blocktable_lock_init(); } static void toku_brt_lock_destroy(void) { toku_pwrite_lock_destroy(); toku_logger_lock_destroy(); toku_graceful_lock_destroy(); + toku_blocktable_lock_destroy(); } void toku_brt_init(void) { diff --git a/newbrt/brt.h b/newbrt/brt.h index 04af3b7f70d..126f7f732a1 100644 --- a/newbrt/brt.h +++ b/newbrt/brt.h @@ -111,8 +111,6 @@ enum brt_header_flags { int toku_brt_keyrange (BRT brt, DBT *key, u_int64_t *less, u_int64_t *equal, u_int64_t *greater); -void extend_block_translation (BLOCKNUM blocknum, struct brt_header *h); - void toku_brt_init(void); void toku_brt_destroy(void); void toku_pwrite_lock_init(void); diff --git a/newbrt/brtdump.c b/newbrt/brtdump.c index b1cf5edc6e3..232bbee2e88 100644 --- a/newbrt/brtdump.c +++ b/newbrt/brtdump.c @@ -30,8 +30,10 @@ dump_header (int f, struct brt_header **header) { else printf(" layout_version=%d\n", h->layout_version); printf(" dirty=%d\n", h->dirty); printf(" nodesize=%u\n", h->nodesize); - printf(" free_blocks=%" PRId64 "\n", h->free_blocks.b); - printf(" unused_memory=%" PRId64 "\n", h->unused_blocks.b); + BLOCKNUM free_blocks = toku_block_get_free_blocks(h->blocktable); + BLOCKNUM unused_blocks = toku_block_get_unused_blocks(h->blocktable); + printf(" free_blocks=%" PRId64 "\n", free_blocks.b); + printf(" unused_memory=%" PRId64 "\n", unused_blocks.b); if (h->n_named_roots==-1) { printf(" unnamed_root=%" PRId64 "\n", h->roots[0].b); printf(" flags=%u\n", h->flags_array[0]); @@ -165,10 +167,7 @@ dump_node (int f, BLOCKNUM blocknum, struct brt_header *h) { static void dump_block_translation(struct brt_header *h, u_int64_t offset) { - if (offset < h->translated_blocknum_limit) { - struct block_translation_pair *bx = &h->block_translation[offset]; - printf("%" PRIu64 ": %" PRId64 " %" PRId64 "\n", offset, bx->diskoff, bx->size); - } + toku_block_dump_translation(h->blocktable, offset); } static int @@ -187,28 +186,31 @@ dump_fragmentation(int f, struct brt_header *h) { u_int64_t leafblocks = 0; u_int64_t fragsizes = 0; u_int64_t i; - for (i = 0; i < h->translated_blocknum_limit; i++) { + u_int64_t limit = toku_block_get_translated_blocknum_limit(h->blocktable); + for (i = 0; i < limit; i++) { BRTNODE n; BLOCKNUM blocknum = make_blocknum(i); int r = toku_deserialize_brtnode_from (f, blocknum, 0 /*pass zero for hash, it doesn't matter*/, &n, h); if (r != 0) continue; - blocksizes += h->block_translation[i].size; + + DISKOFF size = toku_block_get_size(h->blocktable, blocknum); + blocksizes += size; if (n->height == 0) { - leafsizes += h->block_translation[i].size; + leafsizes += size; leafblocks += 1; } toku_brtnode_free(&n); } - size_t n = h->translated_blocknum_limit * sizeof (struct block_translation_pair); + size_t n = limit * sizeof (struct block_translation_pair); struct block_translation_pair *bx = toku_malloc(n); - memcpy(bx, h->block_translation, n); - qsort(bx, h->translated_blocknum_limit, sizeof (struct block_translation_pair), bxpcmp); - for (i = 0; i < h->translated_blocknum_limit - 1; i++) { + toku_block_memcpy_translation_table(h->blocktable, n, bx); + qsort(bx, limit, sizeof (struct block_translation_pair), bxpcmp); + for (i = 0; i < limit - 1; i++) { // printf("%lu %lu %lu\n", i, bx[i].diskoff, bx[i].size); fragsizes += bx[i+1].diskoff - (bx[i].diskoff + bx[i].size); } toku_free(bx); - printf("translated_blocknum_limit: %" PRIu64 "\n", h->translated_blocknum_limit); + printf("translated_blocknum_limit: %" PRIu64 "\n", limit); printf("leafblocks: %" PRIu64 "\n", leafblocks); printf("blocksizes: %" PRIu64 "\n", blocksizes); printf("leafsizes: %" PRIu64 "\n", leafsizes); @@ -299,15 +301,24 @@ main (int argc, const char *argv[]) { } else { BLOCKNUM blocknum; printf("Block translation:"); - for (blocknum.b=0; blocknum.bunused_blocks.b; blocknum.b++) { + + u_int64_t limit = toku_block_get_translated_blocknum_limit(h->blocktable); + BLOCKNUM unused_blocks = toku_block_get_unused_blocks(h->blocktable); + size_t bx_size = limit * sizeof (struct block_translation_pair); + struct block_translation_pair *bx = toku_malloc(bx_size); + toku_block_memcpy_translation_table(h->blocktable, bx_size, bx); + + + for (blocknum.b=0; blocknum.b< unused_blocks.b; blocknum.b++) { printf(" %" PRId64 ":", blocknum.b); - if (h->block_translation[blocknum.b].size == -1) printf("free"); - else printf("%" PRId64 ":%" PRId64, h->block_translation[blocknum.b].diskoff, h->block_translation[blocknum.b].size); + if (bx[blocknum.b].size == -1) printf("free"); + else printf("%" PRId64 ":%" PRId64, bx[blocknum.b].diskoff, bx[blocknum.b].size); } - for (blocknum.b=1; blocknum.bunused_blocks.b; blocknum.b++) { - if (h->block_translation[blocknum.b].size != -1) + for (blocknum.b=1; blocknum.bflags_array); h->flags_array[0] = header.flags; h->nodesize = header.nodesize; - h->free_blocks = header.free_blocks; - h->unused_blocks = header.unused_blocks; + assert(h->blocktable /* Not initialized. Is this used? */); + toku_block_recovery_set_free_blocks(h->blocktable, header.free_blocks); + toku_block_recovery_set_unused_blocks(h->blocktable, header.unused_blocks); h->n_named_roots = header.n_named_roots; r=toku_fifo_create(&h->fifo); assert(r==0); @@ -687,7 +688,7 @@ toku_recover_changeunusedmemory (LSN UU(lsn), FILENUM filenum, BLOCKNUM UU(oldun assert(r==0); assert(pair->brt); assert(pair->brt->h); - pair->brt->h->unused_blocks = newunused; + toku_block_recovery_set_unused_blocks(pair->brt->h->blocktable, newunused); } static int toku_recover_checkpoint (LSN UU(lsn)) { diff --git a/newbrt/tests/Makefile b/newbrt/tests/Makefile index c12be67f78b..ca270690f0e 100644 --- a/newbrt/tests/Makefile +++ b/newbrt/tests/Makefile @@ -84,8 +84,6 @@ REGRESSION_TESTS_RAW = \ omt-cursor-test \ omt-test \ shortcut \ - test1305 \ - test1308a \ test-assert \ test-brt-delete-both \ test-brt-overflow \ diff --git a/newbrt/tests/brt-serialize-test.c b/newbrt/tests/brt-serialize-test.c index 553b673640c..ea4621fad29 100644 --- a/newbrt/tests/brt-serialize-test.c +++ b/newbrt/tests/brt-serialize-test.c @@ -53,14 +53,13 @@ static void test_serialize(void) { memset(btps, 0, sizeof(btps)); brt->h = brt_h; brt_h->panic = 0; brt_h->panic_string = 0; - brt_h->translated_blocknum_limit = 1; - brt_h->block_translation = btps; - brt_h->block_translation[20].diskoff = 4096; - brt_h->block_translation[20].size = 100; - create_block_allocator(&brt_h->block_allocator, 4096, BLOCK_ALLOCATOR_ALIGNMENT); + toku_blocktable_create_new(&brt_h->blocktable); + toku_blocktable_debug_set_translation(brt_h->blocktable, 1, btps); + btps[20].diskoff = 4096; + btps[20].size = 100; { u_int64_t b; - block_allocator_alloc_block(brt_h->block_allocator, 100, &b); + toku_block_alloc(brt_h->blocktable, 100, &b); assert(b==4096); } @@ -120,9 +119,8 @@ static void test_serialize(void) { toku_free(sn.u.n.childinfos); toku_free(sn.u.n.childkeys); - block_allocator_free_block(brt_h->block_allocator, 4096); - destroy_block_allocator(&brt_h->block_allocator); - toku_free(brt_h->block_translation); + toku_block_free(brt_h->blocktable, 4096); + toku_blocktable_destroy(&brt_h->blocktable); toku_free(brt_h); toku_free(brt); } diff --git a/newbrt/wbuf.h b/newbrt/wbuf.h index 0cbce791297..fbefcfd0950 100644 --- a/newbrt/wbuf.h +++ b/newbrt/wbuf.h @@ -15,6 +15,7 @@ /* This code requires that the buffer be big enough to hold whatever you put into it. */ /* This abstraction doesn't do a good job of hiding its internals. * Why? The performance of this code is important, and we want to inline stuff */ +//Why is size here an int instead of DISKOFF like in the initializer? struct wbuf { unsigned char *buf; unsigned int size;