dm-btree.h (6531B)
1/* 2 * Copyright (C) 2011 Red Hat, Inc. 3 * 4 * This file is released under the GPL. 5 */ 6#ifndef _LINUX_DM_BTREE_H 7#define _LINUX_DM_BTREE_H 8 9#include "dm-block-manager.h" 10 11struct dm_transaction_manager; 12 13/*----------------------------------------------------------------*/ 14 15/* 16 * Annotations used to check on-disk metadata is handled as little-endian. 17 */ 18#ifdef __CHECKER__ 19# define __dm_written_to_disk(x) __releases(x) 20# define __dm_reads_from_disk(x) __acquires(x) 21# define __dm_bless_for_disk(x) __acquire(x) 22# define __dm_unbless_for_disk(x) __release(x) 23#else 24# define __dm_written_to_disk(x) 25# define __dm_reads_from_disk(x) 26# define __dm_bless_for_disk(x) 27# define __dm_unbless_for_disk(x) 28#endif 29 30/*----------------------------------------------------------------*/ 31 32/* 33 * Manipulates hierarchical B+ trees with 64-bit keys and arbitrary-sized 34 * values. 35 */ 36 37/* 38 * Information about the values stored within the btree. 39 */ 40struct dm_btree_value_type { 41 void *context; 42 43 /* 44 * The size in bytes of each value. 45 */ 46 uint32_t size; 47 48 /* 49 * Any of these methods can be safely set to NULL if you do not 50 * need the corresponding feature. 51 */ 52 53 /* 54 * The btree is making a duplicate of a run of values, for instance 55 * because previously-shared btree nodes have now diverged. 56 * @value argument is the new copy that the copy function may modify. 57 * (Probably it just wants to increment a reference count 58 * somewhere.) This method is _not_ called for insertion of a new 59 * value: It is assumed the ref count is already 1. 60 */ 61 void (*inc)(void *context, const void *value, unsigned count); 62 63 /* 64 * These values are being deleted. The btree takes care of freeing 65 * the memory pointed to by @value. Often the del function just 66 * needs to decrement a reference counts somewhere. 67 */ 68 void (*dec)(void *context, const void *value, unsigned count); 69 70 /* 71 * A test for equality between two values. When a value is 72 * overwritten with a new one, the old one has the dec method 73 * called _unless_ the new and old value are deemed equal. 74 */ 75 int (*equal)(void *context, const void *value1, const void *value2); 76}; 77 78/* 79 * The shape and contents of a btree. 80 */ 81struct dm_btree_info { 82 struct dm_transaction_manager *tm; 83 84 /* 85 * Number of nested btrees. (Not the depth of a single tree.) 86 */ 87 unsigned levels; 88 struct dm_btree_value_type value_type; 89}; 90 91/* 92 * Set up an empty tree. O(1). 93 */ 94int dm_btree_empty(struct dm_btree_info *info, dm_block_t *root); 95 96/* 97 * Delete a tree. O(n) - this is the slow one! It can also block, so 98 * please don't call it on an IO path. 99 */ 100int dm_btree_del(struct dm_btree_info *info, dm_block_t root); 101 102/* 103 * All the lookup functions return -ENODATA if the key cannot be found. 104 */ 105 106/* 107 * Tries to find a key that matches exactly. O(ln(n)) 108 */ 109int dm_btree_lookup(struct dm_btree_info *info, dm_block_t root, 110 uint64_t *keys, void *value_le); 111 112/* 113 * Tries to find the first key where the bottom level key is >= to that 114 * given. Useful for skipping empty sections of the btree. 115 */ 116int dm_btree_lookup_next(struct dm_btree_info *info, dm_block_t root, 117 uint64_t *keys, uint64_t *rkey, void *value_le); 118 119/* 120 * Insertion (or overwrite an existing value). O(ln(n)) 121 */ 122int dm_btree_insert(struct dm_btree_info *info, dm_block_t root, 123 uint64_t *keys, void *value, dm_block_t *new_root) 124 __dm_written_to_disk(value); 125 126/* 127 * A variant of insert that indicates whether it actually inserted or just 128 * overwrote. Useful if you're keeping track of the number of entries in a 129 * tree. 130 */ 131int dm_btree_insert_notify(struct dm_btree_info *info, dm_block_t root, 132 uint64_t *keys, void *value, dm_block_t *new_root, 133 int *inserted) 134 __dm_written_to_disk(value); 135 136/* 137 * Remove a key if present. This doesn't remove empty sub trees. Normally 138 * subtrees represent a separate entity, like a snapshot map, so this is 139 * correct behaviour. O(ln(n)). 140 */ 141int dm_btree_remove(struct dm_btree_info *info, dm_block_t root, 142 uint64_t *keys, dm_block_t *new_root); 143 144/* 145 * Removes a _contiguous_ run of values starting from 'keys' and not 146 * reaching keys2 (where keys2 is keys with the final key replaced with 147 * 'end_key'). 'end_key' is the one-past-the-end value. 'keys' may be 148 * altered. 149 */ 150int dm_btree_remove_leaves(struct dm_btree_info *info, dm_block_t root, 151 uint64_t *keys, uint64_t end_key, 152 dm_block_t *new_root, unsigned *nr_removed); 153 154/* 155 * Returns < 0 on failure. Otherwise the number of key entries that have 156 * been filled out. Remember trees can have zero entries, and as such have 157 * no lowest key. 158 */ 159int dm_btree_find_lowest_key(struct dm_btree_info *info, dm_block_t root, 160 uint64_t *result_keys); 161 162/* 163 * Returns < 0 on failure. Otherwise the number of key entries that have 164 * been filled out. Remember trees can have zero entries, and as such have 165 * no highest key. 166 */ 167int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root, 168 uint64_t *result_keys); 169 170/* 171 * Iterate through the a btree, calling fn() on each entry. 172 * It only works for single level trees and is internally recursive, so 173 * monitor stack usage carefully. 174 */ 175int dm_btree_walk(struct dm_btree_info *info, dm_block_t root, 176 int (*fn)(void *context, uint64_t *keys, void *leaf), 177 void *context); 178 179 180/*----------------------------------------------------------------*/ 181 182/* 183 * Cursor API. This does not follow the rolling lock convention. Since we 184 * know the order that values are required we can issue prefetches to speed 185 * up iteration. Use on a single level btree only. 186 */ 187#define DM_BTREE_CURSOR_MAX_DEPTH 16 188 189struct cursor_node { 190 struct dm_block *b; 191 unsigned index; 192}; 193 194struct dm_btree_cursor { 195 struct dm_btree_info *info; 196 dm_block_t root; 197 198 bool prefetch_leaves; 199 unsigned depth; 200 struct cursor_node nodes[DM_BTREE_CURSOR_MAX_DEPTH]; 201}; 202 203/* 204 * Creates a fresh cursor. If prefetch_leaves is set then it is assumed 205 * the btree contains block indexes that will be prefetched. The cursor is 206 * quite large, so you probably don't want to put it on the stack. 207 */ 208int dm_btree_cursor_begin(struct dm_btree_info *info, dm_block_t root, 209 bool prefetch_leaves, struct dm_btree_cursor *c); 210void dm_btree_cursor_end(struct dm_btree_cursor *c); 211int dm_btree_cursor_next(struct dm_btree_cursor *c); 212int dm_btree_cursor_skip(struct dm_btree_cursor *c, uint32_t count); 213int dm_btree_cursor_get_value(struct dm_btree_cursor *c, uint64_t *key, void *value_le); 214 215#endif /* _LINUX_DM_BTREE_H */