diff --git a/CMakeLists.txt b/CMakeLists.txt index 19d7642..fd5e12e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -11,4 +11,4 @@ include_directories(./invodb) add_executable(InvoDB invodb/main.cpp - invodb/main.h invodb/file/page_manager.cpp invodb/file/page_manager.h invodb/models/json.cpp invodb/models/json.h invodb/models/collection.cpp invodb/models/collection.h invodb/file/storage_page.cpp invodb/file/storage_page.h invodb/utils/logger.h invodb/utils/uuid.h invodb/btree/btree_node.h invodb/btree/btree_uuid.cpp invodb/btree/btree_uuid.h invodb/btree/btree_node.cpp) + invodb/main.h invodb/file/page_manager.cpp invodb/file/page_manager.h invodb/models/json.cpp invodb/models/json.h invodb/models/collection.cpp invodb/models/collection.h invodb/file/storage_page.cpp invodb/file/storage_page.h invodb/utils/logger.h invodb/utils/uuid.h invodb/btree/node.h invodb/btree/btree.h) diff --git a/invodb/btree/btree.h b/invodb/btree/btree.h new file mode 100644 index 0000000..8315723 --- /dev/null +++ b/invodb/btree/btree.h @@ -0,0 +1,434 @@ +// +// Created by YuhangQ on 2021/10/25. +// + +#ifndef INVODB_BTREE_H +#define INVODB_BTREE_H + +#include "btree/node.h" +#include "utils/uuid.h" + +template +class BTree { +public: + BTree(const int& address); + void insert(const KT &key, const VT &value); + void update(const KT &key, const VT &value); + void remove(const KT &key); + int getNodeSize(); + VT find(const KT &key); + int size(); +private: + void removeEntry(int curAdd, const std::string& key, const int& pointer); + bool canCoalesce(int curAdd, int sibAdd); + void coalesce(int curAdd, int sibAdd); + bool canRedistribute(int curAdd, int sibAdd); + void redistribute(int curAdd, int sibAdd); + int findNode(const KT &key); + void split(const KT &key, int address, int parentAdd, int curAdd); + void insertInternal(const KT &key, int curAdd, int lLeafAdd, int rLeafAdd); + int root; + int n_size; +}; + +// BTree BTreeNode + +template +BTree::BTree(const int& address) { + root = address; + n_size = 0; +} + +template +void BTree::insert(const KT &key, const VT &value) { + if(find(key) != -1) { + throw "keySet already exists."; + } + + n_size++; + + auto cur = BTreeNode::getNode(findNode(key)); + + // insert directly + if(cur->size < cur->m - 1) { + cur->linkSet[cur->insert(key)] = value; + cur->save(); + return; + } + + // split + split(key, value, cur->parent, cur->address); +} + +template +void BTree::update(const KT &key, const VT &value) { + if(find(key) == -1) { + throw "keySet doesn't exists."; + } + auto cur = BTreeNode::getNode(findNode(key)); + cur->linkSet[cur->findPos(key)] = value; + cur->save(); +} + +template +void BTree::remove(const KT &key) { + if(find(key) == -1) { + throw "keySet doesn't exists."; + } + n_size--; + auto cur = BTreeNode::getNode(findNode(key)); + removeEntry(cur->address, key, find(key)); +} + +template +VT BTree::find(const KT &key) { + auto cur = BTreeNode::getNode(findNode(key)); + for(int i=0; isize; i++) { + if(key == cur->keySet[i]) return cur->linkSet[i]; + } + return -1; +} + +template +int BTree::size() { + return n_size; +} + +template +void BTree::removeEntry(int curAdd, const std::string &key, const int &pointer) { + auto cur = BTreeNode::getNode(curAdd); + int pos = cur->findPos(key); + if(pos == -1) return; + for(int i=pos; isize-1; i++) { + cur->keySet[i] = cur->keySet[i + 1]; + } + for(int i=pos+(cur->linkSet[pos] != pointer); isize; i++) { + cur->linkSet[i] = cur->linkSet[i + 1]; + } + cur->size--; + cur->save(); + + if(curAdd == root && !cur->leaf && cur->size == 0) { + root = cur->linkSet[0]; + BTreeNode* root = BTreeNode::getNode(cur->linkSet[0]); + root->parent = 0; + root->save(); + cur->release(); + return; + } + + if(cur->enough() || cur->address == root) return; + + if(canCoalesce(cur->address, cur->left)) { + coalesce(cur->address, cur->left); + } else if(canCoalesce(cur->address, cur->right)) { + coalesce(cur->address, cur->right); + } else if(canRedistribute(cur->address, cur->left)) { + redistribute(cur->address, cur->left); + } else if(canRedistribute(cur->address, cur->right)) { + redistribute(cur->address, cur->right); + } else { + throw "these is a bug!"; + } +} + +template +bool BTree::canCoalesce(int curAdd, int sibAdd) { + if(sibAdd == 0) return false; + auto cur = BTreeNode::getNode(curAdd); + auto sib = BTreeNode::getNode(sibAdd); + if(cur->parent != sib->parent) return false; + return (cur->size + sib->size <= BTreeNode::m - 1 - !cur->leaf); +} + +template +void BTree::coalesce(int curAdd, int sibAdd) { + auto cur = BTreeNode::getNode(curAdd); + auto sib = BTreeNode::getNode(sibAdd); + auto parent = BTreeNode::getNode(cur->parent); + std::string *k; + for(int i=0; isize; i++) { + if((parent->linkSet[i] == curAdd && parent->linkSet[i+1] == sibAdd) + || (parent->linkSet[i] == sibAdd && parent->linkSet[i+1] == curAdd)) { + k = &parent->keySet[i]; + break; + } + } + BTreeNode* newNode = nullptr; + if(cur->left == sibAdd) { + if(!cur->leaf) sib->insert(*k); + for(int i=0; isize; i++) { + sib->linkSet[sib->insert(cur->keySet[i])] = cur->linkSet[i]; + } + sib->linkSet[sib->size] = cur->linkSet[cur->size]; + sib->right = cur->right; + if(cur->right) { + BTreeNode *right = BTreeNode::getNode(cur->right); + right->left = sib->address; + right->save(); + } + newNode = sib; + newNode->save(); + removeEntry(parent->address, *k, curAdd); + cur->release(); + } else { + if(!cur->leaf) cur->insert(*k); + for(int i=0; isize; i++) { + cur->linkSet[cur->insert(sib->keySet[i])] = sib->linkSet[i]; + } + cur->linkSet[cur->size] = sib->linkSet[sib->size]; + + cur->right = sib->right; + + if(sib->right) { + BTreeNode *right = BTreeNode::getNode(sib->right); + right->left = cur->address; + right->save(); + } + newNode = cur; + newNode->save(); + removeEntry(parent->address, *k, sibAdd); + sib->release(); + } + if(newNode->leaf) return; + for(int i=0; i<=newNode->size; i++) { + auto child = BTreeNode::getNode(newNode->linkSet[i]); + child->parent = newNode->address; + child->save(); + } +} + +template +bool BTree::canRedistribute(int curAdd, int sibAdd) { + if(sibAdd == 0) return false; + auto cur = BTreeNode::getNode(curAdd); + auto sib = BTreeNode::getNode(sibAdd); + if(cur->parent != sib->parent) return false; + return sib->size > ((sib->m - !sib->leaf) / 2); +} + +template +void BTree::redistribute(int curAdd, int sibAdd) { + auto cur = BTreeNode::getNode(curAdd); + auto sib = BTreeNode::getNode(sibAdd); + auto parent = BTreeNode::getNode(cur->parent); + std::string k; + int pos; + for(pos=0; possize; pos++) { + if((parent->linkSet[pos] == curAdd && parent->linkSet[pos+1] == sibAdd) + || (parent->linkSet[pos] == sibAdd && parent->linkSet[pos+1] == curAdd)) { + k = parent->keySet[pos]; + break; + } + } + + if(cur->left == sibAdd) { + if(cur->leaf) { + cur->linkSet[cur->insert(sib->keySet[sib->size-1])] = sib->linkSet[sib->size-1]; + parent->keySet[pos] = cur->keySet[0]; + } else{ + cur->linkSet[cur->insert(k)] = sib->linkSet[sib->size]; + parent->keySet[pos] = sib->keySet[sib->size-1]; + } + if(!cur->leaf) { + BTreeNode *child = BTreeNode::getNode(sib->linkSet[sib->size - cur->leaf]); + child->parent = cur->address; + child->save(); + } + sib->size--; + } else { + if(cur->leaf) { + cur->linkSet[cur->insert(sib->keySet[0])] = sib->linkSet[0]; + for(int i=0; isize; i++) { + sib->keySet[i] = sib->keySet[i+1]; + sib->linkSet[i] = sib->linkSet[i+1]; + } + parent->keySet[pos] = sib->keySet[0]; + } else { + if(cur->size != 0) { + cur->linkSet[cur->insert(k)+1] = sib->linkSet[0]; + } else { + cur->keySet[0] = k; + cur->linkSet[1] = sib->linkSet[0]; + cur->size++; + } + parent->keySet[pos] = sib->keySet[0]; + if(!cur->leaf) { + auto child = BTreeNode::getNode(sib->linkSet[0]); + child->parent = cur->address; + child->save(); + } + for(int i=0; isize; i++) { + sib->keySet[i] = sib->keySet[i+1]; + sib->linkSet[i] = sib->linkSet[i+1]; + } + } + sib->size--; + } + + cur->save(); + sib->save(); + parent->save(); +} + +template +int BTree::findNode(const KT &key) { + auto cur = BTreeNode::getNode(root); + while(!cur->leaf) { + for(int i=0; isize; i++) { + if(key < cur->keySet[i]) { + cur = BTreeNode::getNode(cur->linkSet[i]); + break; + } + if(i == cur->size - 1) { + cur = BTreeNode::getNode(cur->linkSet[i + 1]); + break; + } + } + } + return cur->address; +} + +template +void BTree::split(const KT &key, int address, int parentAdd, int curAdd) { + auto cur = BTreeNode::getNode(curAdd); + + cur->linkSet[cur->insert(key)] = address; + + auto lLeaf = BTreeNode::getNode(PageManager::Instance().allocate()); + auto rLeaf = BTreeNode::getNode(PageManager::Instance().allocate()); + + int mid = (cur->m / 2); + for(int i=0; ilinkSet[lLeaf->insert(cur->keySet[i])] = cur->linkSet[i]; + lLeaf->right = rLeaf->address; + lLeaf->left = cur->left; + for(int i=mid; im; i++) rLeaf->linkSet[rLeaf->insert(cur->keySet[i])] = cur->linkSet[i]; + rLeaf->left = lLeaf->address; + rLeaf->right = cur->right; + + if(cur->left) { + auto curLeft = BTreeNode::getNode(cur->left); + curLeft->right = lLeaf->address; + curLeft->save(); + } + + if(cur->right) { + auto curRight = BTreeNode::getNode(cur->right); + curRight->left = rLeaf->address; + curRight->save(); + } + + cur->release(); + + if(cur->address == root) { + auto newRoot = BTreeNode::getNode(PageManager::Instance().allocate()); + newRoot->insert(rLeaf->keySet[0]); + newRoot->linkSet[0] = lLeaf->address; + newRoot->linkSet[1] = rLeaf->address; + newRoot->leaf = false; + root = newRoot->address; + newRoot->parent = 0; + lLeaf->parent = rLeaf->parent = root; + + newRoot->save(); + lLeaf->save(); + rLeaf->save(); + } else { + lLeaf->save(); + rLeaf->save(); + insertInternal(rLeaf->keySet[0], cur->parent, lLeaf->address, rLeaf->address); + } +} + +template +void BTree::insertInternal(const KT &key, int curAdd, int lLeafAdd, int rLeafAdd) { + BTreeNode *cur = BTreeNode::getNode(curAdd); + BTreeNode *lLeaf = BTreeNode::getNode(lLeafAdd); + BTreeNode *rLeaf = BTreeNode::getNode(rLeafAdd); + + if(cur->size < cur->m - 1) { + int pos = cur->insert(key); + cur->linkSet[pos] = lLeaf->address; + cur->linkSet[pos+1] = rLeaf->address; + lLeaf->parent = rLeaf->parent = curAdd; + cur->save(); + lLeaf->save(); + rLeaf->save(); + return; + } + + auto newLChild = BTreeNode::getNode(PageManager::Instance().allocate()); + auto newRChild = BTreeNode::getNode(PageManager::Instance().allocate()); + newLChild->leaf = false; + newRChild->leaf = false; + newLChild->right = newRChild->address; + newLChild->left = cur->left; + newRChild->left = newLChild->address; + newRChild->right = cur->right; + + if(cur->left) { + auto curLeft = BTreeNode::getNode(cur->left); + curLeft->right = newLChild->address; + curLeft->save(); + } + + if(cur->right) { + auto curRight = BTreeNode::getNode(cur->right); + curRight->left = newRChild->address; + curRight->save(); + } + + int pos = cur->insert(key); + cur->linkSet[pos] = lLeaf->address; + cur->linkSet[pos+1] = rLeaf->address; + + int mid = cur->size / 2; + + for(int i=0; iinsert(cur->keySet[i]); + for(int i=0; i<=mid; i++) newLChild->linkSet[i] = cur->linkSet[i]; + + for(int i=mid+1; im; i++) newRChild->insert(cur->keySet[i]); + for(int i=mid+1; i<=cur->m; i++) newRChild->linkSet[i-mid-1] = cur->linkSet[i]; + + for(int i=0; i<=newLChild->size; i++) { + auto child = BTreeNode::getNode(newLChild->linkSet[i]); + child->parent = newLChild->address; + child->save(); + } + for(int i=0; i<=newRChild->size; i++) { + auto child = BTreeNode::getNode(newRChild->linkSet[i]); + child->parent = newRChild->address; + child->save(); + } + + cur->release(); + + if(cur->address == root) { + auto newRoot = BTreeNode::getNode(PageManager::Instance().allocate()); + newRoot->insert(cur->keySet[mid]); + newRoot->linkSet[0] = newLChild->address; + newRoot->linkSet[1] = newRChild->address; + newRoot->leaf = false; + root = newRoot->address; + newRoot->parent = 0; + newLChild->parent = newRChild->parent = root; + + newRoot->save(); + newLChild->save(); + newRChild->save(); + } else { + newLChild->save(); + newRChild->save(); + insertInternal(cur->keySet[mid], cur->parent, newLChild->address, newRChild->address); + } +} + +template +int BTree::getNodeSize() { + auto p = BTreeNode::getNode(root); + return p->save(); +} + +// BTree BTreeNode + +#endif //INVODB_BTREE_H diff --git a/invodb/btree/btree_node.cpp b/invodb/btree/btree_node.cpp deleted file mode 100644 index ed47849..0000000 --- a/invodb/btree/btree_node.cpp +++ /dev/null @@ -1,106 +0,0 @@ -// -// Created by YuhangQ on 2021/10/25. -// - -#include "btree_node.h" - -std::map NodeUUID::map; - -NodeUUID *NodeUUID::getNode(const int &address) { - if(address == 0) { - throw "fuck"; - } - if(map.count(address) == 0) { - delete map[address]; - map[address] = new NodeUUID(address); - } -// if(map.count(address) == 0) { -// map[address] = new NodeUUID(address); -// } - return map[address]; -} - -NodeUUID::NodeUUID(const int& address):address(address) { - clear(); - StoragePage page = PageManager::Instance().getPage(address); - int p = 0; - size = page.getIntStartFrom(p); p += 4; - parent = page.getIntStartFrom(p); p += 4; - left = page.getIntStartFrom(p); p += 4; - right = page.getIntStartFrom(p); p += 4; - leaf = !page.getIntStartFrom(p); p += 4; - for(int i=0; i key[pos]) pos++; - val[size + 1] = val[size]; - for(int i=size; i>pos; i--) { - val[i] = val[i - 1]; - key[i] = key[i - 1]; - } - key[pos] = uuid; - size++; - return pos; -} - -void NodeUUID::print() { - printf("---------BTreeNode---------\n"); - for(int i=0; iaddress); -} - -NodeUUID *NodeUUID::release(const int &address) { - return nullptr; -} - - - diff --git a/invodb/btree/btree_node.h b/invodb/btree/btree_node.h deleted file mode 100644 index 534b375..0000000 --- a/invodb/btree/btree_node.h +++ /dev/null @@ -1,57 +0,0 @@ -// -// Created by i on 2021/10/24. -// - -#ifndef INVODB_BTREE_NODE_H -#define INVODB_BTREE_NODE_H - -#include -#include -#include -#include -#include "file/page_manager.h" - -/** - * m = 27 - * value string max - * (32 + 4)*28 + 5 = 1013 - */ - - -class NodeUUID { -public: - static NodeUUID* getNode(const int& address); - static NodeUUID* release(const int& address); - int insert(const std::string& uuid); - int findPos(const std::string& uuid); - void print(); - void release(); - void clear(); - void save(); - //static const int m = 27; - static const int m = 27; - static const int maxCount = m - 1; - static const int minLeafCount = m / 2; - static const int minLinkCount = (m - 1) / 2; - bool enough() { - if(leaf) return size >= minLeafCount; - else return size >= minLinkCount; - } - bool full() { - return size == maxCount; - } - std::string key[m+1]; - int val[m+1]; - int parent; - int left; - int right; - bool leaf; - int size; - int address; -private: - NodeUUID(const int& address); - static std::map map; - -}; - -#endif //INVODB_BTREE_NODE_H diff --git a/invodb/btree/btree_uuid.cpp b/invodb/btree/btree_uuid.cpp deleted file mode 100644 index 1b0739b..0000000 --- a/invodb/btree/btree_uuid.cpp +++ /dev/null @@ -1,487 +0,0 @@ -// -// Created by YuhangQ on 2021/10/25. -// - -#include "btree_uuid.h" - -BTreeUUID::BTreeUUID(const int& address) { - root = address; - n_size = 0; -} - -int BTreeUUID::find(const std::string& uuid) { - NodeUUID* cur = NodeUUID::getNode(findNode(uuid)); - for(int i=0; isize; i++) { - if(uuid == cur->key[i]) return cur->val[i]; - } - return -1; -} - -int BTreeUUID::findNode(const std::string& uuid) { - NodeUUID* cur = NodeUUID::getNode(root); - while(!cur->leaf) { - for(int i=0; isize; i++) { - if(uuid < cur->key[i]) { - cur = NodeUUID::getNode(cur->val[i]); - break; - } - if(i == cur->size - 1) { - cur = NodeUUID::getNode(cur->val[i + 1]); - break; - } - } - } - return cur->address; -} - -void BTreeUUID::update(const std::string &uuid, int address) { - if(find(uuid) == -1) { - throw "key doesn't exists."; - } - NodeUUID* cur = NodeUUID::getNode(findNode(uuid)); - cur->val[cur->findPos(uuid)] = address; - cur->save(); -} - -void BTreeUUID::insert(const std::string& uuid, int address) { - if(find(uuid) != -1) { - throw "key already exists."; - } - - n_size++; - - NodeUUID* cur = NodeUUID::getNode(findNode(uuid)); - - // insert directly - if(cur->size < cur->m - 1) { - cur->val[cur->insert(uuid)] = address; - cur->save(); - return; - } - - // split - split(uuid, address, cur->parent, cur->address); -} - -void BTreeUUID::split(const std::string& uuid, int address, int parentAddr, int curAddr) { - - NodeUUID* cur = NodeUUID::getNode(curAddr); - - cur->val[cur->insert(uuid)] = address; - - NodeUUID* lLeaf = NodeUUID::getNode(PageManager::Instance().allocate()); - NodeUUID* rLeaf = NodeUUID::getNode(PageManager::Instance().allocate()); - - int mid = (cur->m / 2); - for(int i=0; ival[lLeaf->insert(cur->key[i])] = cur->val[i]; - lLeaf->right = rLeaf->address; - lLeaf->left = cur->left; - for(int i=mid; im; i++) rLeaf->val[rLeaf->insert(cur->key[i])] = cur->val[i]; - rLeaf->left = lLeaf->address; - rLeaf->right = cur->right; - - if(cur->left) { - NodeUUID* curLeft = NodeUUID::getNode(cur->left); - curLeft->right = lLeaf->address; - curLeft->save(); - } - - if(cur->right) { - NodeUUID* curRight = NodeUUID::getNode(cur->right); - curRight->left = rLeaf->address; - curRight->save(); - } - - cur->release(); - - if(cur->address == root) { - NodeUUID* newRoot = NodeUUID::getNode(PageManager::Instance().allocate()); - newRoot->insert(rLeaf->key[0]); - newRoot->val[0] = lLeaf->address; - newRoot->val[1] = rLeaf->address; - newRoot->leaf = false; - root = newRoot->address; - newRoot->parent = 0; - lLeaf->parent = rLeaf->parent = root; - - newRoot->save(); - lLeaf->save(); - rLeaf->save(); - } else { - lLeaf->save(); - rLeaf->save(); - insertInternal(rLeaf->key[0], cur->parent, lLeaf->address, rLeaf->address); - } -} - -void BTreeUUID::insertInternal(const std::string& uuid, int curAddr, int lLeafAddr, int rLeafAddr) { - - NodeUUID *cur = NodeUUID::getNode(curAddr); - NodeUUID *lLeaf = NodeUUID::getNode(lLeafAddr); - NodeUUID *rLeaf = NodeUUID::getNode(rLeafAddr); - - if(cur->size < cur->m - 1) { - int pos = cur->insert(uuid); - cur->val[pos] = lLeaf->address; - cur->val[pos+1] = rLeaf->address; - lLeaf->parent = rLeaf->parent = curAddr; - cur->save(); - lLeaf->save(); - rLeaf->save(); - return; - } - - NodeUUID* newLChild = NodeUUID::getNode(PageManager::Instance().allocate()); - NodeUUID* newRChild = NodeUUID::getNode(PageManager::Instance().allocate()); - newLChild->leaf = false; - newRChild->leaf = false; - newLChild->right = newRChild->address; - newLChild->left = cur->left; - newRChild->left = newLChild->address; - newRChild->right = cur->right; - - if(cur->left) { - NodeUUID* curLeft = NodeUUID::getNode(cur->left); - curLeft->right = newLChild->address; - curLeft->save(); - } - - if(cur->right) { - NodeUUID* curRight = NodeUUID::getNode(cur->right); - curRight->left = newRChild->address; - curRight->save(); - } - - int pos = cur->insert(uuid); - cur->val[pos] = lLeaf->address; - cur->val[pos+1] = rLeaf->address; - - int mid = cur->size / 2; - - for(int i=0; iinsert(cur->key[i]); - for(int i=0; i<=mid; i++) newLChild->val[i] = cur->val[i]; - - for(int i=mid+1; im; i++) newRChild->insert(cur->key[i]); - for(int i=mid+1; i<=cur->m; i++) newRChild->val[i-mid-1] = cur->val[i]; - - for(int i=0; i<=newLChild->size; i++) { - NodeUUID* child = NodeUUID::getNode(newLChild->val[i]); - child->parent = newLChild->address; - child->save(); - } - for(int i=0; i<=newRChild->size; i++) { - NodeUUID* child = NodeUUID::getNode(newRChild->val[i]); - child->parent = newRChild->address; - child->save(); - } - - cur->release(); - - if(cur->address == root) { - NodeUUID* newRoot = NodeUUID::getNode(PageManager::Instance().allocate()); - newRoot->insert(cur->key[mid]); - newRoot->val[0] = newLChild->address; - newRoot->val[1] = newRChild->address; - newRoot->leaf = false; - root = newRoot->address; - newRoot->parent = 0; - newLChild->parent = newRChild->parent = root; - - newRoot->save(); - newLChild->save(); - newRChild->save(); - } else { - newLChild->save(); - newRChild->save(); - insertInternal(cur->key[mid], cur->parent, newLChild->address, newRChild->address); - } -} - -/* -void BTreeUUID::print() { - innerPrint(NodeUUID::getNode(root)); -} - -void BTreeUUID::innerPrint(NodeUUID *cur) { - if(cur->address == root) { - cnt = 0; - } - if(cur->leaf) cnt += cur->size; - printf("---------%d(%d)count=%d&sum=%d---l:%d,r:%d-parent:%d----\n", cur->address, cur->leaf, cur->size, cnt, cur->left, cur->right, cur->parent); - for(int i=0; isize; i++) { - printf("%d:%s ", i, cur->key[i].substr(0, 6).c_str()); - } - printf("\n"); - for(int i=0; i<=cur->size; i++) { - printf("%d:%d ", i, cur->val[i]); - } - printf("\n"); - - - if(cur->leaf) return; - for(int i=0; i<=cur->size; i++) { - - if(NodeUUID::getNode(cur->val[i])->parent != cur->address) { - printf("FUCK\n"); - //exit(0); - } - innerPrint(NodeUUID::getNode(cur->val[i])); - } - -} - */ - -void BTreeUUID::remove(const std::string &uuid) { - if(find(uuid) == -1) { - throw "key doesn't exists."; - } - n_size--; - NodeUUID* cur = NodeUUID::getNode(findNode(uuid)); - removeEntry(cur->address, uuid, find(uuid)); -} - -void BTreeUUID::removeEntry(int curAddr, const std::string& uuid, const int& pointer) { - //printf("---removeEntry: %d %s %d\n", curAddr, uuid.c_str(), pointer); - - NodeUUID* cur = NodeUUID::getNode(curAddr); - int pos = cur->findPos(uuid); - if(pos == -1) return; - for(int i=pos; isize-1; i++) { - cur->key[i] = cur->key[i + 1]; - } - for(int i=pos+(cur->val[pos] != pointer); isize; i++) { - cur->val[i] = cur->val[i + 1]; - } - cur->size--; - cur->save(); - - if(curAddr == root && !cur->leaf && cur->size == 0) { - root = cur->val[0]; - NodeUUID* root = NodeUUID::getNode(cur->val[0]); - root->parent = 0; - root->save(); - - cur->release(); - return; - } - - if(cur->enough() || cur->address == root) return; - - - //printf("em %d %d\n", cur->address, cur->size); - - - if(canCoalesce(cur->address, cur->left)) { - coalesce(cur->address, cur->left); - } else if(canCoalesce(cur->address, cur->right)) { - coalesce(cur->address, cur->right); - } else if(canRedistribute(cur->address, cur->left)) { - redistribute(cur->address, cur->left); - } else if(canRedistribute(cur->address, cur->right)) { - redistribute(cur->address, cur->right); - } else { - throw "these is a bug!"; - } -} - - -bool BTreeUUID::canCoalesce(int curAddr, int sibAddr) { - if(sibAddr == 0) return false; - NodeUUID* cur = NodeUUID::getNode(curAddr); - NodeUUID* sib = NodeUUID::getNode(sibAddr); - if(cur->parent != sib->parent) return false; - return (cur->size + sib->size <= NodeUUID::m - 1 - !cur->leaf); -} - -void BTreeUUID::coalesce(int curAddr, int sibAddr) { - - //printf("coalesce %d and %d\n", curAddr, sibAddr); - - NodeUUID* cur = NodeUUID::getNode(curAddr); - NodeUUID* sib = NodeUUID::getNode(sibAddr); - NodeUUID* parent = NodeUUID::getNode(cur->parent); - std::string *k; - for(int i=0; isize; i++) { - if((parent->val[i] == curAddr && parent->val[i+1] == sibAddr) - || (parent->val[i] == sibAddr && parent->val[i+1] == curAddr)) { - k = &parent->key[i]; - break; - } - } - NodeUUID* newNode = nullptr; - if(cur->left == sibAddr) { - if(!cur->leaf) sib->insert(*k); - for(int i=0; isize; i++) { - sib->val[sib->insert(cur->key[i])] = cur->val[i]; - } - sib->val[sib->size] = cur->val[cur->size]; - sib->right = cur->right; - if(cur->right) { - NodeUUID *right = NodeUUID::getNode(cur->right); - right->left = sib->address; - right->save(); - } - newNode = sib; - newNode->save(); - removeEntry(parent->address, *k, curAddr); - cur->release(); - } else { - if(!cur->leaf) cur->insert(*k); - for(int i=0; isize; i++) { - cur->val[cur->insert(sib->key[i])] = sib->val[i]; - } - cur->val[cur->size] = sib->val[sib->size]; - - cur->right = sib->right; - - if(sib->right) { - NodeUUID *right = NodeUUID::getNode(sib->right); - right->left = cur->address; - right->save(); - } - newNode = cur; - newNode->save(); - removeEntry(parent->address, *k, sibAddr); - sib->release(); - } - if(newNode->leaf) return; - for(int i=0; i<=newNode->size; i++) { - NodeUUID* child = NodeUUID::getNode(newNode->val[i]); - child->parent = newNode->address; - child->save(); - } -} - -bool BTreeUUID::canRedistribute(int curAddr, int sibAddr) { - if(sibAddr == 0) return false; - NodeUUID* cur = NodeUUID::getNode(curAddr); - NodeUUID* sib = NodeUUID::getNode(sibAddr); - if(cur->parent != sib->parent) return false; - return sib->size > ((sib->m - !sib->leaf) / 2); -} - -void BTreeUUID::redistribute(int curAddr, int sibAddr) { - - //printf("redistribute %d from %d\n", curAddr, sibAddr); - - NodeUUID* cur = NodeUUID::getNode(curAddr); - NodeUUID* sib = NodeUUID::getNode(sibAddr); - NodeUUID* parent = NodeUUID::getNode(cur->parent); - std::string k; - int pos; - for(pos=0; possize; pos++) { - if((parent->val[pos] == curAddr && parent->val[pos+1] == sibAddr) - || (parent->val[pos] == sibAddr && parent->val[pos+1] == curAddr)) { - k = parent->key[pos]; - break; - } - } - - if(cur->left == sibAddr) { - if(cur->leaf) { - cur->val[cur->insert(sib->key[sib->size-1])] = sib->val[sib->size-1]; - parent->key[pos] = cur->key[0]; - } else{ - cur->val[cur->insert(k)] = sib->val[sib->size]; - parent->key[pos] = sib->key[sib->size-1]; - } - if(!cur->leaf) { - NodeUUID *child = NodeUUID::getNode(sib->val[sib->size - cur->leaf]); - child->parent = cur->address; - child->save(); - } - sib->size--; - } else { - if(cur->leaf) { - cur->val[cur->insert(sib->key[0])] = sib->val[0]; - for(int i=0; isize; i++) { - sib->key[i] = sib->key[i+1]; - sib->val[i] = sib->val[i+1]; - } - parent->key[pos] = sib->key[0]; - } else { - if(cur->size != 0) { - cur->val[cur->insert(k)+1] = sib->val[0]; - } else { - cur->key[0] = k; - cur->val[1] = sib->val[0]; - cur->size++; - } - parent->key[pos] = sib->key[0]; - if(!cur->leaf) { - NodeUUID* child = NodeUUID::getNode(sib->val[0]); - child->parent = cur->address; - child->save(); - } - for(int i=0; isize; i++) { - sib->key[i] = sib->key[i+1]; - sib->val[i] = sib->val[i+1]; - } - } - sib->size--; - } - - cur->save(); - sib->save(); - parent->save(); -} - -int BTreeUUID::size() { - return n_size; -} - -void BTreeUUID::testAndBenchmark(const int& n) { - - clock_t start = clock(); - - std::map map; - - for(int i=0; ifirst; - int addr = rand(); - map[uuid] = addr; - update(uuid, addr); - } - // remove - else { - if(map.size() == 0) continue; - auto it = map.begin(); - std::advance(it, rand() % map.size()); - std::string uuid = it->first; - map.erase(uuid); - remove(uuid); - } - } - - if(map.size() != size()) { - printf("%d %d\n", map.size(), size()); - printf("BTree has BUG!\n"); - exit(0); - } - - for(auto it=map.begin(); it != map.end(); it++) { - if(find(it->first) != it->second) { - printf("BTree has BUG!\n"); - exit(0); - } - } - - clock_t end = clock(); - - printf("BTree pass the test with n=%d, time=%fs!\n", n, (double)(end - start) / CLOCKS_PER_SEC); -} - - diff --git a/invodb/btree/btree_uuid.h b/invodb/btree/btree_uuid.h deleted file mode 100644 index 9ae723f..0000000 --- a/invodb/btree/btree_uuid.h +++ /dev/null @@ -1,36 +0,0 @@ -// -// Created by YuhangQ on 2021/10/25. -// - -#ifndef INVODB_BTREE_UUID_H -#define INVODB_BTREE_UUID_H - -#include "btree/btree_node.h" -#include "utils/uuid.h" - -class BTreeUUID { -public: - BTreeUUID(const int& address); - void insert(const std::string& uuid, int address); - void update(const std::string& uuid, int address); - void remove(const std::string& uuid); - int find(const std::string& uuid); - void print(); - void testAndBenchmark(const int& n); - int size(); -private: - void removeEntry(int curAddr, const std::string& uuid, const int& pointer); - bool canCoalesce(int curAddr, int sibAddr); - void coalesce(int curAddr, int sibAddr); - bool canRedistribute(int curAddr, int sibAddr); - void redistribute(int curAddr, int sibAddr); - void innerPrint(NodeUUID* cur); - int findNode(const std::string& uuid); - void split(const std::string& uuid, int address, int parentAddr, int curAddr); - void insertInternal(const std::string& uuid, int curAddr, int lLeafAddr, int rLeafAddr); - int root; - int n_size; -}; - - -#endif //INVODB_BTREE_UUID_H diff --git a/invodb/btree/node.h b/invodb/btree/node.h new file mode 100644 index 0000000..6a3c63b --- /dev/null +++ b/invodb/btree/node.h @@ -0,0 +1,194 @@ +// +// Created by i on 2021/10/24. +// + +#ifndef INVODB_NODE_H +#define INVODB_NODE_H + +#include +#include +#include +#include +#include +#include "file/page_manager.h" + +template +class BTreeNode { +public: + static BTreeNode* getNode(const int &address); + static BTreeNode* release(const int &address); + int insert(KT const &key); + int findPos(KT const &key); + void release(); + void clear(); + int save(); + static const int m = M_SIZE; + static const int maxCount = m - 1; + static const int minLeafCount = m / 2; + static const int minLinkCount = (m - 1) / 2; + bool enough() { + if(leaf) return size >= minLeafCount; + else return size >= minLinkCount; + } + bool full() { + return size == maxCount; + } + KT keySet[m + 1]; + VT linkSet[m + 1]; + int parent; + int left; + int right; + bool leaf; + int size; + int address; +private: + BTreeNode(const int& address); +}; + +template +BTreeNode::BTreeNode(const int& address): address(address) { + clear(); + StoragePage page = PageManager::Instance().getPage(address); + int p = 0; + size = page.getIntStartFrom(p); p += 4; + parent = page.getIntStartFrom(p); p += 4; + left = page.getIntStartFrom(p); p += 4; + right = page.getIntStartFrom(p); p += 4; + leaf = !page.getIntStartFrom(p); p += 4; + + if(std::is_same::value) { + for(int i=0; ipush_back(c); + } + } + } else { + for(int i=0; i<=m; i++) { + keySet[i] = *(KT*)(&page[p]); + p += K_SIZE; + } + } + + if(std::is_same::value) { + for(int i=0; ipush_back(c); + } + } + } else { + for (int i = 0; i <= m; i++) { + linkSet[i] = *(VT*)(&page[p]); + p += V_SIZE; + } + } + +} + +template +BTreeNode *BTreeNode::getNode(const int &address) { + if(address < 4) { + throw "invalid address!"; + } + static std::map*> map; + if(map.count(address) == 0) { + delete map[address]; + map[address] = new BTreeNode(address); + } + return map[address]; +} + +template +BTreeNode *BTreeNode::release(const int &address) { + return nullptr; +} + +template +int BTreeNode::insert(const KT &key) { + int pos = 0; + while(pos < size && key > keySet[pos]) pos++; + linkSet[size + 1] = linkSet[size]; + for(int i=size; i>pos; i--) { + linkSet[i] = linkSet[i - 1]; + keySet[i] = keySet[i - 1]; + } + keySet[pos] = key; + size++; + return pos; +} + +template +int BTreeNode::findPos(const KT &key) { + int pos = std::lower_bound(keySet, keySet+size, key) - keySet; + if(pos == size || keySet[pos] != key) return -1; + return pos; +} + +template +void BTreeNode::release() { + BTreeNode::release(this->address); +} + +template +void BTreeNode::clear() { + for(int i=0; i +int BTreeNode::save() { + StoragePage page(address); + int p = 0; + page.setIntStartFrom(p, size); p += 4; + page.setIntStartFrom(p, parent); p += 4; + page.setIntStartFrom(p, left); p += 4; + page.setIntStartFrom(p, right); p += 4; + page.setIntStartFrom(p, !leaf); p += 4; + + if(std::is_same::value) { + for(int i=0; ic_str(), str->size()); + p += K_SIZE; + } + } else { + for(int i=0; i<=m; i++) { + page.setStartFrom(p, &keySet[i], K_SIZE); + p += K_SIZE; + } + } + + if(std::is_same::value) { + for(int i=0; ic_str(), str->size()); + p += V_SIZE; + } + } else { + for (int i = 0; i <= m; i++) { + page.setStartFrom(p, &linkSet[i], V_SIZE); + p += V_SIZE; + } + } + + if(p >= 1024) { + throw "too big page!"; + } + + page.save(); + + return p; +} + +#endif //INVODB_NODE_H diff --git a/invodb/file/storage_page.cpp b/invodb/file/storage_page.cpp index 9dc3913..fc2d88f 100644 --- a/invodb/file/storage_page.cpp +++ b/invodb/file/storage_page.cpp @@ -43,8 +43,12 @@ void StoragePage::setIntStartFrom(const int& index, const int& value) { *((int *)&page[index]) = value; } -void StoragePage::setStringStartFrom(const int &index, const char *str) { - for(int i=0; iaddress = id; } char& operator[] (int index) { if(index>=1024 || index < 0) throw "overflow"; else return this->page[index]; } diff --git a/invodb/main.cpp b/invodb/main.cpp index 7fdd054..416300d 100644 --- a/invodb/main.cpp +++ b/invodb/main.cpp @@ -4,6 +4,9 @@ #include "main.h" + +void testAndBenchmark(int n); + int main() { int t = time(0); //srand(1635418590); @@ -28,8 +31,71 @@ int main() { JSON json("{\"hello\": 1}"); col->insert(json); - BTreeUUID *btree = new BTreeUUID(PageManager::Instance().allocate()); - btree->testAndBenchmark(100000); + testAndBenchmark(100000); + + //btree->testAndBenchmark(100000); return 0; -} \ No newline at end of file +} + +void testAndBenchmark(int n) { + + + auto btree = new BTree<15, std::string, 32, double, 8>(PageManager::Instance().allocate()); + printf("nodeSize: %d\n", btree->getNodeSize()); + + + clock_t start = clock(); + + std::map map; + + for(int i=0; iinsert(uuid, addr); + map[uuid] = addr; + } + // update + else if(opt == 2) { + if(map.size() == 0) continue; + auto it = map.begin(); + std::advance(it, rand() % map.size()); + std::string uuid = it->first; + double addr = (double)rand() / 100; + map[uuid] = addr; + btree->update(uuid, addr); + } + // remove + else { + if(map.size() == 0) continue; + auto it = map.begin(); + std::advance(it, rand() % map.size()); + std::string uuid = it->first; + map.erase(uuid); + btree->remove(uuid); + } + } + + if(map.size() != btree->size()) { + printf("%d %d\n", map.size(), btree->size()); + printf("BTree has BUG!\n"); + exit(0); + } + + printf("test res k-v: %d\n", map.size()); + + for(auto it=map.begin(); it != map.end(); it++) { + printf("%llf %llf\n", btree->find(it->first), it->second); + if(btree->find(it->first) != it->second) { + printf("BTree has BUG!\n"); + //exit(0); + } + } + + clock_t end = clock(); + + printf("BTree pass the test with n=%d, time=%fs!\n", n, (double)(end - start) / CLOCKS_PER_SEC); +} diff --git a/invodb/models/collection.h b/invodb/models/collection.h index b4e8ca7..082735b 100644 --- a/invodb/models/collection.h +++ b/invodb/models/collection.h @@ -7,7 +7,7 @@ #include "file/page_manager.h" #include "utils/logger.h" -#include "btree/btree_uuid.h" +#include "btree/btree.h" #include "json.h" #include #include