diff --git a/invodb/btree/btree_node.cpp b/invodb/btree/btree_node.cpp index 2fea8c2..f90790e 100644 --- a/invodb/btree/btree_node.cpp +++ b/invodb/btree/btree_node.cpp @@ -4,32 +4,28 @@ #include "btree_node.h" -std::map BTreeNodeUUID::map; +std::map NodeUUID::map; -BTreeNodeUUID *BTreeNodeUUID::getNode(const int &address) { - if(true || map.count(address) == 0) { - map[address] = new BTreeNodeUUID(address); +NodeUUID *NodeUUID::getNode(const int &address) { + if(map.count(address) == 0) { + delete map[address]; + map[address] = new NodeUUID(address); } return map[address]; } -BTreeNodeUUID::BTreeNodeUUID(const int& address):address(address) { +NodeUUID::NodeUUID(const int& address):address(address) { clear(); StoragePage page = PageManager::Instance().getPage(address); int p = 0; - size = page.getIntStartFrom(p); - p += 4; - parent = page.getIntStartFrom(p); - p += 4; - left = page.getIntStartFrom(p); - p += 4; - right = page.getIntStartFrom(p); - p += 4; - leaf = !page.getIntStartFrom(p); - p += 4; + size = page.getIntStartFrom(p); p += 4; + parent = page.getIntStartFrom(p); p += 4; + left = page.getIntStartFrom(p); p += 4; + right = page.getIntStartFrom(p); p += 4; + leaf = !page.getIntStartFrom(p); p += 4; for(int i=0; i key[pos]) pos++; val[size + 1] = val[size]; @@ -51,34 +47,29 @@ int BTreeNodeUUID::insert(const std::string uuid) { return pos; } -void BTreeNodeUUID::print() { +void NodeUUID::print() { printf("---------BTreeNode---------\n"); for(int i=0; iaddress); +} + +NodeUUID *NodeUUID::release(const int &address) { + return nullptr; +} diff --git a/invodb/btree/btree_node.h b/invodb/btree/btree_node.h index 3d2c439..d060a1b 100644 --- a/invodb/btree/btree_node.h +++ b/invodb/btree/btree_node.h @@ -18,14 +18,28 @@ */ -class BTreeNodeUUID { +class NodeUUID { public: - static BTreeNodeUUID* getNode(const int& address); - int insert(const std::string uuid); + static NodeUUID* getNode(const int& address); + static NodeUUID* release(const int& address); + int insert(const std::string& uuid); + int findPos(const std::string& uuid); void print(); + void release(); void clear(); void save(); - static const int m = 27; + //static const int m = 27; + static const int m = 3; + static const int maxCount = m - 1; + static const int minLeafCount = m / 2; + static const int minLinkCount = (m - 1) / 2; + bool enough() { + if(leaf) return size >= minLeafCount; + else return size >= minLinkCount; + } + bool full() { + return size == maxCount; + } std::string key[m+1]; int val[m+1]; int parent; @@ -35,8 +49,8 @@ public: int size; int address; private: - BTreeNodeUUID(const int& address); - static std::map map; + NodeUUID(const int& address); + static std::map map; }; diff --git a/invodb/btree/btree_uuid.cpp b/invodb/btree/btree_uuid.cpp index a761e5c..a1df79c 100644 --- a/invodb/btree/btree_uuid.cpp +++ b/invodb/btree/btree_uuid.cpp @@ -8,25 +8,52 @@ BTreeUUID::BTreeUUID(const int& address) { root = address; } -void BTreeUUID::insert(const char *uuid, int address) { +int BTreeUUID::find(const std::string& uuid) { + NodeUUID* cur = NodeUUID::getNode(findNode(uuid)); + for(int i=0; isize; i++) { + if(uuid == cur->key[i]) return cur->val[i]; + } + return -1; +} - BTreeNodeUUID* cur = BTreeNodeUUID::getNode(root); - BTreeNodeUUID* parent = nullptr; +int BTreeUUID::findNode(const std::string& uuid) { + NodeUUID* cur = NodeUUID::getNode(root); + while(!cur->leaf) { + int parent = cur->address; + for(int i=0; isize; i++) { + if(uuid < cur->key[i]) { + cur = NodeUUID::getNode(cur->val[i]); + break; + } + if(i == cur->size - 1) { + cur = NodeUUID::getNode(cur->val[i + 1]); + break; + } + } + //if(cur->parent != parent) cur->parent = parent; + //cur->save(); + } + return cur->address; +} + + +void BTreeUUID::insert(const std::string& uuid, int address) { + + NodeUUID* cur = NodeUUID::getNode(root); + NodeUUID* parent = nullptr; while(!cur->leaf) { parent = cur; for(int i=0; isize; i++) { if(uuid < cur->key[i]) { - cur = BTreeNodeUUID::getNode(cur->val[i]); + cur = NodeUUID::getNode(cur->val[i]); break; } if(i == cur->size - 1) { - cur = BTreeNodeUUID::getNode(cur->val[i + 1]); + cur = NodeUUID::getNode(cur->val[i + 1]); break; } } - cur->parent = parent->address; - cur->save(); } // insert directly @@ -41,31 +68,46 @@ void BTreeUUID::insert(const char *uuid, int address) { else split(uuid, address, parent->address, cur->address); } -void BTreeUUID::split(std::string uuid, int address, int parentAddr, int curAddr) { +void BTreeUUID::split(const std::string& uuid, int address, int parentAddr, int curAddr) { - BTreeNodeUUID* parent = BTreeNodeUUID::getNode(parentAddr); - BTreeNodeUUID* cur = BTreeNodeUUID::getNode(curAddr); + NodeUUID* parent = NodeUUID::getNode(parentAddr); + NodeUUID* cur = NodeUUID::getNode(curAddr); cur->val[cur->insert(uuid)] = address; - BTreeNodeUUID* lLeaf = BTreeNodeUUID::getNode(PageManager::Instance().allocate()); - BTreeNodeUUID* rLeaf = BTreeNodeUUID::getNode(PageManager::Instance().allocate()); + NodeUUID* lLeaf = NodeUUID::getNode(PageManager::Instance().allocate()); + NodeUUID* rLeaf = NodeUUID::getNode(PageManager::Instance().allocate()); int mid = (cur->m / 2); for(int i=0; ival[lLeaf->insert(cur->key[i])] = cur->val[i]; lLeaf->right = rLeaf->address; lLeaf->left = cur->left; for(int i=mid; im; i++) rLeaf->val[rLeaf->insert(cur->key[i])] = cur->val[i]; - rLeaf->left = rLeaf->address; + rLeaf->left = lLeaf->address; rLeaf->right = cur->right; + if(cur->left) { + NodeUUID* curLeft = NodeUUID::getNode(cur->left); + curLeft->right = lLeaf->address; + curLeft->save(); + } + + if(cur->right) { + NodeUUID* curRight = NodeUUID::getNode(cur->right); + curRight->left = rLeaf->address; + curRight->save(); + } + + cur->release(); + if(cur->address == root) { - BTreeNodeUUID* newRoot = BTreeNodeUUID::getNode(PageManager::Instance().allocate()); + NodeUUID* newRoot = NodeUUID::getNode(PageManager::Instance().allocate()); newRoot->insert(rLeaf->key[0]); newRoot->val[0] = lLeaf->address; newRoot->val[1] = rLeaf->address; newRoot->leaf = false; root = newRoot->address; + newRoot->parent = 0; lLeaf->parent = rLeaf->parent = root; newRoot->save(); @@ -78,27 +120,43 @@ void BTreeUUID::split(std::string uuid, int address, int parentAddr, int curAddr } } -void BTreeUUID::insertInternal(std::string uuid, int curAddr, int lLeafAddr, int rLeafAddr) { +void BTreeUUID::insertInternal(const std::string& uuid, int curAddr, int lLeafAddr, int rLeafAddr) { - BTreeNodeUUID *cur = BTreeNodeUUID::getNode(curAddr); - BTreeNodeUUID *lLeaf = BTreeNodeUUID::getNode(lLeafAddr); - BTreeNodeUUID *rLeaf = BTreeNodeUUID::getNode(rLeafAddr); + NodeUUID *cur = NodeUUID::getNode(curAddr); + NodeUUID *lLeaf = NodeUUID::getNode(lLeafAddr); + NodeUUID *rLeaf = NodeUUID::getNode(rLeafAddr); if(cur->size < cur->m - 1) { int pos = cur->insert(uuid); cur->val[pos] = lLeaf->address; cur->val[pos+1] = rLeaf->address; - lLeaf->parent = rLeaf->parent = root; + lLeaf->parent = rLeaf->parent = curAddr; cur->save(); lLeaf->save(); rLeaf->save(); return; } - BTreeNodeUUID* newLChild = BTreeNodeUUID::getNode(PageManager::Instance().allocate()); - BTreeNodeUUID* newRChild = BTreeNodeUUID::getNode(PageManager::Instance().allocate()); + NodeUUID* newLChild = NodeUUID::getNode(PageManager::Instance().allocate()); + NodeUUID* newRChild = NodeUUID::getNode(PageManager::Instance().allocate()); newLChild->leaf = false; newRChild->leaf = false; + newLChild->right = newRChild->address; + newLChild->left = cur->left; + newRChild->left = newLChild->address; + newRChild->right = cur->right; + + if(cur->left) { + NodeUUID* curLeft = NodeUUID::getNode(cur->left); + curLeft->right = newLChild->address; + curLeft->save(); + } + + if(cur->right) { + NodeUUID* curRight = NodeUUID::getNode(cur->right); + curRight->left = newRChild->address; + curRight->save(); + } int pos = cur->insert(uuid); cur->val[pos] = lLeaf->address; @@ -112,16 +170,27 @@ void BTreeUUID::insertInternal(std::string uuid, int curAddr, int lLeafAddr, int for(int i=mid+1; im; i++) newRChild->insert(cur->key[i]); for(int i=mid+1; i<=cur->m; i++) newRChild->val[i-mid-1] = cur->val[i]; - lLeaf->save(); - rLeaf->save(); + for(int i=0; i<=newLChild->size; i++) { + NodeUUID* child = NodeUUID::getNode(newLChild->val[i]); + child->parent = newLChild->address; + child->save(); + } + for(int i=0; i<=newRChild->size; i++) { + NodeUUID* child = NodeUUID::getNode(newRChild->val[i]); + child->parent = newRChild->address; + child->save(); + } + + cur->release(); if(cur->address == root) { - BTreeNodeUUID* newRoot = BTreeNodeUUID::getNode(PageManager::Instance().allocate()); + NodeUUID* newRoot = NodeUUID::getNode(PageManager::Instance().allocate()); newRoot->insert(cur->key[mid]); newRoot->val[0] = newLChild->address; newRoot->val[1] = newRChild->address; newRoot->leaf = false; root = newRoot->address; + newRoot->parent = 0; newLChild->parent = newRChild->parent = root; newRoot->save(); @@ -130,23 +199,23 @@ void BTreeUUID::insertInternal(std::string uuid, int curAddr, int lLeafAddr, int } else { newLChild->save(); newRChild->save(); - if(cur->parent == 0) throw "fuck"; insertInternal(cur->key[mid], cur->parent, newLChild->address, newRChild->address); } } + void BTreeUUID::print() { - innerPrint(BTreeNodeUUID::getNode(root)); + innerPrint(NodeUUID::getNode(root)); } -void BTreeUUID::innerPrint(BTreeNodeUUID *cur) { +void BTreeUUID::innerPrint(NodeUUID *cur) { if(cur->address == root) { cnt = 0; } if(cur->leaf) cnt += cur->size; - printf("---------%d(%d)count=%d&sum=%d---l:%d,r:%d-----\n", cur->address, cur->leaf, cur->size, cnt, cur->left, cur->right); + printf("---------%d(%d)count=%d&sum=%d---l:%d,r:%d-parent:%d----\n", cur->address, cur->leaf, cur->size, cnt, cur->left, cur->right, cur->parent); for(int i=0; isize; i++) { - printf("%d:%s ", i, cur->key[i].substr(0, 4).c_str()); + printf("%d:%s ", i, cur->key[i].substr(0, 6).c_str()); } printf("\n"); for(int i=0; i<=cur->size; i++) { @@ -157,64 +226,216 @@ void BTreeUUID::innerPrint(BTreeNodeUUID *cur) { if(cur->leaf) return; for(int i=0; i<=cur->size; i++) { - innerPrint(BTreeNodeUUID::getNode(cur->val[i])); - } - - /* - BTreeNodeUUID test = *cur; - test.address = PageManager::Instance().allocate(); - test.print(); - - - test.save(); - - BTreeNodeUUID test2 = *BTreeNodeUUID::getNode(test.address); - - printf("test: size:%d l:%d r:%d\n", test2.size, test2.left, test2.right); - test2.print(); - */ - - -} - -int BTreeUUID::find(std::string uuid) { - BTreeNodeUUID* cur = BTreeNodeUUID::getNode(root); - while(!cur->leaf) { - for(int i=0; isize; i++) { - if(uuid < cur->key[i]) { - cur = BTreeNodeUUID::getNode(cur->val[i]); - break; - } - if(i == cur->size - 1) { - cur = BTreeNodeUUID::getNode(cur->val[i + 1]); - break; - } + if(NodeUUID::getNode(cur->val[i])->parent != cur->address) { + printf("FUCK\n"); + //exit(0); } + innerPrint(NodeUUID::getNode(cur->val[i])); } - for(int i=0; isize; i++) { - if(uuid == cur->key[i]) return cur->val[i]; - } - return -1; + } -/* -void BTreeUUID::innerInsert(BTreeNodeUUID* &p, BTreeNodeUUID* f, const char *uuid, int address) { - if(p == nullptr) { - p = new BTreeNodeUUID(PageManager::Instance().allocate()); - p->insert(uuid, address); + +void BTreeUUID::remove(const std::string &uuid) { + NodeUUID* cur = NodeUUID::getNode(findNode(uuid)); + if(find(uuid) == -1) printf("ohFUCK\n"); + removeEntry(cur->address, uuid, find(uuid)); +} + +void BTreeUUID::removeEntry(int curAddr, const std::string& uuid, const int& pointer) { + printf("---removeEntry: %d %s %d\n", curAddr, uuid.c_str(), pointer); + + NodeUUID* cur = NodeUUID::getNode(curAddr); + int pos = cur->findPos(uuid); + if(pos == -1) return; + for(int i=pos; isize-1; i++) { + cur->key[i] = cur->key[i + 1]; + } + for(int i=pos+(cur->val[pos] != pointer); isize; i++) { + cur->val[i] = cur->val[i + 1]; + } + cur->size--; + cur->save(); + + if(curAddr == root && !cur->leaf && cur->size == 0) { + root = cur->val[0]; + NodeUUID* root = NodeUUID::getNode(cur->val[0]); + root->parent = 0; + root->save(); + + cur->release(); return; } - p->insert(uuid, address); - p->print(); - // full - if(p->size() == p->m) { - int mid = p->m / 2; - BTreeNodeUUID* lnode = new BTreeNodeUUID(PageManager::Instance().allocate()); - BTreeNodeUUID* rnode = new BTreeNodeUUID(PageManager::Instance().allocate()); - for(int i=0; iinsert(p->link[i].key, p->link[i].value); - for(int i=mid; im; i++) rnode->insert(p->link[i].key, p->link[i].value); + + if(cur->enough() || cur->address == root) return; + + + printf("em %d %d\n", cur->address, cur->size); + + + if(canCoalesce(cur->address, cur->left)) { + coalesce(cur->address, cur->left); + } else if(canCoalesce(cur->address, cur->right)) { + coalesce(cur->address, cur->right); + } else if(canRedistribute(cur->address, cur->left)) { + redistribute(cur->address, cur->left); + } else if(canRedistribute(cur->address, cur->right)) { + redistribute(cur->address, cur->right); + } else { + throw "these is a bug!"; } } -*/ + + +bool BTreeUUID::canCoalesce(int curAddr, int sibAddr) { + if(sibAddr == 0) return false; + NodeUUID* cur = NodeUUID::getNode(curAddr); + NodeUUID* sib = NodeUUID::getNode(sibAddr); + if(cur->parent != sib->parent) return false; + return (cur->size + sib->size <= NodeUUID::m - 1 - !cur->leaf); +} + +void BTreeUUID::coalesce(int curAddr, int sibAddr) { + + printf("coalesce %d and %d\n", curAddr, sibAddr); + + NodeUUID* cur = NodeUUID::getNode(curAddr); + NodeUUID* sib = NodeUUID::getNode(sibAddr); + NodeUUID* parent = NodeUUID::getNode(cur->parent); + std::string *k; + for(int i=0; isize; i++) { + if((parent->val[i] == curAddr && parent->val[i+1] == sibAddr) + || (parent->val[i] == sibAddr && parent->val[i+1] == curAddr)) { + k = &parent->key[i]; + break; + } + } + NodeUUID* newNode = nullptr; + if(cur->left == sibAddr) { + if(!cur->leaf) sib->insert(*k); + for(int i=0; isize; i++) { + sib->val[sib->insert(cur->key[i])] = cur->val[i]; + } + sib->val[sib->size] = cur->val[cur->size]; + sib->right = cur->right; + if(cur->right) { + NodeUUID *right = NodeUUID::getNode(cur->right); + right->left = sib->address; + right->save(); + } + newNode = sib; + newNode->save(); + removeEntry(parent->address, *k, curAddr); + cur->release(); + } else { + if(!cur->leaf) cur->insert(*k); + for(int i=0; isize; i++) { + cur->val[cur->insert(sib->key[i])] = sib->val[i]; + } + cur->val[cur->size] = sib->val[sib->size]; + + cur->right = sib->right; + + if(sib->right) { + NodeUUID *right = NodeUUID::getNode(sib->right); + right->left = cur->address; + right->save(); + } + newNode = cur; + newNode->save(); + removeEntry(parent->address, *k, sibAddr); + sib->release(); + } + if(newNode->leaf) return; + for(int i=0; i<=newNode->size; i++) { + NodeUUID* child = NodeUUID::getNode(newNode->val[i]); + child->parent = newNode->address; + child->save(); + } +} + +bool BTreeUUID::canRedistribute(int curAddr, int sibAddr) { + if(sibAddr == 0) return false; + NodeUUID* cur = NodeUUID::getNode(curAddr); + NodeUUID* sib = NodeUUID::getNode(sibAddr); + if(cur->parent != sib->parent) return false; + return sib->size > ((sib->m - !sib->leaf) / 2); +} + +void BTreeUUID::redistribute(int curAddr, int sibAddr) { + + printf("redistribute %d from %d\n", curAddr, sibAddr); + + NodeUUID* cur = NodeUUID::getNode(curAddr); + NodeUUID* sib = NodeUUID::getNode(sibAddr); + NodeUUID* parent = NodeUUID::getNode(cur->parent); + std::string k; + int pos; + for(pos=0; possize; pos++) { + if((parent->val[pos] == curAddr && parent->val[pos+1] == sibAddr) + || (parent->val[pos] == sibAddr && parent->val[pos+1] == curAddr)) { + k = parent->key[pos]; + break; + } + } + + if(cur->left == sibAddr) { + if(cur->leaf) { + cur->val[cur->insert(sib->key[sib->size-1])] = sib->val[sib->size-1]; + parent->key[pos] = cur->key[0]; + } else{ + cur->val[cur->insert(k)] = sib->val[sib->size]; + parent->key[pos] = sib->key[sib->size-1]; + } + if(!cur->leaf) { + NodeUUID *child = NodeUUID::getNode(sib->val[sib->size - cur->leaf]); + child->parent = cur->address; + child->save(); + } + sib->size--; + } else { + if(cur->leaf) { + cur->val[cur->insert(sib->key[0])] = sib->val[0]; + for(int i=0; isize; i++) { + sib->key[i] = sib->key[i+1]; + sib->val[i] = sib->val[i+1]; + } + parent->key[pos] = sib->key[0]; + } else { + if(cur->size != 0) { + cur->val[cur->insert(k)+1] = sib->val[0]; + } else { + cur->key[0] = k; + cur->val[1] = sib->val[0]; + cur->size++; + } + parent->key[pos] = sib->key[0]; + if(!cur->leaf) { + NodeUUID* child = NodeUUID::getNode(sib->val[0]); + child->parent = cur->address; + child->save(); + } + for(int i=0; isize; i++) { + sib->key[i] = sib->key[i+1]; + sib->val[i] = sib->val[i+1]; + } + } + sib->size--; + } + + cur->save(); + sib->save(); + parent->save(); +} + +int BTreeUUID::test() { + NodeUUID* cur = NodeUUID::getNode(findNode("\0")); + int sum = cur->size; + while(cur->right) { + cur = NodeUUID::getNode(cur->right); + sum += cur->size; + } + return sum; +} diff --git a/invodb/btree/btree_uuid.h b/invodb/btree/btree_uuid.h index fc4ef6e..5d0c98d 100644 --- a/invodb/btree/btree_uuid.h +++ b/invodb/btree/btree_uuid.h @@ -5,19 +5,27 @@ #ifndef INVODB_BTREE_UUID_H #define INVODB_BTREE_UUID_H -#include +#include "btree/btree_node.h" +#include "utils/uuid.h" class BTreeUUID { public: BTreeUUID(const int& address); - void insert(const char* uuid, int address); - int find(std::string uuid); + void insert(const std::string& uuid, int address); + void remove(const std::string& uuid); + int find(const std::string& uuid); void print(); + int test(); private: - void innerPrint(BTreeNodeUUID* cur); - - void split(std::string uuid, int address, int parentAddr, int curAddr); - void insertInternal(std::string uuid, int curAddr, int lLeafAddr, int rLeafAddr); + void removeEntry(int curAddr, const std::string& uuid, const int& pointer); + bool canCoalesce(int curAddr, int sibAddr); + void coalesce(int curAddr, int sibAddr); + bool canRedistribute(int curAddr, int sibAddr); + void redistribute(int curAddr, int sibAddr); + void innerPrint(NodeUUID* cur); + int findNode(const std::string& uuid); + void split(const std::string& uuid, int address, int parentAddr, int curAddr); + void insertInternal(const std::string& uuid, int curAddr, int lLeafAddr, int rLeafAddr); int root; int cnt; }; diff --git a/invodb/main.cpp b/invodb/main.cpp index 7270a4f..c84521f 100644 --- a/invodb/main.cpp +++ b/invodb/main.cpp @@ -4,8 +4,40 @@ #include "main.h" + +void benchmark() { + BTreeUUID *btree = new BTreeUUID(PageManager::Instance().allocate()); + char uuid[33]; uuid[32] = '\0'; + + std::vector> v; + + const int n = 1000000; + + for(int i=0; iinsert(uuid, addr); + } + + for(int i=0; i<1000000; i++) { + std::swap(v[rand()%v.size()], v[rand()%v.size()]); + } + + for(int i=0; ifind(v[0].first); + if(addr != v[0].second) { + printf("fuck\n"); + exit(0); + } + } +} + int main() { - srand(time(NULL)); + int t = time(0); + //srand(1635418590); + srand(1635423140); + printf("seed: %d\n", t); system("rm -rf test.invodb && touch test.invodb"); @@ -25,20 +57,7 @@ int main() { JSON json("{\"hello\": 1}"); col->insert(json); - BTreeUUID *btree = new BTreeUUID(PageManager::Instance().allocate()); - char uuid[32]; - - std::vector v; - - for(int i=0; i<10000; i++) { - generateUUID(uuid); - v.push_back(std::string(uuid, 32)); - btree->insert(uuid, PageManager::Instance().allocate()); - } - - btree->print(); - - printf("%d\n", btree->find("123")); + benchmark(); return 0; } \ No newline at end of file diff --git a/invodb/utils/uuid.h b/invodb/utils/uuid.h index d73c5f9..119cdbf 100644 --- a/invodb/utils/uuid.h +++ b/invodb/utils/uuid.h @@ -15,4 +15,25 @@ inline void generateUUID(char *uuid) { } } +inline std::string appropriateString(const std::string& s, const int& offset) { + int a[s.size()]; + for(int i=0; i=0; i--) { + res += a[i]; + } + return res; +} + #endif //INVODB_UUID_H