From a9d5c2c942991c68018c270a6064a4fe5a251c46 Mon Sep 17 00:00:00 2001 From: YuhangQ Date: Wed, 3 Nov 2021 22:05:13 +0800 Subject: [PATCH] done --- invodb/btree/btree.h | 117 ++++++++++++++++++++++++++++------- invodb/btree/list.h | 3 + invodb/btree/node.h | 34 +++++++--- invodb/file/page_manager.cpp | 6 +- invodb/main.cpp | 52 ++++++++++------ invodb/models/cache.h | 10 +-- invodb/models/collection.cpp | 47 +++++++++----- invodb/models/collection.h | 3 + 8 files changed, 202 insertions(+), 70 deletions(-) diff --git a/invodb/btree/btree.h b/invodb/btree/btree.h index 1a90982..20eed9e 100644 --- a/invodb/btree/btree.h +++ b/invodb/btree/btree.h @@ -22,8 +22,11 @@ public: int firstNode(); std::vector keySet(); std::vector> all(); + void print(); + void innerPrint(const int& t); int size(); private: + void removeEntry(int curAdd, const KT& key, const int& pointer); bool canCoalesce(int curAdd, int sibAdd); void coalesce(int curAdd, int sibAdd); @@ -115,11 +118,18 @@ void BTree::removeEntry(int curAdd, const KT &key, const int &pointe cur->save(); if(curAdd == root && !cur->leaf && cur->size == 0) { - root = cur->linkSet[0]; - auto root = BTreeNode::getNode(cur->linkSet[0]); - root->parent = 0; - root->save(); - cur->release(); +// root = cur->linkSet[0]; +// auto root = BTreeNode::getNode(cur->linkSet[0]); +// root->parent = 0; +// root->save(); +// cur->release(); + auto rootNode = BTreeNode::getNode(cur->linkSet[0]); + *cur = *rootNode; + rootNode->release(); + cur->parent = 0; + cur->address = root; + cur->update(); + cur->save(); return; } @@ -160,7 +170,7 @@ void BTree::coalesce(int curAdd, int sibAdd) { break; } } - BTreeNode* newNode = nullptr; + if(cur->left == sibAdd) { if(!cur->leaf) sib->insert(*k); for(int i=0; isize; i++) { @@ -169,12 +179,12 @@ void BTree::coalesce(int curAdd, int sibAdd) { sib->linkSet[sib->size] = cur->linkSet[cur->size]; sib->right = cur->right; if(cur->right) { - BTreeNode *right = BTreeNode::getNode(cur->right); + auto right = BTreeNode::getNode(cur->right); right->left = sib->address; right->save(); } - newNode = sib; - newNode->save(); + sib->update(); + sib->save(); removeEntry(parent->address, *k, curAdd); cur->release(); } else { @@ -187,21 +197,24 @@ void BTree::coalesce(int curAdd, int sibAdd) { cur->right = sib->right; if(sib->right) { - BTreeNode *right = BTreeNode::getNode(sib->right); + auto right = BTreeNode::getNode(sib->right); right->left = cur->address; right->save(); } - newNode = cur; - newNode->save(); + cur->update(); + cur->save(); + removeEntry(parent->address, *k, sibAdd); sib->release(); } + /* if(newNode->leaf) return; for(int i=0; i<=newNode->size; i++) { auto child = BTreeNode::getNode(newNode->linkSet[i]); child->parent = newNode->address; child->save(); } + */ } template @@ -237,7 +250,7 @@ void BTree::redistribute(int curAdd, int sibAdd) { parent->keySet[pos] = sib->keySet[sib->size-1]; } if(!cur->leaf) { - BTreeNode *child = BTreeNode::getNode(sib->linkSet[sib->size - cur->leaf]); + auto child = BTreeNode::getNode(sib->linkSet[sib->size - cur->leaf]); child->parent = cur->address; child->save(); } @@ -327,18 +340,32 @@ void BTree::split(const KT &key, int address, int parentAdd, int cur cur->release(); if(cur->address == root) { - auto newRoot = BTreeNode::getNode(PageManager::Instance().allocate()); +// auto newRoot = BTreeNode::getNode(PageManager::Instance().allocate()); +// newRoot->insert(rLeaf->keySet[0]); +// newRoot->linkSet[0] = lLeaf->address; +// newRoot->linkSet[1] = rLeaf->address; +// newRoot->leaf = false; +// root = newRoot->address; +// newRoot->parent = 0; +// lLeaf->parent = rLeaf->parent = root; +// +// newRoot->save(); +// lLeaf->save(); +// rLeaf->save(); + + auto newRoot = BTreeNode::getNode(root); + newRoot->clear(); newRoot->insert(rLeaf->keySet[0]); newRoot->linkSet[0] = lLeaf->address; newRoot->linkSet[1] = rLeaf->address; newRoot->leaf = false; - root = newRoot->address; newRoot->parent = 0; lLeaf->parent = rLeaf->parent = root; newRoot->save(); lLeaf->save(); rLeaf->save(); + } else { lLeaf->save(); rLeaf->save(); @@ -346,11 +373,41 @@ void BTree::split(const KT &key, int address, int parentAdd, int cur } } +template +void BTree::print() { + innerPrint(root); +} + +template +void BTree::innerPrint(const int& t) { + auto p = BTreeNode::getNode(t); + printf("----------Node: %d----Parent: %d----------\n", p->address, p->parent); + for(int i=0; isize; i++) { + std::cout << p->keySet[i] << " "; + } + std::cout << std::endl; + for(int i=0; i<=p->size; i++) { + std::cout << p->linkSet[i] << " "; + } + std::cout << std::endl; + if(p->leaf) return; + for(int i=0; i<=p->size; i++) { + auto p2 = BTreeNode::getNode(p->linkSet[i]); + if(p2->parent != p->address) { + printf(">>>>>>>>>>>>>>>>>>FUCK"); + exit(0); + } + + innerPrint(p->linkSet[i]); + } +} + + template void BTree::insertInternal(const KT &key, int curAdd, int lLeafAdd, int rLeafAdd) { - BTreeNode *cur = BTreeNode::getNode(curAdd); - BTreeNode *lLeaf = BTreeNode::getNode(lLeafAdd); - BTreeNode *rLeaf = BTreeNode::getNode(rLeafAdd); + auto cur = BTreeNode::getNode(curAdd); + auto lLeaf = BTreeNode::getNode(lLeafAdd); + auto rLeaf = BTreeNode::getNode(rLeafAdd); if(cur->size < cur->m - 1) { int pos = cur->insert(key); @@ -410,18 +467,34 @@ void BTree::insertInternal(const KT &key, int curAdd, int lLeafAdd, cur->release(); if(cur->address == root) { - auto newRoot = BTreeNode::getNode(PageManager::Instance().allocate()); - newRoot->insert(cur->keySet[mid]); +// auto newRoot = BTreeNode::getNode(PageManager::Instance().allocate()); +// newRoot->insert(cur->keySet[mid]); +// newRoot->linkSet[0] = newLChild->address; +// newRoot->linkSet[1] = newRChild->address; +// newRoot->leaf = false; +// root = newRoot->address; +// newRoot->parent = 0; +// newLChild->parent = newRChild->parent = root; +// +// newRoot->save(); +// newLChild->save(); +// newRChild->save(); + + KT key = cur->keySet[mid]; + + auto newRoot = BTreeNode::getNode(root); + newRoot->clear(); + newRoot->insert(key); newRoot->linkSet[0] = newLChild->address; newRoot->linkSet[1] = newRChild->address; newRoot->leaf = false; - root = newRoot->address; newRoot->parent = 0; newLChild->parent = newRChild->parent = root; newRoot->save(); newLChild->save(); newRChild->save(); + } else { newLChild->save(); newRChild->save(); @@ -449,7 +522,7 @@ std::vector BTree::keySet() { std::vector v; while(true) { for(int i=0; isize; i++) { - v.push_back(p->keySet[0]); + v.push_back(p->keySet[i]); } if(p->right == 0) break; p = BTreeNode::getNode(p->right); diff --git a/invodb/btree/list.h b/invodb/btree/list.h index d7e6d59..ac69c9d 100644 --- a/invodb/btree/list.h +++ b/invodb/btree/list.h @@ -16,6 +16,9 @@ public: void remove(T const& value) { tree->remove(value); } + void print() { + tree->print(); + } bool exists(T const& value) { return tree->exists(value); } diff --git a/invodb/btree/node.h b/invodb/btree/node.h index da496c1..c99c615 100644 --- a/invodb/btree/node.h +++ b/invodb/btree/node.h @@ -11,14 +11,23 @@ #include #include #include "file/page_manager.h" +#include "models/cache.h" template class BTreeNode { public: - static BTreeNode* getNode(const int &index); + static std::shared_ptr> getNode(const int &index); static BTreeNode* release(const int &index); int insert(KT const &key); int findPos(KT const &key); + void update() { + if(leaf) return; + for(int i=0; i<=size; i++) { + auto node = getNode(linkSet[i]); + node->parent = address; + node->save(); + } + } void release(); void clear(); int save(); @@ -82,16 +91,21 @@ BTreeNode::BTreeNode(const int& address): address(address) { } template -BTreeNode *BTreeNode::getNode(const int &index) { +std::shared_ptr> BTreeNode::getNode(const int &index) { if(index == 0) { throw "invalid address!"; } - static std::map*> map; - if(map.count(index) == 0) { - delete map[index]; - map[index] = new BTreeNode(index); + return std::make_shared>( BTreeNode(index)); + static LRUCache> cache(1000000); + if(!cache.exist(index)) { + auto p = std::make_shared>( BTreeNode(index)); + cache.put(index, p); + return p; + } else { + auto p = cache.get(index); + cache.put(index, p); + return p; } - return map[index]; } template @@ -128,7 +142,7 @@ void BTreeNode::release() { template void BTreeNode::clear() { - for(int i=0; i::value) { ((std::string *)&keySet[i])->clear(); } @@ -144,8 +158,10 @@ void BTreeNode::clear() { linkSet[i] = 0; } size = 0; - leaf = false; + leaf = true; parent = 0; + left = 0; + right = 0; } template diff --git a/invodb/file/page_manager.cpp b/invodb/file/page_manager.cpp index 5c86110..67349d8 100644 --- a/invodb/file/page_manager.cpp +++ b/invodb/file/page_manager.cpp @@ -21,9 +21,11 @@ int PageManager::loadDatabase(const char *filename) { StoragePage PageManager::getPage(const int &index) { + /* if(cache.exist(index)) { return cache.get(index); } + */ StoragePage page(index); // 调整指针位置 @@ -34,7 +36,7 @@ StoragePage PageManager::getPage(const int &index) { } void PageManager::setPage(const int &index, const StoragePage &page) { - cache.put(index, page); + //cache.put(index, page); stream.clear(); stream.seekg(index * 1024); stream.write(page, 1024); @@ -48,6 +50,8 @@ int PageManager::allocate() { } void PageManager::release(const int &index, const bool &next) { + + return; auto page = getPage(index); freeList->insert(page.getAddress()); if(next) { diff --git a/invodb/main.cpp b/invodb/main.cpp index f523155..d05392a 100644 --- a/invodb/main.cpp +++ b/invodb/main.cpp @@ -9,11 +9,11 @@ void testAndBenchmark(int n); int main() { int t = time(0); - //srand(1635418590); - srand(t); + srand(1635418590); + //srand(t); printf("seed: %d\n", t); - system("rm -rf test.invodb && touch test.invodb"); + //system("rm -rf test.invodb && touch test.invodb"); PageManager::loadDatabase("test.invodb"); @@ -30,34 +30,44 @@ int main() { } +// freopen("qq.txt", "r", stdin); +// char qq[100], phone[100]; +// for(int i=0; i<400000; i++) { +// if(i % 1000 == 0) printf("[%d/%d] Inserting!\n", i, 400000); +// scanf("%s%s", qq, phone); +// nlohmann::json json; +// json["qq"] = qq; +// json["phone"] = phone; +// col->insert(json); +// } + + col->test(); - nlohmann::json j = nlohmann::json::parse(R"( -{ - "string": "this is a string!", - "double": 3.1415, - "int": 25565, - "bool": true, - "child": { - "id": 3 - }, - "array": ["1", "2", "3"] -} - )"); +// nlohmann::json j = nlohmann::json::parse(R"( +//{ +// "string": "this is a string!", +// "double": 3.1415, +// "int": 25565, +// "bool": true, +// "child": { +// "id": 3 +// }, +// "array": ["1", "2", "3"] +//} +// )"); - col->insert(j); + //testAndBenchmark(100000); - col->remove(j); - testAndBenchmark(100000); return 0; } void testAndBenchmark(int n) { - auto btree = new BTree(PageManager::Instance().allocate()); + auto btree = new BTree(PageManager::Instance().allocate()); printf("nodeSize: %d\n", btree->getNodeSize()); @@ -67,6 +77,7 @@ void testAndBenchmark(int n) { for(int i=0; iremove(uuid); } + + //printf("opt: %d\n", opt); + //btree->print(); } if(map.size() != btree->size()) { diff --git a/invodb/models/cache.h b/invodb/models/cache.h index 4805a44..afcf954 100644 --- a/invodb/models/cache.h +++ b/invodb/models/cache.h @@ -17,11 +17,11 @@ public: return hash.find(key) != hash.end(); } - VT get(KT const &key) { + std::shared_ptr get(KT const &key) { if (hash.find(key) == hash.end()) throw "cache error"; else { - VT value = hash[key]->second; + std::shared_ptr value = hash[key]->second; ls.erase(hash[key]); ls.push_front(std::make_pair(key, value)); hash[key] = ls.begin(); @@ -29,7 +29,7 @@ public: } } - void put(KT const &key, VT const &value) { + void put(KT const &key, std::shared_ptr const &value) { if (hash.find(key) != hash.end()) { ls.erase(hash[key]); } @@ -43,8 +43,8 @@ public: private: int capacity; - std::list> ls; - std::unordered_map>::iterator> hash; + std::list>> ls; + std::unordered_map>>::iterator> hash; }; diff --git a/invodb/models/collection.cpp b/invodb/models/collection.cpp index 5e0d87a..f87d455 100644 --- a/invodb/models/collection.cpp +++ b/invodb/models/collection.cpp @@ -45,19 +45,17 @@ Collection::Collection(const std::string &name, const int &firstPage) { void Collection::insert(nlohmann::json &json) { - if(json["__INVO_ID__"].empty()) { json["__INVO_ID__"] = generateUUID(); } else { remove(json); } - - std::string id = json["__INVO_ID__"].get(); int add = PageManager::Instance().saveJSONToFile(json); uuid->insert(id, add); - Logger::info("INSERT ", json.dump()); + + //Logger::info("INSERT ", json.dump()); // add index indexJSON("", json, add); @@ -84,11 +82,13 @@ void Collection::remove(const nlohmann::json &json) { void Collection::indexJSON(const std::string prefix, const nlohmann::json &json, const int& address) { // even easier with structured bindings (C++17) for (auto& [key, value] : json.items()) { - std::cout << prefix << key << " : " << value << "\n"; - if(value.is_string()) insertIndex(prefix + key, value.get(), address); - if(value.is_number()) insertIndex(prefix + key, value.get(), address); + //std::cout << prefix << key << " : " << value << "\n"; + if(key == "__INVO_ID__") continue; + if(value.is_boolean()) insertIndex(prefix + key, value.get(), address); - if(value.is_object()) indexJSON(prefix + key + ".", value.get(),address); + if(value.is_number()) insertIndex(prefix + key, value.get(), address); + if(value.is_string()) insertIndex(prefix + key, value.get(), address); + //if(value.is_object()) indexJSON(prefix + key + ".", value.get(),address); if(value.is_array()) { for(auto& element : value.get()) { if(element.is_string()) insertIndex(prefix + key, element.get(), address); @@ -100,9 +100,12 @@ void Collection::indexJSON(const std::string prefix, const nlohmann::json &json, } void Collection::insertIndex(const std::string indexName, const std::string indexValue, const int &address) { - printf("INDEX: %s = \"%s\" add:(%d)\n", indexName.c_str(), indexValue.c_str(), address); std::string treeName = indexName + "$string"; + + + //printf("INDEX TO %s: %s = \"%s\" add:(%d)\n", treeName.c_str(), indexName.c_str(), indexValue.c_str(), address); + if(!index->exists(treeName)) { index->insert(treeName, PageManager::Instance().allocate()); } @@ -115,10 +118,12 @@ void Collection::insertIndex(const std::string indexName, const std::string inde List list(indexTree.find(indexValue)); list.insert(address); + //printf("INSERT %d INTO %d\n", address, indexTree.find(indexValue)); + //list.print(); } void Collection::insertIndex(const std::string indexName, double indexValue, const int &address) { - printf("INDEX: %s = %f add:(%d)\n", indexName.c_str(), indexValue, address); + //printf("INDEX: %s = %f add:(%d)\n", indexName.c_str(), indexValue, address); std::string treeName = indexName + "$number"; if(!index->exists(treeName)) { @@ -136,7 +141,7 @@ void Collection::insertIndex(const std::string indexName, double indexValue, con } void Collection::insertIndex(const std::string indexName, bool indexValue, const int &address) { - printf("INDEX: %s = %s add:(%d)\n", indexName.c_str(), indexValue ? "true" : "false", address); + //printf("INDEX: %s = %s add:(%d)\n", indexName.c_str(), indexValue ? "true" : "false", address); std::string treeName = indexName + "$boolean"; if(!index->exists(treeName)) { @@ -171,7 +176,7 @@ void Collection::clearIndex(const std::string prefix, const nlohmann::json &json } void Collection::removeIndex(const std::string indexName, const std::string indexValue, const int &address) { - printf("REMOVE: %s = \"%s\" add:(%d)\n", indexName.c_str(), indexValue.c_str(), address); + //printf("REMOVE: %s = \"%s\" add:(%d)\n", indexName.c_str(), indexValue.c_str(), address); std::string treeName = indexName + "$string"; if(!index->exists(treeName)) { @@ -189,7 +194,7 @@ void Collection::removeIndex(const std::string indexName, const std::string inde } void Collection::removeIndex(const std::string indexName, double indexValue, const int &address) { - printf("REMOVE: %s = %f add:(%d)\n", indexName.c_str(), indexValue, address); + //printf("REMOVE: %s = %f add:(%d)\n", indexName.c_str(), indexValue, address); std::string treeName = indexName + "$number"; if(!index->exists(treeName)) { @@ -207,7 +212,7 @@ void Collection::removeIndex(const std::string indexName, double indexValue, con } void Collection::removeIndex(const std::string indexName, bool indexValue, const int &address) { - printf("REMOVE: %s = %s add:(%d)\n", indexName.c_str(), indexValue ? "true" : "false", address); + //printf("REMOVE: %s = %s add:(%d)\n", indexName.c_str(), indexValue ? "true" : "false", address); std::string treeName = indexName + "$boolean"; if(!index->exists(treeName)) { @@ -223,3 +228,17 @@ void Collection::removeIndex(const std::string indexName, bool indexValue, const List list(indexTree.find(indexValue)); list.remove(address); } + +void Collection::test() { + index->print(); + auto qq = new BTree(8); + while(true) { + std::string q; + std::cin >> q; + List list(qq->find(q)); + //list.print(); + for(auto& add : list.all()) { + std::cout << ">> " << PageManager::Instance().readJSONFromFile(add).dump() << std::endl; + } + } +} diff --git a/invodb/models/collection.h b/invodb/models/collection.h index d98d75a..2459499 100644 --- a/invodb/models/collection.h +++ b/invodb/models/collection.h @@ -24,6 +24,9 @@ public: static void loadCollections(); static Collection& getCollection(const std::string& name); static Collection& createCollection(const std::string& name); + + void test(); + private: void indexJSON(const std::string prefix, const nlohmann::json &json, const int& address);