diff --git a/CMakeLists.txt b/CMakeLists.txt index 771d8e3..c21cb0d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.16.3) project(InvoDB) -set(CMAKE_CXX_STANDARD 14) +set(CMAKE_CXX_STANDARD 17) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O0") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0") @@ -11,4 +11,4 @@ include_directories(./invodb) add_executable(InvoDB invodb/main.cpp - invodb/main.h invodb/file/page_manager.cpp invodb/file/page_manager.h invodb/models/collection.cpp invodb/models/collection.h invodb/file/storage_page.cpp invodb/file/storage_page.h invodb/utils/logger.h invodb/utils/uuid.h invodb/btree/node.h invodb/btree/btree.h) + invodb/main.h invodb/file/page_manager.cpp invodb/file/page_manager.h invodb/models/collection.cpp invodb/models/collection.h invodb/file/storage_page.cpp invodb/file/storage_page.h invodb/utils/logger.h invodb/utils/uuid.h invodb/btree/node.h invodb/btree/btree.h invodb/index/index.cpp invodb/index/index.h invodb/btree/list.h) diff --git a/invodb/btree/btree.h b/invodb/btree/btree.h index eb4f9b3..78a2fd0 100644 --- a/invodb/btree/btree.h +++ b/invodb/btree/btree.h @@ -8,7 +8,7 @@ #include "btree/node.h" #include "utils/uuid.h" -template +template class BTree { public: BTree(const int& address); @@ -18,37 +18,35 @@ public: bool exists(const KT &key); int getNodeSize(); int find(const KT &key); + std::vector keySet(); + std::vector> all(); int size(); - private: + int findNode(const KT &key); void removeEntry(int curAdd, const KT& key, const int& pointer); bool canCoalesce(int curAdd, int sibAdd); void coalesce(int curAdd, int sibAdd); bool canRedistribute(int curAdd, int sibAdd); void redistribute(int curAdd, int sibAdd); - int findNode(const KT &key); void split(const KT &key, int address, int parentAdd, int curAdd); void insertInternal(const KT &key, int curAdd, int lLeafAdd, int rLeafAdd); int root; int n_size; + static const int M_SIZE = 1000 / (K_SIZE + 4); }; -// BTree BTreeNode - - -template -BTree::BTree(const int& address) { +template +BTree::BTree(const int& address) { root = address; - n_size = 0; } -template -void BTree::insert(const KT &key, const int &value) { - +template +void BTree::insert(const KT &key, const int &value) { if(exists(key)) { - throw "keySet already exists."; + update(key, value); + return; } n_size++; @@ -66,28 +64,29 @@ void BTree::insert(const KT &key, const int &value) { split(key, value, cur->parent, cur->address); } -template -void BTree::update(const KT &key, const int &value) { +template +void BTree::update(const KT &key, const int &value) { if(!exists(key)) { - throw "keySet doesn't exists."; + insert(key, value); + return; } auto cur = BTreeNode::getNode(findNode(key)); cur->linkSet[cur->findPos(key)] = value; cur->save(); } -template -void BTree::remove(const KT &key) { +template +void BTree::remove(const KT &key) { if(!exists(key)) { - throw "keySet already exists."; + return; } n_size--; auto cur = BTreeNode::getNode(findNode(key)); removeEntry(cur->address, key, find(key)); } -template -int BTree::find(const KT &key) { +template +int BTree::find(const KT &key) { auto cur = BTreeNode::getNode(findNode(key)); for(int i=0; isize; i++) { if(key == cur->keySet[i]) return cur->linkSet[i]; @@ -95,13 +94,13 @@ int BTree::find(const KT &key) { return -1; } -template -int BTree::size() { +template +int BTree::size() { return n_size; } -template -void BTree::removeEntry(int curAdd, const KT &key, const int &pointer) { +template +void BTree::removeEntry(int curAdd, const KT &key, const int &pointer) { auto cur = BTreeNode::getNode(curAdd); int pos = cur->findPos(key); if(pos == -1) return; @@ -138,8 +137,8 @@ void BTree::removeEntry(int curAdd, const KT &key, const int } } -template -bool BTree::canCoalesce(int curAdd, int sibAdd) { +template +bool BTree::canCoalesce(int curAdd, int sibAdd) { if(sibAdd == 0) return false; auto cur = BTreeNode::getNode(curAdd); auto sib = BTreeNode::getNode(sibAdd); @@ -147,8 +146,8 @@ bool BTree::canCoalesce(int curAdd, int sibAdd) { return (cur->size + sib->size <= BTreeNode::m - 1 - !cur->leaf); } -template -void BTree::coalesce(int curAdd, int sibAdd) { +template +void BTree::coalesce(int curAdd, int sibAdd) { auto cur = BTreeNode::getNode(curAdd); auto sib = BTreeNode::getNode(sibAdd); auto parent = BTreeNode::getNode(cur->parent); @@ -204,8 +203,8 @@ void BTree::coalesce(int curAdd, int sibAdd) { } } -template -bool BTree::canRedistribute(int curAdd, int sibAdd) { +template +bool BTree::canRedistribute(int curAdd, int sibAdd) { if(sibAdd == 0) return false; auto cur = BTreeNode::getNode(curAdd); auto sib = BTreeNode::getNode(sibAdd); @@ -213,8 +212,8 @@ bool BTree::canRedistribute(int curAdd, int sibAdd) { return sib->size > ((sib->m - !sib->leaf) / 2); } -template -void BTree::redistribute(int curAdd, int sibAdd) { +template +void BTree::redistribute(int curAdd, int sibAdd) { auto cur = BTreeNode::getNode(curAdd); auto sib = BTreeNode::getNode(sibAdd); auto parent = BTreeNode::getNode(cur->parent); @@ -277,10 +276,8 @@ void BTree::redistribute(int curAdd, int sibAdd) { parent->save(); } -template -int BTree::findNode(const KT &key) { - - +template +int BTree::findNode(const KT &key) { auto cur = BTreeNode::getNode(root); while(!cur->leaf) { for(int i=0; isize; i++) { @@ -297,8 +294,8 @@ int BTree::findNode(const KT &key) { return cur->address; } -template -void BTree::split(const KT &key, int address, int parentAdd, int curAdd) { +template +void BTree::split(const KT &key, int address, int parentAdd, int curAdd) { auto cur = BTreeNode::getNode(curAdd); cur->linkSet[cur->insert(key)] = address; @@ -348,8 +345,8 @@ void BTree::split(const KT &key, int address, int parentAdd, } } -template -void BTree::insertInternal(const KT &key, int curAdd, int lLeafAdd, int rLeafAdd) { +template +void BTree::insertInternal(const KT &key, int curAdd, int lLeafAdd, int rLeafAdd) { BTreeNode *cur = BTreeNode::getNode(curAdd); BTreeNode *lLeaf = BTreeNode::getNode(lLeafAdd); BTreeNode *rLeaf = BTreeNode::getNode(rLeafAdd); @@ -431,16 +428,50 @@ void BTree::insertInternal(const KT &key, int curAdd, int lL } } -template -int BTree::getNodeSize() { +template +int BTree::getNodeSize() { auto p = BTreeNode::getNode(root); return p->save(); } -template -bool BTree::exists(const KT &key) { +template +bool BTree::exists(const KT &key) { return find(key) != -1; } +template +std::vector BTree::keySet() { + auto p = BTreeNode::getNode(root); + while(!p->leaf) { + p = BTreeNode::getNode(p->linkSet[0]); + } + std::vector v; + while(true) { + for(int i=0; isize; i++) { + v.push_back(p->keySet[0]); + } + if(p->right == 0) break; + p = BTreeNode::getNode(p->right); + } + return v; +} + +template +std::vector> BTree::all() { + auto p = BTreeNode::getNode(root); + while(!p->leaf) { + p = BTreeNode::getNode(p->linkSet[0]); + } + std::vector> v; + while(true) { + for(int i=0; isize; i++) { + v.push_back(std::make_pair(p->keySet[i], p->linkSet[i])); + } + if(p->right == 0) break; + p = BTreeNode::getNode(p->right); + } + return v; +} + #endif //INVODB_BTREE_H diff --git a/invodb/btree/list.h b/invodb/btree/list.h new file mode 100644 index 0000000..d7e6d59 --- /dev/null +++ b/invodb/btree/list.h @@ -0,0 +1,34 @@ +// +// Created by YuhangQ on 2021/11/1. +// + +#ifndef INVODB_LIST_H +#define INVODB_LIST_H + +#include "btree.h" + +template +class List { +public: + void insert(T const& value) { + tree->insert(value, 0); + } + void remove(T const& value) { + tree->remove(value); + } + bool exists(T const& value) { + return tree->exists(value); + } + std::vector all() { + return tree->keySet(); + } + List(const int& address) { + tree = new BTree(address); + } + +private: + BTree* tree; +}; + + +#endif //INVODB_LIST_H diff --git a/invodb/btree/node.h b/invodb/btree/node.h index 3b5d7e3..9b8f056 100644 --- a/invodb/btree/node.h +++ b/invodb/btree/node.h @@ -15,8 +15,8 @@ template class BTreeNode { public: - static BTreeNode* getNode(const int &address); - static BTreeNode* release(const int &address); + static BTreeNode* getNode(const int &index); + static BTreeNode* release(const int &index); int insert(KT const &key); int findPos(KT const &key); void release(); @@ -68,7 +68,7 @@ BTreeNode::BTreeNode(const int& address): address(address) { } } } else { - for(int i=0; i<=m; i++) { + for(int i=0; i::BTreeNode(const int& address): address(address) { } template -BTreeNode *BTreeNode::getNode(const int &address) { - - std::cout << address << std::endl; - - - if(address < 4) { +BTreeNode *BTreeNode::getNode(const int &index) { + if(index < 4) { throw "invalid address!"; } static std::map*> map; - if(map.count(address) == 0) { - delete map[address]; - map[address] = new BTreeNode(address); + if(map.count(index) == 0) { + delete map[index]; + map[index] = new BTreeNode(index); } - return map[address]; + return map[index]; } template -BTreeNode *BTreeNode::release(const int &address) { +BTreeNode *BTreeNode::release(const int &index) { return nullptr; } @@ -131,7 +127,21 @@ void BTreeNode::release() { template void BTreeNode::clear() { - for(int i=0; i::value) { + ((std::string *)&keySet[i])->clear(); + } + if(std::is_same::value) { + *((double *)&keySet[i]) = 0; + } + if(std::is_same::value) { + *((bool *)&keySet[i]) = 0; + } + if(std::is_same::value) { + *((int *)&keySet[i]) = 0; + } + linkSet[i] = 0; + } size = 0; leaf = false; parent = 0; @@ -139,6 +149,8 @@ void BTreeNode::clear() { template int BTreeNode::save() { + + StoragePage page(address); int p = 0; page.setIntStartFrom(p, size); p += 4; @@ -154,7 +166,7 @@ int BTreeNode::save() { p += K_SIZE; } } else { - for(int i=0; i<=m; i++) { + for(int i=0; i::save() { p += 4; } - if(p >= 1024) { + if(p > 1024) { throw "too big page!"; } diff --git a/invodb/index/index.cpp b/invodb/index/index.cpp new file mode 100644 index 0000000..2eb27af --- /dev/null +++ b/invodb/index/index.cpp @@ -0,0 +1,5 @@ +// +// Created by YuhangQ on 2021/11/1. +// + +#include "index.h" diff --git a/invodb/index/index.h b/invodb/index/index.h new file mode 100644 index 0000000..1d88f44 --- /dev/null +++ b/invodb/index/index.h @@ -0,0 +1,25 @@ +// +// Created by YuhangQ on 2021/11/1. +// + +#ifndef INVODB_INDEX_H +#define INVODB_INDEX_H + +#include "btree/btree.h" +#include "json/json.hpp" + +class Index { +public: + void insert(const nlohmann::json& json); +private: + template + void insertElement(T const& key, const int& add) { + BTree treeString; + BTree treeBool; + BTree treeDouble; + BTree treeInt; + } +}; + + +#endif //INVODB_INDEX_H diff --git a/invodb/main.cpp b/invodb/main.cpp index 7db0050..2dda153 100644 --- a/invodb/main.cpp +++ b/invodb/main.cpp @@ -13,9 +13,10 @@ int main() { srand(t); printf("seed: %d\n", t); - system("rm -rf test.invodb && touch test.invodb"); + //system("rm -rf test.invodb && touch test.invodb"); PageManager::loadDatabase("test.invodb"); + Collection::loadCollections(); PageManager& manager = PageManager::Instance(); @@ -25,28 +26,36 @@ int main() { col = &Collection::getCollection("hello"); } catch(const char *error) { Collection::createCollection("hello"); + col = &Collection::getCollection("hello"); } std::string test; for(int i=0; i<100; i++) { test += generateUUID(); } - - nlohmann::json j; - j["hello"] = test; + nlohmann::json j = nlohmann::json::parse(R"( +{ + "string": "this is a string!", + "double": 3.1415, + "int": 25565, + "bool": true, + "child": { + "id": 3 + }, + "array": ["1", "2", "3"] +} + )"); col->insert(j); - //testAndBenchmark(100000); - - //btree->testAndBenchmark(100000); + col->remove(j); return 0; } void testAndBenchmark(int n) { - auto btree = new BTree<27, std::string, 32>(PageManager::Instance().allocate()); + auto btree = new BTree(PageManager::Instance().allocate()); printf("nodeSize: %d\n", btree->getNodeSize()); diff --git a/invodb/main.h b/invodb/main.h index b39729d..e51a415 100644 --- a/invodb/main.h +++ b/invodb/main.h @@ -9,7 +9,8 @@ #include #include #include -#include +#include "models/collection.h" +#include "btree/list.h" #endif //INVODB_MAIN_H diff --git a/invodb/models/collection.cpp b/invodb/models/collection.cpp index 16f268f..0dad326 100644 --- a/invodb/models/collection.cpp +++ b/invodb/models/collection.cpp @@ -4,28 +4,6 @@ #include "collection.h" -Collection::Collection(const std::string &name, const int &firstPage) { - Logger::info("load Collection: ", name); - tree = new BTree<3, std::string, 32>(firstPage); -} - -void Collection::insert(nlohmann::json &json) { - - //printf("fuck:%d\n", tree); - if(json["__INVO_ID__"].empty()) { - json["__INVO_ID__"] = generateUUID(); - } - - int add = PageManager::Instance().saveJSONToFile(json); - - std::string id = json["__INVO_ID__"].get(); - - tree->insert(id, add); - - auto tjson = PageManager::Instance().readJSONFromFile(add); - Logger::info("INSERT ", tjson.dump()); -} - std::map Collection::map; std::set Collection::free; @@ -41,7 +19,7 @@ void Collection::loadCollections() { int firstPage = page.getIntStartFrom(p + 28); // if free if (firstPage == 0) free.insert(id * 32 + i); - // not free + // not free else map.insert(make_pair(name, new Collection(name, firstPage))); } } @@ -71,7 +49,6 @@ Collection& Collection::createCollection(const std::string &name) { page.setStringStartFrom(id*32, name.c_str()); page.setIntStartFrom(id*32+28, collectionPage.getAddress()); - page.print(); page.save(); Collection *col = new Collection(name, collectionPage.getAddress()); @@ -88,3 +65,185 @@ Collection &Collection::getCollection(const std::string &name) { return *map[name]; } +Collection::Collection(const std::string &name, const int &firstPage) { + Logger::info("load Collection: ", name); + index = new BTree(firstPage); + if(!index->exists("__INVO_ID__")) { + index->insert("__INVO_ID__", PageManager::Instance().allocate()); + } + uuid = new BTree(index->find("__INVO_ID__")); +} + +void Collection::insert(nlohmann::json &json) { + if(json["__INVO_ID__"].empty()) { + json["__INVO_ID__"] = generateUUID(); + } else { + remove(json); + } + std::string id = json["__INVO_ID__"].get(); + int add = PageManager::Instance().saveJSONToFile(json); + uuid->insert(id, add); + Logger::info("INSERT ", json.dump()); + + // add index + indexJSON("", json, add); +} + +void Collection::remove(const nlohmann::json &json) { + if(json["__INVO_ID__"].empty()) { + throw "no invo_id"; + } + std::string id = json["__INVO_ID__"].get(); + uuid->remove(id); + + int address = uuid->find(id); + + nlohmann::json jsonInDisk = PageManager::Instance().readJSONFromFile(address); + + clearIndex("", json, address); + + PageManager::Instance().release(address); +} + +void Collection::indexJSON(const std::string prefix, const nlohmann::json &json, const int& address) { + // even easier with structured bindings (C++17) + for (auto& [key, value] : json.items()) { + std::cout << prefix << key << " : " << value << "\n"; + if(value.is_string()) insertIndex(prefix + key, value.get(), address); + if(value.is_number()) insertIndex(prefix + key, value.get(), address); + if(value.is_boolean()) insertIndex(prefix + key, value.get(), address); + if(value.is_object()) indexJSON(prefix + key + ".", value.get(),address); + if(value.is_array()) { + for(auto& element : value.get()) { + if(element.is_string()) insertIndex(prefix + key, element.get(), address); + if(element.is_number()) insertIndex(prefix + key, element.get(), address); + if(element.is_boolean()) insertIndex(prefix + key, element.get(), address); + } + } + } +} + +void Collection::insertIndex(const std::string indexName, const std::string indexValue, const int &address) { + printf("INDEX: %s = \"%s\" add:(%d)\n", indexName.c_str(), indexValue.c_str(), address); + + std::string treeName = indexName + "$string"; + if(!index->exists(treeName)) { + index->insert(treeName, PageManager::Instance().allocate()); + } + + BTree indexTree(index->find(treeName)); + + if(!indexTree.exists(indexValue)) { + indexTree.insert(indexValue, PageManager::Instance().allocate()); + } + + List list(indexTree.find(indexValue)); + list.insert(address); +} + +void Collection::insertIndex(const std::string indexName, double indexValue, const int &address) { + printf("INDEX: %s = %f add:(%d)\n", indexName.c_str(), indexValue, address); + + std::string treeName = indexName + "$number"; + if(!index->exists(treeName)) { + index->insert(treeName, PageManager::Instance().allocate()); + } + + BTree indexTree(index->find(treeName)); + + if(!indexTree.exists(indexValue)) { + indexTree.insert(indexValue, PageManager::Instance().allocate()); + } + + List list(indexTree.find(indexValue)); + list.insert(address); +} + +void Collection::insertIndex(const std::string indexName, bool indexValue, const int &address) { + printf("INDEX: %s = %s add:(%d)\n", indexName.c_str(), indexValue ? "true" : "false", address); + + std::string treeName = indexName + "$boolean"; + if(!index->exists(treeName)) { + index->insert(treeName, PageManager::Instance().allocate()); + } + + BTree indexTree(index->find(treeName)); + + if(!indexTree.exists(indexValue)) { + indexTree.insert(indexValue, PageManager::Instance().allocate()); + } + + List list(indexTree.find(indexValue)); + list.insert(address); +} + +void Collection::clearIndex(const std::string prefix, const nlohmann::json &json, const int &address) { + for (auto& [key, value] : json.items()) { + std::cout << prefix << key << " : " << value << "\n"; + if(value.is_string()) removeIndex(prefix + key, value.get(), address); + if(value.is_number()) removeIndex(prefix + key, value.get(), address); + if(value.is_boolean()) removeIndex(prefix + key, value.get(), address); + if(value.is_object()) clearIndex(prefix + key + ".", value.get(),address); + if(value.is_array()) { + for(auto& element : value.get()) { + if(element.is_string()) removeIndex(prefix + key, element.get(), address); + if(element.is_number()) removeIndex(prefix + key, element.get(), address); + if(element.is_boolean()) removeIndex(prefix + key, element.get(), address); + } + } + } +} + +void Collection::removeIndex(const std::string indexName, const std::string indexValue, const int &address) { + printf("REMOVE: %s = \"%s\" add:(%d)\n", indexName.c_str(), indexValue.c_str(), address); + + std::string treeName = indexName + "$string"; + if(!index->exists(treeName)) { + index->insert(treeName, PageManager::Instance().allocate()); + } + + BTree indexTree(index->find(treeName)); + + if(!indexTree.exists(indexValue)) { + indexTree.insert(indexValue, PageManager::Instance().allocate()); + } + + List list(indexTree.find(indexValue)); + list.remove(address); +} + +void Collection::removeIndex(const std::string indexName, double indexValue, const int &address) { + printf("REMOVE: %s = %f add:(%d)\n", indexName.c_str(), indexValue, address); + + std::string treeName = indexName + "$number"; + if(!index->exists(treeName)) { + index->insert(treeName, PageManager::Instance().allocate()); + } + + BTree indexTree(index->find(treeName)); + + if(!indexTree.exists(indexValue)) { + indexTree.insert(indexValue, PageManager::Instance().allocate()); + } + + List list(indexTree.find(indexValue)); + list.remove(address); +} + +void Collection::removeIndex(const std::string indexName, bool indexValue, const int &address) { + printf("REMOVE: %s = %s add:(%d)\n", indexName.c_str(), indexValue ? "true" : "false", address); + + std::string treeName = indexName + "$boolean"; + if(!index->exists(treeName)) { + index->insert(treeName, PageManager::Instance().allocate()); + } + + BTree indexTree(index->find(treeName)); + + if(!indexTree.exists(indexValue)) { + indexTree.insert(indexValue, PageManager::Instance().allocate()); + } + + List list(indexTree.find(indexValue)); + list.remove(address); +} diff --git a/invodb/models/collection.h b/invodb/models/collection.h index 8d5686c..7f11968 100644 --- a/invodb/models/collection.h +++ b/invodb/models/collection.h @@ -14,19 +14,32 @@ #include #include #include "utils/uuid.h" +#include "btree/list.h" class Collection { public: void insert(nlohmann::json &json); + void remove(const nlohmann::json &json); static void loadCollections(); static Collection& getCollection(const std::string& name); static Collection& createCollection(const std::string& name); private: + + void indexJSON(const std::string prefix, const nlohmann::json &json, const int& address); + void insertIndex(const std::string indexName, const std::string indexValue, const int& address); + void insertIndex(const std::string indexName, double indexValue, const int& address); + void insertIndex(const std::string indexName, bool indexValue, const int& address); + void clearIndex(const std::string prefix, const nlohmann::json &json, const int& address); + void removeIndex(const std::string indexName, const std::string indexValue, const int& address); + void removeIndex(const std::string indexName, double indexValue, const int& address); + void removeIndex(const std::string indexName, bool indexValue, const int& address); + static std::map map; static std::set free; - BTree<3, std::string, 32> *tree; + BTree *uuid; + BTree *index; Collection(const std::string& name,const int& firstPage); Collection() {}