From 56075f3a15c382f530056936d210d7d2940cdda2 Mon Sep 17 00:00:00 2001
From: YuhangQ <i@yuhangq.com>
Date: Mon, 1 Nov 2021 15:11:33 +0800
Subject: [PATCH] add index tree

---
 CMakeLists.txt               |   4 +-
 invodb/btree/btree.h         | 121 ++++++++++++--------
 invodb/btree/list.h          |  34 ++++++
 invodb/btree/node.h          |  46 +++++---
 invodb/index/index.cpp       |   5 +
 invodb/index/index.h         |  25 +++++
 invodb/main.cpp              |  25 +++--
 invodb/main.h                |   3 +-
 invodb/models/collection.cpp | 207 +++++++++++++++++++++++++++++++----
 invodb/models/collection.h   |  15 ++-
 10 files changed, 387 insertions(+), 98 deletions(-)
 create mode 100644 invodb/btree/list.h
 create mode 100644 invodb/index/index.cpp
 create mode 100644 invodb/index/index.h

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 771d8e3..c21cb0d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,7 +1,7 @@
 cmake_minimum_required(VERSION 3.16.3)
 project(InvoDB)
 
-set(CMAKE_CXX_STANDARD 14)
+set(CMAKE_CXX_STANDARD 17)
 
 set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O0")
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0")
@@ -11,4 +11,4 @@ include_directories(./invodb)
 
 add_executable(InvoDB
         invodb/main.cpp
-        invodb/main.h invodb/file/page_manager.cpp invodb/file/page_manager.h invodb/models/collection.cpp invodb/models/collection.h invodb/file/storage_page.cpp invodb/file/storage_page.h invodb/utils/logger.h invodb/utils/uuid.h invodb/btree/node.h invodb/btree/btree.h)
+        invodb/main.h invodb/file/page_manager.cpp invodb/file/page_manager.h invodb/models/collection.cpp invodb/models/collection.h invodb/file/storage_page.cpp invodb/file/storage_page.h invodb/utils/logger.h invodb/utils/uuid.h invodb/btree/node.h invodb/btree/btree.h invodb/index/index.cpp invodb/index/index.h invodb/btree/list.h)
diff --git a/invodb/btree/btree.h b/invodb/btree/btree.h
index eb4f9b3..78a2fd0 100644
--- a/invodb/btree/btree.h
+++ b/invodb/btree/btree.h
@@ -8,7 +8,7 @@
 #include "btree/node.h"
 #include "utils/uuid.h"
 
-template<int M_SIZE, typename KT, int K_SIZE>
+template<typename KT, int K_SIZE>
 class BTree {
 public:
     BTree(const int& address);
@@ -18,37 +18,35 @@ public:
     bool exists(const KT &key);
     int getNodeSize();
     int find(const KT &key);
+    std::vector<KT> keySet();
+    std::vector<std::pair<KT, int>> all();
     int size();
-
 private:
+    int findNode(const KT &key);
     void removeEntry(int curAdd, const KT& key, const int& pointer);
     bool canCoalesce(int curAdd, int sibAdd);
     void coalesce(int curAdd, int sibAdd);
     bool canRedistribute(int curAdd, int sibAdd);
     void redistribute(int curAdd, int sibAdd);
-    int findNode(const KT &key);
     void split(const KT &key, int address, int parentAdd, int curAdd);
     void insertInternal(const KT &key, int curAdd, int lLeafAdd, int rLeafAdd);
     int root;
     int n_size;
+    static const int M_SIZE = 1000 / (K_SIZE + 4);
 };
 
-// BTree<M_SIZE, KT, K_SIZE> BTreeNode<M_SIZE, KT, K_SIZE>
-
-
-template<int M_SIZE, typename KT, int K_SIZE>
-BTree<M_SIZE, KT, K_SIZE>::BTree(const int& address) {
+template<typename KT, int K_SIZE>
+BTree<KT, K_SIZE>::BTree(const int& address) {
     root = address;
-
     n_size = 0;
 }
 
-template<int M_SIZE, typename KT, int K_SIZE>
-void BTree<M_SIZE, KT, K_SIZE>::insert(const KT &key, const int &value) {
-
+template<typename KT, int K_SIZE>
+void BTree<KT, K_SIZE>::insert(const KT &key, const int &value) {
 
     if(exists(key)) {
-        throw "keySet already exists.";
+        update(key, value);
+        return;
     }
 
     n_size++;
@@ -66,28 +64,29 @@ void BTree<M_SIZE, KT, K_SIZE>::insert(const KT &key, const int &value) {
     split(key, value, cur->parent, cur->address);
 }
 
-template<int M_SIZE, typename KT, int K_SIZE>
-void BTree<M_SIZE, KT, K_SIZE>::update(const KT &key, const int &value) {
+template<typename KT, int K_SIZE>
+void BTree<KT, K_SIZE>::update(const KT &key, const int &value) {
     if(!exists(key)) {
-        throw "keySet doesn't exists.";
+        insert(key, value);
+        return;
     }
     auto cur = BTreeNode<M_SIZE, KT, K_SIZE>::getNode(findNode(key));
     cur->linkSet[cur->findPos(key)] = value;
     cur->save();
 }
 
-template<int M_SIZE, typename KT, int K_SIZE>
-void BTree<M_SIZE, KT, K_SIZE>::remove(const KT &key) {
+template<typename KT, int K_SIZE>
+void BTree<KT, K_SIZE>::remove(const KT &key) {
     if(!exists(key)) {
-        throw "keySet already exists.";
+        return;
     }
     n_size--;
     auto cur = BTreeNode<M_SIZE, KT, K_SIZE>::getNode(findNode(key));
     removeEntry(cur->address, key, find(key));
 }
 
-template<int M_SIZE, typename KT, int K_SIZE>
-int BTree<M_SIZE, KT, K_SIZE>::find(const KT &key) {
+template<typename KT, int K_SIZE>
+int BTree<KT, K_SIZE>::find(const KT &key) {
     auto cur = BTreeNode<M_SIZE, KT, K_SIZE>::getNode(findNode(key));
     for(int i=0; i<cur->size; i++) {
         if(key == cur->keySet[i]) return cur->linkSet[i];
@@ -95,13 +94,13 @@ int BTree<M_SIZE, KT, K_SIZE>::find(const KT &key) {
     return -1;
 }
 
-template<int M_SIZE, typename KT, int K_SIZE>
-int BTree<M_SIZE, KT, K_SIZE>::size() {
+template<typename KT, int K_SIZE>
+int BTree<KT, K_SIZE>::size() {
     return n_size;
 }
 
-template<int M_SIZE, typename KT, int K_SIZE>
-void BTree<M_SIZE, KT, K_SIZE>::removeEntry(int curAdd, const KT &key, const int &pointer) {
+template<typename KT, int K_SIZE>
+void BTree<KT, K_SIZE>::removeEntry(int curAdd, const KT &key, const int &pointer) {
     auto cur = BTreeNode<M_SIZE, KT, K_SIZE>::getNode(curAdd);
     int pos = cur->findPos(key);
     if(pos == -1) return;
@@ -138,8 +137,8 @@ void BTree<M_SIZE, KT, K_SIZE>::removeEntry(int curAdd, const KT &key, const int
     }
 }
 
-template<int M_SIZE, typename KT, int K_SIZE>
-bool BTree<M_SIZE, KT, K_SIZE>::canCoalesce(int curAdd, int sibAdd) {
+template<typename KT, int K_SIZE>
+bool BTree<KT, K_SIZE>::canCoalesce(int curAdd, int sibAdd) {
     if(sibAdd == 0) return false;
     auto cur = BTreeNode<M_SIZE, KT, K_SIZE>::getNode(curAdd);
     auto sib = BTreeNode<M_SIZE, KT, K_SIZE>::getNode(sibAdd);
@@ -147,8 +146,8 @@ bool BTree<M_SIZE, KT, K_SIZE>::canCoalesce(int curAdd, int sibAdd) {
     return (cur->size + sib->size <= BTreeNode<M_SIZE, KT, K_SIZE>::m - 1 - !cur->leaf);
 }
 
-template<int M_SIZE, typename KT, int K_SIZE>
-void BTree<M_SIZE, KT, K_SIZE>::coalesce(int curAdd, int sibAdd) {
+template<typename KT, int K_SIZE>
+void BTree<KT, K_SIZE>::coalesce(int curAdd, int sibAdd) {
     auto cur = BTreeNode<M_SIZE, KT, K_SIZE>::getNode(curAdd);
     auto sib = BTreeNode<M_SIZE, KT, K_SIZE>::getNode(sibAdd);
     auto parent = BTreeNode<M_SIZE, KT, K_SIZE>::getNode(cur->parent);
@@ -204,8 +203,8 @@ void BTree<M_SIZE, KT, K_SIZE>::coalesce(int curAdd, int sibAdd) {
     }
 }
 
-template<int M_SIZE, typename KT, int K_SIZE>
-bool BTree<M_SIZE, KT, K_SIZE>::canRedistribute(int curAdd, int sibAdd) {
+template<typename KT, int K_SIZE>
+bool BTree<KT, K_SIZE>::canRedistribute(int curAdd, int sibAdd) {
     if(sibAdd == 0) return false;
     auto cur = BTreeNode<M_SIZE, KT, K_SIZE>::getNode(curAdd);
     auto sib = BTreeNode<M_SIZE, KT, K_SIZE>::getNode(sibAdd);
@@ -213,8 +212,8 @@ bool BTree<M_SIZE, KT, K_SIZE>::canRedistribute(int curAdd, int sibAdd) {
     return sib->size > ((sib->m - !sib->leaf) / 2);
 }
 
-template<int M_SIZE, typename KT, int K_SIZE>
-void BTree<M_SIZE, KT, K_SIZE>::redistribute(int curAdd, int sibAdd) {
+template<typename KT, int K_SIZE>
+void BTree<KT, K_SIZE>::redistribute(int curAdd, int sibAdd) {
     auto cur = BTreeNode<M_SIZE, KT, K_SIZE>::getNode(curAdd);
     auto sib = BTreeNode<M_SIZE, KT, K_SIZE>::getNode(sibAdd);
     auto parent = BTreeNode<M_SIZE, KT, K_SIZE>::getNode(cur->parent);
@@ -277,10 +276,8 @@ void BTree<M_SIZE, KT, K_SIZE>::redistribute(int curAdd, int sibAdd) {
     parent->save();
 }
 
-template<int M_SIZE, typename KT, int K_SIZE>
-int BTree<M_SIZE, KT, K_SIZE>::findNode(const KT &key) {
-
-
+template<typename KT, int K_SIZE>
+int BTree<KT, K_SIZE>::findNode(const KT &key) {
     auto cur = BTreeNode<M_SIZE, KT, K_SIZE>::getNode(root);
     while(!cur->leaf) {
         for(int i=0; i<cur->size; i++) {
@@ -297,8 +294,8 @@ int BTree<M_SIZE, KT, K_SIZE>::findNode(const KT &key) {
     return cur->address;
 }
 
-template<int M_SIZE, typename KT, int K_SIZE>
-void BTree<M_SIZE, KT, K_SIZE>::split(const KT &key, int address, int parentAdd, int curAdd) {
+template<typename KT, int K_SIZE>
+void BTree<KT, K_SIZE>::split(const KT &key, int address, int parentAdd, int curAdd) {
     auto cur = BTreeNode<M_SIZE, KT, K_SIZE>::getNode(curAdd);
 
     cur->linkSet[cur->insert(key)] = address;
@@ -348,8 +345,8 @@ void BTree<M_SIZE, KT, K_SIZE>::split(const KT &key, int address, int parentAdd,
     }
 }
 
-template<int M_SIZE, typename KT, int K_SIZE>
-void BTree<M_SIZE, KT, K_SIZE>::insertInternal(const KT &key, int curAdd, int lLeafAdd, int rLeafAdd) {
+template<typename KT, int K_SIZE>
+void BTree<KT, K_SIZE>::insertInternal(const KT &key, int curAdd, int lLeafAdd, int rLeafAdd) {
     BTreeNode<M_SIZE, KT, K_SIZE> *cur = BTreeNode<M_SIZE, KT, K_SIZE>::getNode(curAdd);
     BTreeNode<M_SIZE, KT, K_SIZE> *lLeaf = BTreeNode<M_SIZE, KT, K_SIZE>::getNode(lLeafAdd);
     BTreeNode<M_SIZE, KT, K_SIZE> *rLeaf = BTreeNode<M_SIZE, KT, K_SIZE>::getNode(rLeafAdd);
@@ -431,16 +428,50 @@ void BTree<M_SIZE, KT, K_SIZE>::insertInternal(const KT &key, int curAdd, int lL
     }
 }
 
-template<int M_SIZE, typename KT, int K_SIZE>
-int BTree<M_SIZE, KT, K_SIZE>::getNodeSize() {
+template<typename KT, int K_SIZE>
+int BTree<KT, K_SIZE>::getNodeSize() {
     auto p = BTreeNode<M_SIZE, KT, K_SIZE>::getNode(root);
     return p->save();
 }
 
-template<int M_SIZE, typename KT, int K_SIZE>
-bool BTree<M_SIZE, KT, K_SIZE>::exists(const KT &key) {
+template<typename KT, int K_SIZE>
+bool BTree<KT, K_SIZE>::exists(const KT &key) {
     return find(key) != -1;
 }
 
+template<typename KT, int K_SIZE>
+std::vector<KT> BTree<KT, K_SIZE>::keySet() {
+    auto p = BTreeNode<M_SIZE, KT, K_SIZE>::getNode(root);
+    while(!p->leaf) {
+        p = BTreeNode<M_SIZE, KT, K_SIZE>::getNode(p->linkSet[0]);
+    }
+    std::vector<KT> v;
+    while(true) {
+        for(int i=0; i<p->size; i++) {
+            v.push_back(p->keySet[0]);
+        }
+        if(p->right == 0) break;
+        p = BTreeNode<M_SIZE, KT, K_SIZE>::getNode(p->right);
+    }
+    return v;
+}
+
+template<typename KT, int K_SIZE>
+std::vector<std::pair<KT, int>> BTree<KT, K_SIZE>::all() {
+    auto p = BTreeNode<M_SIZE, KT, K_SIZE>::getNode(root);
+    while(!p->leaf) {
+        p = BTreeNode<M_SIZE, KT, K_SIZE>::getNode(p->linkSet[0]);
+    }
+    std::vector<std::pair<KT, int>> v;
+    while(true) {
+        for(int i=0; i<p->size; i++) {
+            v.push_back(std::make_pair(p->keySet[i], p->linkSet[i]));
+        }
+        if(p->right == 0) break;
+        p = BTreeNode<M_SIZE, KT, K_SIZE>::getNode(p->right);
+    }
+    return v;
+}
+
 
 #endif //INVODB_BTREE_H
diff --git a/invodb/btree/list.h b/invodb/btree/list.h
new file mode 100644
index 0000000..d7e6d59
--- /dev/null
+++ b/invodb/btree/list.h
@@ -0,0 +1,34 @@
+//
+// Created by YuhangQ on 2021/11/1.
+//
+
+#ifndef INVODB_LIST_H
+#define INVODB_LIST_H
+
+#include "btree.h"
+
+template<typename T, int T_SIZE>
+class List {
+public:
+    void insert(T const& value) {
+        tree->insert(value, 0);
+    }
+    void remove(T const& value) {
+        tree->remove(value);
+    }
+    bool exists(T const& value) {
+        return tree->exists(value);
+    }
+    std::vector<T> all() {
+        return tree->keySet();
+    }
+    List(const int& address) {
+        tree = new BTree<T, T_SIZE>(address);
+    }
+
+private:
+    BTree<T, T_SIZE>* tree;
+};
+
+
+#endif //INVODB_LIST_H
diff --git a/invodb/btree/node.h b/invodb/btree/node.h
index 3b5d7e3..9b8f056 100644
--- a/invodb/btree/node.h
+++ b/invodb/btree/node.h
@@ -15,8 +15,8 @@
 template<int M_SIZE, typename KT, int K_SIZE>
 class BTreeNode {
 public:
-    static BTreeNode<M_SIZE, KT, K_SIZE>* getNode(const int &address);
-    static BTreeNode<M_SIZE, KT, K_SIZE>* release(const int &address);
+    static BTreeNode<M_SIZE, KT, K_SIZE>* getNode(const int &index);
+    static BTreeNode<M_SIZE, KT, K_SIZE>* release(const int &index);
     int insert(KT const &key);
     int findPos(KT const &key);
     void release();
@@ -68,7 +68,7 @@ BTreeNode<M_SIZE, KT, K_SIZE>::BTreeNode(const int& address): address(address) {
             }
         }
     } else {
-        for(int i=0; i<=m; i++) {
+        for(int i=0; i<m; i++) {
             keySet[i] = *(KT*)(&page[p]);
             p += K_SIZE;
         }
@@ -82,24 +82,20 @@ BTreeNode<M_SIZE, KT, K_SIZE>::BTreeNode(const int& address): address(address) {
 }
 
 template<int M_SIZE, typename KT, int K_SIZE>
-BTreeNode<M_SIZE, KT, K_SIZE> *BTreeNode<M_SIZE, KT, K_SIZE>::getNode(const int &address) {
-
-    std::cout << address << std::endl;
-
-
-    if(address < 4) {
+BTreeNode<M_SIZE, KT, K_SIZE> *BTreeNode<M_SIZE, KT, K_SIZE>::getNode(const int &index) {
+    if(index < 4) {
         throw "invalid address!";
     }
     static std::map<int, BTreeNode<M_SIZE, KT, K_SIZE>*> map;
-    if(map.count(address) == 0) {
-        delete map[address];
-        map[address] = new BTreeNode<M_SIZE, KT, K_SIZE>(address);
+    if(map.count(index) == 0) {
+        delete map[index];
+        map[index] = new BTreeNode<M_SIZE, KT, K_SIZE>(index);
     }
-    return map[address];
+    return map[index];
 }
 
 template<int M_SIZE, typename KT, int K_SIZE>
-BTreeNode<M_SIZE, KT, K_SIZE> *BTreeNode<M_SIZE, KT, K_SIZE>::release(const int &address) {
+BTreeNode<M_SIZE, KT, K_SIZE> *BTreeNode<M_SIZE, KT, K_SIZE>::release(const int &index) {
     return nullptr;
 }
 
@@ -131,7 +127,21 @@ void BTreeNode<M_SIZE, KT, K_SIZE>::release() {
 
 template<int M_SIZE, typename KT, int K_SIZE>
 void BTreeNode<M_SIZE, KT, K_SIZE>::clear() {
-    for(int i=0; i<m+1; i++) keySet[i].clear(), linkSet[i] = 0;
+    for(int i=0; i<m+1; i++) {
+        if(std::is_same<KT, std::string>::value) {
+            ((std::string *)&keySet[i])->clear();
+        }
+        if(std::is_same<KT, double>::value) {
+            *((double *)&keySet[i]) = 0;
+        }
+        if(std::is_same<KT, bool>::value) {
+            *((bool *)&keySet[i]) = 0;
+        }
+        if(std::is_same<KT, int>::value) {
+            *((int *)&keySet[i]) = 0;
+        }
+        linkSet[i] = 0;
+    }
     size = 0;
     leaf = false;
     parent = 0;
@@ -139,6 +149,8 @@ void BTreeNode<M_SIZE, KT, K_SIZE>::clear() {
 
 template<int M_SIZE, typename KT, int K_SIZE>
 int BTreeNode<M_SIZE, KT, K_SIZE>::save() {
+
+
     StoragePage page(address);
     int p = 0;
     page.setIntStartFrom(p, size); p += 4;
@@ -154,7 +166,7 @@ int BTreeNode<M_SIZE, KT, K_SIZE>::save() {
             p += K_SIZE;
         }
     } else {
-        for(int i=0; i<=m; i++) {
+        for(int i=0; i<m; i++) {
             page.setStartFrom(p, &keySet[i], K_SIZE);
             p += K_SIZE;
         }
@@ -165,7 +177,7 @@ int BTreeNode<M_SIZE, KT, K_SIZE>::save() {
         p += 4;
     }
 
-    if(p >= 1024) {
+    if(p > 1024) {
         throw "too big page!";
     }
 
diff --git a/invodb/index/index.cpp b/invodb/index/index.cpp
new file mode 100644
index 0000000..2eb27af
--- /dev/null
+++ b/invodb/index/index.cpp
@@ -0,0 +1,5 @@
+//
+// Created by YuhangQ on 2021/11/1.
+//
+
+#include "index.h"
diff --git a/invodb/index/index.h b/invodb/index/index.h
new file mode 100644
index 0000000..1d88f44
--- /dev/null
+++ b/invodb/index/index.h
@@ -0,0 +1,25 @@
+//
+// Created by YuhangQ on 2021/11/1.
+//
+
+#ifndef INVODB_INDEX_H
+#define INVODB_INDEX_H
+
+#include "btree/btree.h"
+#include "json/json.hpp"
+
+class Index {
+public:
+    void insert(const nlohmann::json& json);
+private:
+    template<typename T>
+    void insertElement(T const& key, const int& add) {
+        BTree<T, 128> treeString;
+        BTree<T, 1> treeBool;
+        BTree<T, 8> treeDouble;
+        BTree<T, 4> treeInt;
+    }
+};
+
+
+#endif //INVODB_INDEX_H
diff --git a/invodb/main.cpp b/invodb/main.cpp
index 7db0050..2dda153 100644
--- a/invodb/main.cpp
+++ b/invodb/main.cpp
@@ -13,9 +13,10 @@ int main() {
     srand(t);
     printf("seed: %d\n", t);
 
-    system("rm -rf test.invodb && touch test.invodb");
+    //system("rm -rf test.invodb && touch test.invodb");
 
     PageManager::loadDatabase("test.invodb");
+
     Collection::loadCollections();
 
     PageManager& manager = PageManager::Instance();
@@ -25,28 +26,36 @@ int main() {
        col = &Collection::getCollection("hello");
     } catch(const char *error) {
         Collection::createCollection("hello");
+        col = &Collection::getCollection("hello");
     }
 
     std::string test;
     for(int i=0; i<100; i++) {
         test += generateUUID();
     }
-
-    nlohmann::json j;
-    j["hello"] = test;
+    nlohmann::json j = nlohmann::json::parse(R"(
+{
+    "string": "this is a string!",
+    "double": 3.1415,
+    "int": 25565,
+    "bool": true,
+    "child": {
+        "id": 3
+    },
+    "array": ["1", "2", "3"]
+}
+    )");
 
     col->insert(j);
 
-    //testAndBenchmark(100000);
-
-    //btree->testAndBenchmark(100000);
+    col->remove(j);
 
     return 0;
 }
 
 void testAndBenchmark(int n) {
 
-    auto btree = new BTree<27, std::string, 32>(PageManager::Instance().allocate());
+    auto btree = new BTree<std::string, 32>(PageManager::Instance().allocate());
 
     printf("nodeSize: %d\n", btree->getNodeSize());
 
diff --git a/invodb/main.h b/invodb/main.h
index b39729d..e51a415 100644
--- a/invodb/main.h
+++ b/invodb/main.h
@@ -9,7 +9,8 @@
 #include <cstdlib>
 #include <ctime>
 #include <vector>
-#include <models/collection.h>
+#include "models/collection.h"
+#include "btree/list.h"
 
 
 #endif //INVODB_MAIN_H
diff --git a/invodb/models/collection.cpp b/invodb/models/collection.cpp
index 16f268f..0dad326 100644
--- a/invodb/models/collection.cpp
+++ b/invodb/models/collection.cpp
@@ -4,28 +4,6 @@
 
 #include "collection.h"
 
-Collection::Collection(const std::string &name, const int &firstPage) {
-    Logger::info<std::string, std::string>("load Collection: ", name);
-    tree = new BTree<3, std::string, 32>(firstPage);
-}
-
-void Collection::insert(nlohmann::json &json) {
-
-    //printf("fuck:%d\n", tree);
-    if(json["__INVO_ID__"].empty()) {
-        json["__INVO_ID__"] = generateUUID();
-    }
-
-    int add = PageManager::Instance().saveJSONToFile(json);
-
-    std::string id = json["__INVO_ID__"].get<std::string>();
-
-    tree->insert(id, add);
-
-    auto tjson = PageManager::Instance().readJSONFromFile(add);
-    Logger::info<std::string, std::string>("INSERT ", tjson.dump());
-}
-
 std::map<std::string, Collection*> Collection::map;
 std::set<int> Collection::free;
 
@@ -41,7 +19,7 @@ void Collection::loadCollections() {
             int firstPage = page.getIntStartFrom(p + 28);
             // if free
             if (firstPage == 0) free.insert(id * 32 + i);
-                // not free
+            // not free
             else map.insert(make_pair(name, new Collection(name, firstPage)));
         }
     }
@@ -71,7 +49,6 @@ Collection& Collection::createCollection(const std::string &name) {
 
     page.setStringStartFrom(id*32, name.c_str());
     page.setIntStartFrom(id*32+28, collectionPage.getAddress());
-    page.print();
     page.save();
 
     Collection *col = new Collection(name, collectionPage.getAddress());
@@ -88,3 +65,185 @@ Collection &Collection::getCollection(const std::string &name) {
     return *map[name];
 }
 
+Collection::Collection(const std::string &name, const int &firstPage) {
+    Logger::info<std::string, std::string>("load Collection: ", name);
+    index = new BTree<std::string, 128>(firstPage);
+    if(!index->exists("__INVO_ID__")) {
+        index->insert("__INVO_ID__", PageManager::Instance().allocate());
+    }
+    uuid = new BTree<std::string, 32>(index->find("__INVO_ID__"));
+}
+
+void Collection::insert(nlohmann::json &json) {
+    if(json["__INVO_ID__"].empty()) {
+        json["__INVO_ID__"] = generateUUID();
+    } else {
+        remove(json);
+    }
+    std::string id = json["__INVO_ID__"].get<std::string>();
+    int add = PageManager::Instance().saveJSONToFile(json);
+    uuid->insert(id, add);
+    Logger::info<std::string, std::string>("INSERT ", json.dump());
+
+    // add index
+    indexJSON("", json, add);
+}
+
+void Collection::remove(const nlohmann::json &json) {
+    if(json["__INVO_ID__"].empty()) {
+        throw "no invo_id";
+    }
+    std::string id = json["__INVO_ID__"].get<std::string>();
+    uuid->remove(id);
+
+    int address = uuid->find(id);
+
+    nlohmann::json jsonInDisk = PageManager::Instance().readJSONFromFile(address);
+
+    clearIndex("", json, address);
+
+    PageManager::Instance().release(address);
+}
+
+void Collection::indexJSON(const std::string prefix, const nlohmann::json &json, const int& address) {
+    // even easier with structured bindings (C++17)
+    for (auto& [key, value] : json.items()) {
+        std::cout << prefix << key << " : " << value << "\n";
+        if(value.is_string()) insertIndex(prefix + key, value.get<std::string>(), address);
+        if(value.is_number()) insertIndex(prefix + key, value.get<double>(), address);
+        if(value.is_boolean()) insertIndex(prefix + key, value.get<bool>(), address);
+        if(value.is_object()) indexJSON(prefix + key + ".", value.get<nlohmann::json>(),address);
+        if(value.is_array()) {
+            for(auto& element : value.get<nlohmann::json>()) {
+                if(element.is_string()) insertIndex(prefix + key, element.get<std::string>(), address);
+                if(element.is_number()) insertIndex(prefix + key, element.get<double>(), address);
+                if(element.is_boolean()) insertIndex(prefix + key, element.get<bool>(), address);
+            }
+        }
+    }
+}
+
+void Collection::insertIndex(const std::string indexName, const std::string indexValue, const int &address) {
+    printf("INDEX: %s = \"%s\" add:(%d)\n", indexName.c_str(), indexValue.c_str(), address);
+
+    std::string treeName = indexName + "$string";
+    if(!index->exists(treeName)) {
+        index->insert(treeName, PageManager::Instance().allocate());
+    }
+
+    BTree<std::string, 128> indexTree(index->find(treeName));
+
+    if(!indexTree.exists(indexValue)) {
+        indexTree.insert(indexValue, PageManager::Instance().allocate());
+    }
+
+    List<int, 4> list(indexTree.find(indexValue));
+    list.insert(address);
+}
+
+void Collection::insertIndex(const std::string indexName, double indexValue, const int &address) {
+    printf("INDEX: %s = %f add:(%d)\n", indexName.c_str(), indexValue, address);
+
+    std::string treeName = indexName + "$number";
+    if(!index->exists(treeName)) {
+        index->insert(treeName, PageManager::Instance().allocate());
+    }
+
+    BTree<double, 8> indexTree(index->find(treeName));
+
+    if(!indexTree.exists(indexValue)) {
+        indexTree.insert(indexValue, PageManager::Instance().allocate());
+    }
+
+    List<int, 4> list(indexTree.find(indexValue));
+    list.insert(address);
+}
+
+void Collection::insertIndex(const std::string indexName, bool indexValue, const int &address) {
+    printf("INDEX: %s = %s add:(%d)\n", indexName.c_str(), indexValue ? "true" : "false", address);
+
+    std::string treeName = indexName + "$boolean";
+    if(!index->exists(treeName)) {
+        index->insert(treeName, PageManager::Instance().allocate());
+    }
+
+    BTree<bool, 1> indexTree(index->find(treeName));
+
+    if(!indexTree.exists(indexValue)) {
+        indexTree.insert(indexValue, PageManager::Instance().allocate());
+    }
+
+    List<int, 4> list(indexTree.find(indexValue));
+    list.insert(address);
+}
+
+void Collection::clearIndex(const std::string prefix, const nlohmann::json &json, const int &address) {
+    for (auto& [key, value] : json.items()) {
+        std::cout << prefix << key << " : " << value << "\n";
+        if(value.is_string()) removeIndex(prefix + key, value.get<std::string>(), address);
+        if(value.is_number()) removeIndex(prefix + key, value.get<double>(), address);
+        if(value.is_boolean()) removeIndex(prefix + key, value.get<bool>(), address);
+        if(value.is_object()) clearIndex(prefix + key + ".", value.get<nlohmann::json>(),address);
+        if(value.is_array()) {
+            for(auto& element : value.get<nlohmann::json>()) {
+                if(element.is_string()) removeIndex(prefix + key, element.get<std::string>(), address);
+                if(element.is_number()) removeIndex(prefix + key, element.get<double>(), address);
+                if(element.is_boolean()) removeIndex(prefix + key, element.get<bool>(), address);
+            }
+        }
+    }
+}
+
+void Collection::removeIndex(const std::string indexName, const std::string indexValue, const int &address) {
+    printf("REMOVE: %s = \"%s\" add:(%d)\n", indexName.c_str(), indexValue.c_str(), address);
+
+    std::string treeName = indexName + "$string";
+    if(!index->exists(treeName)) {
+        index->insert(treeName, PageManager::Instance().allocate());
+    }
+
+    BTree<std::string, 128> indexTree(index->find(treeName));
+
+    if(!indexTree.exists(indexValue)) {
+        indexTree.insert(indexValue, PageManager::Instance().allocate());
+    }
+
+    List<int, 4> list(indexTree.find(indexValue));
+    list.remove(address);
+}
+
+void Collection::removeIndex(const std::string indexName, double indexValue, const int &address) {
+    printf("REMOVE: %s = %f add:(%d)\n", indexName.c_str(), indexValue, address);
+
+    std::string treeName = indexName + "$number";
+    if(!index->exists(treeName)) {
+        index->insert(treeName, PageManager::Instance().allocate());
+    }
+
+    BTree<double, 8> indexTree(index->find(treeName));
+
+    if(!indexTree.exists(indexValue)) {
+        indexTree.insert(indexValue, PageManager::Instance().allocate());
+    }
+
+    List<int, 4> list(indexTree.find(indexValue));
+    list.remove(address);
+}
+
+void Collection::removeIndex(const std::string indexName, bool indexValue, const int &address) {
+    printf("REMOVE: %s = %s add:(%d)\n", indexName.c_str(), indexValue ? "true" : "false", address);
+
+    std::string treeName = indexName + "$boolean";
+    if(!index->exists(treeName)) {
+        index->insert(treeName, PageManager::Instance().allocate());
+    }
+
+    BTree<bool, 1> indexTree(index->find(treeName));
+
+    if(!indexTree.exists(indexValue)) {
+        indexTree.insert(indexValue, PageManager::Instance().allocate());
+    }
+
+    List<int, 4> list(indexTree.find(indexValue));
+    list.remove(address);
+}
diff --git a/invodb/models/collection.h b/invodb/models/collection.h
index 8d5686c..7f11968 100644
--- a/invodb/models/collection.h
+++ b/invodb/models/collection.h
@@ -14,19 +14,32 @@
 #include <algorithm>
 #include <cstring>
 #include "utils/uuid.h"
+#include "btree/list.h"
 
 class Collection {
 public:
     void insert(nlohmann::json &json);
+    void remove(const nlohmann::json &json);
 
     static void loadCollections();
     static Collection& getCollection(const std::string& name);
     static Collection& createCollection(const std::string& name);
 private:
+
+    void indexJSON(const std::string prefix, const nlohmann::json &json, const int& address);
+    void insertIndex(const std::string indexName, const std::string indexValue, const int& address);
+    void insertIndex(const std::string indexName, double indexValue, const int& address);
+    void insertIndex(const std::string indexName, bool indexValue, const int& address);
+    void clearIndex(const std::string prefix, const nlohmann::json &json, const int& address);
+    void removeIndex(const std::string indexName, const std::string indexValue, const int& address);
+    void removeIndex(const std::string indexName, double indexValue, const int& address);
+    void removeIndex(const std::string indexName, bool indexValue, const int& address);
+
     static std::map<std::string, Collection*> map;
     static std::set<int> free;
 
-    BTree<3, std::string, 32> *tree;
+    BTree<std::string, 32> *uuid;
+    BTree<std::string, 128> *index;
 
     Collection(const std::string& name,const int& firstPage);
     Collection() {}