Merge branch 'main' of gitea.yuhangq.com:YuhangQ/cloud-sat into main

同步代码，树型传播
2023-04-20 13:02:43 +08:00 · 2023-04-20 13:02:42 +08:00
6 changed files with 176 additions and 68 deletions
--- a/run.sh
+++ b/run.sh
@ -27,6 +27,6 @@ INSTANCE=04157f716c1e9606c6a530657bf8f957-Kakuro-easy-125-ext.xml.hg_4.cnf

 # make -j 16 && mpirun --bind-to none -np 5 --allow-run-as-root ./light -i $DIR/$INSTANCE --share=1 --threads=4 --times=3600

-make -j 16 && mpirun --bind-to none -np 9 --allow-run-as-root ./light -i ./data/hard1.cnf --share=1 --threads=16 --times=3600
+make -j 16 && mpirun --bind-to none -np 9 --allow-run-as-root valgrind ./light -i ./data/hard1.cnf --share=1 --threads=2 --times=3600

 #./light -i $DIR/$INSTANCE --share=1 --threads=16 --times=3600
--- a/src/light.hpp
+++ b/src/light.hpp
@ -45,8 +45,8 @@ public:
    vec<int> model;

    sharer* s;
+    std::vector<std::vector<int>> sharing_groups;
    enum { SAT, UNSAT, DEFAULT } worker_type;
-    int next_node;

    MPI_Request terminal_request;

@ -74,15 +74,15 @@ public:
        boost::mutex::scoped_lock lock(winner_mtx);
        return winner_period;
    }
+
    void arg_parse(int argc, char **argv);
    void init_workers();
    void diversity_workers();
+    void seperate_groups();
    void parse_input();
    int  run();
-    //void share();
    int  solve();
    void terminate_workers();
-    void print_model();
    void configure_from_file(const char*);
 };

--- a/src/main.cpp
+++ b/src/main.cpp
@ -21,50 +21,10 @@ int main(int argc, char **argv) {
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    light* S = new light();
+    S->num_procs = num_procs;
+    S->rank = rank;
    S->arg_parse(argc, argv);

-    // 初始化环形数据结构
-
-    int worker_procs = num_procs - 1;
-    if(worker_procs >= 8) {
-        // 分出一些 worker 跑其他策略
-        int sat_procs = worker_procs / 8;
-        int unsat_procs = sat_procs;
-        int default_procs = worker_procs - sat_procs - unsat_procs;
-
-        S->next_node = rank + 1;
-
-        // [1, sat_procs] for sat
-        if(rank >= 1 && rank <= sat_procs) {
-            S->worker_type = light::SAT;
-            if(S->next_node > sat_procs) {
-                S->next_node = 1;
-            }
-        }
-        // [sat_procs+1, sat_procs+unsat_procs] for unsat
-        if(rank >= sat_procs+1 && rank <= sat_procs+unsat_procs) {
-            S->worker_type = light::UNSAT;
-            if(S->next_node > sat_procs+unsat_procs) {
-                S->next_node = sat_procs+1;
-            }
-        }
-        // [sat_procs+unsat_procs+1, worker_procs]
-        if(rank >= sat_procs+unsat_procs+1 && rank <= worker_procs) {
-            S->worker_type = light::DEFAULT;
-            if(S->next_node > worker_procs) {
-                S->next_node = sat_procs+unsat_procs+1;
-            }
-        }
-
-    } else {
-        // 总线程太小就跑默认策略
-        S->worker_type = light::DEFAULT;
-        S->next_node = rank + 1;
-        if(S->next_node > worker_procs) {
-            S->next_node = 1;
-        }
-    }
-
    // leader
    if(rank == 0) leader_main(S, num_procs, rank);
    else worker_main(S, num_procs, rank);
--- a/src/sharer.cpp
+++ b/src/sharer.cpp
@ -12,7 +12,7 @@ double share_time = 0;
 int num_procs, rank;

 const int BUF_SIZE = 100 * 1024 * 1024;
-std::vector<std::pair<MPI_Request*, int*>> send_data_struct;
+std::vector<std::pair<MPI_Request*, shared_ptr<int[]>>> send_data_struct;
 MPI_Request receive_request;
 int buf[BUF_SIZE];

@ -26,11 +26,10 @@ std::unordered_map<int, bool> clause_imported;
 void sharer::share_clauses_to_next_node(int from, const std::vector<shared_ptr<clause_store>> &cls) {

    // 环形传递，数据来源如果是目的地，说明数据已轮转一圈，停止发送。
-    if(from == S->next_node) return;
+    if(OPT(share_method) == 0 && from == next_node) return;

    // 定义发送数据
    MPI_Request *send_request = new MPI_Request();
-    int *send_buf;
    int send_length = 1;

    // 初始化发送数据
@ -38,7 +37,7 @@ void sharer::share_clauses_to_next_node(int from, const std::vector<shared_ptr<c
        send_length += (cls[i]->size + 2);
    }

-    send_buf = new int[send_length];
+    shared_ptr<int[]> send_buf = std::make_shared<int[]>(send_length);

    int index = 0;

@ -57,7 +56,18 @@ void sharer::share_clauses_to_next_node(int from, const std::vector<shared_ptr<c

    // 调用 MPI 发送共享子句

-    MPI_Isend(send_buf, send_length, MPI_INT, S->next_node, SHARE_CLAUSES_TAG, MPI_COMM_WORLD, send_request);
+    if(OPT(share_method)) {
+        // 树形传递
+        if(son[from][rank][0] != -1) {
+            MPI_Isend(send_buf.get(), send_length, MPI_INT, son[from][rank][0], SHARE_CLAUSES_TAG, MPI_COMM_WORLD, send_request);
+        }
+        if(son[from][rank][1] != -1) {
+            MPI_Isend(send_buf.get(), send_length, MPI_INT, son[from][rank][1], SHARE_CLAUSES_TAG, MPI_COMM_WORLD, send_request);
+        }
+    } else {
+        // 环形传递
+        MPI_Isend(send_buf.get(), send_length, MPI_INT, next_node, SHARE_CLAUSES_TAG, MPI_COMM_WORLD, send_request);
+    }

    send_data_struct.push_back(std::make_pair(send_request, send_buf));

@ -69,7 +79,6 @@ void sharer::share_clauses_to_next_node(int from, const std::vector<shared_ptr<c
        int flag;
        if(MPI_Test(send_data_struct[i].first, &flag, MPI_STATUS_IGNORE) == MPI_SUCCESS && flag == 1) {
            delete send_data_struct[i].first;
-            delete []send_data_struct[i].second;
            // 与数组最后一个交换，然后 pop_back;
            std::swap(send_data_struct[i], send_data_struct[send_data_struct.size()-1]);
            send_data_struct.pop_back();
@ -97,7 +106,8 @@ int sharer::receive_clauses_from_last_node(std::vector<shared_ptr<clause_store>>
        if(transmitter == -1) {
            transmitter = status.MPI_SOURCE;
        }
-        assert(transmitter == status.MPI_SOURCE);
+
+        //assert(transmitter == status.MPI_SOURCE);

        from = buf[index++];

@ -128,10 +138,29 @@ int sharer::receive_clauses_from_last_node(std::vector<shared_ptr<clause_store>>
    return from;
 }

-void sharer::clause_sharing_init() {
+void sharer::clause_sharing_init(std::vector<std::vector<int>> &sharing_groups) {
+
    MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+
+    printf("c sharing groups: ");
+    for(int i=0; i<sharing_groups.size(); i++) {
+        printf(" [ ");
+        for(int j=0; j<sharing_groups[i].size(); j++) {
+            printf("%d ", sharing_groups[i][j]);
+        }
+        printf("]");
+    }
+    printf("\n");
+
+    if(OPT(share_method)) {
+        init_tree_transmission(sharing_groups);
+    } else {
+        init_circular_transmission(sharing_groups);
+    }
+
    MPI_Irecv(buf, BUF_SIZE, MPI_INT, MPI_ANY_SOURCE, SHARE_CLAUSES_TAG, MPI_COMM_WORLD, &receive_request);
+
 }

 void sharer::clause_sharing_end() {
@ -156,7 +185,7 @@ void sharer::do_clause_sharing() {
    int transmitter;
    int from = receive_clauses_from_last_node(clauses, transmitter);
    if(from != -1 && clauses.size() > 0) {
-        printf("c node%d(%d)->%d get %d exported clauses from node-%d\n", rank, S->worker_type, S->next_node, clauses.size(), transmitter);
+        printf("c node%d(%d) get %d exported clauses from node-%d\n", rank, S->worker_type, clauses.size(), transmitter);
        // printf("c [node-%d] sharing unique reduce percentage: %.2f%%\n", rank, (double) num_skip_clauses_by_network / num_received_clauses_by_network * 100);

        for (int j = 0; j < consumers.size(); j++) {
@ -308,3 +337,60 @@ int sharer::sort_clauses(int x) {
 //         }
 //     }   
 // }
+
+
+
+void sharer::init_tree_transmission(std::vector<std::vector<int>> &sharing_groups) {
+
+    srand(19260817);
+
+    son = new int**[num_procs];
+    for(int i=0; i<num_procs; i++) {
+        son[i] = new int*[num_procs];
+        for(int j=0; j<num_procs; j++) {
+            son[i][j] = new int[2];
+        }
+    }
+    
+    for(int i=0; i<sharing_groups.size(); i++) {
+
+        // create binary trees for every group;
+        std::vector<int> &group = sharing_groups[i];
+
+        for(int j=0; j<group.size(); j++) {
+            // create a binary tree with root group[j];
+
+            // radom shuffle
+            std::vector<int> rs = group;
+            // keep group[j] as the first
+            std::swap(group[j], group[0]);
+            for(int k=1; k<rs.size(); k++) {
+                std::swap(rs[k], rs[rand()%(rs.size()-1)+1]);
+            }
+
+            // build a tree from array
+            for(int k=0; k<rs.size(); k++) {
+                // printf("son[%d] [%d]\n", rs[0], rs[k]);
+
+                // son[1][1][0] =213;
+
+                son[rs[0]][rs[k]][0] = ( 2 * k + 1 < rs.size() ) ? rs[ 2 * k + 1 ] : -1;
+                son[rs[0]][rs[k]][1] = ( 2 * k + 2 < rs.size() ) ? rs[ 2 * k + 2 ] : -1;
+            }
+
+        }
+    }
+
+}
+
+void sharer::init_circular_transmission(std::vector<std::vector<int>> &sharing_groups) {
+
+    for(int i=0; i<sharing_groups.size(); i++) {
+        for(int j=0; j<sharing_groups[i].size(); j++) {
+            if(sharing_groups[i][j] == S->rank) {
+                next_node = sharing_groups[i][(j+1)%sharing_groups[i].size()];
+                return;
+            }
+        }
+    }
+}
--- a/src/sharer.hpp
+++ b/src/sharer.hpp
@ -14,10 +14,23 @@ public:

    sharer(light *S):S(S) {}

+    void clause_sharing_init(std::vector<std::vector<int>> &sharing_groups);
+
    void do_clause_sharing();
-    void clause_sharing_init();
+
    void clause_sharing_end();

+    // next node in circle mode
+    int next_node;
+
+    // son of this node in tree broadcast mode
+    // left  son -> son[root][0]
+    // right son -> son[root][1]
+    // when son[root][x] == -1, no son
+    int ***son;
+
+    void init_circular_transmission(std::vector<std::vector<int>> &sharing_groups);
+    void init_tree_transmission(std::vector<std::vector<int>> &sharing_groups);

    void share_clauses_to_next_node(int from, const std::vector<shared_ptr<clause_store>> &cls);
    int receive_clauses_from_last_node(std::vector<shared_ptr<clause_store>> &clauses, int &transmitter);
@ -27,8 +40,6 @@ public:

 private:
    light* S;    
-
-    
 };

 #endif
--- a/src/solve.cpp
+++ b/src/solve.cpp
@ -68,11 +68,6 @@ void light::init_workers() {
 }

 void light::diversity_workers() {
-
-    int num_procs, rank;
-    MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-
    for (int i = 0; i < OPT(threads); i++) {
        if(worker_type == SAT) {

@ -163,7 +158,6 @@ void light::diversity_workers() {
                else
                    workers[i]->configure("phase", 1);
            }
-
        }

        
@ -216,7 +210,7 @@ int light::solve() {
            s->consumers.push(workers[j]);
            workers[j]->in_sharer = s;
        }
-        s->clause_sharing_init();
+        s->clause_sharing_init(sharing_groups);
    }

    while (!terminated) {
@ -264,6 +258,8 @@ int light::solve() {

 int light::run() {

+    seperate_groups();
+
    init_workers();
    diversity_workers();

@ -274,6 +270,61 @@ int light::run() {
    return res;
 }

+void light::seperate_groups() {
+    // split distribute nodes to groups(SAT MODE、UNSAT MODE、DEFAULT MODE)
+    int worker_procs = num_procs - 1;
+
+    if(worker_procs >= 8) {
+        int sat_procs = worker_procs / 8;
+        int unsat_procs = sat_procs;
+        int default_procs = worker_procs - sat_procs - unsat_procs;
+        // [1, sat_procs] for sat
+        if(rank >= 1 && rank <= sat_procs) {
+            worker_type = light::SAT;
+        }
+
+        std::vector<int> tmp;
+        for(int i=1; i<=sat_procs; i++) {
+            tmp.push_back(i);
+        }
+        sharing_groups.push_back(tmp);
+
+        
+        // [sat_procs+1, sat_procs+unsat_procs] for unsat
+        if(rank >= sat_procs+1 && rank <= sat_procs+unsat_procs) {
+            worker_type = light::UNSAT;
+        }
+
+        tmp.clear();
+        for(int i=sat_procs+1; i<=sat_procs+unsat_procs; i++) {
+            tmp.push_back(i);
+        }
+        sharing_groups.push_back(tmp);
+
+        // [sat_procs+unsat_procs+1, worker_procs]
+        if(rank >= sat_procs+unsat_procs+1 && rank <= worker_procs) {
+            worker_type = light::DEFAULT;
+        }
+
+        tmp.clear();
+        for(int i=sat_procs+unsat_procs+1; i<=worker_procs; i++) {
+            tmp.push_back(i);
+        }
+        sharing_groups.push_back(tmp);
+
+    } else {
+        // 总线程太小就跑默认策略
+        worker_type = light::DEFAULT;
+        std::vector<int> tmp;
+        for(int i=1; i<=worker_procs; i++) {
+            tmp.push_back(i);
+        }
+        sharing_groups.push_back(tmp);
+    }
+
+    printf("sg: %d\n", sharing_groups[0].size());
+}
+
 void print_model(vec<int> &model) {
    printf("v");
    for (int i = 0; i < model.size(); i++) {
Author	SHA1	Message	Date
YuhangQ	1b1c0c81c2	Merge branch 'main' of gitea.yuhangq.com:YuhangQ/cloud-sat into main	2023-04-20 13:02:43 +08:00
YuhangQ	f53d7764b6	同步代码，树型传播	2023-04-20 13:02:42 +08:00