同步代码

This commit is contained in:
YuhangQ 2023-04-07 09:22:40 +00:00
parent 47d72f3b23
commit 8406414f06
11 changed files with 200 additions and 7814 deletions

65
bug.txt Normal file
View File

@ -0,0 +1,65 @@
==2320428==
==2320428== Process terminating with default action of signal 15 (SIGTERM)
==2320428== at 0x4DF912F: poll (poll.c:29)
==2320428== by 0x50E09B8: ??? (in /usr/lib/x86_64-linux-gnu/libevent_core-2.1.so.7.0.1)
==2320428== by 0x50DBCEC: event_base_loop (in /usr/lib/x86_64-linux-gnu/libevent_core-2.1.so.7.0.1)
==2320428== by 0x4F1AAC5: ??? (in /usr/lib/x86_64-linux-gnu/libopen-pal.so.40.30.2)
==2320428== by 0x4D78401: start_thread (pthread_create.c:442)
==2320428== by 0x4E06743: clone (clone.S:100)
==2320428==
==2320428== HEAP SUMMARY:
==2320428== in use at exit: 5,263,216,314 bytes in 121,653 blocks
==2320428== total heap usage: 230,366 allocs, 108,713 frees, 5,284,993,033 bytes allocated
==2320428==
==2320426==
==2320426== Process terminating with default action of signal 15 (SIGTERM)
==2320426== at 0x4DF912F: poll (poll.c:29)
==2320426== by 0x50E09B8: ??? (in /usr/lib/x86_64-linux-gnu/libevent_core-2.1.so.7.0.1)
==2320426== by 0x50DBCEC: event_base_loop (in /usr/lib/x86_64-linux-gnu/libevent_core-2.1.so.7.0.1)
==2320426== by 0x4F1AAC5: ??? (in /usr/lib/x86_64-linux-gnu/libopen-pal.so.40.30.2)
==2320426== by 0x4D78401: start_thread (pthread_create.c:442)
==2320426== by 0x4E06743: clone (clone.S:100)
==2320426==
==2320426== HEAP SUMMARY:
==2320426== in use at exit: 164,740,285 bytes in 19,687 blocks
==2320426== total heap usage: 61,284 allocs, 41,597 frees, 167,994,848 bytes allocated
==2320426==
==2320426== LEAK SUMMARY:
==2320426== definitely lost: 5,056 bytes in 12 blocks
==2320426== indirectly lost: 0 bytes in 0 blocks
==2320426== possibly lost: 160,111,368 bytes in 4,232 blocks
==2320426== still reachable: 4,623,861 bytes in 15,443 blocks
==2320426== of which reachable via heuristic:
==2320426== newarray : 1,040 bytes in 2 blocks
==2320426== suppressed: 0 bytes in 0 blocks
==2320426== Rerun with --leak-check=full to see details of leaked memory
==2320426==
==2320426== Use --track-origins=yes to see where uninitialised values come from
==2320426== For lists of detected and suppressed errors, rerun with: -s
==2320426== ERROR SUMMARY: 4 errors from 4 contexts (suppressed: 0 from 0)
==2320427==
==2320427== Process terminating with default action of signal 15 (SIGTERM)
==2320427== at 0x4DF912F: poll (poll.c:29)
==2320427== by 0x50E09B8: ??? (in /usr/lib/x86_64-linux-gnu/libevent_core-2.1.so.7.0.1)
==2320427== by 0x50DBCEC: event_base_loop (in /usr/lib/x86_64-linux-gnu/libevent_core-2.1.so.7.0.1)
==2320427== by 0x4F1AAC5: ??? (in /usr/lib/x86_64-linux-gnu/libopen-pal.so.40.30.2)
==2320427== by 0x4D78401: start_thread (pthread_create.c:442)
==2320427== by 0x4E06743: clone (clone.S:100)
==2320427==
==2320427== HEAP SUMMARY:
==2320427== in use at exit: 5,263,151,743 bytes in 121,171 blocks
==2320427== total heap usage: 230,206 allocs, 109,035 frees, 5,285,767,114 bytes allocated
==2320427==
==2320429==
==2320429== Process terminating with default action of signal 15 (SIGTERM)
==2320429== at 0x4DF912F: poll (poll.c:29)
==2320429== by 0x50E09B8: ??? (in /usr/lib/x86_64-linux-gnu/libevent_core-2.1.so.7.0.1)
==2320429== by 0x50DBCEC: event_base_loop (in /usr/lib/x86_64-linux-gnu/libevent_core-2.1.so.7.0.1)
==2320429== by 0x4F1AAC5: ??? (in /usr/lib/x86_64-linux-gnu/libopen-pal.so.40.30.2)
==2320429== by 0x4D78401: start_thread (pthread_create.c:442)
==2320429== by 0x4E06743: clone (clone.S:100)
==2320429==
==2320429== HEAP SUMMARY:
==2320429== in use at exit: 5,262,646,978 bytes in 121,271 blocks
==2320429== total heap usage: 230,016 allocs, 108,745 frees, 5,284,059,066 bytes allocated
==2320429==

View File

@ -4,7 +4,7 @@ running configure, to aid debugging if configure makes a mistake.
It was created by m4ri configure 20140914, which was It was created by m4ri configure 20140914, which was
generated by GNU Autoconf 2.69. Invocation command line was generated by GNU Autoconf 2.69. Invocation command line was
$ ./configure $ ./configure --no-create --no-recursion
## --------- ## ## --------- ##
## Platform. ## ## Platform. ##
@ -363,7 +363,7 @@ configure:11789: gcc -o conftest -g -O2 conftest.c >&5
configure:11789: $? = 0 configure:11789: $? = 0
configure:11789: ./conftest configure:11789: ./conftest
configure:11789: $? = 0 configure:11789: $? = 0
configure:11799: result: 90675:1100800:7ffafbbf:bfebfbff configure:11799: result: 90675:7100800:7ffafbbf:bfebfbff
configure:11811: checking whether mmx is supported configure:11811: checking whether mmx is supported
configure:11823: result: yes configure:11823: result: yes
configure:11826: checking whether sse is supported configure:11826: checking whether sse is supported
@ -697,30 +697,6 @@ configure:13835: checking that generated files are newer than configure
configure:13841: result: done configure:13841: result: done
configure:13868: creating ./config.status configure:13868: creating ./config.status
## ---------------------- ##
## Running config.status. ##
## ---------------------- ##
This file was extended by m4ri config.status 20140914, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES =
CONFIG_HEADERS =
CONFIG_LINKS =
CONFIG_COMMANDS =
$ ./config.status
on code-server
config.status:1117: creating Makefile
config.status:1117: creating testsuite/Makefile
config.status:1117: creating m4ri/m4ri_config.h
config.status:1117: creating m4ri.pc
config.status:1117: creating m4ri/config.h
config.status:1298: m4ri/config.h is unchanged
config.status:1346: executing depfiles commands
config.status:1346: executing libtool commands
## ---------------- ## ## ---------------- ##
## Cache variables. ## ## Cache variables. ##
## ---------------- ## ## ---------------- ##
@ -801,7 +777,7 @@ ax_cv_c_flags__msse3=yes
ax_cv_c_flags__msse=yes ax_cv_c_flags__msse=yes
ax_cv_cpu_vendor=Intel ax_cv_cpu_vendor=Intel
ax_cv_func_posix_memalign_works=yes ax_cv_func_posix_memalign_works=yes
ax_cv_gcc_x86_cpuid_0x00000001=90675:1100800:7ffafbbf:bfebfbff ax_cv_gcc_x86_cpuid_0x00000001=90675:7100800:7ffafbbf:bfebfbff
ax_cv_gcc_x86_cpuid_0x0=20:756e6547:6c65746e:49656e69 ax_cv_gcc_x86_cpuid_0x0=20:756e6547:6c65746e:49656e69
ax_cv_have_mmx_ext=yes ax_cv_have_mmx_ext=yes
ax_cv_have_sse2_ext=yes ax_cv_have_sse2_ext=yes
@ -1017,3 +993,27 @@ target_alias=''
#define NDEBUG 1 #define NDEBUG 1
configure: exit 0 configure: exit 0
## ---------------------- ##
## Running config.status. ##
## ---------------------- ##
This file was extended by m4ri config.status 20140914, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES =
CONFIG_HEADERS =
CONFIG_LINKS =
CONFIG_COMMANDS =
$ ./config.status
on code-server
config.status:1117: creating Makefile
config.status:1117: creating testsuite/Makefile
config.status:1117: creating m4ri/m4ri_config.h
config.status:1117: creating m4ri.pc
config.status:1117: creating m4ri/config.h
config.status:1298: m4ri/config.h is unchanged
config.status:1346: executing depfiles commands
config.status:1346: executing libtool commands

2
run.sh
View File

@ -4,4 +4,4 @@
#valgrind #valgrind
make -j 16 && mpirun -np 4 --allow-run-as-root valgrind ./light -i data/class_1_easy_10_0.cnf --share=1 --threads=32 --times=60 make -j 16 && mpirun -np 4 --allow-run-as-root valgrind ./light -i data/class_1_easy_10_0.cnf --share=1 --threads=32 --times=1000

View File

@ -8,7 +8,6 @@
#include "comm_tag.h" #include "comm_tag.h"
#include "../light.hpp" #include "../light.hpp"
#include "../utils/cmdline.h" #include "../utils/cmdline.h"
#include "../utils/easylogging++.h"
#include "../paras.hpp" #include "../paras.hpp"
#include "heartbeat.h" #include "heartbeat.h"
@ -16,15 +15,9 @@ void leader_main(light* S, int num_procs, int rank) {
auto clk_st = std::chrono::high_resolution_clock::now(); auto clk_st = std::chrono::high_resolution_clock::now();
el::Configurations defaultConf;
defaultConf.setGlobally(el::ConfigurationType::Format, "c %level [leader] %msg");
el::Loggers::reconfigureLogger("default", defaultConf);
LOG(INFO) << "start logging powered by easylogging++";
S->opt->print_change(); S->opt->print_change();
LOG(INFO) << "c [leader] preprocess(simplify) input data"; printf("c [leader] preprocess(simplify) input data\n");
// 进行化简 // 进行化简
auto pre = new preprocess(); auto pre = new preprocess();
@ -38,7 +31,7 @@ void leader_main(light* S, int num_procs, int rank) {
// preprocess 证明了UNSAT 则不需要启动云计算 // preprocess 证明了UNSAT 则不需要启动云计算
if(!start) { if(!start) {
LOGGER->info("UNSAT!!!!!! by preprocess"); printf("c [leader] UNSAT!!!!!! by preprocess\n");
return; return;
} }
@ -55,7 +48,7 @@ void leader_main(light* S, int num_procs, int rank) {
const auto& str_ref = ss.str(); const auto& str_ref = ss.str();
char* cstr = const_cast<char *>(str_ref.c_str()); char* cstr = const_cast<char *>(str_ref.c_str());
LOGGER->info("hand out length of cnf instance to all nodes"); printf("c [leader] hand out length of cnf instance to all nodes\n");
int cnf_length = str_ref.size(); int cnf_length = str_ref.size();
@ -65,13 +58,13 @@ void leader_main(light* S, int num_procs, int rank) {
MPI_Barrier(MPI_COMM_WORLD); MPI_Barrier(MPI_COMM_WORLD);
LOGGER->info("hand out cnf instance to all nodes"); printf("c [leader] hand out cnf instance to all nodes\n");
MPI_Bcast(cstr, cnf_length, MPI_CHAR, 0, MPI_COMM_WORLD); MPI_Bcast(cstr, cnf_length, MPI_CHAR, 0, MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD); MPI_Barrier(MPI_COMM_WORLD);
LOGGER->info("hand out done!"); printf("c [leader] hand out done!\n");
int is_sat; int is_sat;
MPI_Request solved; MPI_Request solved;
@ -88,7 +81,7 @@ void leader_main(light* S, int num_procs, int rank) {
auto clk_now = std::chrono::high_resolution_clock::now(); auto clk_now = std::chrono::high_resolution_clock::now();
int solve_time = std::chrono::duration_cast<std::chrono::seconds>(clk_now - clk_st).count(); int solve_time = std::chrono::duration_cast<std::chrono::seconds>(clk_now - clk_st).count();
if (solve_time >= S->opt->times) { if (solve_time >= S->opt->times) {
LOGGER->info("solve time out"); printf("c [leader] solve time out\n");
break; break;
} }
@ -109,15 +102,15 @@ void leader_main(light* S, int num_procs, int rank) {
if(is_sat) { if(is_sat) {
res = 10; res = 10;
LOGGER->info("recived model size: %v", pre->vars); printf("c [leader] received model size: %d\n", pre->vars);
LOGGER->info("SAT!!!!!!"); printf("c SAT!!!!!!\n");
MPI_Send(NULL, 0, MPI_INT, status.MPI_SOURCE, MODEL_REPORT_TAG, MPI_COMM_WORLD); MPI_Send(NULL, 0, MPI_INT, status.MPI_SOURCE, MODEL_REPORT_TAG, MPI_COMM_WORLD);
sol = new int[pre->vars]; sol = new int[pre->vars];
MPI_Recv(sol, pre->vars, MPI_INT, status.MPI_SOURCE, MODEL_REPORT_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE); MPI_Recv(sol, pre->vars, MPI_INT, status.MPI_SOURCE, MODEL_REPORT_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
} else { } else {
res = 20; res = 20;
LOGGER->info("UNSAT!!!!!!"); printf("c UNSAT!!!!!!\n");
} }
break; break;
} }
@ -142,6 +135,4 @@ void leader_main(light* S, int num_procs, int rank) {
printf("s UNKNOWN\n"); printf("s UNKNOWN\n");
} }
} }

View File

@ -6,7 +6,6 @@
#include "../light.hpp" #include "../light.hpp"
#include "../utils/cmdline.h" #include "../utils/cmdline.h"
#include "../utils/easylogging++.h"
#include "../paras.hpp" #include "../paras.hpp"
#include "comm_tag.h" #include "comm_tag.h"
@ -14,17 +13,11 @@ void worker_main(light* S, int num_procs, int rank) {
auto clk_st = std::chrono::high_resolution_clock::now(); auto clk_st = std::chrono::high_resolution_clock::now();
el::Configurations defaultConf;
char tmp_format[1024];
sprintf(tmp_format, "c %%level [worker%d] %%msg", rank);
defaultConf.setGlobally(el::ConfigurationType::Format, tmp_format);
el::Loggers::reconfigureLogger("default", defaultConf);
// 阻塞接收初始化信号 // 阻塞接收初始化信号
int start; int start;
MPI_Recv(&start, 1, MPI_INT, 0, START_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE); MPI_Recv(&start, 1, MPI_INT, 0, START_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
if(!start) { if(!start) {
LOGGER->info("I has no need to start"); printf("c [worker%d] I has no need to start\n", rank);
return; return;
} }
@ -49,15 +42,16 @@ void worker_main(light* S, int num_procs, int rank) {
int res = S->run(); int res = S->run();
LOGGER->info("kissat exit with result: %v", res); printf("c [worker%d] kissat exit with result: %d\n", rank, res);
MPI_Request solved_request, model_request; MPI_Request *solved_request = new MPI_Request();
MPI_Request *model_request = new MPI_Request();
MPI_Irecv(NULL, 0, MPI_INT, 0, MODEL_REPORT_TAG, MPI_COMM_WORLD, &model_request); MPI_Irecv(NULL, 0, MPI_INT, 0, MODEL_REPORT_TAG, MPI_COMM_WORLD, model_request);
if(res == 10) { if(res == 10) {
int is_sat = 1; int is_sat = 1;
MPI_Isend(&is_sat, 1, MPI_INT, 0, SOLVED_REPORT_TAG, MPI_COMM_WORLD, &solved_request); MPI_Isend(&is_sat, 1, MPI_INT, 0, SOLVED_REPORT_TAG, MPI_COMM_WORLD, solved_request);
while(true) { while(true) {
@ -66,21 +60,20 @@ void worker_main(light* S, int num_procs, int rank) {
auto clk_now = std::chrono::high_resolution_clock::now(); auto clk_now = std::chrono::high_resolution_clock::now();
int solve_time = std::chrono::duration_cast<std::chrono::seconds>(clk_now - clk_st).count(); int solve_time = std::chrono::duration_cast<std::chrono::seconds>(clk_now - clk_st).count();
if (solve_time >= S->opt->times) { if (solve_time >= S->opt->times) {
LOGGER->info("solve time out"); printf("c [worker%d] solve time out\n", rank);
break; break;
} }
// when getting terminate signal // when getting terminate signal
if(MPI_Test(&S->terminal_request, &flag, MPI_STATUS_IGNORE) == MPI_SUCCESS && flag == 1) { if(MPI_Test(&S->terminal_request, &flag, MPI_STATUS_IGNORE) == MPI_SUCCESS && flag == 1) {
printf("c [worker%d] getting terminate signal\n", rank);
LOGGER->info("getting terminate signal");
break; break;
} }
// when getting model signal // when getting model signal
if(MPI_Test(&model_request, &flag, MPI_STATUS_IGNORE) == MPI_SUCCESS && flag == 1) { if(MPI_Test(model_request, &flag, MPI_STATUS_IGNORE) == MPI_SUCCESS && flag == 1) {
LOGGER->info("getting send model signal"); printf("c [worker%d] getting send model signal\n", rank);
// send model and break; // send model and break;
MPI_Send(S->model.data, S->model.size(), MPI_INT, 0, MODEL_REPORT_TAG, MPI_COMM_WORLD); MPI_Send(S->model.data, S->model.size(), MPI_INT, 0, MODEL_REPORT_TAG, MPI_COMM_WORLD);
@ -91,12 +84,12 @@ void worker_main(light* S, int num_procs, int rank) {
} else if(res == 20) { } else if(res == 20) {
int flag; int flag;
int is_sat = 0; int is_sat = 0;
MPI_Isend(&is_sat, 1, MPI_INT, 0, SOLVED_REPORT_TAG, MPI_COMM_WORLD, &solved_request); MPI_Isend(&is_sat, 1, MPI_INT, 0, SOLVED_REPORT_TAG, MPI_COMM_WORLD, solved_request);
} else { } else {
// when unknown do nothing. // when unknown do nothing.
} }
delete(S); //delete(S);
MPI_Barrier(MPI_COMM_WORLD); MPI_Barrier(MPI_COMM_WORLD);
} }

View File

@ -1,7 +1,6 @@
#ifndef _light_hpp_INCLUDED #ifndef _light_hpp_INCLUDED
#define _light_hpp_INCLUDED #define _light_hpp_INCLUDED
#include "utils/easylogging++.h"
#include "utils/parse.hpp" #include "utils/parse.hpp"
#include "preprocess/preprocess.hpp" #include "preprocess/preprocess.hpp"
#include "paras.hpp" #include "paras.hpp"
@ -14,8 +13,6 @@
#include <mpi.h> #include <mpi.h>
typedef long long ll; typedef long long ll;
#define LOGGER el::Loggers::getLogger("default")
class basesolver; class basesolver;
class sharer; class sharer;
@ -42,10 +39,16 @@ public:
vec<int> solver_type; vec<int> solver_type;
vec<basesolver *> workers; vec<basesolver *> workers;
vec<sharer *> sharers; vec<sharer *> sharers;
pthread_t *sharer_ptrs;
vec<int> model; vec<int> model;
MPI_Request terminal_request; MPI_Request terminal_request;
int num_procs;
int rank;
int next_node;
int last_node;
char* filename; char* filename;
char* instance; char* instance;

View File

@ -11,8 +11,6 @@
#include "distributed/leader.hpp" #include "distributed/leader.hpp"
#include "distributed/worker.hpp" #include "distributed/worker.hpp"
INITIALIZE_EASYLOGGINGPP
int main(int argc, char **argv) { int main(int argc, char **argv) {
int num_procs, rank; int num_procs, rank;
MPI_Init(&argc, &argv); MPI_Init(&argc, &argv);
@ -22,6 +20,16 @@ int main(int argc, char **argv) {
light* S = new light(); light* S = new light();
S->arg_parse(argc, argv); S->arg_parse(argc, argv);
// 初始化节点关系
S->rank = rank;
S->num_procs = num_procs;
if(rank == 0) {
S->last_node = S->next_node = 0;
} else {
S->last_node = (rank - 2 + num_procs - 1) % (num_procs - 1) + 1;
S->next_node = rank % (num_procs - 1) + 1;
}
// leader // leader
if(rank == 0) leader_main(S, num_procs, rank); if(rank == 0) leader_main(S, num_procs, rank);
else worker_main(S, num_procs, rank); else worker_main(S, num_procs, rank);

View File

@ -7,7 +7,7 @@
#include <algorithm> #include <algorithm>
#include <mutex> #include <mutex>
#include "mpi.h" #include "mpi.h"
#include "utils/easylogging++.h"
auto clk_st = std::chrono::high_resolution_clock::now(); auto clk_st = std::chrono::high_resolution_clock::now();
char* worker_sign = ""; char* worker_sign = "";
@ -141,7 +141,6 @@ void light::parse_input() {
int light::solve() { int light::solve() {
// printf("c -----------------solve start----------------------\n"); // printf("c -----------------solve start----------------------\n");
LOG(INFO) << "solve start";
pthread_t *ptr = new pthread_t[OPT(threads)]; pthread_t *ptr = new pthread_t[OPT(threads)];
for (int i = 0; i < OPT(threads); i++) { for (int i = 0; i < OPT(threads); i++) {
@ -175,6 +174,15 @@ int light::solve() {
for (int i = 0; i < OPT(threads); i++) { for (int i = 0; i < OPT(threads); i++) {
pthread_join(ptr[i], NULL); pthread_join(ptr[i], NULL);
} }
// 等待 share 进程结束
if(OPT(share)) {
//pthread_create(&ptr[i], NULL, share_worker,);
//pthread_join(&sharers[0], NULL);
}
// printf("ending join\n"); // printf("ending join\n");
if (result == 10) if (result == 10)
@ -198,6 +206,10 @@ int light::run() {
parse_input(); parse_input();
if (OPT(share)) share(); if (OPT(share)) share();
int res = solve(); int res = solve();
if(OPT(share)) {
pthread_join(sharer_ptrs[0], NULL);
}
return res; return res;
} }

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -7,7 +7,7 @@
#include <boost/thread/thread.hpp> #include <boost/thread/thread.hpp>
const int BUF_SIZE = 1024 * 1024; const int BUF_SIZE = 1024 * 1024;
std::vector<std::pair<MPI_Request, int*>> send_data_struct; std::vector<std::pair<MPI_Request*, int*>> send_data_struct;
MPI_Request receive_request; MPI_Request receive_request;
int buf[BUF_SIZE]; int buf[BUF_SIZE];
@ -20,18 +20,19 @@ void share_clauses_to_next_node(const vec<clause_store *> &cls) {
// 已完成发送,释放内存空间 // 已完成发送,释放内存空间
int flag; int flag;
if(MPI_Test(&request, &flag, MPI_STATUS_IGNORE) == MPI_SUCCESS & flag == 1) { if(MPI_Test(request, &flag, MPI_STATUS_IGNORE) == MPI_SUCCESS && flag == 1) {
//MPI_Request_free(&request); delete request;
delete []send_buf; delete []send_buf;
} // 与数组最后一个交换,然后 pop_back;
std::swap(send_data_struct[i], send_data_struct[send_data_struct.size()-1]);
send_data_struct.pop_back();
// 与数组最后一个交换,然后 pop_back; printf("c [worker] free send request, now: %d\n", send_data_struct.size());
std::swap(send_data_struct[i], send_data_struct[send_data_struct.size()-1]); }
send_data_struct.pop_back();
} }
// 定义发送数据 // 定义发送数据
MPI_Request send_request; MPI_Request *send_request = new MPI_Request();
int *send_buf; int *send_buf;
int send_length = 0; int send_length = 0;
@ -62,25 +63,24 @@ void share_clauses_to_next_node(const vec<clause_store *> &cls) {
int target = rank % (num_procs - 1) + 1; int target = rank % (num_procs - 1) + 1;
MPI_Isend(send_buf, send_length, MPI_INT, target, SHARE_CLAUSES_TAG, MPI_COMM_WORLD, &send_request); MPI_Isend(send_buf, send_length, MPI_INT, target, SHARE_CLAUSES_TAG, MPI_COMM_WORLD, send_request);
send_data_struct.push_back(std::make_pair(send_request, send_buf)); send_data_struct.push_back(std::make_pair(send_request, send_buf));
LOGGER->info("send clauses: %v", send_length); printf("c [worker] send clauses: %d\n", send_length);
} }
bool receive_clauses_from_last_node(vec<clause_store*> &clauses) { bool receive_clauses_from_last_node(vec<clause_store*> &clauses) {
clauses.clear(); clauses.clear();
int flag; int flag;
MPI_Status status; MPI_Status status;
int index = 0; bool received = false;
// 已接收完数据 // 已接收完数据
if(MPI_Test(&receive_request, &flag, &status) == MPI_SUCCESS && flag == 1) { while(MPI_Test(&receive_request, &flag, &status) == MPI_SUCCESS && flag == 1) {
int index = 0;
int count; int count;
MPI_Get_count(&status, MPI_INT, &count); MPI_Get_count(&status, MPI_INT, &count);
@ -103,15 +103,14 @@ bool receive_clauses_from_last_node(vec<clause_store*> &clauses) {
int from = (rank - 2 + num_procs - 1) % (num_procs - 1) + 1; int from = (rank - 2 + num_procs - 1) % (num_procs - 1) + 1;
LOGGER->info("receive clauses: %v", count); //LOGGER->info("receive clauses: %v", count);
MPI_Irecv(buf, BUF_SIZE, MPI_INT, from, SHARE_CLAUSES_TAG, MPI_COMM_WORLD, &receive_request); MPI_Irecv(buf, BUF_SIZE, MPI_INT, from, SHARE_CLAUSES_TAG, MPI_COMM_WORLD, &receive_request);
return true; received = true;
} }
return false; return received;
} }
void * share_worker(void *arg) { void * share_worker(void *arg) {
@ -120,50 +119,61 @@ void * share_worker(void *arg) {
auto clk_st = std::chrono::high_resolution_clock::now(); auto clk_st = std::chrono::high_resolution_clock::now();
double share_time = 0; double share_time = 0;
// int num_procs, rank; int num_procs, rank;
// MPI_Comm_size(MPI_COMM_WORLD, &num_procs); MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
// MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_rank(MPI_COMM_WORLD, &rank);
// int from = (rank - 2 + num_procs - 1) % (num_procs - 1) + 1; int from = (rank - 2 + num_procs - 1) % (num_procs - 1) + 1;
// MPI_Irecv(buf, BUF_SIZE, MPI_INT, from, SHARE_CLAUSES_TAG, MPI_COMM_WORLD, &receive_request); MPI_Irecv(buf, BUF_SIZE, MPI_INT, from, SHARE_CLAUSES_TAG, MPI_COMM_WORLD, &receive_request);
while (true) { while (true) {
++nums; ++nums;
usleep(sq->share_intv); usleep(sq->share_intv);
auto clk_now = std::chrono::high_resolution_clock::now(); auto clk_now = std::chrono::high_resolution_clock::now();
int solve_time = std::chrono::duration_cast<std::chrono::milliseconds>(clk_now - clk_st).count(); int solve_time = std::chrono::duration_cast<std::chrono::milliseconds>(clk_now - clk_st).count();
LOGGER->info("round %v, time: %v.%v", nums, solve_time / 1000, solve_time % 1000); //LOGGER->info("round %v, time: %v.%v", nums, solve_time / 1000, solve_time % 1000);
if (terminated) break;
printf("c [worker] round %d, time: %d.%d\n", nums, solve_time / 1000, solve_time % 1000);
if (terminated) {
MPI_Cancel(&receive_request);
for(int i=0; i<send_data_struct.size(); i++) {
auto& request = send_data_struct[i].first;
auto& send_buf = send_data_struct[i].second;
MPI_Cancel(request);
delete []send_buf;
}
break;
}
// printf("start sharing %d\n", sq->share_intv); // printf("start sharing %d\n", sq->share_intv);
for (int i = 0; i < sq->producers.size(); i++) { for (int i = 0; i < sq->producers.size(); i++) {
sq->cls.clear(); sq->cls.clear();
sq->producers[i]->export_clauses_to(sq->cls); sq->producers[i]->export_clauses_to(sq->cls);
//printf("c size %d\n", sq->cls.size()); //printf("c size %d\n", sq->cls.size());
int number = sq->cls.size(); int number = sq->cls.size();
LOGGER->info("thread-%v: get %v exported clauses", i, number); printf("c [worker] thread-%d: get %d exported clauses\n", i, number);
// //分享当前节点产生的子句
// if(sq->cls.size() > 0) share_clauses_to_next_node(sq->cls);
// // 导入外部网络传输的子句 //分享当前节点产生的子句
// vec<clause_store*> clauses; if(sq->cls.size() > 0) share_clauses_to_next_node(sq->cls);
// if(receive_clauses_from_last_node(clauses)) {
// for (int j = 0; j < sq->consumers.size(); j++) {
// for (int k = 0; k < clauses.size(); k++)
// clauses[k]->increase_refs(1);
// sq->consumers[j]->import_clauses_from(clauses);
// }
// // 传递外部网络传输的子句给下个节点 // 导入外部网络传输的子句
// share_clauses_to_next_node(clauses); vec<clause_store*> clauses;
if(receive_clauses_from_last_node(clauses)) {
for (int j = 0; j < sq->consumers.size(); j++) {
for (int k = 0; k < clauses.size(); k++)
clauses[k]->increase_refs(1);
sq->consumers[j]->import_clauses_from(clauses);
}
// for (int k = 0; k < clauses.size(); k++) { // 传递外部网络传输的子句给下个节点
// clauses[k]->free_clause(); share_clauses_to_next_node(clauses);
// }
// } for (int k = 0; k < clauses.size(); k++) {
clauses[k]->free_clause();
}
}
// 导入当前节点产生的子句 // 导入当前节点产生的子句
int percent = sq->sort_clauses(i); int percent = sq->sort_clauses(i);
@ -278,9 +288,9 @@ void light::share() {
sharers.push(s); sharers.push(s);
} }
pthread_t *ptr = new pthread_t[sharers_number]; sharer_ptrs = new pthread_t[sharers_number];
for (int i = 0; i < sharers_number; i++) { for (int i = 0; i < sharers_number; i++) {
pthread_create(&ptr[i], NULL, share_worker, sharers[i]); pthread_create(&sharer_ptrs[i], NULL, share_worker, sharers[i]);
} }
} }
} }