NeoPZ
numatst.cpp
Go to the documentation of this file.
1 
8 #ifdef HAVE_CONFIG_H
9 #include <pz_config.h>
10 #endif
11 
12 #include <iostream>
13 #include <cstdlib>
14 
15 #include "pzbfilestream.h" // TPZBFileStream, TPZFileStream
16 #include "pzmd5stream.h"
17 
18 #include "pzlog.h"
19 
20 #include <fstream>
21 #include <string>
22 
23 #ifdef LOG4CXX
24 static LoggerPtr loggerconverge(Logger::getLogger("pz.converge"));
25 static LoggerPtr logger(Logger::getLogger("main"));
26 #endif
27 
28 #include "pzskylmat.h"
29 
30 //#include "timing_analysis.h"
31 #include "arglib.h"
32 #include "run_stats_table.h"
33 
34 #ifdef HAS_GETRUSAGE
35 #include <sys/resource.h> // getrusage
36 #endif
37 
38 #ifdef USING_TBB
39 #include "tbb/task_scheduler_init.h"
40 using namespace tbb;
41 // If you have issues with: dyld: Library not loaded: libtbb.dylib
42 // try setting the LD path. Ex:
43 // export DYLD_FALLBACK_LIBRARY_PATH=/Users/borin/Desktop/neopz/tbb40_297oss/lib/
44 #endif
45 
46 void help(const char* prg)
47 {
48  cout << "Compute the Decompose_LDLt method for the matrix" << endl;
49  cout << endl;
50  cout << "Usage: " << prg << "-if file [-v verbose_level] [-b] "
51  << "[-tot_rdt rdt_file] [-op matrix_operation] [-h]" << endl << endl;
52  cout << "matrix_operation:" << endl;
53  cout << " 0: Decompose_LDLt()" << endl;
54  cout << " 1: Decompose_LDLt2() -- deprecated (not working)" << endl;
55  cout << " 2: Decompose_Cholesky()" << endl;
56  clarg::arguments_descriptions(cout, " ", "\n");
57 }
58 
59 clarg::argString ifn("-ifn", "input matrix file name (use -bi to read from binary files)", "matrix.txt");
60 clarg::argInt affinity("-af", "affinity mode (0=no affinity, 1=heuristi 1)", 0);
61 clarg::argInt verb_level("-v", "verbosity level", 0);
62 int verbose = 0;
63 /* Verbose macro. */
64 #define VERBOSE(level,...) if (level <= verbose) cout << __VA_ARGS__
65 
66 clarg::argInt mop("-op", "Matrix operation", 1);
67 clarg::argBool br("-br", "binary reference. Reference decomposed matrix file format == binary.", false);
68 clarg::argBool bi("-bi", "binary input. Input file format == binary.", false);
69 clarg::argBool bd("-bd", "binary dump. Dump file format == binary.", false);
70 clarg::argBool h("-h", "help message", false);
71 clarg::argBool copy_matrix_inside_thread("-cot", "copy on thread - copy matrix inside thread.", false);
72 clarg::argInt mstats("-mstats", "Matrix statistics vebosity level.", 0);
73 clarg::argInt maxcol("-maxcol", "Limit computation to max column (Use Resize(maxcol)).", 0);
74 clarg::argString gen_dm_sig("-gen_dm_md5", "generates MD5 signature for decomposed matrix into file.", "decomposed_matrix.md5");
75 clarg::argString chk_dm_sig("-chk_dm_md5", "compute MD5 signature for decomposed matrix and check against MD5 at file.", "decomposed_matrix.md5");
76 clarg::argString chk_dm_error("-chk_dm_error", "check the decomposed matrix error against a reference matrix. (use -br to read from binary files)", "ref_decomposed_matrix.txt");
77 clarg::argDouble error_tol("-error_tol", "error tolerance.", 1.e-12);
78 clarg::argString dump_dm("-dump_dm", "dump decomposed matrix. (use -bd for binary format)", "dump_matrix.txt");
79 clarg::argInt cholesky_blk("-chol_blk", "Cholesky blocking factor", 256);
80 
81 /* Run statistics. */
82 RunStatsTable total_rst("-tot_rdt",
83  "Whole program (total) statistics raw data table");
84 
85 clarg::argInt nmats("-nmats", "Number of matrizes to decompose simultaneously.", 1);
86 
88 {
89 public:
91  {}
93 
94  void OpenWrite(const std::string& fn)
95  {
96  if (binary)
97  bfs.OpenWrite(fn);
98  else
99  fs.OpenWrite(fn);
100  }
101 
102  void OpenRead(const std::string& fn)
103  {
104  if (binary)
105  bfs.OpenRead(fn);
106  else
107  fs.OpenRead(fn);
108  }
109 
110  operator TPZStream&()
111  {
112  if (binary)
113  return bfs;
114  else
115  return fs;
116  }
117 
118 protected:
119 
120  bool binary;
121  TPZFileStream fs;
122  TPZBFileStream bfs;
123 };
124 
125 #include <sched.h> //sched_getcpu
126 
127 std::vector< TPZSkylMatrix<REAL>* > matrices;
128 
129 #ifdef USING_LIBNUMA
130 cpu_set_t dies_mask_array[8];
131 cpu_set_t mask_core0;
132 cpu_set_t mask_L20;
133 cpu_set_t mask_die0;
134 cpu_set_t mask_proc0;
135 cpu_set_t mask_oddcores;
136 cpu_set_t mask_evencores;
137 
138 
139 void print_mask(cpu_set_t* mask)
140 {
141  for (int i=0; i<64; i++) {
142  cout << CPU_ISSET(i, mask)?"1":"0";
143  }
144 }
145 
146 #endif
147 
148 
149 // 4 processors
150 // 2 dies / processor
151 // 4 L2 caches per die
152 // 2 cores per L2 cache
153 
154 // FPU is shared among the 2 cores under the same L2 cache
155 
156 // Best Assign policy
157 // # of threads : policy
158 // 1 : any core
159 // 2, 3, 4 : different processors
160 // 5 - 16 : different dies.
162 {
163 #define SET_RANGE(mskp,start,end) CPU_ZERO(mskp); \
164 for (int i=start; i<end; i++) CPU_SET(i,mskp)
165 
166 #ifdef USING_LIBNUMA
167  SET_RANGE(&dies_mask_array[0],0,8);
168  SET_RANGE(&dies_mask_array[1],8,16);
169  SET_RANGE(&dies_mask_array[2],16,24);
170  SET_RANGE(&dies_mask_array[3],24,32);
171  SET_RANGE(&dies_mask_array[4],32,40);
172  SET_RANGE(&dies_mask_array[5],40,48);
173  SET_RANGE(&dies_mask_array[6],48,56);
174  SET_RANGE(&dies_mask_array[7],56,64);
175 
176  SET_RANGE(&mask_proc0,0,16);
177  SET_RANGE(&mask_die0,0,8);
178  SET_RANGE(&mask_L20,0,2);
179  SET_RANGE(&mask_core0,0,1);
180 
181  CPU_ZERO(&mask_oddcores);
182  CPU_ZERO(&mask_evencores);
183  for(int i=0; i<64; i+=2) {
184  CPU_SET(i,&mask_evencores);
185  CPU_SET(i+1,&mask_oddcores);
186  }
187 
188  if (verbose >= 1) {
189  cout << "mask core 0 : "; print_mask(&mask_core0); cout << endl;
190  cout << "mask core L2 0 : "; print_mask(&mask_L20); cout << endl;
191  cout << "mask core die 0 : "; print_mask(&mask_die0); cout << endl;
192  cout << "mask core proc 0: "; print_mask(&mask_proc0); cout << endl;
193  cout << "mask evencores : "; print_mask(&mask_evencores); cout << endl;
194  cout << "mask oddcores : "; print_mask(&mask_oddcores); cout << endl;
195  }
196 #endif
197 }
198 
199 
200 // CPU_SET(cpus[idx],&mask);
201 //int cpus[] = {0, 16, 32, 48, 8, 24, 40, 54};
202 void set_affinity(int af, int tidx)
203 {
204 #ifdef USING_LIBNUMA
205  cpu_set_t* msk = NULL;
206  switch (af) {
207 
208  case 1: {
209  msk = dies_mask_array + (tidx%8);
210  break;
211  }
212  case 2: {
213  msk = &mask_proc0;
214  break;
215  }
216  case 3: {
217  msk = &mask_die0;
218  break;
219  }
220  case 4: {
221  msk = &mask_L20;
222  break;
223  }
224  case 5: {
225  msk = &mask_core0;
226  break;
227  }
228  case 6: {
229  msk = &mask_evencores;
230  break;
231  }
232  case 7: {
233  msk = &mask_oddcores;
234  break;
235  }
236  case 0: // Do not set affinity
237  return;
238  default: {
239  VERBOSE(2, "Warning: -af " << af
240  << " has not been defined. Not setting affinity");
241  return;
242  }
243  }
244 
245  if (verbose >= 2) {
246  cout << "Thread " << tidx << " affinity mask = ";
247  print_mask(msk);
248  cout << endl;
249  }
250 
251  sched_setaffinity(0, sizeof(cpu_set_t), msk);
252 #endif
253 }
254 
256 
257 void init_decompose(int idx)
258 {
259  // cpu_set_t mask;
260  // CPU_ZERO(&mask);
261  // CPU_SET(cpus[idx],&mask);
262  if (affinity.get_value() > 0) {
263  set_affinity(affinity.get_value(), idx);
264  }
265  matrices[idx] = new TPZSkylMatrix<REAL>(matrix);
266 }
267 
268 void compute_decompose(int idx)
269 {
270 
271  TPZSkylMatrix<REAL>* matrix = matrices[idx];
272 
273 #define CASE_OP(opid,method) \
274 case opid: \
275 matrix->method; \
276 break
277 
278  switch (mop.get_value()) {
279  CASE_OP(0,Decompose_LDLt());
280  case 1:
281  std::cerr << "ERROR: deprecated operation -- decompose LDLt2 is no longer implemented." << std::endl;
282  break;
283  CASE_OP(2,Decompose_Cholesky());
284  CASE_OP(3,Decompose_Cholesky_blk(cholesky_blk.get_value()));
285  default:
286  std::cerr << "ERROR: Invalid matrix operation type." << std::endl;
287  }
288 }
289 
290 #include <pthread.h>
291 
293 {
294 public:
296  void start()
297  {start_time = getms();}
298  void stop()
299  {stop_time = getms();}
300  uint64_t get_start() {return start_time; }
301  uint64_t get_stop() {return stop_time; }
302  uint64_t get_elapsed() {return stop_time-start_time; }
303 private:
304  uint64_t getms()
305  {
306  timeval t;
307  gettimeofday(&t,NULL);
308  return (t.tv_sec*1000) + (t.tv_usec/1000);
309  }
310  uint64_t start_time;
311  uint64_t stop_time;
312 };
313 
317 std::vector<thread_timer_t> thread_timer;
318 pthread_cond_t cond=PTHREAD_COND_INITIALIZER;
319 pthread_cond_t main_cond=PTHREAD_COND_INITIALIZER;
320 pthread_mutex_t glob_mutex=PTHREAD_MUTEX_INITIALIZER;
321 pthread_mutex_t main_mutex=PTHREAD_MUTEX_INITIALIZER;
323 
325 {
326 public:
328  {
329  run_parallel=false;
330  init_routine=NULL;
331  parallel_routine=NULL;
332  nthreads=0;
333  nthreads_initialized = 0;
334  wait_for_all_init = true;
335  }
336 
337  void execute_n_threads(unsigned n,
338  void (*init_routine)(int),
339  void (*parallel_routine)(int));
340 
342  {
343  thread_arg_t(int t,void (*ir)(int), void (*pr)(int),
344  pthread_mutex_t* mt, pthread_cond_t* cd,
345  pthread_cond_t* mcd) :
346  tid(t), init_routine(ir), parallel_routine(pr),
347  glob_mutex(mt), cond(cd), main_cond(mcd)
348  {}
349  int tid;
350  void (*init_routine)(int);
351  void (*parallel_routine)(int);
352  pthread_mutex_t* glob_mutex;
353  pthread_cond_t* cond;
354  pthread_cond_t* main_cond;
355  };
356 
357 private:
358 
360  void (*init_routine)(int);
362  void (*parallel_routine)(int);
363  std::vector<pthread_t> threads;
364 
365 };
366 
367 void *threadfunc(void *parm)
368 {
371 
372  int tid = args->tid;
373 
374  pthread_mutex_lock(args->glob_mutex);
375  if (args->init_routine) {
376 #ifdef _GNU_SOURCE
377  VERBOSE(1,"Thread " << tid << " calling init routine on CPU "
378  << (int) sched_getcpu() << endl);
379 #endif
380  (*args->init_routine)(tid);
381  }
382  nthreads_initialized++;
383  if (nthreads_initialized == nthreads) {
384  wait_for_all_init = false;
385  /* Release main thread */
386  pthread_cond_signal(args->main_cond);
387  }
388 
389  /* Wait for main to sync */
390  while (!run_parallel) {
391  pthread_cond_wait(args->cond, args->glob_mutex);
392  }
393 #ifdef _GNU_SOURCE
394  VERBOSE(1,"Thread " << tid << " calling parallel routine on CPU "
395  << (int) sched_getcpu() << endl);
396 #endif
397 
398  pthread_mutex_unlock(args->glob_mutex);
399 
400  thread_timer[tid].start();
401 
402  if (args->parallel_routine) {
403  (*args->parallel_routine)(tid);
404  }
405 
406  thread_timer[tid].stop();
407 
408  return NULL;
409 }
410 
411 void
413  void (*init_routine)(int),
414  void (*parallel_routine)(int))
415 {
416  nthreads = n;
417  nthreads_initialized = 0;
418  threads.resize(nthreads);
419  thread_timer.resize(nthreads);
420 
421  for (int i=0; i<nthreads; i++) {
422  synchronized_threads_t::thread_arg_t arg(i,init_routine,parallel_routine,
423  &glob_mutex, &cond, &main_cond);
424  PZ_PTHREAD_CREATE(&threads[i],NULL,threadfunc,(void*) &i, __FUNCTION__);
425  }
426 
427  /* Wait for all to be initialized */
428  pthread_mutex_lock(&main_mutex);
429  while (wait_for_all_init) {
430  pthread_cond_wait(&main_cond, &main_mutex);
431  }
432  pthread_mutex_unlock(&main_mutex);
433 
434  /* Signall all to start together. */
435  total_rst.start();
436  run_parallel = true;
437  pthread_cond_broadcast(&cond);
438 
439  /* Wait for all to finish. */
440  for (unsigned i=0; i<nthreads; i++) {
441  PZ_PTHREAD_JOIN(threads[i], NULL, __FUNCTION__);
442  }
443  total_rst.stop();
444 
445  if (verbose >= 2) {
446  printf("%7s,%10s,%10s,%10s\n", "thread", "elapsed", "start", "stop");
447  for (unsigned i=0; i<nthreads; i++) {
448  printf("%7d,%10lld,%10lld,%10lld\n", i,
449  thread_timer[i].get_elapsed(),
450  thread_timer[i].get_start(),
451  thread_timer[i].get_stop());
452  }
453  }
454 }
455 
456 
457 int main(int argc, char *argv[])
458 {
459 #ifdef USING_TBB
460  task_scheduler_init init;
461 #endif
462  setup_masks();
463 
464  /* Parse the arguments */
465  if (clarg::parse_arguments(argc, argv)) {
466  cerr << "Error when parsing the arguments!" << endl;
467  return 1;
468  }
469 
470  verbose = verb_level.get_value();
471 
472  if (h.get_value() == true) {
473  help(argv[0]);
474  return 1;
475  }
476 
477  if (nmats.get_value() < 1) {
478  cerr << "Error, nmats must be >= 1" << endl;
479  return 1;
480  }
481 
482  if (verbose >= 1) {
483  std::cout << "- Arguments -----------------------" << std::endl;
484  clarg::values(std::cout, false);
485  std::cout << "-----------------------------------" << std::endl;
486  }
487 
488  synchronized_threads_t thread_exec;
489 
490  /* Read the matrix. */
491  VERBOSE(1,"Reading input file: " << ifn.get_value() << std::endl);
492  FileStreamWrapper input_file(bi.get_value());
493  input_file.OpenRead(ifn.get_value());
494  matrix.Read(input_file,0);
495  VERBOSE(1,"Reading input file: " << ifn.get_value()
496  << " [DONE]" << std::endl);
497 
498  if (maxcol.was_set())
499  matrix.Resize(maxcol.get_value(),0);
500 
501  int nthreads = nmats.get_value();
502 
503  thread_exec.execute_n_threads(nthreads, init_decompose,
505 
506  if (mstats.get_value() > 0) {
507  unsigned n = matrix.Dim();
508  uint64_t n_sky_items = 0;
509  uint64_t max_height = 0;
510  for (unsigned i=0; i<n; i++) {
511  unsigned height = matrix.SkyHeight(i);
512  if (mstats.get_value() > 1) {
513  cout << "col " << i << " height = " << height << endl;
514  }
515  n_sky_items += height;
516  if (height > max_height) max_height = height;
517  }
518  uint64_t n2 = n * n;
519  double av_height = (double) n_sky_items / (double) n;
520  cout << "N = " << n << endl;
521  cout << "N^2 = " << n2 << endl;
522  cout << "Sky items = " << n_sky_items << endl;
523  cout << "N^2 / Sky items = " << (double) n2 / (double) n_sky_items << endl;
524  cout << "Avg. Height = " << av_height << endl;
525  cout << "Max. Height = " << max_height << endl;
526  }
527 
529  if (dump_dm.was_set()) {
530  VERBOSE(1, "Dumping decomposed matrix into: " <<
531  dump_dm.get_value() << endl);
532  FileStreamWrapper dump_file(bd.get_value());
533  dump_file.OpenWrite(dump_dm.get_value());
534  matrix.Write(dump_file, 0);
535  }
536 
537  /* Gen/Check MD5 signature */
538  if (gen_dm_sig.was_set() || chk_dm_sig.was_set()) {
539  TPZMD5Stream sig;
540  matrix.Write(sig, 1);
541  int ret;
542  if (chk_dm_sig.was_set()) {
543  if ((ret=sig.CheckMD5(chk_dm_sig.get_value()))) {
544  cerr << "ERROR(ret=" << ret << ") : MD5 Signature for "
545  << "decomposed matrixdoes not match." << endl;
546  return 1;
547  }
548  else {
549  cout << "Checking decomposed matrix MD5 signature: [OK]" << endl;
550  }
551  }
552  if (gen_dm_sig.was_set()) {
553  if ((ret=sig.WriteMD5(gen_dm_sig.get_value()))) {
554  cerr << "ERROR (ret=" << ret << ") when writing the "
555  << "decomposed matrix MD5 signature to file: "
556  << gen_dm_sig.get_value() << endl;
557  return 1;
558  }
559  }
560  }
561 
562  int ret=0; // Ok
563 
565  if (chk_dm_error.was_set()) {
566  VERBOSE(1, "Checking decomposed matrix error: " <<
567  chk_dm_error.get_value() << endl);
568  FileStreamWrapper ref_file(br.get_value());
569  ref_file.OpenRead(chk_dm_error.get_value());
570  /* Reference matrix. */
571  TPZSkylMatrix<REAL> ref_matrix;
572  ref_matrix.Read(ref_file,0);
573  int max_j = matrix.Cols();
574  if (max_j != ref_matrix.Cols()) {
575  cerr << "Decomposed matrix has " << max_j
576  << " cols while reference matrix has "
577  << ref_matrix.Cols() << endl;
578  return 1;
579  }
580  REAL error_tolerance = error_tol.get_value();
581  REAL max_error = 0.0;
582  for (int j=0; j<max_j; j++) {
583  int col_height = matrix.SkyHeight(j);
584  if (col_height != ref_matrix.SkyHeight(j)) {
585  cerr << "Column " << j << " of decomposed matrix has " << col_height
586  << " non zero rows while reference matrix has "
587  << ref_matrix.SkyHeight(j) << endl;
588  return 1;
589  }
590  int min_i = (j+1) - col_height;
591  for (int i=min_i; i<=j; i++) {
592 
593  REAL dm_ij = matrix.s(i,j);
594  REAL rm_ij = ref_matrix.s(i,j);
595  if (dm_ij != rm_ij) {
596  REAL diff = abs(dm_ij - rm_ij);
597  if (diff >= error_tolerance) {
598  VERBOSE(1, "diff(" << diff << ") tolerance (" << error_tolerance
599  << "). dm[" << i << "][" << j << "] (" << dm_ij
600  << ") != rm[" << i << "][" << j << "] (" << rm_ij
601  << ")." << endl);
602  ret = 1;
603  max_error = (max_error < diff)?diff:max_error;
604  }
605  }
606  }
607  }
608  if (ret != 0) {
609  cerr << "Error ("<< max_error <<") > error tolerance ("
610  << error_tolerance <<")" << endl;
611  }
612  }
613 
614  return ret;
615 }
616 
617 
618 
619 
Contains a class to record running statistics on CSV tables.
void set_affinity(int af, int tidx)
Definition: numatst.cpp:202
thread_arg_t(int t, void(*ir)(int), void(*pr)(int), pthread_mutex_t *mt, pthread_cond_t *cd, pthread_cond_t *mcd)
Definition: numatst.cpp:343
int verbose
Definition: numatst.cpp:62
int Resize(const int64_t newDim, const int64_t) override
Redimensions a matriz keeping the previous values.
Definition: pzskylmat.cpp:2487
Contains definitions to LOGPZ_DEBUG, LOGPZ_INFO, LOGPZ_WARN, LOGPZ_ERROR and LOGPZ_FATAL, and the implementation of the inline InitializePZLOG(string) function using log4cxx library or not. It must to be called out of "#ifdef LOG4CXX" scope.
Contains declaration of the TPZMD5Stream class which implements the interface to write and check md5 ...
list threads
Definition: test.py:140
clarg::argString chk_dm_sig("-chk_dm_md5", "compute MD5 signature for decomposed matrix and check against MD5 at file.", "decomposed_matrix.md5")
void values(ostream &os, bool defined_only)
Definition: arglib.cpp:183
std::vector< thread_timer_t > thread_timer
Definition: numatst.cpp:317
void setup_masks()
Definition: numatst.cpp:161
uint64_t start_time
Definition: numatst.cpp:310
void OpenRead(const std::string &fn)
Definition: numatst.cpp:102
clarg::argString gen_dm_sig("-gen_dm_md5", "generates MD5 signature for decomposed matrix into file.", "decomposed_matrix.md5")
bool wait_for_all_init
Definition: numatst.cpp:316
void init_decompose(int idx)
Definition: numatst.cpp:257
uint64_t get_start()
Definition: numatst.cpp:300
#define VERBOSE(level,...)
Definition: numatst.cpp:64
pthread_cond_t main_cond
Definition: numatst.cpp:319
void Write(TPZStream &buf, int withclassid) const override
Packs the object structure in a stream of bytes.
Definition: pzskylmat.cpp:3287
clarg::argDouble error_tol("-error_tol", "error tolerance.", 1.e-12)
int64_t SkyHeight(int64_t col)
return the height of the skyline for a given column
Definition: pzskylmat.h:422
clarg::argBool h("-h", "help message", false)
TinyFad< 8, T > abs(const TinyFad< 8, T > &in)
Definition: tinyfadeight.h:846
void OpenWrite(const std::string &fn)
Definition: numatst.cpp:94
clarg::argInt maxcol("-maxcol", "Limit computation to max column (Use Resize(maxcol)).", 0)
clarg::argString dump_dm("-dump_dm", "dump decomposed matrix. (use -bd for binary format)", "dump_matrix.txt")
clarg::argBool br("-br", "binary reference. Reference decomposed matrix file format == binary.", false)
clarg::argString ifn("-ifn", "input matrix file name (use -bi to read from binary files)", "matrix.txt")
void compute_decompose(int idx)
Definition: numatst.cpp:268
Implements a skyline storage format. A Skyline matrix is symmetric so square. Matrix.
Definition: pzskylmat.h:394
fn
Definition: test.py:253
pthread_mutex_t main_mutex
Definition: numatst.cpp:321
TPZSkylMatrix< REAL > matrix
Definition: numatst.cpp:255
void execute_n_threads(unsigned n, void(*init_routine)(int), void(*parallel_routine)(int))
Definition: numatst.cpp:412
RunStatsTable total_rst("-tot_rdt", "Whole program (total) statistics raw data table")
void help(const char *prg)
Definition: numatst.cpp:46
#define PZ_PTHREAD_JOIN(thread, val, fn)
Definition: pz_pthread.h:34
int main(int argc, char *argv[])
Definition: numatst.cpp:457
std::vector< pthread_t > threads
Definition: numatst.cpp:363
virtual TVar & s(const int64_t row, const int64_t col) override
The operators check on the bounds if the DEBUG variable is defined.
Definition: pzskylmat.cpp:1871
pthread_mutex_t glob_mutex
Definition: numatst.cpp:320
clarg::argInt nmats("-nmats", "Number of matrizes to decompose simultaneously.", 1)
int CheckMD5(const std::string &filename)
Check Stream MD5 signature against MD5 signature store on file.
Definition: pzmd5stream.h:65
bool run_parallel
Definition: numatst.cpp:322
clarg::argBool copy_matrix_inside_thread("-cot", "copy on thread - copy matrix inside thread.", false)
int nthreads
Definition: numatst.cpp:315
Implements reading from and writing to an ascii file. Persistency.
Definition: TPZFileStream.h:15
uint64_t get_stop()
Definition: numatst.cpp:301
clarg::argString chk_dm_error("-chk_dm_error", "check the decomposed matrix error against a reference matrix. (use -br to read from binary files)", "ref_decomposed_matrix.txt")
int parse_arguments(int argc, char *argv[])
Definition: arglib.cpp:195
clarg::argBool bi("-bi", "binary input. Input file format == binary.", false)
std::vector< TPZSkylMatrix< REAL > *> matrices
Definition: numatst.cpp:127
#define SET_RANGE(mskp, start, end)
void start()
Definition: numatst.cpp:296
clarg::argInt affinity("-af", "affinity mode (0=no affinity, 1=heuristi 1)", 0)
clarg::argInt mstats("-mstats", "Matrix statistics vebosity level.", 0)
void * threadfunc(void *parm)
Definition: numatst.cpp:367
uint64_t stop_time
Definition: numatst.cpp:311
Contains TPZSkyline class which implements a skyline storage format.
void arguments_descriptions(ostream &os, string prefix, string suffix)
Definition: arglib.cpp:189
virtual int64_t Dim() const
Returns the dimension of the matrix if the matrix is square.
Definition: pzmatrix.h:892
#define CASE_OP(opid, method)
uint64_t getms()
Definition: numatst.cpp:304
clarg::argInt verb_level("-v", "verbosity level", 0)
void Read(TPZStream &buf, void *context) override
Unpacks the object structure from a stream of bytes.
Definition: pzskylmat.cpp:3270
int WriteMD5(const std::string &filename)
Write computed MD5 signature to file.
Definition: pzmd5stream.h:121
FileStreamWrapper(bool b)
Definition: numatst.cpp:90
bool was_set() const
Definition: arglib.h:138
int nthreads_initialized
Definition: numatst.cpp:314
const T & get_value() const
Definition: arglib.h:177
#define PZ_PTHREAD_CREATE(thread, attr, routine, args, fn)
Definition: pz_pthread.h:31
int64_t Cols() const
Returns number of cols.
Definition: pzmatrix.h:809
pthread_cond_t cond
Definition: numatst.cpp:318
clarg::argBool bd("-bd", "binary dump. Dump file format == binary.", false)
Defines the interface for saving and reading data. Persistency.
Definition: TPZStream.h:50
clarg::argInt mop("-op", "Matrix operation", 1)
uint64_t get_elapsed()
Definition: numatst.cpp:302
clarg::argInt cholesky_blk("-chol_blk", "Cholesky blocking factor", 256)
Implements the interface to write and check MD5 files. Persistency.
Definition: pzmd5stream.h:22