NeoPZ
pzskylmat.cpp
Go to the documentation of this file.
1 //EBORIN: Uncomment the following lines to enable matrices do be dumped before decompose/subst
2 //#define DUMP_BEFORE_DECOMPOSE
3 //#define DUMP_BEFORE_SUBST
4 
5 #include "arglib.h"
6 
7 
8 #ifdef USING_NEW_SKYLMAT
9 
15 #include <math.h>
16 #include <stdlib.h>
17 
18 #ifdef BLAS
19 extern "C" {
20 #include <cblas.h>
21 }
22 #endif
23 
24 #include "pzfmatrix.h"
25 #include "pzskylmat.h"
26 
27 #include <sstream>
28 #include "pzlog.h"
29 #ifdef LOG4CXX
30 static LoggerPtr logger(Logger::getLogger("pz.matrix.tpzskylmatrix"));
31 #endif
32 
33 using namespace std;
34 
35 /**************************** PUBLIC ****************************/
36 template<class TVar>
37 TPZSkylMatrix<TVar>::TPZSkylMatrix(const int64_t dim ) :
38 TPZRegisterClassId(&TPZSkylMatrix::ClassId),TPZMatrix<TVar>( dim, dim ), fElem(dim+1), fStorage(0)
39 {
40  // Initializes the diagonal with zeros.
41  fElem.Fill(0);
42 }
43 
44 template<class TVar>
45 TPZSkylMatrix<TVar>::TPZSkylMatrix(const int64_t dim, const TPZVec<int64_t> &skyline ) :
46 TPZRegisterClassId(&TPZSkylMatrix::ClassId),TPZMatrix<TVar>( dim, dim ), fElem(dim+1), fStorage(0)
47 {
48  fElem.Fill(0);
49  InitializeElem(skyline,fStorage,fElem);
50 }
51 
52 template<class TVar>
54 {
55  fElem.Fill(0);
56  InitializeElem(skyline,fStorage,fElem);
57 }
58 
59 template<class TVar>
61 #ifdef PZDEBUG
62  {
63  int64_t size = this->fElem.NElements();
64  if(size != B.fElem.NElements()){
65  PZError << "\nFATAL ERROR at " << __PRETTY_FUNCTION__ << "\n";
66  PZError.flush();
67  DebugStop();
68  }
69  for(int64_t i = 0; i < size; i++){
70  if((this->fElem[i]-this->fElem[0]) != (B.fElem[i]-B.fElem[0])){
71  PZError << "\nFATAL ERROR at " << __PRETTY_FUNCTION__ << "\n";
72  PZError.flush();
73  DebugStop();
74  }
75  }
76  }
77 #endif
78 
79  const int64_t n = this->fStorage.NElements();
80  for(int64_t i = 0; i < n; i++)
81  this->fStorage[i] += TVar(k) * B.fStorage[i];
82 }
83 
84 template<class TVar>
86 {
87  TPZMatrix<TVar> *matrix = mat.operator->();
88  TPZSkylMatrix<TVar> *skylmat = dynamic_cast<TPZSkylMatrix<TVar> *>(matrix);
89  if (!skylmat) {
90  DebugStop();
91  }
92  if (fStorage.NElements() != skylmat->fStorage.NElements()) {
93  DebugStop();
94  }
95  memcpy(&fStorage[0], &(skylmat->fStorage[0]) , fStorage.NElements()*sizeof(TVar));
96  this->fDecomposed = skylmat->fDecomposed;
97  this->fDefPositive = skylmat->fDefPositive;
98 }
99 
100 template<class TVar>
101 int
102 TPZSkylMatrix<TVar>::PutVal(const int64_t r,const int64_t c,const TVar & value )
103 {
104  // Adjusting row and col to work with superior triangular
105  if (r > c)
106  return PutVal(c, r, value);
107 
108  // Column array index
109  int64_t sz = Size(c);
110  if ( (c-r) >= sz) {
111  if (IsZero(value)) {
112  return 1; // Return OK
113  }
114  else {
115  cerr << "TPZSkylMatrix::PutVal Size" << sz;
116  TPZMatrix<TVar>::Error(__PRETTY_FUNCTION__,"Index out of range");
117  return 0;
118  }
119  }
120 
121  // See pzskylmat.h for more info on how to compute the index.
122  int64_t index = r + sz - 1 - c;
123  fElem[c][index] = value;
124  return 1;
125 }
126 
127 template<class TVar>
128 const TVar &
129 TPZSkylMatrix<TVar>::GetVal(const int64_t r,const int64_t c ) const
130 {
131  if (r > c)
132  return GetVal(c,r);
133 
134 #ifdef PZDEBUG
135  unsigned dim = this->Dim();
136 
137  if(r >= dim || c >= dim || r < 0 || c < 0) {
138  cerr << "TPZSkylMatrix::GetVal index out of range row = " << r
139  << " col = " << c << endl;
140  return this->gZero;
141  }
142 #endif
143 
144  int64_t sz = Size(c);
145  if ( (c-r) < sz ) {
146  int64_t index = r + sz - 1 - c;
147  return (fElem[c][index]);
148  }
149  else {
150 #ifdef PZDEBUG
151  if(this->gZero != TVar(0.)) {
152  cerr << "TPZSkylMatrix gZero = " << this->gZero << endl;
153  DebugStop();
154  }
155 #endif
156  return(this->gZero );
157  }
158 }
159 
160 template<class TVar>
161 TVar &
162 TPZSkylMatrix<TVar>::operator()(int64_t ri, int64_t ci)
163 {
164  int64_t r = ri;
165  int64_t c = ci;
166  if (ri > ci) {
167  r = ci;
168  c = ri;
169  }
170 
171  int64_t sz = Size(c);
172 
173  if ( (c-r) >= sz ) {
174  TPZMatrix<TVar>::Error(__PRETTY_FUNCTION__,"Index out of range");
175  DebugStop();
176  }
177 
178  int64_t index = r + sz - 1 - c;
179 
180  return fElem[c][index];
181 }
182 
183 template<class TVar>
185  const TVar alpha,const TVar beta ,const int opt) const
186 {
187  // Computes z = beta * y + alpha * opt(this)*x
188  // z and x cannot overlap in memory
189 
190  if (this->fDecomposed != ENoDecompose) {
191  //TODO: EBORIN: Figure out why the next line was commented.
192  // DebugStop();
193  }
194 
195  if ((!opt && this->Cols() != x.Rows()) || this->Rows() != x.Rows()) {
196  TPZMatrix<TVar>::Error(__PRETTY_FUNCTION__," <matrix with incompatible dimensions>" );
197  }
198 
199  if(z.Rows() != x.Rows() || z.Cols() != x.Cols())
200  z.Redim(x.Rows(),x.Cols());
201 
202  if(x.Cols() != y.Cols() || x.Cols() != z.Cols() || x.Rows() != y.Rows() || x.Rows() != z.Rows()) {
203  cerr << "x.Cols = " << x.Cols() << " y.Cols()"<< y.Cols() << " z.Cols() " << z.Cols()
204  << " x.Rows() " << x.Rows() << " y.Rows() "<< y.Rows() << " z.Rows() "<< z.Rows() << endl;
205  TPZMatrix<TVar>::Error(__PRETTY_FUNCTION__," incompatible dimensions\n");
206  }
207 
208  this->PrepareZ(y,z,beta,opt);
209 
210  int64_t rows = this->Rows();
211  int64_t xcols = x.Cols();
212  int64_t ic, r;
213  for (ic = 0; ic < xcols; ic++) {
214  for( r = 0 ; r < rows ; r++ ) {
215  int64_t offset = Size(r);
216  TVar val = 0.;
217  const TVar *p = &x.g((r-offset+1),ic);
218  TVar *diag = fElem[r];
219  TVar *diaglast = fElem[r+1]-1;
220  while( diag < diaglast ) {
221  val += *diag++ * *p;
222  p ++;
223  }
224  if( diag == diaglast )
225  val += *diag * *p;
226 
227  z(r,ic) += val*alpha;
228 
229  TVar *zp = &z((r-offset+1),ic);
230  val = x.g(r,ic);
231  diag = fElem[r];
232  while( diag < diaglast ) {
233  *zp += alpha * *diag++ * val;
234  zp ++;
235  }
236  }
237  }
238 }
239 
240 template<class TVar>
243 {
244  Clear();
245  Copy(A);
246  return(*this);
247 }
248 
249 template<class TVar>
252 {
253  if ( this->Dim() != A.Dim() )
254  TPZMatrix<TVar>::Error(__PRETTY_FUNCTION__,"<incompatible dimensions>" );
255 
256  TPZVec<int64_t> skylinesize(this->Dim());
257  ComputeMaxSkyline(*this,A,skylinesize);
258  TPZSkylMatrix res( this->fRow, skylinesize );
259 
260  TVar *elemMax;
261  TVar *elemMin;
262  int64_t sizeMax;
263  int64_t sizeMin;
264 
265  for (int64_t col = 0; col < this->Dim(); col++) {
266  // Select the size and the elements of the smallest and highest column.
267  if ( Size(col) > A.Size(col) ) {
268  sizeMax = Size(col);
269  elemMax = fElem[col];
270  sizeMin = A.Size(col);
271  elemMin = A.fElem[col];
272  }
273  else {
274  sizeMax = A.Size(col);
275  elemMax = A.fElem[col];
276  sizeMin = Size(col);
277  elemMin = fElem[col];
278  }
279 
280  TVar *dest = res.fElem[col];
281  int64_t i = 0;
282  for ( ; i < sizeMin; i++ )
283  *dest++ = (*elemMax++) + (*elemMin++);
284  for ( ; i < sizeMax; i++ )
285  *dest++ = *elemMax++;
286  }
287 
288  return( res );
289 }
290 
291 template<class TVar>
294 {
295  if ( this->Dim() != A.Dim() )
296  TPZMatrix<TVar>::Error(__PRETTY_FUNCTION__, "operator-( TPZSkylMatrix ) <incompatible dimensions>" );
297 
298  TPZVec<int64_t> skylinesize(this->Dim());
299  ComputeMaxSkyline(*this,A,skylinesize);
300  TPZSkylMatrix<TVar> res( this->fRow, skylinesize );
301 
302  for ( int64_t col = 0; col < this->fRow; col++ ) {
303  int64_t sizeThis = Size(col);
304  TVar *elemThis = fElem[col];
305  int64_t sizeA = A.Size(col);
306  TVar *elemA = A.fElem[col];
307 
308  TVar *dest = res.fElem[col];
309  int64_t i;
310  for ( i = 0; (i < sizeThis) && (i < sizeA); i++ )
311  *dest++ = (*elemThis++) - (*elemA++);
312 
313  if ( i == sizeA )
314  for ( ; i < sizeThis; i++ ) *dest++ = *elemThis++;
315  else
316  for ( ; i < sizeA; i++ ) *dest++ = -(*elemA++);
317  }
318 
319  return( res );
320 }
321 
322 template<class TVar>
325 {
326  if ( this->Dim() != A.Dim() )
327  TPZMatrix<TVar>::Error(__PRETTY_FUNCTION__,"operator+=( TPZSkylMatrix ) <incompatible dimensions>" );
328 
329  TPZSkylMatrix res((*this)+A);
330  *this = res;
331  return *this;
332 }
333 
334 template<class TVar>
337 {
338  if ( this->Dim() != A.Dim() )
339  TPZMatrix<TVar>::Error(__PRETTY_FUNCTION__,"operator-=( TPZSkylMatrix ) <incompatible dimensions>" );
340 
341  TPZSkylMatrix res(*this-A);
342  *this = res;
343  return *this;
344 }
345 
346 template<class TVar>
348 TPZSkylMatrix<TVar>::operator*(const TVar value ) const
349 {
350  TPZSkylMatrix res( *this );
351  int64_t nelem = res.fStorage.NElements();
352  TVar *elemRes = res.fElem[0];
353 
354  for (int64_t i=0; i<nelem; i++) {
355  *elemRes++ *= value;
356  }
357 
358  return( res );
359 }
360 
361 template<class TVar>
363 TPZSkylMatrix<TVar>::operator*=(const TVar value )
364 {
365  if ( IsZero( value ) ) {
366  Zero();
367  return( *this );
368  }
369 
370  int64_t nelem = fStorage.NElements();
371  TVar *elemRes = fElem[0];
372 
373  for (int64_t i=0; i<nelem; i++) {
374  *elemRes++ *= value;
375  }
376 
377  this->fDecomposed = 0;
378  return( *this );
379 }
380 
381 /*** Resize ***/
382 // Change the dimensions of the matrix, but keep the old values. New elements are zeroed.
383 template<class TVar>
384 int TPZSkylMatrix<TVar>::Resize( int64_t newDim ,int64_t )
385 {
386  if ( newDim == this->Dim() )
387  return( 1 );
388 
389  fElem.Resize(newDim+1);
390 
391  // Copy the elements into the new matrix.
392  int64_t min = MIN( newDim, this->Dim() );
393  for (int64_t i = min+1; i <= newDim; i++ )
394  fElem[i] = fElem[i-1];
395 
396  // Set the remaining positions with zero.
397  fStorage.Resize(fElem[newDim]-fElem[0]);
398  this->fRow = this->fCol = newDim;
399  this->fDecomposed = 0;
400  return( 1 );
401 }
402 
403 template<class TVar>
404 int TPZSkylMatrix<TVar>::Redim( int64_t newDim , int64_t)
405 {
406  if ( newDim == this->Dim() ) {
407  Zero();
408  return( 1 );
409  }
410 
411  Clear();
412  fElem.Resize(newDim);
413  fElem.Fill(0);
414  this->fRow = this->fCol = newDim;
415  this->fDecomposed = 0;
416  return( 1 );
417 }
418 
419 template<class TVar>
421 {
422  fStorage.Fill(0.);
423  this->fDecomposed = 0;
424  this->fDefPositive = 0;
425  return( 1 );
426 }
427 
428 template<class TVar>
430 {
431  int64_t dim = skyline.NElements();
432 
433  int64_t nelem=0;
434  for(int64_t i=0; i<dim; i++) {
435  nelem += i-skyline[i]+1;
436  }
437 
438  return nelem;
439 }
440 
441 template<class TVar>
443 {
444  int64_t dim = skyline.NElements();
445  int64_t nel = NumElements(skyline);
446  storage.Resize(nel);
447  storage.Fill(0.);
448  point.Resize(dim+1);
449  if(dim) {
450  point[0] = &storage[0];
451  point[dim] = &storage[0]+nel;
452  }
453  else {
454  point[0] = 0;
455  }
456 
457  for(int64_t i=1; i<dim+1; i++) {
458  point[i] = point[i-1] + (i-1)-skyline[i-1]+1;
459  }
460 }
461 
462 template<class TVar>
464  const TPZSkylMatrix<TVar> &second, TPZVec<int64_t> &res)
465 {
466  if (first.Rows() != second.Rows()) {
467  cout<<"ComputeMaxSkyline : incompatible dimension";
468  return;
469  }
470  int64_t dim = first.Rows();
471  res.Resize(dim);
472 
473  for(int64_t i=0; i<dim; i++) {
474  int64_t sz_first = first.Size(i);
475  int64_t sz_secon = second.Size(i);
476  if (sz_first > sz_secon)
477  res[i] = i-(sz_first-1);
478  else
479  res[i] = i-(sz_secon-1);
480  }
481 }
482 
483 template<class TVar>
484 void TPZSkylMatrix<TVar>::SolveSOR(int64_t & numiterations,const TPZFMatrix<TVar> &F,
485  TPZFMatrix<TVar> &result, TPZFMatrix<TVar> *residual, TPZFMatrix<TVar> &scratch,const REAL overrelax,
486  REAL &tol,const int FromCurrent,const int direction)
487 {
488  if(residual == &F) {
489  cout << "TPZMatrix::SolveSOR called with residual and F equal, no solution\n";
490  return;
491  }
492  REAL res = 2.*tol+1.;
493  if(residual) res = Norm(*residual);
494  if(!FromCurrent) {
495  result.Zero();
496  }
497  TVar over = overrelax;
498  int64_t r = this->Dim();
499  int64_t c = F.Cols();
500  int64_t i,ifirst = 0, ilast = r, iinc = 1;
501  if(direction == -1) {
502  ifirst = r-1;
503  ilast = 0;
504  iinc = -1;
505  }
506  int64_t it;
507  for(it=0; it<numiterations && res > tol; it++) {
508  res = 0.;
509  scratch = F;
510  for(int64_t ic=0; ic<c; ic++) {
511  if(direction == 1) {
512  //
513  // compute the upper triangular part first and put into the scractch vector
514  //
515  for(i=ifirst; i!=ilast; i+= iinc) {
516  //TPZColuna *mydiag = &fDiag[i];
517  int64_t offset = Size(i);
518  TVar val;
519  TVar *diaglast = (fElem[i+1]-1);
520  TVar *scratchp = &scratch(i-offset+1,ic);
521  val = result(i,ic);
522  TVar *p = fElem[i];
523  int64_t lastid = diaglast-p;
524  int64_t id;
525  for(id=0; id<=lastid; id++)
526  *scratchp++ -= *p++ * val;
527  /* codeguard fix
528  while( diag >= diaglast ) *scratchp++ -= *diag-- * val;
529  */
530  }
531  //
532  // perform the SOR operation
533  //
534  for(i=ifirst; i!=ilast; i+= iinc) {
535  //TPZColuna *mydiag = &fDiag[i];
536  int64_t offset = Size(i);
537  TVar val = scratch(i,ic);
538  TVar *p = &result(i-offset+1,ic);
539  TVar *diag = fElem[i];
540  TVar *diaglast = (fElem[i+1]-1);
541  while( diag < diaglast )
542  val -= *diag++ * *p++;
543  res += abs(val*val);
544  result(i,ic) += val*over/ (*diag);
545  }
546  } else {
547  //
548  // the direction is upward
549  //
550  // put the lower triangular part of the multiplication into the scratch vector
551  //
552  for(i=ifirst; i!=ilast; i+= iinc) {
553  //TPZColuna *mydiag = &fDiag[i];
554  int64_t offset = Size(i);
555  TVar val = scratch(i,ic);
556  TVar *p = &result(i-offset+1,ic);
557  TVar *diag = fElem[i];
558  TVar *diaglast = (fElem[i+1]-1);
559  while( diag < diaglast )
560  val -= *diag++ * *p++;
561  // res += val*val;
562  scratch(i,ic) = val;
563  }
564  //
565  // perform the SOR operation
566  //
567  for(i=ifirst; i!=ilast; i+= iinc) {
568  //TPZColuna *mydiag = &fDiag[i];
569  int64_t offset = Size(i);
570  // TVar val = scratch(i,ic);
571  TVar *diaglast = (fElem[i+1]-1);
572  TVar *scratchp = &scratch(i-offset+1,ic);
573  //val= result(i,ic);
574  TVar val = scratch(i,ic);
575  val -= *diaglast * result(i,ic);
576  res += abs(val*val);
577  val = over * val / *diaglast;
578  result(i,ic) += val;
579  val = result(i,ic);
580  TVar *diag = fElem[i];
581  while( diag < diaglast )
582  *scratchp++ -= *diag++ * val;
583  }
584  }
585  }
586  res = sqrt(res);
587  }
588  if(residual) {
589  this->Residual(result,F,*residual);
590  }
591  numiterations = it;
592  tol = res;
593 }
594 
595 template<class TVar>
597  TPZVec<int64_t> &source,
598  TPZVec<int64_t> &destination)
599 {
600 
601  std::cout << "Passei por aqui\n";
602  int64_t nelem = source.NElements();
603  int64_t icoef,jcoef,ieq,jeq,ieqs,jeqs;
604  for(icoef=0; icoef<nelem; icoef++) {
605  ieq = destination[icoef];
606  ieqs = source[icoef];
607  for(jcoef=icoef; jcoef<nelem; jcoef++) {
608  jeq = destination[jcoef];
609  jeqs = source[jcoef];
610  int64_t row(ieq), col(jeq);
611  // invertendo linha-coluna para triangular superior
612  if (row > col)
613  this->Swap(&row, &col);
614 #ifdef PZDEBUG
615  // checando limites
616  if(row >= this->Dim() || col >= this->Dim()) {
617  cout << "TPZSkylMatrix::GetVal index out of range row = " <<
618  row << " col = " << col << endl;
619  DebugStop();
620  }
621 #endif
622  int64_t sz = Size(col);
623 
624  int64_t index = row + sz - 1 - col;
625 #pragma omp atomic
626  fElem[col][index] += elmat(ieqs,jeqs);
627 
628 
629  }
630  }
631 }
632 
633 template<>
635  TPZVec<int64_t> &source,
636  TPZVec<int64_t> &destination)
637 {
638  int64_t nelem = source.NElements();
639  int64_t icoef,jcoef,ieq,jeq,ieqs,jeqs;
640  for(icoef=0; icoef<nelem; icoef++) {
641  ieq = destination[icoef];
642  ieqs = source[icoef];
643  for(jcoef=icoef; jcoef<nelem; jcoef++) {
644  jeq = destination[jcoef];
645  jeqs = source[jcoef];
646  int64_t row(ieq), col(jeq);
647  // invertendo linha-coluna para triangular superior
648  if (row > col)
649  this->Swap(&row, &col);
650 #ifdef PZDEBUG
651  // checando limites
652  if(row >= this->Dim() || col >= this->Dim()) {
653  cout << "TPZSkylMatrix::GetVal index out of range row = " <<
654  row << " col = " << col << endl;
655  DebugStop();
656  }
657 #endif
658  int64_t sz = Size(col);
659 
660  int64_t index = row + sz - 1 - col;
661 #pragma omp atomic
662  fElem[col][index] += elmat(ieqs,jeqs);
663  }
664  }
665 }
666 
667 
668 template<>
670  TPZVec<int64_t> &source,
671  TPZVec<int64_t> &destination)
672 {
673  int64_t nelem = source.NElements();
674  int64_t icoef,jcoef,ieq,jeq,ieqs,jeqs;
675  for(icoef=0; icoef<nelem; icoef++) {
676  ieq = destination[icoef];
677  ieqs = source[icoef];
678  for(jcoef=icoef; jcoef<nelem; jcoef++) {
679  jeq = destination[jcoef];
680  jeqs = source[jcoef];
681  int64_t row(ieq), col(jeq);
682  // invertendo linha-coluna para triangular superior
683  if (row > col)
684  this->Swap(&row, &col);
685 #ifdef PZDEBUG
686  // checando limites
687  if(row >= this->Dim() || col >= this->Dim()) {
688  cout << "TPZSkylMatrix::GetVal index out of range row = " <<
689  row << " col = " << col << endl;
690  DebugStop();
691  }
692 #endif
693  // executando contribuição
694  int64_t sz = Size(col);
695 
696  int64_t index = row + sz - 1 - col;
697 #pragma omp atomic
698  fElem[col][index] += elmat(ieqs,jeqs);
699 
700 
701  }
702  }
703 }
704 
705 template<>
706 int TPZSkylMatrix<std::complex<float> >::Decompose_Cholesky(std::list<int64_t> &singular)
707 {
708  DebugStop();
709  return -1;
710 }
711 
712 template<>
713 int TPZSkylMatrix<std::complex<double> >::Decompose_Cholesky(std::list<int64_t> &singular)
714 {
715  DebugStop();
716  return -1;
717 }
718 
719 template<>
720 int TPZSkylMatrix<std::complex<long double> >::Decompose_Cholesky(std::list<int64_t> &singular)
721 {
722  DebugStop();
723  return -1;
724 }
725 
726 //EBORIN: Define these if you want to use the experimental version.
727 //#define DECOMPOSE_CHOLESKY_OPT2
728 //#define SKYLMATRIX_GETVAL_OPT1
729 
730 /**************************/
731 /*** Decompose Cholesky ***/
732 template<class TVar>
733 int TPZSkylMatrix<TVar>::Decompose_Cholesky(std::list<int64_t> &singular)
734 {
735  if(this->fDecomposed == ECholesky)
736  return 1;
737 
738  if (this->fDecomposed )
739  TPZMatrix<TVar>::Error(__PRETTY_FUNCTION__, "Decompose_Cholesky <Matrix already Decomposed>" );
740 
741 #ifdef DUMP_BEFORE_DECOMPOSE
742  dump_matrix(this, "TPZSkylMatrix::Decompose_Cholesky()");
743 #endif
744 
745  singular.clear();
746 
747  TVar pivot;
748  int64_t dimension = this->Dim();
749 
750  for ( int64_t k = 0; k < dimension; k++ ) {
751 
752  if ( Size(k) == 0 )
753  return 0;
754 
755  // sum = Sum( A(k,p) * A(k,p) ), p = 1, ..., k-1.
756  TVar sum = 0.0;
757  TVar *elem_k = fElem[k];
758  TVar *end_k = fElem[k+1]-1;
759 
760 #pragma clang loop vectorize_width(2)
761  for ( ; elem_k < end_k; elem_k++ )
762  sum += (*elem_k) * (*elem_k);
763 
764  elem_k = fElem[k+1]-1; // Diagonal element.
765  TVar* first_k = fElem[k]; // First element at row k
766  TVar* last_k = elem_k; // Last element at row k (diagonal element)
767  int64_t k_sz = last_k - first_k;
768 
769  // Faz A(k,k) = sqrt( A(k,k) - sum ).
770  pivot = *elem_k - sum;
771 
772  //EBORIN: FIXME: Shouldn't this be IsZero(pivot)???
773  if (pivot < TVar(1.e-10)) {
774  singular.push_back(k);
775  pivot = 1.;
776  }
777 
778  pivot = sqrt( pivot );
779  *elem_k = pivot;
780 
781  // Loop para i = k+1 ... Dim().
782  int64_t i=k+1;
783 
784  for ( int64_t j = 2; i<dimension; j++,i++ ) {
785 
786  TVar* upd_elem = fElem[i+1] - j; // Element to update (row i, column k)
787  TVar* first_i = fElem[i]; // First element at row i (also the first element at rowi)
788  TVar* last_i = upd_elem;
789 
790  if (first_i < last_i) {
791 
792  // Faz sum = SOMA( A(i,p) * A(k,p) ), p = 1,..,k-1.
793  sum = 0.0;
794 
795  int64_t min_sz = last_i - first_i;
796  if (min_sz > k_sz)
797  min_sz = k_sz;
798 
799  TVar* ip = last_i - min_sz;
800  TVar* kp = last_k - min_sz;
801 
802 #pragma clang loop vectorize_width(2)
803  for(unsigned l=0; l<min_sz; l++)
804  sum += (*ip++) * (*kp++);
805 
806  // A(i,k) = (A(i,k) - sum) / A(k,k)
807  *upd_elem = (*upd_elem -sum) / pivot;
808  }
809  else if (first_i == last_i) {
810  // sum == 0.0
811  *upd_elem = *upd_elem / pivot;
812  }
813  // else { no elements to update }
814  }
815  }
816 
817  if(this->Rows() && (GetVal(this->Rows()-1,this->Rows()-1)) < TVar(1.e-15)) {
818  singular.push_back(this->Rows()-1);
819  PutVal(this->Rows()-1,this->Rows()-1,1.);
820  }
821 
822  this->fDecomposed = ECholesky;
823  this->fDefPositive = 1;
824  return( 1 );
825 }
826 
827 template<>
828 int TPZSkylMatrix<std::complex<float> >::Decompose_Cholesky()
829 {
830  DebugStop();
831  return -1;
832 }
833 template<>
834 int TPZSkylMatrix<std::complex<double> >::Decompose_Cholesky()
835 {
836  DebugStop();
837  return -1;
838 }
839 template<>
840 int TPZSkylMatrix<std::complex<long double> >::Decompose_Cholesky()
841 {
842  DebugStop();
843  return -1;
844 }
845 
846 clarg::argBool clk_mig("-skl_chk_pm", "Migrate Skyline pages before Cholesky Decomposition");
847 clarg::argBool clk_rea("-skl_chk_rea", "Reallocate Skyline data before Cholesky Decomposition");
848 
849 template<class TVar>
851 {
852  if(this->fDecomposed == ECholesky)
853  return 1;
854 
855  if (this->fDecomposed )
856  TPZMatrix<TVar>::Error(__PRETTY_FUNCTION__, "Decompose_Cholesky <Matrix already Decomposed>" );
857 
858 #ifdef DUMP_BEFORE_DECOMPOSE
859  dump_matrix(this, "TPZSkylMatrix::Decompose_Cholesky()");
860 #endif
861 
862  if (clk_mig.was_set())
863  MigratePages();
864  if (clk_rea.was_set())
865  ReallocForNuma();
866 
867  TVar pivot;
868  TVar minpivot = 10000.;
869  int64_t dimension = this->Dim();
870 
871  for ( int64_t k = 0; k < dimension; k++ ) {
872 
873  if ( Size(k) == 0 )
874  return 0;
875 
876  // sum = Sum( A(k,p) * A(k,p) ), p = 1, ..., k-1.
877  TVar sum = 0.0;
878  TVar *elem_k = fElem[k];
879  TVar *end_k = fElem[k+1]-1;
880 #pragma clang loop vectorize_width(2)
881  for ( ; elem_k < end_k; elem_k++ )
882  sum += (*elem_k) * (*elem_k);
883 
884  elem_k = fElem[k+1]-1; // Diagonal element.
885  TVar* first_k = fElem[k]; // First element at row k
886  TVar* last_k = elem_k; // Last element at row k (diagonal element)
887  int64_t k_sz = last_k - first_k;
888 
889  // Faz A(k,k) = sqrt( A(k,k) - sum ).
890  pivot = *elem_k - sum;
891  minpivot = minpivot < pivot ? minpivot : pivot;
892  if ( pivot < TVar(0.) || IsZero(pivot) ) {
893  cout << "TPZSkylMatrix::DecomposeCholesky! Matrix is not positive definite" << pivot << endl;
894  return 0;
895  }
896 
897  pivot = sqrt( pivot );
898  *elem_k = pivot;
899 
900  // Loop para i = k+1 ... Dim().
901  int64_t i=k+1;
902 
903  for ( int64_t j = 2; i<dimension; j++,i++ ) {
904 
905  TVar* upd_elem = fElem[i+1] - j; // Element to update (row i, column k)
906  TVar* first_i = fElem[i]; // First element at row i (also the first element at rowi)
907  TVar* last_i = upd_elem;
908 
909  if (first_i < last_i) {
910 
911  // Faz sum = SOMA( A(i,p) * A(k,p) ), p = 1,..,k-1.
912  sum = 0.0;
913 
914  int64_t min_sz = last_i - first_i;
915  if (min_sz > k_sz)
916  min_sz = k_sz;
917 
918  TVar* ip = last_i - min_sz;
919  TVar* kp = last_k - min_sz;
920 
921 #pragma clang loop vectorize_width(2)
922  for(unsigned l=0; l<min_sz; l++)
923  sum += (*ip++) * (*kp++);
924 
925  // A(i,k) = (A(i,k) - sum) / A(k,k)
926  *upd_elem = (*upd_elem -sum) / pivot;
927  }
928  else if (first_i == last_i) {
929  // sum == 0.0
930  *upd_elem = *upd_elem / pivot;
931  }
932  // else { no elements to update }
933  }
934  }
935 
936  this->fDecomposed = ECholesky;
937  this->fDefPositive = 1;
938 #ifdef PZDEBUG
939  std::cout << __PRETTY_FUNCTION__ << " minpivot " << minpivot << std::endl;
940 #endif
941  return( 1 );
942 }
943 
944 template<>
945 int TPZSkylMatrix<std::complex<float> >::Decompose_Cholesky_blk(int64_t blk_sz)
946 {
947  DebugStop();
948  return -1;
949 }
950 
951 template<>
952 int TPZSkylMatrix<std::complex<double> >::Decompose_Cholesky_blk(int64_t blk_sz)
953 {
954  DebugStop();
955  return -1;
956 }
957 
958 template<>
959 int TPZSkylMatrix<std::complex<long double> >::Decompose_Cholesky_blk(int64_t blk_sz)
960 {
961  DebugStop();
962  return -1;
963 }
964 
965 template<class TVar>
967 {
968  DebugStop();
969  return -1;
970 }
971 
972 template<class TVar>
973 int
974 TPZSkylMatrix<TVar>::Decompose_LDLt(std::list<int64_t> &singular)
975 {
976  if( this->fDecomposed == ELDLt)
977  return 1;
978 
979  if( this->fDecomposed ) {
980  TPZMatrix<TVar>::Error(__PRETTY_FUNCTION__, " Decompose_LDLt <Matrix already Decomposed with different decomposition>" );
981  }
982 
983 #ifdef DUMP_BEFORE_DECOMPOSE
984  dump_matrix(this, "TPZSkylMatrix::Decompose_LDLt(singular)");
985 #endif
986 
987  singular.clear();
988 
989  // Third try
990  TVar *elj,*ell;
991  int64_t j,l,minj,minl,minrow,dimension = this->Dim();
992  TVar sum;
993  j = 1;
994  while(j < dimension) {
995 
996  minj = j-Size(j)+1;
997  l = minj;
998 
999  while(l <= j) {
1000  minl = l-Size(l)+1;
1001  minrow = (minj<minl)? minl:minj;
1002  int64_t k = minrow;
1003 
1004  // DiagkPtr = fDiag+minrow;
1005  elj = fElem[j+1] - (j+1) + minrow ; // fElem[j]+j-minrow;
1006  ell = fElem[l+1] - (l+1) + minrow ; // fElem[l]+l-minrow;
1007  sum = 0.;
1008 
1009  while(k < l) {
1010  sum += *elj++ * *ell++ * *(fElem[k+1]-1);
1011  k++;
1012  }
1013 
1014  *elj -= sum;
1015 
1016  if(ell != elj) {
1017  *elj /= *ell;
1018  }
1019  else if(IsZero(*elj)) {
1020  singular.push_back(l);
1021  *elj = 1.;
1022  }
1023  l++;
1024  }
1025  j++;
1026  }
1027 
1028  if(this->Rows() && IsZero(GetVal(this->Rows()-1,this->Rows()-1))) {
1029  singular.push_back(this->Rows()-1);
1030  PutVal(this->Rows()-1,this->Rows()-1,1.);
1031  }
1032 
1033  this->fDecomposed = ELDLt;
1034  this->fDefPositive = 0;
1035  //if(Dim() > 100) cout << endl;
1036  return( 1 );
1037 }
1038 
1039 template<class TVar>
1041 {
1042  if( this->fDecomposed == ELDLt)
1043  return 1;
1044  if ( this->fDecomposed )
1045  TPZMatrix<TVar>::Error(__PRETTY_FUNCTION__, " Decompose_LDLt <Matrix already Decomposed with different decomposition>" );
1046 
1047 #ifdef DUMP_BEFORE_DECOMPOSE
1048  dump_matrix(this, "TPZSkylMatrix::Decompose_LDLt()");
1049 #endif
1050 
1051  // Third try
1052  TVar *elj,*ell;
1053  int64_t j,l,minj,minl,minrow,dimension = this->Dim();
1054  TPZVec<TVar> diag(dimension);
1055  for(j=0; j<dimension; j++)
1056  diag[j] = *(fElem[j+1]-1);
1057 
1058  //std::cout << "TPZSkylMatrix<TVar>::Decompose_LDLt: dimension = " << dimension << std::endl;
1059 
1060  TVar sum;
1061  j = 1;
1062  while(j < dimension) {
1063  /* if(!(j%100) && Dim() > 100) {
1064  cout << j << ' ';
1065  cout.flush();
1066  }
1067  if(!(j%1000)) cout << endl;*/
1068  minj = j-Size(j)+1;
1069  l = minj;
1070  while(l <= j) {
1071  minl = l-Size(l)+1;
1072  minrow = (minj<minl)? minl:minj;
1073  int64_t k = minrow;
1074  // DiagkPtr = fDiag+minrow;
1075  elj = fElem[j]+minrow-minj; // fElem[j]+minrow-(j-Size(j)+1); // Pointer to A[minrow,j]
1076  ell = fElem[l]+minrow-minl; // fElem[l]+minrow-(l-Size(l)+1); // Pointer to A[minrow,l]
1077  TVar *diagptr = &diag[k];
1078  sum = 0.;
1079  while(k < l) {
1080  sum += *elj++ * *ell++ * *diagptr++;
1081  k++;
1082  }
1083  *elj -= sum;
1084  if(ell != elj) *elj /= *ell;
1085  else if(IsZero(*elj)) {
1086 #ifdef LOG4CXX
1087  std::stringstream sout;
1088  sout << "col = " << j << " diagonal " << *elj;
1089  LOGPZ_DEBUG(logger,sout.str())
1090 #endif
1091 
1092  *diagptr = *elj;
1093  cout << "TPZSkylMatrix pivot = " << *elj << endl;
1094  cout << "TPZSkylMatrix::DecomposeLDLt zero pivot\n";
1095  cout << "j = " << j << " l = " << l << endl;
1096  }
1097  else {
1098  *diagptr = *elj;
1099  }
1100  l++;
1101  }
1102  j++;
1103  }
1104  this->fDecomposed = ELDLt;
1105  this->fDefPositive = 0;
1106  //if(Dim() > 100) cout << endl;
1107  return( 1 );
1108 }
1109 
1110 #ifdef USING_MKL
1111 
1112 #include <mkl.h>
1113 
1114 /*********************/
1115 /*** Subst Forward ***/
1116 //
1117 // Faz Ax = b, onde A e' triangular superior.
1118 // Utilizando MKL
1119 template<>
1120 int
1122 {
1123  if ( (B->Rows() != this->Dim()) || this->fDecomposed != ECholesky)
1124  TPZMatrix<double>::Error(__PRETTY_FUNCTION__,"TPZSkylMatrix::Subst_Forward not decomposed with cholesky");
1125 
1126  int n = this->Dim();
1127  TPZVec<int> pntr(n+1);
1128 
1129  for (int i=0; i<n+1; i++)
1130  pntr[i] = fElem[i] - &fStorage[0] + 1;
1131 
1132  char desc[4] = { 'T', 'L', 'N', 'F' };
1133  char trans = 'N';
1134  double alfa = 1.0;
1135 
1136  for(int j=0; j<B->Cols(); j++)
1137  mkl_dskysv(&trans, &n, &alfa, desc, &fStorage[0], &pntr[0], &(*B)(0,j), &(*B)(0,j));
1138 
1139  return 1;
1140 }
1141 
1142 /*** Subst Backward ***/
1143 // Perform Ax = b, where A is triangular inferior.
1144 // Utilizando MKL
1145 template<>
1147 {
1148  if ( (B->Rows() != this->Dim()) || this->fDecomposed != ECholesky)
1149  TPZMatrix<double>::Error(__PRETTY_FUNCTION__,"TPZSkylMatrix::Subst_Backward not decomposed with cholesky");
1150 
1151  int n = this->Dim();
1152  TPZVec<int> pntr(n+1);
1153 
1154  for (int i=0; i<n+1; i++)
1155  pntr[i] = fElem[i] - &fStorage[0] + 1;
1156 
1157  char desc[4] = { 'T', 'L', 'N', 'F' };
1158  char trans = 'T';
1159  double alfa = 1.0;
1160 
1161  for(int j=0; j<B->Cols(); j++)
1162  mkl_dskysv(&trans, &n, &alfa, desc, &fStorage[0], &pntr[0], &(*B)(0,j), &(*B)(0,j));
1163 
1164  return 1;
1165 }
1166 template<>
1167 int
1169 {
1170  if ( (B->Rows() != this->Dim()) || this->fDecomposed != ECholesky)
1171  TPZMatrix<float>::Error(__PRETTY_FUNCTION__,"TPZSkylMatrix::Subst_Forward not decomposed with cholesky");
1172 
1173  int n = this->Dim();
1174  TPZVec<int> pntr(n+1);
1175 
1176  for (int i=0; i<n+1; i++)
1177  pntr[i] = fElem[i] - &fStorage[0] + 1;
1178 
1179  char desc[4] = { 'T', 'L', 'N', 'F' };
1180  char trans = 'N';
1181  float alfa = 1.0;
1182 
1183  for(int j=0; j<B->Cols(); j++)
1184  mkl_sskysv(&trans, &n, &alfa, desc, &fStorage[0], &pntr[0], &(*B)(0,j), &(*B)(0,j));
1185 
1186  return 1;
1187 }
1188 
1189 /*** Subst Backward ***/
1190 // Perform Ax = b, where A is triangular inferior.
1191 // Utilizando MKL
1192 template<>
1194 {
1195  if ( (B->Rows() != this->Dim()) || this->fDecomposed != ECholesky)
1196  TPZMatrix<float>::Error(__PRETTY_FUNCTION__,"TPZSkylMatrix::Subst_Backward not decomposed with cholesky");
1197 
1198  int n = this->Dim();
1199  TPZVec<int> pntr(n+1);
1200 
1201  for (int i=0; i<n+1; i++)
1202  pntr[i] = fElem[i] - &fStorage[0] + 1;
1203 
1204  char desc[4] = { 'T', 'L', 'N', 'F' };
1205  char trans = 'T';
1206  float alfa = 1.0;
1207 
1208  for(int j=0; j<B->Cols(); j++)
1209  mkl_sskysv(&trans, &n, &alfa, desc, &fStorage[0], &pntr[0], &(*B)(0,j), &(*B)(0,j));
1210 
1211  return 1;
1212 }
1213 #endif
1214 
1215 /*** Subst Forward ***/
1216 // Perform Ax = b, where A is triangular inferior.
1217 template<class TVar>
1219 {
1220  if ( (B->Rows() != this->Dim()) || this->fDecomposed != ECholesky)
1221  TPZMatrix<TVar>::Error(__PRETTY_FUNCTION__," TPZSkylMatrix::Subst_Forward not decomposed with cholesky");
1222 
1223 #ifdef DUMP_BEFORE_SUBST
1224  dump_matrices(this, B, "TPZSkylMatrix::Subst_Forward(B)");
1225 #endif
1226 
1227  // std::cout << "SubstForward this " << (void *) this << " neq " << Dim() << " normb " << Norm(*B) << std::endl;
1228  int64_t dimension=this->Dim();
1229  for ( int64_t j = 0; j < B->Cols(); j++ ) {
1230  int64_t k=0;
1231 
1232  // Find the first non-zero element at column j;
1233  while (k<dimension && (*B)(k,j) == TVar(0)) k++;
1234 
1235  // std::cout << "kstart " << k << std::endl;
1236  for (; k < dimension; k++ ) {
1237 
1238  // sum = SUM( A[k,i] * B[i,j] ), for i = 1, ..., k-1.
1239  TVar sum = 0.0;
1240  TVar *elem_ki = fElem[k];
1241  TVar *end_ki = fElem[k+1]-1;
1242  int64_t array_sz = end_ki - elem_ki;
1243  TVar* BPtr = &(*B)(k,j) - array_sz;
1244 
1245  for (unsigned l=0; l<array_sz; l++)
1246  sum += (*elem_ki++) * (*BPtr++);
1247 
1248  // B[k,j] = (B[k,j] - sum) / A[k,k].
1249  BPtr = &(*B)(k,j);
1250  *BPtr = (*BPtr - sum) / fElem[k+1][-1];
1251  }
1252  }
1253 
1254  return 1;
1255 }
1256 
1257 /*** Subst Backward ***/
1258 // Perform Ax = b, where A is triangular inferior.
1259 template<class TVar>
1261 {
1262  if ( (B->Rows() != this->Dim()) || this->fDecomposed != ECholesky)
1263  TPZMatrix<TVar>::Error(__PRETTY_FUNCTION__,"TPZSkylMatrix::Subst_Backward not decomposed with cholesky");
1264 
1265 #ifdef DUMP_BEFORE_SUBST
1266  dump_matrices(this, B, "TPZSkylMatrix::Subst_Backward(B)");
1267 #endif
1268 
1269  int64_t Dimension = this->Dim();
1270  if(!Dimension)
1271  return 1; // nothing to do
1272 
1273  for (int64_t j = 0; j < B->Cols(); j++ ) {
1274 
1275  int64_t k = Dimension-1;
1276 
1277  // Find the last non-zero element at column j of B.
1278  while (k>0 && (*B)(k,j) == TVar(0.)) k--;
1279 
1280  // std::cout << "kstart " << k << std::endl;
1281  for ( ; k > 0; k--) {
1282  // sum = SUM( A[k,i] * B[i,j] ), for i = 1, ..., k-1.
1283 
1284  TVar *elem_ki = fElem[k];
1285  TVar *end_ki = fElem[k+1]-1; // diagonal(k)
1286  int64_t array_sz = end_ki - elem_ki;
1287 
1288  TVar *BPtr = &(*B)(k,j);
1289  *BPtr = *BPtr / *end_ki;
1290  TVar val = *BPtr;
1291 
1292  BPtr = BPtr - array_sz;
1293 
1294  for (unsigned l=0; l<array_sz; l++) {
1295  *BPtr++ -= ((*elem_ki++) * val);
1296  }
1297  }
1298  }
1299 
1300  for(int64_t j = 0; j < B->Cols(); j++)
1301  (*B)(0,j) /= fElem[0][0];
1302 
1303  return 1;
1304 }
1305 
1306 /*** Subst L Forward ***/
1307 // Forward Substitution suposing diagonal elements are equal to 1.
1308 template<class TVar>
1309 int
1311 {
1312  if ( (B->Rows() != this->Dim()) || (this->fDecomposed != ELDLt && this->fDecomposed != ELU) )
1313  return( 0 );
1314 
1315  int64_t dimension =this->Dim();
1316  for ( int64_t k = 0; k < dimension; k++ ) {
1317  for ( int64_t j = 0; j < B->Cols(); j++ ) {
1318 
1319  // sum = SUM( A[k,i] * B[i,j] ), for i = 1, ..., k-1.
1320  TVar sum = 0.0;
1321  TVar *elem_ki = fElem[k];
1322  TVar *end_ki = fElem[k+1]-1;
1323  int64_t array_sz = end_ki - elem_ki;
1324  TVar* BPtr = &(*B)(k,j) - array_sz;
1325  for (unsigned l=0; l<array_sz; l++)
1326  sum += (*elem_ki++) * (*BPtr++);
1327 
1328  //EBORIN: Not sure if the code or the commentary is incorrect.
1329  // Faz B[k,j] = (B[k,j] - sum) / A[k,k].
1330  BPtr = &(*B)(k,j);
1331  *BPtr-= sum;
1332  }
1333  }
1334 
1335  return 1 ;
1336 }
1337 
1338 template<class TVar>
1340 {
1341  if ( (B->Rows() != this->Dim()) || this->fDecomposed != ELDLt)
1342  return 0;
1343 
1344  int64_t dimension = this->Dim();
1345 
1346  for (int64_t j = 0; j < B->Cols(); j++ ) {
1347  TVar *BPtr = &(*B)(0,j);
1348  int64_t k=0;
1349 
1350  while(k < dimension) {
1351  //*BPtr = *BPtr / diagonal(k)
1352  *BPtr++ /= *(fElem[k+1]-1);
1353  k++;
1354  }
1355  }
1356 
1357  return 1;
1358 }
1359 
1360 template<class TVar>
1362 {
1363  if ( (B->Rows() != this->Dim()) || !this->fDecomposed || this->fDecomposed == ECholesky) {
1364  TPZMatrix<TVar>::Error(__PRETTY_FUNCTION__,"TPZSkylMatrix::Subst_LBackward not decomposed properly");
1365  }
1366 
1367  int64_t Dimension = this->Dim();
1368 
1369  for ( int64_t k = Dimension-1; k > 0; k-- ) {
1370  for ( int64_t j = 0; j < B->Cols(); j++ ) {
1371 
1372  TVar *elem_ki = fElem[k];
1373  TVar *end_ki = fElem[k+1]-1; // diagonal(k)
1374  int64_t array_sz = end_ki - elem_ki;
1375  TVar* BPtr = &(*B)(k,j) - array_sz;
1376  TVar val = (*B)(k,j);
1377 
1378  for (unsigned l=0; l<array_sz; l++) {
1379  *BPtr++ -= (*elem_ki++) * val;
1380  }
1381  }
1382  }
1383 
1384  return 1;
1385 }
1386 
1387 /**************************** PRIVATE ****************************/
1388 template<class TVar>
1389 int
1391 {
1392  this->fStorage.Resize(0);
1393  this->fElem.Resize(0);
1394  this->fRow = this->fCol = 0;
1395  this->fDecomposed = 0;
1396  return( 1 );
1397 }
1398 
1399 template<class TVar>
1401 {
1402  int64_t dimension = A.Dim();
1403  this->fRow = this->fCol = dimension;
1404  fElem.Resize(A.fElem.NElements());
1405  fStorage = A.fStorage;
1406  TVar *firstp = 0;
1407  if(fStorage.NElements())
1408  firstp = &fStorage[0];
1409 
1410  for(int64_t i=0; i<=dimension; i++)
1411  fElem[i]=firstp+(A.fElem[i]-A.fElem[0]);
1412 
1413  this->fDecomposed = A.fDecomposed;
1414  this->fDefPositive = A.fDefPositive;
1415 }
1416 
1417 template <class TVar>
1418 void TPZSkylMatrix<TVar>::Read(TPZStream &buf, void *context )
1419 {
1420  TPZMatrix<TVar>::Read(buf, context);
1421  buf.Read( fStorage);
1422  TPZVec<int64_t> skyl(this->Rows()+1,0);
1423  buf.Read( skyl);
1424  TVar *ptr = 0;
1425  if (this->Rows()) {
1426  ptr = &fStorage[0];
1427  }
1428  fElem.Resize(this->Rows()+1);
1429  for (int64_t i=0; i<this->Rows()+1; i++) {
1430  fElem[i] = skyl[i] + ptr;
1431  }
1432 }
1433 
1434 template <class TVar>
1435 void TPZSkylMatrix<TVar>::Write( TPZStream &buf, int withclassid ) const
1436 {
1437  TPZMatrix<TVar>::Write(buf,withclassid);
1438  buf.Write( fStorage);
1439  TPZVec<int64_t> skyl(this->Rows()+1,0);
1440  TVar *ptr = 0;
1441  if (this->Rows()) {
1442  ptr = &fStorage[0];
1443  }
1444  for (int64_t i=0; i<this->Rows()+1; i++) {
1445  skyl[i] = fElem[i] - ptr;
1446  }
1447  buf.Write( skyl);
1448 }
1449 
1450 template <class TVar>
1451 void TPZSkylMatrix<TVar>::Write( TPZStream &buf, int withclassid )
1452 {
1453  TPZMatrix<TVar>::Write(buf,withclassid);
1454  buf.Write( fStorage);
1455  TPZVec<int64_t> skyl(this->Rows()+1,0);
1456  TVar *ptr = 0;
1457  if (this->Rows()) {
1458  ptr = &fStorage[0];
1459  }
1460  for (int64_t i=0; i<this->Rows()+1; i++) {
1461  skyl[i] = fElem[i] - ptr;
1462  }
1463  buf.Write( skyl);
1464 }
1465 
1466 template<class TVar>
1467 void TPZSkylMatrix<TVar>::DecomposeColumn(int64_t col, int64_t prevcol)
1468 {
1469  TVar *ptrprev; //Pointer to prev column
1470  TVar *ptrcol; //Pointer to col column
1471  int64_t skprev, skcol; //prev and col Skyline height respectively
1472  int64_t minline;
1473 
1474  skprev = SkyHeight(prevcol);
1475  skcol = SkyHeight(col);
1476 
1477  ptrprev = Diag(prevcol);
1478  ptrcol = Diag(col);
1479 
1480  if((prevcol-skprev) > (col-skcol)){
1481  minline = prevcol - skprev;
1482  }
1483  else {
1484  minline = col - skcol;
1485  }
1486  if(minline > prevcol) {
1487  cout << "error condition\n";
1488  cout.flush();
1489  return;
1490  }
1491  TVar *run1 = ptrprev - (prevcol-minline);
1492  TVar *run2 = ptrcol - (col-minline);
1493  TVar sum = 0;
1494 
1495  while(run1 != ptrprev)
1496  sum += (*run1++)*(*run2++);
1497  *run2-=sum;
1498  if(run1 != run2){
1499  *run2 /= *run1;
1500  }
1501  else{
1502  *run2=sqrt(*run2);
1503  }
1504 }
1505 
1506 template<>
1507 void TPZSkylMatrix<std::complex<float> >::DecomposeColumn(int64_t col, int64_t prevcol,std::list<int64_t> &singular)
1508 {
1509  DebugStop();
1510 }
1511 
1512 template<>
1513 void TPZSkylMatrix<std::complex<double> >::DecomposeColumn(int64_t col, int64_t prevcol,std::list<int64_t> &singular)
1514 {
1515  DebugStop();
1516 }
1517 
1518 template<>
1519 void TPZSkylMatrix<std::complex<long double> >::DecomposeColumn(int64_t col, int64_t prevcol,std::list<int64_t> &singular)
1520 {
1521  DebugStop();
1522 }
1523 
1524 template<class TVar>
1525 void TPZSkylMatrix<TVar>::DecomposeColumn(int64_t col, int64_t prevcol,std::list<int64_t> &singular)
1526 {
1527  TVar *ptrprev; //Pointer to prev column
1528  TVar *ptrcol; //Pointer to col column
1529  int64_t skprev, skcol; //prev and col Skyline height respectively
1530  int64_t minline;
1531 
1532  skprev = SkyHeight(prevcol);
1533  skcol = SkyHeight(col);
1534 
1535  ptrprev = Diag(prevcol);
1536  ptrcol = Diag(col);
1537 
1538  if((prevcol-skprev) > (col-skcol)){
1539  minline = prevcol - skprev;
1540  }
1541  else {
1542  minline = col - skcol;
1543  }
1544  if(minline > prevcol) {
1545  cout << "error condition\n";
1546  cout.flush();
1547  return;
1548  }
1549  TVar *run1 = ptrprev - (prevcol-minline);
1550  TVar *run2 = ptrcol - (col-minline);
1551  TVar sum = 0;
1552 
1553  while(run1 != ptrprev)
1554  sum += (*run1++)*(*run2++);
1555  *run2-=sum;
1556  if(run1 != run2){
1557  *run2 /= *run1;
1558  }
1559  else{
1560  TVar pivot = *run2;
1561  if ( pivot < TVar(1.e-10) ) {
1562 #ifdef LOG4CXX
1563  std::stringstream sout;
1564  sout << "equation " << col << " is singular pivot " << pivot;
1565  LOGPZ_WARN(logger,sout.str())
1566 #endif
1567  singular.push_back(col);
1568  pivot = 1.;
1569  }
1570  *run2=sqrt(pivot);
1571  }
1572 }
1573 
1574 template<>
1575 void TPZSkylMatrix<std::complex<float> >::DecomposeColumn2(int64_t col, int64_t prevcol)
1576 {
1577  DebugStop();
1578 }
1579 
1580 template<>
1581 void TPZSkylMatrix<std::complex<double> >::DecomposeColumn2(int64_t col, int64_t prevcol)
1582 {
1583  DebugStop();
1584 }
1585 
1586 template<>
1587 void TPZSkylMatrix<std::complex<long double> >::DecomposeColumn2(int64_t col, int64_t prevcol)
1588 {
1589  DebugStop();
1590 }
1591 
1592 template<class TVar>
1593 void TPZSkylMatrix<TVar>::DecomposeColumn2(int64_t col, int64_t prevcol)
1594 {
1595  //Cholesky Decomposition
1596  TVar *ptrprev; //Pointer to prev column
1597  TVar *ptrcol; //Pointer to col column
1598  int64_t skprev, skcol; //prev and col Skyline height respectively
1599  int64_t minline;
1600 
1601  skprev = SkyHeight(prevcol);
1602  skcol = SkyHeight(col);
1603 
1604  ptrprev = Diag(prevcol);
1605  ptrcol = Diag(col);
1606 
1607  if((prevcol-skprev) > (col-skcol)){
1608  minline = prevcol - skprev;
1609  }
1610  else {
1611  minline = col - skcol;
1612  }
1613  if(minline > prevcol) {
1614  cout << "error condition\n";
1615  cout.flush();
1616  return;
1617  }
1618  //EBORIN: TODO: Improve this code (change run1 and run2 so that dot product increment both)
1619  TVar *run1 = ptrprev - 1;
1620  TVar *run2 = ptrcol - ((col-prevcol)+1);
1621  TVar *lastptr = ptrprev - (prevcol-minline+1);
1622  TVar sum = 0;
1623  TVar *modify = ptrcol-(col-prevcol);
1624 #ifndef BLAS
1625  while(run1 > lastptr)
1626  sum += (*run1--)*(*run2--);
1627 #else
1628  int64_t n=lastptr-run1;
1629  sum = cblas_ddot(n,run1-(n-1),1,run2-(n-1),1);
1630 #endif
1631  *modify-=sum;
1632  if(col != prevcol){
1633  *modify /= *ptrprev;
1634  }
1635  else{
1636  if ( *modify < TVar(1.e-25) ) {
1637  cout << "TPZSkylMatrix::DecomposeCholesky a matrix nao e positiva definida" << *modify << endl;
1638  *modify = 1.e-10;
1639  }
1640  *modify=sqrt(*modify);
1641  }
1642 }
1643 
1644 template <class TVar>
1645 void TPZSkylMatrix<TVar>::AutoFill(int64_t nrow, int64_t ncol, int symmetric) {
1646  if (nrow != ncol || !symmetric)
1647  {
1648  DebugStop();
1649  }
1650  TPZMatrix<TVar>::Redim(nrow,ncol);
1651  TPZVec<int64_t> skyline(nrow);
1652  fElem.resize(nrow+1);
1653  fElem.Fill(0);
1654  for (int64_t i=0; i<nrow; i++) {
1655  skyline[i]=(i*(rand()+RAND_MAX/2))/RAND_MAX;
1656  }
1657  InitializeElem(skyline,fStorage,fElem);
1658 
1659  for (int64_t i=0; i<nrow; i++) {
1660  TVar sum = 0.;
1661  for (int64_t j=skyline[i]; j<i; j++) {
1662  TVar val = ((TVar)rand())/RAND_MAX;
1663  PutVal(j,i,val);
1664  sum += fabs(val);
1665  }
1666  PutVal(i,i,sum+1.);
1667  }
1668 }
1669 
1670 template class TPZSkylMatrix<float>;
1671 template class TPZSkylMatrix<std::complex<float> >;
1672 
1673 template class TPZSkylMatrix<double>;
1674 template class TPZSkylMatrix<std::complex<double> >;
1675 
1676 #ifndef BORLAND
1677 template class TPZRestoreClass<TPZSkylMatrix<float>>;
1680 
1684 #endif
1685 
1686 template class TPZSkylMatrix<long double>;
1688 
1689 template class TPZSkylMatrix<TPZFlopCounter>;
1690 
1691 #else // USING_NEW_SKYLMAT
1692 
1698 #include <math.h>
1699 #include <stdlib.h>
1700 
1701 #ifdef BLAS
1702 extern "C" {
1703 #include <cblas.h>
1704 }
1705 #endif
1706 
1707 #include "pzfmatrix.h"
1708 #include "pzskylmat.h"
1709 
1710 #include <sstream>
1711 #include "pzlog.h"
1712 #include "tpzverysparsematrix.h"
1713 #ifdef LOG4CXX
1714 static LoggerPtr logger(Logger::getLogger("pz.matrix.tpzskylmatrix"));
1715 #endif
1716 
1717 using namespace std;
1718 
1719 /*******************/
1720 /*** TPZSkylMatrix ***/
1721 
1722 
1723 /**************************** PUBLIC ****************************/
1724 
1725 /*****************************/
1726 /*** Construtor (int) ***/
1727 
1728 template<class TVar>
1730 : TPZRegisterClassId(&TPZSkylMatrix::ClassId),TPZMatrix<TVar>( dim, dim ), fElem(dim+1), fStorage(0)
1731 {
1732 
1733  // Inicializa a diagonal (vazia).
1734  fElem.Fill(0);
1735 }
1736 template<class TVar>
1737 TPZSkylMatrix<TVar>::TPZSkylMatrix(const int64_t dim, const TPZVec<int64_t> &skyline )
1738 : TPZRegisterClassId(&TPZSkylMatrix::ClassId),TPZMatrix<TVar>( dim, dim ), fElem(dim+1), fStorage(0)
1739 {
1740 
1741  // Inicializa a diagonal (vazia).
1742  fElem.Fill(0);
1743  InitializeElem(skyline,fStorage,fElem);
1744 }
1745 
1746 template<class TVar>
1748 #ifdef PZDEBUG
1749  {
1750  int64_t size = this->fElem.NElements();
1751  if(size != B.fElem.NElements()){
1752  PZError << "\nFATAL ERROR at " << __PRETTY_FUNCTION__ << "\n";
1753  PZError.flush();
1754  DebugStop();
1755  }
1756  for(int64_t i = 0; i < size; i++){
1757  if((this->fElem[i]-this->fElem[0]) != (B.fElem[i]-B.fElem[0])){
1758  PZError << "\nFATAL ERROR at " << __PRETTY_FUNCTION__ << "\n";
1759  PZError.flush();
1760  DebugStop();
1761  }
1762  }
1763  }
1764 #endif
1765 
1766  const int64_t n = this->fStorage.NElements();
1767  for(int64_t i = 0L; i < n; i++) this->fStorage[i] += TVar(k)*B.fStorage[i];
1768 
1769 }
1770 
1774 template<class TVar>
1776 {
1777  TPZMatrix<TVar> *matrix = mat.operator->();
1778  TPZSkylMatrix<TVar> *skylmat = dynamic_cast<TPZSkylMatrix<TVar> *>(matrix);
1779  if (!skylmat) {
1780  DebugStop();
1781  }
1782  if (fStorage.NElements() != skylmat->fStorage.NElements()) {
1783  DebugStop();
1784  }
1785  memcpy(&fStorage[0], &(skylmat->fStorage[0]) , fStorage.NElements()*sizeof(TVar));
1786  this->fDecomposed = skylmat->fDecomposed;
1787  this->fDefPositive = skylmat->fDefPositive;
1788 }
1789 
1790 
1791 template<class TVar>
1793 {
1794 #ifdef PZDEBUG
1795  for (int64_t i = 0 ; i < this->Rows() ; i++){
1796  if (skyline[i] < 0 || skyline[i] > i) DebugStop();
1797  }
1798 #endif
1799  fElem.Fill(0);
1800  InitializeElem(skyline,fStorage,fElem);
1801 }
1802 template<class TVar>
1804  int64_t dim = skyline.NElements();
1805  int64_t i;
1806  int64_t nelem=0;
1807  for(i=0; i<dim; i++) {
1808  nelem += i-skyline[i]+1;
1809  }
1810  return nelem;
1811 }
1812 
1813 template<class TVar>
1814 void TPZSkylMatrix<TVar>::InitializeElem(const TPZVec<int64_t> &skyline, TPZVec<TVar> &storage, TPZVec<TVar *> &point) { // JORGE 2013 OUTUBRO ???
1815  int64_t dim = skyline.NElements();
1816  int64_t nel = NumElements(skyline);
1817 #ifdef PZDEBUG
1818  // std::cout << "Skyline Matrix, Number of elements : " << nel << " in floating point " << nel*sizeof(TVar) << std::endl;
1819 #endif
1820  storage.Resize(nel);
1821  storage.Fill(0.);
1822  int64_t i;
1823  point.Resize(dim+1);
1824  if(dim) {
1825  point[0] = &storage[0];
1826  point[dim] = &storage[0]+nel;
1827  } else {
1828  point[0] = 0;
1829  }
1830  for(i=1; i<dim+1; i++)
1831  point[i] = point[i-1]+(i-1)-skyline[i-1]+1;
1832 }
1833 
1837 template<class TVar>
1839 
1840  if (first.Rows() != second.Rows()) {
1841  cout<<"ComputeMaxSkyline : incompatible dimension";
1842  return;
1843  }
1844  int64_t i, dim = first.Rows();
1845  res.Resize(dim+1);
1846 
1847  for(i=1; i<dim+1; i++) {
1848 
1849  int64_t aux = ( first.Size(i) > second.Size(i) ) ? first.Size(i) : second.Size(i);
1850  res[i] = i-aux-1;
1851  }
1852 }
1853 
1854 template<class TVar>
1855 TVar &
1856 TPZSkylMatrix<TVar>::operator()(const int64_t r, const int64_t c) {
1857  int64_t row(r),col(c);
1858  if ( row > col ) this->Swap( &row, &col );
1859 
1860  // Indice do vetor coluna.
1861  int64_t index = col - row;
1862  if ( index >= Size(col) ) {
1863  //Error("TPZSkylMatrix::operator()","Index out of range");
1864  TPZMatrix<TVar>::Error(__PRETTY_FUNCTION__,"Index out of range");
1865  DebugStop();
1866  }
1867  return fElem[col][index];
1868 }
1869 
1870 template<class TVar>
1871 TVar &TPZSkylMatrix<TVar>::s(const int64_t row, const int64_t col) {
1872  return operator()(row,col);
1873 }
1874 
1875 template<class TVar>
1876 TVar &
1878  return operator()(r,r);
1879 }
1880 
1881 //EBORIN: Define these if you want to use the experimental version.
1882 #define DECOMPOSE_CHOLESKY_OPT2
1883 //#define SKYLMATRIX_PUTVAL_OPT1
1884 //#define SKYLMATRIX_GETVAL_OPT1
1885 
1886 #ifdef SKYLMATRIX_PUTVAL_OPT1
1887 #pragma message ( "warning: Using experimental version of TPZSkylMatrix<TVAr>::PutVal(...)" )
1888 /**************/
1889 /*** PutVal ***/
1890 template<class TVar>
1891 int
1892 TPZSkylMatrix<TVar>::PutVal(const int64_t r,const int64_t c,const TVar & value )
1893 {
1894  // inicializando row e col para trabalhar com a triangular superior
1895  if (r > c) return PutVal(c, r, value);
1896 
1897  // Indice do vetor coluna.
1898  int64_t index = c - r;
1899  // Se precisar redimensionar o vetor.
1900  //EBORIN: Do we really need to check this?
1901  if (index >= Size(c)) {
1902  if (!IsZero(value)) {
1903  cout << "TPZSkylMatrix::PutVal Size" << Size(c);
1904  cout.flush();
1905  TPZMatrix<TVar>::Error(__PRETTY_FUNCTION__,"Index out of range");
1906  }
1907  else
1908  return 1;
1909  }
1910 
1911  fElem[c][index] = value;
1912  this->fDecomposed = 0;
1913  return (1);
1914 }
1915 #else
1916 /**************/
1917 /*** PutVal ***/
1918 template<class TVar>
1919 int
1920 TPZSkylMatrix<TVar>::PutVal(const int64_t r,const int64_t c,const TVar & value )
1921 {
1922  // inicializando row e col para trabalhar com a triangular superior
1923  int64_t row(r),col(c);
1924  if ( row > col )
1925  this->Swap( &row, &col );
1926 
1927  // Indice do vetor coluna.
1928  int64_t index = col - row;
1929  // Se precisar redimensionar o vetor.
1930  //EBORIN: Do we really need to check this?
1931  if ( index >= Size(col) && !IsZero(value)) {
1932  cout << "TPZSkylMatrix::PutVal Size" << Size(col);
1933  cout.flush();
1934  TPZMatrix<TVar>::Error(__PRETTY_FUNCTION__,"Index out of range");
1935  } else if(index >= Size(col)) return 1;
1936  fElem[col][index] = value;
1937  // delete[]newVet;
1938  this->fDecomposed = 0;
1939  return( 1 );
1940 }
1941 #endif
1942 
1943 template<class TVar>
1945  const TVar alpha,const TVar beta ,const int opt) const {
1946  // Computes z = beta * y + alpha * opt(this)*x
1947  // z and x cannot overlap in memory
1948 
1949  if (this->fDecomposed != ENoDecompose) {
1950  // DebugStop();
1951  }
1952  if ((!opt && this->Cols() != x.Rows()) || this->Rows() != x.Rows())
1953  TPZMatrix<TVar>::Error(__PRETTY_FUNCTION__," <matrixs with incompatible dimensions>" );
1954  if(z.Rows() != x.Rows() || z.Cols() != x.Cols()) z.Redim(x.Rows(),x.Cols());
1955  if(x.Cols() != y.Cols() || x.Cols() != z.Cols() || x.Rows() != y.Rows() || x.Rows() != z.Rows()) {
1956  cout << "x.Cols = " << x.Cols() << " y.Cols()"<< y.Cols() << " z.Cols() " << z.Cols() << " x.Rows() " << x.Rows() << " y.Rows() "<< y.Rows() << " z.Rows() "<< z.Rows() << endl;
1957  TPZMatrix<TVar>::Error(__PRETTY_FUNCTION__," incompatible dimensions\n");
1958  }
1959  this->PrepareZ(y,z,beta,opt);
1960  int64_t rows = this->Rows();
1961  int64_t xcols = x.Cols();
1962  int64_t ic, r;
1963  for (ic = 0; ic < xcols; ic++) {
1964  for( r = 0 ; r < rows ; r++ ) {
1965  int64_t offset = Size(r);
1966  TVar val = 0.;
1967  const TVar *p = &x.g((r-offset+1),ic);
1968  TVar *diag = fElem[r] + offset-1;
1969  TVar *diaglast = fElem[r];
1970  while( diag > diaglast ) {
1971  val += *diag-- * *p;
1972  p ++;
1973  }
1974  if( diag == diaglast ) val += *diag * *p;
1975  z(r,ic) += val*alpha;
1976  TVar *zp = &z((r-offset+1),ic);
1977  val = x.g(r,ic);
1978  diag = fElem[r] + offset-1;
1979  while( diag > diaglast ) {
1980  *zp += alpha * *diag-- * val;
1981  zp ++;
1982  }
1983  }
1984  }
1985 }
1986 
1987 template<class TVar>
1988 void TPZSkylMatrix<TVar>::SolveSOR(int64_t & numiterations,const TPZFMatrix<TVar> &F,
1989  TPZFMatrix<TVar> &result, TPZFMatrix<TVar> *residual, TPZFMatrix<TVar> &scratch,const REAL overrelax,
1990  REAL &tol,const int FromCurrent,const int direction) {
1991 
1992  if(residual == &F) {
1993  cout << "TPZMatrix::SolveSOR called with residual and F equal, no solution\n";
1994  return;
1995  }
1996  REAL res = 2.*tol+1.;
1997  if(residual) res = Norm(*residual);
1998  if(!FromCurrent) {
1999  result.Zero();
2000  }
2001  TVar over = overrelax;
2002  int64_t r = this->Dim();
2003  int64_t c = F.Cols();
2004  int64_t i,ifirst = 0, ilast = r, iinc = 1;
2005  if(direction == -1) {
2006  ifirst = r-1;
2007  ilast = 0;
2008  iinc = -1;
2009  }
2010  int64_t it;
2011  for(it=0; it<numiterations && res > tol; it++) {
2012  res = 0.;
2013  scratch = F;
2014  for(int64_t ic=0; ic<c; ic++) {
2015  if(direction == 1) {
2016  //
2017  // compute the upper triangular part first and put into the scractch vector
2018  //
2019  for(i=ifirst; i!=ilast; i+= iinc) {
2020  //TPZColuna *mydiag = &fDiag[i];
2021  int64_t offset = Size(i);
2022  TVar val;
2023  TVar *diag;
2024  TVar *diaglast = fElem[i];
2025  TVar *scratchp = &scratch(i-offset+1,ic);
2026  val = result(i,ic);
2027  diag = fElem[i] + offset-1;
2028  int64_t lastid = diag-diaglast;
2029  int64_t id;
2030  for(id=0; id<=lastid; id++) *(scratchp+id) -= *(diag-id) * val;
2031  /* codeguard fix
2032  while( diag >= diaglast ) *scratchp++ -= *diag-- * val;
2033  */
2034  }
2035  //
2036  // perform the SOR operation
2037  //
2038  for(i=ifirst; i!=ilast; i+= iinc) {
2039  //TPZColuna *mydiag = &fDiag[i];
2040  int64_t offset = Size(i);
2041  TVar val = scratch(i,ic);
2042  TVar *p = &result(i-offset+1,ic);
2043  TVar *diag = fElem[i] + offset-1;
2044  TVar *diaglast = fElem[i];
2045  while( diag > diaglast ) val -= *diag-- * *p++;
2046  res += abs(val*val);
2047  result(i,ic) += val*over/ (*diag);
2048  }
2049  } else {
2050  //
2051  // the direction is upward
2052  //
2053  // put the lower triangular part of the multiplication into the scratch vector
2054  //
2055  for(i=ifirst; i!=ilast; i+= iinc) {
2056  //TPZColuna *mydiag = &fDiag[i];
2057  int64_t offset = Size(i);
2058  TVar val = scratch(i,ic);
2059  TVar *p = &result(i-offset+1,ic);
2060  TVar *diag = fElem[i] + offset-1;
2061  TVar *diaglast = fElem[i];
2062  while( diag > diaglast ) val -= *diag-- * *p++;
2063  // res += val*val;
2064  scratch(i,ic) = val;
2065  }
2066  //
2067  // perform the SOR operation
2068  //
2069  for(i=ifirst; i!=ilast; i+= iinc) {
2070  //TPZColuna *mydiag = &fDiag[i];
2071  int64_t offset = Size(i);
2072  // TVar val = scratch(i,ic);
2073  TVar *diag;
2074  TVar *diaglast = fElem[i];
2075  TVar *scratchp = &scratch(i-offset+1,ic);
2076  //val= result(i,ic);
2077  TVar val = scratch(i,ic);
2078  val -= *diaglast * result(i,ic);
2079  res += abs(val*val);
2080  val = over * val / *diaglast;
2081  result(i,ic) += val;
2082  val = result(i,ic);
2083  diag = fElem[i] + offset-1;
2084  while( diag > diaglast ) *scratchp++ -= *diag-- * val;
2085  }
2086  }
2087  }
2088  res = sqrt(res);
2089  }
2090  if(residual) {
2091  this->Residual(result,F,*residual);
2092  }
2093  numiterations = it;
2094  tol = res;
2095 }
2096 
2097 #ifdef SKYLMATRIX_GETVAL_OPT1
2098 #pragma message ( "warning: Using experimental version of TPZSkylMatrix<TVAr>::GetVal(...)" )
2099 template<class TVar>
2100 const TVar &
2101 TPZSkylMatrix<TVar>::GetVal(const int64_t r,const int64_t c ) const
2102 {
2103  if (r > c) return GetVal(c,r);
2104  unsigned dim = this->Dim();
2105  //EBORIN: Do we really need to do this? May only when running debug version.
2106  if(r >= dim || c >= dim || r < 0 || c < 0) {
2107  cout << "TPZSkylMatrix::GetVal index out of range row = " << r
2108  << " col = " << c << endl;
2109  return this->gZero;
2110  }
2111 
2112  // Indice do vetor coluna.
2113  int64_t index = c - r;
2114  if ( index < Size(c) ) {
2115  return (fElem[c][index]);
2116  }
2117  else {
2118  if(this->gZero != TVar(0.)) {
2119  cout << "TPZSkylMatrix gZero = " << this->gZero << endl;
2120  DebugStop();
2121  }
2122  return(this->gZero );
2123  }
2124 }
2125 #else
2126 /**************/
2127 /*** GetVal ***/
2128 template<class TVar>
2129 const TVar &
2130 TPZSkylMatrix<TVar>::GetVal(const int64_t r,const int64_t c ) const
2131 {
2132  // inicializando row e col para trabalhar com a triangular superior
2133  int64_t row(r),col(c);
2134  if ( row > col )
2135  this->Swap( &row, &col );
2136 
2137  if(row >= this->Dim() || col >= this->Dim() || row < 0 || col<0) {
2138  cout << "TPZSkylMatrix::GetVal index out of range row = " << row
2139  << " col = " << col << endl;
2140  return this->gZero;
2141  }
2142  // Indice do vetor coluna.
2143  int64_t index = col - row;
2144  //TPZColuna *pCol = &fDiag[col];
2145 
2146  if ( index < Size(col) )
2147  return( fElem[col][index] );
2148  else {
2149  if(this->gZero != TVar(0.)) {
2150  cout << "TPZSkylMatrix gZero = " << this->gZero << endl;
2151  DebugStop();
2152  }
2153  return(this->gZero );
2154  }
2155 }
2156 #endif
2157 /******** Operacoes com matrizes SKY LINE ********/
2158 
2159 /******************/
2160 /*** Operator = ***/
2161 
2162 template<class TVar>
2165 {
2166  Clear();
2167  Copy( A );
2168  return( *this );
2169 }
2170 
2171 /******************/
2172 /*** Operator + ***/
2173 
2174 template<class TVar>
2177 {
2178  if ( this->Dim() != A.Dim() )
2179  TPZMatrix<TVar>::Error(__PRETTY_FUNCTION__,"<incompatible dimensions>" );
2180 
2181  TPZVec<int64_t> skylinesize(this->Dim());
2182  ComputeMaxSkyline(*this,A,skylinesize);
2183  TPZSkylMatrix res( this->fRow, skylinesize );
2184 
2185  TVar *elemMax;
2186  TVar *elemMin;
2187  int64_t sizeMax;
2188  int64_t sizeMin;
2189 
2190  for ( int64_t col = 0; col < this->Dim(); col++ )
2191  {
2192  // Define o tamanho e os elementos da maior e da menor
2193  // coluna.
2194  if ( Size(col) > A.Size(col) )
2195  {
2196  sizeMax = Size(col);
2197  elemMax = fElem[col];
2198  sizeMin = A.Size(col);
2199  elemMin = A.fElem[col];
2200  }
2201  else
2202  {
2203  sizeMax = A.Size(col);
2204  elemMax = A.fElem[col];
2205  sizeMin = Size(col);
2206  elemMin = fElem[col];
2207  }
2208 
2209  // Inicializa coluna da matriz resultado.
2210 
2211  // Efetua a SOMA.
2212  TVar *dest = res.fElem[col];
2213  int64_t i;
2214  for ( i = 0; i < sizeMin; i++ )
2215  *dest++ = (*elemMax++) + (*elemMin++);
2216  for ( ; i < sizeMax; i++ )
2217  *dest++ = *elemMax++;
2218  }
2219 
2220  return( res );
2221 }
2222 
2223 /******************/
2224 /*** Operator - ***/
2225 
2226 template<class TVar>
2229 {
2230  if ( this->Dim() != A.Dim() )
2231  TPZMatrix<TVar>::Error(__PRETTY_FUNCTION__, "operator-( TPZSkylMatrix ) <incompatible dimensions>" );
2232 
2233  TPZVec<int64_t> skylinesize(this->Dim());
2234  ComputeMaxSkyline(*this,A,skylinesize);
2235  TPZSkylMatrix<TVar> res( this->fRow, skylinesize );
2236 
2237  for ( int64_t col = 0; col < this->fRow; col++ )
2238  {
2239  // Define o tamanho e os elementos das colunas das 2 matrizes.
2240  int64_t sizeThis = Size(col);
2241  TVar *elemThis = fElem[col];
2242  int64_t sizeA = A.Size(col);
2243  TVar *elemA = A.fElem[col];
2244 
2245  // Inicializa coluna da matriz resultado.
2246 
2247  // Efetua a SUBTRACAO.
2248  TVar *dest = res.fElem[col];
2249  int64_t i;
2250  for ( i = 0; (i < sizeThis) && (i < sizeA); i++ ) *dest++ = (*elemThis++) - (*elemA++);
2251  if ( i == sizeA ) for ( ; i < sizeThis; i++ ) *dest++ = *elemThis++;
2252  else for ( ; i < sizeA; i++ ) *dest++ = -(*elemA++);
2253  }
2254 
2255  return( res );
2256 }
2257 
2258 template<class TVar>
2260  TPZVec<int64_t> &source,
2261  TPZVec<int64_t> &destination)
2262 {
2263  int64_t nelem = source.NElements();
2264  int64_t icoef,jcoef,ieq,jeq,ieqs,jeqs;
2265  for(icoef=0; icoef<nelem; icoef++) {
2266  ieq = destination[icoef];
2267  ieqs = source[icoef];
2268  for(jcoef=icoef; jcoef<nelem; jcoef++) {
2269  jeq = destination[jcoef];
2270  jeqs = source[jcoef];
2271  int64_t row(ieq), col(jeq);
2272  // invertendo linha-coluna para triangular superior
2273  if (row > col)
2274  this->Swap(&row, &col);
2275 #ifdef PZDEBUG
2276  // checando limites
2277  if(row >= this->Dim() || col >= this->Dim()) {
2278  cout << "TPZSkylMatrix::GetVal index out of range row = " <<
2279  row << " col = " << col << endl;
2280  DebugStop();
2281  }
2282 #endif
2283  // indice do vetor coluna
2284  int64_t index = col - row;
2285 #ifdef PZDEBUG
2286  // checando limite da coluna
2287  if (index >= Size(col)) {
2288  cerr << "Try TPZSkylMatrix gZero." << endl;
2289  DebugStop();
2290  }
2291 #endif
2292  // executando contribuição
2293  fElem[col][index] += elmat(ieqs,jeqs);
2294 
2295  }
2296  }
2297 }
2298 
2299 template<>
2301  TPZVec<int64_t> &source,
2302  TPZVec<int64_t> &destination)
2303 {
2304  int64_t nelem = source.NElements();
2305  int64_t icoef,jcoef,ieq,jeq,ieqs,jeqs;
2306  for(icoef=0; icoef<nelem; icoef++) {
2307  ieq = destination[icoef];
2308  ieqs = source[icoef];
2309  for(jcoef=icoef; jcoef<nelem; jcoef++) {
2310  jeq = destination[jcoef];
2311  jeqs = source[jcoef];
2312  int64_t row(ieq), col(jeq);
2313  // invertendo linha-coluna para triangular superior
2314  if (row > col)
2315  this->Swap(&row, &col);
2316 #ifdef PZDEBUG
2317  // checando limites
2318  if(row >= this->Dim() || col >= this->Dim()) {
2319  cout << "TPZSkylMatrix::GetVal index out of range row = " <<
2320  row << " col = " << col << endl;
2321  DebugStop();
2322  }
2323 #endif
2324  // indice do vetor coluna
2325  int64_t index = col - row;
2326 #ifdef PZDEBUG
2327  // checando limite da coluna
2328  if (index >= Size(col)) {
2329  std::cout << "Skyline wrongly configured " << " row " << row << " col " << col << " Size(col) " << Size(col) << std::endl;
2330  cerr << "Try TPZSkylMatrix gZero." << endl;
2331  std::cout << destination << std::endl;
2332  DebugStop();
2333  }
2334 #endif
2335  // executando contribuição
2336 #pragma omp atomic
2337  fElem[col][index] += elmat(ieqs,jeqs);
2338 
2339  }
2340  }
2341 }
2342 
2343 
2344 template<>
2346  TPZVec<int64_t> &source,
2347  TPZVec<int64_t> &destination)
2348 {
2349  int64_t nelem = source.NElements();
2350  int64_t icoef,jcoef,ieq,jeq,ieqs,jeqs;
2351  for(icoef=0; icoef<nelem; icoef++) {
2352  ieq = destination[icoef];
2353  ieqs = source[icoef];
2354  for(jcoef=icoef; jcoef<nelem; jcoef++) {
2355  jeq = destination[jcoef];
2356  jeqs = source[jcoef];
2357  int64_t row(ieq), col(jeq);
2358  // invertendo linha-coluna para triangular superior
2359  if (row > col)
2360  this->Swap(&row, &col);
2361 #ifdef PZDEBUG
2362  // checando limites
2363  if(row >= this->Dim() || col >= this->Dim()) {
2364  cout << "TPZSkylMatrix::GetVal index out of range row = " <<
2365  row << " col = " << col << endl;
2366  DebugStop();
2367  }
2368 #endif
2369  // indice do vetor coluna
2370  int64_t index = col - row;
2371 #ifdef PZDEBUG
2372  // checando limite da coluna
2373  if (index >= Size(col)) {
2374  cerr << "Try TPZSkylMatrix gZero." << endl;
2375  DebugStop();
2376  }
2377 #endif
2378  // executando contribuição
2379 #pragma omp atomic
2380  fElem[col][index] += elmat(ieqs,jeqs);
2381 
2382  }
2383  }
2384 }
2385 
2386 /*******************/
2387 /*** Operator += ***/
2388 
2389 template<class TVar>
2392 {
2393  if ( this->Dim() != A.Dim() )
2394  TPZMatrix<TVar>::Error(__PRETTY_FUNCTION__,"operator+=( TPZSkylMatrix ) <incompatible dimensions>" );
2395 
2396  TPZSkylMatrix res((*this)+A);
2397  *this = res;
2398  return *this;
2399 }
2400 
2401 /*******************/
2402 /*** Operator -= ***/
2403 
2404 template<class TVar>
2407 {
2408  if ( this->Dim() != A.Dim() )
2409  TPZMatrix<TVar>::Error(__PRETTY_FUNCTION__,"operator-=( TPZSkylMatrix ) <incompatible dimensions>" );
2410 
2411  TPZSkylMatrix res(*this-A);
2412  *this = res;
2413  return *this;
2414 }
2415 
2416 
2417 
2418 /******** Operacoes com valores NUMERICOS ********/
2419 //
2420 // As operacoes com valores numericos sao efetuadas apenas nos
2421 // elementos alocados. Em especial, as operacoes A = 0.0 e A *= 0.0
2422 // desalocam todos os elementos da matriz.
2423 //
2424 
2425 
2426 /*****************************/
2427 /*** Operator * ( TVar ) ***/
2428 
2429 template<class TVar>
2431 TPZSkylMatrix<TVar>::operator*(const TVar value ) const
2432 {
2433  TPZSkylMatrix res( *this );
2434 
2435  for ( int64_t col = 0; col < this->Dim(); col++ )
2436  {
2437  // Aloca nova coluna para o resultado.
2438  int64_t colsize = Size(col);
2439  // Efetua a SOMA.
2440  TVar *elemRes = res.fElem[col];
2441  for ( int64_t i = 0; i < colsize; i++ )
2442  *elemRes++ *= value;
2443  }
2444 
2445  return( res );
2446 }
2447 
2448 
2449 
2450 
2451 
2452 /******************************/
2453 /*** Operator *= ( TVar ) ***/
2454 
2455 template<class TVar>
2458 {
2459  if ( IsZero( value ) )
2460  {
2461  Zero();
2462  return( *this );
2463  }
2464 
2465  int64_t col, colmax = this->Dim();
2466  for (col=0; col<colmax; col++ )
2467  {
2468  // Efetua a MULTIPLICACAO.
2469  TVar *elem = fElem[col];
2470  TVar *end = fElem[col+1];
2471  while ( elem < end ) *elem++ *= value;
2472  }
2473 
2474  this->fDecomposed = 0;
2475  return( *this );
2476 }
2477 
2478 
2479 
2480 /**************/
2481 /*** Resize ***/
2482 //
2483 // Muda as dimensoes da matriz, mas matem seus valores antigos. Novas
2484 // posicoes sao criadas com ZEROS.
2485 //
2486 template<class TVar>
2487 int TPZSkylMatrix<TVar>::Resize( int64_t newDim ,int64_t ) {
2488  if ( newDim == this->Dim() )
2489  return( 1 );
2490 
2491  fElem.Resize(newDim+1);
2492  // Cria nova matrix.
2493 
2494  // Copia os elementos para a nova matriz.
2495  int64_t min = MIN( newDim, this->Dim() );
2496  int64_t i;
2497  for ( i = min+1; i <= newDim; i++ )
2498  fElem[i] = fElem[i-1];
2499 
2500  // Zera as posicoes que sobrarem (se sobrarem)
2501  fStorage.Resize(fElem[newDim]-fElem[0]);
2502  this->fRow = this->fCol = newDim;
2503  this->fDecomposed = 0;
2504  return( 1 );
2505 }
2506 
2507 
2508 
2509 /*************/
2510 /*** Redim ***/
2511 //
2512 // Muda as dimensoes da matriz e ZERA seus elementos.
2513 //
2514 template<class TVar>
2515 int
2516 TPZSkylMatrix<TVar>::Redim( int64_t newDim , int64_t)
2517 {
2518  if ( newDim == this->Dim() )
2519  {
2520  Zero();
2521  return( 1 );
2522  }
2523 
2524  Clear();
2525  fElem.Resize(newDim);
2526  fElem.Fill(0);
2527  this->fRow = this->fCol = newDim;
2528  this->fDecomposed = 0;
2529  return( 1 );
2530 }
2531 
2532 template<>
2533 int
2535 {
2536  DebugStop();
2537  return -1;
2538 }
2539 template<>
2540 int
2542 {
2543  DebugStop();
2544  return -1;
2545 }
2546 template<>
2547 int
2549 {
2550  DebugStop();
2551  return -1;
2552 }
2553 /**************************/
2554 /*** Decompose Cholesky ***/
2555 template<class TVar>
2556 int
2557 TPZSkylMatrix<TVar>::Decompose_Cholesky(std::list<int64_t> &singular)
2558 {
2559  if(this->fDecomposed == ECholesky) return 1;
2560  if ( this->fDecomposed ) TPZMatrix<TVar>::Error(__PRETTY_FUNCTION__, "Decompose_Cholesky <Matrix already Decomposed>" );
2561 
2562 #ifdef DUMP_BEFORE_DECOMPOSE
2563  dump_matrix(this, "TPZSkylMatrix::Decompose_Cholesky(singular)");
2564 #endif
2565 
2566  singular.clear();
2567  TVar pivot;
2568  TVar Tol;
2569  ZeroTolerance(Tol);
2570  int64_t dimension = this->Dim();
2571  /* if(Dim() > 100) {
2572  cout << "\nTPZSkylMatrix Cholesky decomposition Dim = " << Dim() << endl;
2573  cout.flush();
2574  }*/
2575  for ( int64_t k = 0; k < dimension; k++ )
2576  {
2577  /* if(!(k%100) && Dim() > 100) {
2578  cout << k << ' ';
2579  cout.flush();
2580  }
2581  if(!(k%1000)) cout << endl;*/
2582  if ( Size(k) == 0 ) return( 0 );
2583 
2584  // Faz sum = SOMA( A(k,p) * A(k,p) ), p = 1, ..., k-1.
2585  //
2586  TVar sum = 0.0;
2587  TVar *elem_k = fElem[k]+1;
2588  TVar *end_k = fElem[k]+Size(k);
2589 #pragma clang loop vectorize_width(2)
2590  for ( ; elem_k < end_k; elem_k++ ) sum += (*elem_k) * (*elem_k);
2591 
2592  // Faz A(k,k) = sqrt( A(k,k) - sum ).
2593  //
2594  pivot = fElem[k][0] - sum;
2595  // if ( pivot < ((TVar)1.e-9) ) {
2596  if(pivot < Tol) {
2597  singular.push_back(k);
2598  std::cout << __FUNCTION__ << " Singular equation pivot " << pivot << " k " << k << std::endl;
2599  pivot = 1.;
2600  }
2601  // A matriz nao e' definida positiva.
2602 
2603  pivot = fElem[k][0] = sqrt( pivot );
2604 
2605  // Loop para i = k+1 ... Dim().
2606  //
2607  int64_t i=k+1;
2608  for ( int64_t j = 2; i<dimension; j++,i++ ) {
2609  // Se tiverem elementos na linha 'i' cuja coluna e'
2610  // menor do que 'K'...
2611  if ( Size(i) > j ) {
2612  // Faz sum = SOMA( A(i,p) * A(k,p) ), p = 1,..,k-1.
2613  sum = 0.0;
2614  TVar *elem_i = &fElem[i][j];
2615  TVar *end_i = fElem[i+1];
2616  elem_k = &(fElem[k][1]);
2617  // Vectorizable loop
2618  unsigned max_l = end_i - elem_i;
2619  unsigned tmp = end_k - elem_k;
2620  if (tmp < max_l) max_l = tmp;
2621 #pragma clang loop vectorize_width(2)
2622  for(unsigned l=0; l<max_l; l++)
2623  sum += (*elem_i++) * (*elem_k++);
2624  // Faz A(i,k) = (A(i,k) - sum) / A(k,k)
2625  fElem[i][j-1] = (fElem[i][j-1] -sum) / pivot;
2626  } else if ( Size(i) == j ) fElem[i][j-1] /= pivot;
2627 
2628  // Se nao tiverem estes elementos, sum = 0.0.
2629 
2630  // Se nao existir nem o elemento A(i,k), nao faz nada.
2631  }
2632  }
2633 
2634  if(this->Rows() && fabs(GetVal(this->Rows()-1,this->Rows()-1)) < fabs((TVar)1.e-15))
2635  {
2636  singular.push_back(this->Rows()-1);
2637  PutVal(this->Rows()-1,this->Rows()-1,1.);
2638  }
2639  this->fDecomposed = ECholesky;
2640  this->fDefPositive = 1;
2641  return( 1 );
2642 }
2643 
2644 template<>
2646 {
2647  DebugStop();
2648  return -1;
2649 }
2650 template<>
2652 {
2653  DebugStop();
2654  return -1;
2655 }
2656 template<>
2658 {
2659  DebugStop();
2660  return -1;
2661 }
2662 
2663 clarg::argBool clk_mig("-skl_chk_pm", "Migrate Skyline pages before Cholesky Decomposition");
2664 clarg::argBool clk_rea("-skl_chk_rea", "Reallocate Skyline data before Cholesky Decomposition");
2665 
2666 template<class TVar>
2667 int
2669 {
2670  if(this->fDecomposed == ECholesky) return 1;
2671  if (this->fDecomposed ) TPZMatrix<TVar>::Error(__PRETTY_FUNCTION__, "Decompose_Cholesky <Matrix already Decomposed>" );
2672 
2673 #ifdef DUMP_BEFORE_DECOMPOSE
2674  dump_matrix(this, "TPZSkylMatrix::Decompose_Cholesky()");
2675 #endif
2676 
2677  TVar pivot;
2678  TVar minpivot = 10000.;
2679  int64_t dimension = this->Dim();
2680  /* if(Dim() > 100) {
2681  cout << "\nTPZSkylMatrix Cholesky decomposition Dim = " << Dim() << endl;
2682  cout.flush();
2683  }*/
2684 
2685  if (clk_mig.was_set())
2686  MigratePages();
2687  if (clk_rea.was_set())
2688  ReallocForNuma();
2689 
2690  // #define DECOMPOSE_CHOLESKY_OPT2 // EBORIN: Optimization 2 -- See bellow
2691 #ifdef DECOMPOSE_CHOLESKY_OPT2
2692 //#pragma message ( "warning: Using experimental (last_col check) version of TPZSkylMatrix<TVar>::Decompose_Cholesky()" )
2693  PZError << "warning: Using experimental (last_col check) version of TPZSkylMatrix<TVar>::Decompose_Cholesky()" << std::endl;
2694  TPZVec<int64_t> last_col(dimension);
2695  {
2696  int64_t y = dimension-1;
2697  for (int64_t k=(dimension-1); k>=0; k--) {
2698  int64_t min_row = k-Size(k)+1;
2699  while(y>=min_row) {
2700  last_col[y--] = k;
2701  }
2702  }
2703  }
2704 #endif
2705 
2706  for ( int64_t k = 0; k < dimension; k++ )
2707  {
2708  /* if(!(k%100) && Dim() > 100) {
2709  cout << k << ' ';
2710  cout.flush();
2711  }
2712  if(!(k%1000)) cout << endl;*/
2713  if ( Size(k) == 0 ) return( 0 );
2714 
2715  // Faz sum = SOMA( A(k,p) * A(k,p) ), p = 1, ..., k-1.
2716  //
2717 
2718  TVar sum = 0.0;
2719  TVar *elem_k = fElem[k]+1;
2720  TVar *end_k = fElem[k]+Size(k);
2721 #pragma clang loop vectorize_width(2)
2722  for ( ; elem_k < end_k; elem_k++ ) sum += (*elem_k) * (*elem_k);
2723 
2724  // Faz A(k,k) = sqrt( A(k,k) - sum ).
2725  //
2726  pivot = fElem[k][0] - sum;
2727  minpivot = minpivot < pivot ? minpivot : pivot;
2728  if ( pivot < ((TVar)0.) || IsZero(pivot) ) {
2729  cout << "TPZSkylMatrix::DecomposeCholesky a matrix nao e positiva definida" << pivot << endl;
2730  return( 0 );
2731  }
2732  // A matriz nao e' definida positiva.
2733 
2734  pivot = fElem[k][0] = sqrt( pivot );
2735 
2736  // Loop para i = k+1 ... Dim().
2737  //
2738  int64_t i=k+1;
2739 #ifdef DECOMPOSE_CHOLESKY_OPT2
2740  //EBORIN: Este laco computa os elementos da linha k e pode ser computado em paralelo...
2741  // Cada iteracao computa L[k,i] em funcao de A[k,i], L[0...k-1,k] x L[0...k-1,i]
2742  // - Apenas a porcao L[a...k-1,k] e L[b...k-1,i] nao zero
2743  // (representada na skyline) precisa ser computada => Numero variavel de
2744  // multiplicacoes no laco interno
2745  // - L[a...k-1,k] e reaproveitada (i-k) vezes.
2746  // Idea: Substituir i<dimension por i<last_col(k), onde last_col(k) é a última
2747  // coluna da matriz que possi dados na linha k.
2748  int64_t max_i = last_col[k];
2749  for ( int64_t j = 2; i <= max_i; j++,i++ ) {
2750 #else
2751  for ( int64_t j = 2; i<dimension; j++,i++ ) {
2752 #endif
2753  // Se tiverem elementos na linha 'i' cuja coluna e'
2754  // menor do que 'K'...
2755  if ( Size(i) > j ) {
2756  // Faz sum = SOMA( A(i,p) * A(k,p) ), p = 1,..,k-1.
2757  sum = 0.0;
2758  TVar *elem_i = &fElem[i][j];
2759  TVar *end_i = fElem[i+1];
2760  elem_k = &(fElem[k][1]);
2761  // Vectorizable loop
2762  unsigned max_l = end_i - elem_i;
2763  unsigned tmp = end_k - elem_k;
2764  if (tmp < max_l) max_l = tmp;
2765 #pragma clang loop vectorize_width(2)
2766  for(unsigned l=0; l<max_l; l++)
2767  sum += (*elem_i++) * (*elem_k++);
2768  // Faz A(i,k) = (A(i,k) - sum) / A(k,k)
2769  fElem[i][j-1] = (fElem[i][j-1] -sum) / pivot;
2770  } else if ( Size(i) == j ) fElem[i][j-1] /= pivot;
2771 
2772  // Se nao tiverem estes elementos, sum = 0.0.
2773 
2774  // Se nao existir nem o elemento A(i,k), nao faz nada.
2775  }
2776 
2777  }
2778 
2779 
2780 
2781  this->fDecomposed = ECholesky;
2782  this->fDefPositive = 1;
2783 #ifdef PZDEBUG
2784  std::cout << __PRETTY_FUNCTION__ << " minpivot " << minpivot << std::endl;
2785 #endif
2786  return( 1 );
2787 }
2788 
2789 template<>
2791 {
2792  DebugStop();
2793  return -1;
2794 }
2795 template<>
2797 {
2798  DebugStop();
2799  return -1;
2800 }
2801 template<>
2803 {
2804  DebugStop();
2805  return -1;
2806 }
2807 
2808 
2809 template<class TVar>
2810 int
2812 {
2813  if(this->fDecomposed == ECholesky)
2814  return 1;
2815  if (this->fDecomposed )
2816  TPZMatrix<TVar>::Error(__PRETTY_FUNCTION__, "Decompose_Cholesky <Matrix already Decomposed>" );
2817 
2818  TVar pivot = (TVar)0.;
2819  TVar minpivot = 10000.;
2820  int64_t dimension = this->Dim();
2821 
2822  for (int64_t blk_i = 0; blk_i < dimension; blk_i += blk_sz) {
2823  int64_t first_row = blk_i;
2824  int64_t first_col = blk_i;
2825  int64_t last_row = blk_i + MIN(dimension,blk_i+blk_sz);
2826 
2827  // Compute band inner nodes
2828  for ( int64_t j = first_col; j < dimension; j++) {
2829 
2830  if (Size(j) == 0)
2831  return( 0 ); // Size of column cannot be zero.
2832 
2833  int64_t first_i = MAX(first_row, (j+1)-Size(j));
2834  int64_t last_i = MIN(last_row, j); // j-1 becase we do not need to compute u(j,j)
2835  for ( int64_t i = first_i; i < last_i; i++) {
2836 
2837  // Compute u(i,j) = (a_ij - SUM_k_1_to_i-1 (u_ki * u_kj) ) / uii
2838 
2839  // TVar u_ii = fElem[i][0];
2840  int64_t I = j-i; // fElem[j][I] = A(i,j) I = j-i
2841  TVar* u_ij = &fElem[j][I];
2842 
2843  TVar sum = 0.0;
2844  TVar *elem_kj = u_ij+1;
2845  TVar *end_kj = fElem[j+1];
2846  TVar *elem_ki = &fElem[i][1];
2847  TVar *end_ki = fElem[i+1];
2848 
2849  // Faz sum = SOMA( A(i,p) * A(k,p) ), p = 1,..,k-1.
2850  sum = 0.0;
2851 
2852  unsigned max_l = end_kj - elem_kj;
2853  unsigned tmp = end_ki - elem_ki;
2854  if (tmp < max_l) max_l = tmp;
2855 #pragma clang loop vectorize_width(2)
2856  for(unsigned l=0; l<max_l; l++)
2857  sum += (*elem_kj++) * (*elem_ki++);
2858 
2859  *u_ij = (*u_ij - sum) / pivot;
2860  }
2861 
2862  // After computing all the elements of this column, compute the diagonal (ujj)
2863  if (last_i == j)
2864  {
2865  TVar sum = 0.0;
2866  TVar* u_jj = &fElem[j][0];
2867  TVar *elem_k = fElem[j]+1;
2868  TVar *end_k = fElem[j+1];
2869 #pragma clang loop vectorize_width(2)
2870  for ( ; elem_k < end_k; elem_k++ ) sum += (*elem_k) * (*elem_k);
2871  pivot = *u_jj - sum;
2872 
2873  minpivot = minpivot < pivot ? minpivot : pivot;
2874 
2875  if ( pivot < ((TVar)0.) || IsZero(pivot) ) {
2876  cout << "TPZSkylMatrix::DecomposeCholesky a matrix nao e positiva definida" << pivot << endl;
2877  return( 0 );
2878  }
2879  // Valor do elemento da diagonal k,k
2880  *u_jj = sqrt( pivot );
2881  }
2882  }
2883  }
2884  this->fDecomposed = ECholesky;
2885  this->fDefPositive = 1;
2886  return( 1 );
2887 }
2888 
2889 /**********************/
2890 /*** Decompose LDLt ***/
2891 template<class TVar>
2892 int
2893 TPZSkylMatrix<TVar>::Decompose_LDLt(std::list<int64_t> &singular)
2894 {
2895  if( this->fDecomposed == ELDLt) return 1;
2896  if ( this->fDecomposed )
2897  {
2898  TPZMatrix<TVar>::Error(__PRETTY_FUNCTION__, "Decompose_LDLt <Matrix already Decomposed with different decomposition>" );
2899  }
2900 
2901 #ifdef DUMP_BEFORE_DECOMPOSE
2902  dump_matrix(this, "TPZSkylMatrix::Decompose_LDLt(singular)");
2903 #endif
2904 
2905  singular.clear();
2906 
2907  // Third try
2908  TVar *elj,*ell;
2909  int64_t j,l,minj,minl,minrow,dimension = this->Dim();
2910  TVar sum;
2911  j = 1;
2912  while(j < dimension) {
2913  minj = j-Size(j)+1;
2914  l = minj;
2915  while(l <= j) {
2916  minl = l-Size(l)+1;
2917  minrow = (minj<minl)? minl:minj;
2918  int64_t k = minrow;
2919  // DiagkPtr = fDiag+minrow;
2920  elj = fElem[j]+j-minrow;
2921  ell = fElem[l]+l-minrow;
2922  sum = 0.;
2923  //EBORIN:
2924  // Is this a hot spot?
2925  while(k < l) {
2926  sum += *elj-- * *ell-- * *(fElem[k++]);
2927  }
2928  *elj -= sum;
2929  if(ell != elj) *elj /= *ell;
2930  else if(IsZero(*elj)) {
2931  singular.push_back(l);
2932  *elj = 1.;
2933  }
2934  l++;
2935  }
2936  j++;
2937  }
2938 
2939  if(this->Rows() && IsZero(GetVal(this->Rows()-1,this->Rows()-1)))
2940  {
2941  singular.push_back(this->Rows()-1);
2942  PutVal(this->Rows()-1,this->Rows()-1,1.);
2943  }
2944  this->fDecomposed = ELDLt;
2945  this->fDefPositive = 0;
2946  //if(Dim() > 100) cout << endl;
2947  return( 1 );
2948 }
2949 
2950 template<class TVar>
2951 int
2953 {
2954 
2955  if( this->fDecomposed == ELDLt) return 1;
2956  if ( this->fDecomposed )
2957  TPZMatrix<TVar>::Error(__PRETTY_FUNCTION__, "Decompose_LDLt <Matrix already Decomposed with different decomposition>" );
2958 
2959 #ifdef DUMP_BEFORE_DECOMPOSE
2960  dump_matrix(this, "TPZSkylMatrix::Decompose_LDLt()");
2961 #endif
2962 
2963  // Third try
2964  TVar *elj,*ell;
2965  int64_t j,l,minj,minl,minrow,dimension = this->Dim();
2966  TPZVec<TVar> diag(dimension);
2967  for(j=0; j<dimension; j++)
2968  {
2969  diag[j] = *fElem[j];
2970  }
2971 
2972  //std::cout << "TPZSkylMatrix<TVar>::Decompose_LDLt: dimension = " << dimension << std::endl;
2973 
2974  TVar sum;
2975  j = 1;
2976  while(j < dimension) {
2977  /* if(!(j%100) && Dim() > 100) {
2978  cout << j << ' ';
2979  cout.flush();
2980  }
2981  if(!(j%1000)) cout << endl;*/
2982  minj = j-Size(j)+1;
2983  l = minj;
2984  while(l <= j) {
2985  minl = l-Size(l)+1;
2986  minrow = (minj<minl)? minl:minj;
2987  int64_t k = minrow;
2988  // DiagkPtr = fDiag+minrow;
2989  elj = fElem[j]+j-minrow;
2990  ell = fElem[l]+l-minrow;
2991  TVar *diagptr = &diag[k];
2992  sum = 0.;
2993  while(k < l) {
2994  // sum += *elj-- * *ell-- * *(fElem[k++]);
2995  //EBORIN: trocar *diagptr++ por *diagptr-- ajuda na vetorização?
2996  sum += *elj-- * *ell-- * *diagptr++;
2997  k++;
2998  }
2999  *elj -= sum;
3000  if(ell != elj) *elj /= *ell;
3001  else if(IsZero(*elj)) {
3002 #ifdef LOG4CXX
3003  std::stringstream sout;
3004  sout << "col = " << j << " diagonal " << *elj;
3005  LOGPZ_DEBUG(logger,sout.str())
3006 #endif
3007 
3008  *diagptr = *elj;
3009  cout << "TPZSkylMatrix pivot = " << *elj << endl;
3010  cout << "TPZSkylMatrix::DecomposeLDLt zero pivot\n";
3011  cout << "j = " << j << " l = " << l << endl;
3012  }
3013  else
3014  {
3015  *diagptr = *elj;
3016  }
3017  l++;
3018  }
3019  j++;
3020  }
3021  this->fDecomposed = ELDLt;
3022  this->fDefPositive = 0;
3023  //if(Dim() > 100) cout << endl;
3024  return( 1 );
3025 }
3026 
3027 /*********************/
3028 /*** Subst Forward ***/
3029 //
3030 // Faz Ax = b, onde A e' triangular inferior.
3031 //
3032 template<class TVar>
3033 int
3035 {
3036  if ( (B->Rows() != this->Dim()) || this->fDecomposed != ECholesky)
3037  TPZMatrix<TVar>::Error(__PRETTY_FUNCTION__,"TPZSkylMatrix::Subst_Forward not decomposed with cholesky");
3038 
3039 #ifdef DUMP_BEFORE_SUBST
3040  dump_matrices(this, B, "TPZSkylMatrix::Subst_Forward(B)");
3041 #endif
3042 
3043  // std::cout << "SubstForward this " << (void *) this << " neq " << Dim() << " normb " << Norm(*B) << std::endl;
3044  int64_t dimension=this->Dim();
3045  for ( int64_t j = 0; j < B->Cols(); j++ )
3046  {
3047  int64_t k=0;
3048  while (k<dimension && (*B)(k,j) == TVar(0)) {
3049  k++;
3050  }
3051  // std::cout << "kstart " << k << std::endl;
3052  for (; k < dimension; k++ )
3053  {
3054  // Faz sum = SOMA( A[k,i] * B[i,j] ), para i = 1, ..., k-1.
3055  //
3056  TVar sum = 0.0;
3057  TVar *elem_ki = fElem[k]+1;
3058  TVar *end_ki = fElem[k+1];
3059  TVar* BPtr = &(*B)(k,j); //(k-1,j)
3060  // for ( int i = k-1; elem_ki < end_ki; i-- )
3061  // sum += (*elem_ki++) * B->GetVal( i, j );
3062 
3063  //EBORIN:
3064  // Is this a hot-spot?
3065  // Is it vectorized?
3066  while(elem_ki < end_ki) sum += (*elem_ki++) * (*--BPtr);//(*BPtr--)
3067  // Faz B[k,j] = (B[k,j] - sum) / A[k,k].
3068  //
3069  // B->PutVal( k, j, (B->GetVal(k, j) - sum) / row_k->pElem[0] );
3070  BPtr = &(*B)(k,j);
3071  *BPtr-= sum;
3072  *BPtr /= fElem[k][0];
3073  }
3074  }
3075 
3076  return( 1 );
3077 }
3078 
3079 /*********************/
3080 /*** Subst Backward ***/
3081 //
3082 // Faz Ax = b, onde A e' triangular superior.
3083 //
3084 template<class TVar>
3085 int
3087 {
3088  if ( (B->Rows() != this->Dim()) || this->fDecomposed != ECholesky)
3089  TPZMatrix<TVar>::Error(__PRETTY_FUNCTION__,"TPZSkylMatrix::Subst_Backward not decomposed with cholesky");
3090 
3091 #ifdef DUMP_BEFORE_SUBST
3092  dump_matrices(this, B, "TPZSkylMatrix::Subst_Backward(B)");
3093 #endif
3094  int64_t Dimension = this->Dim();
3095  if(!Dimension) return 1; // nothing to do
3096  int64_t j;
3097  for ( j = 0; j < B->Cols(); j++ )
3098  {
3099  int64_t k = Dimension-1;
3100  while (k>0 && (*B)(k,j) == TVar(0.)) {
3101  k--;
3102  }
3103  // std::cout << "kstart " << k << std::endl;
3104 
3105  for (;k > 0; k-- )
3106  {
3107  // Faz sum = SOMA( A[k,i] * B[i,j] ), para i = 1, ..., k-1.
3108  //
3109  TVar val;
3110  TVar *elem_ki = fElem[k];
3111  TVar *end_ki = fElem[k+1];
3112  TVar *BPtr = &(*B)(k,j);
3113  *BPtr /= *elem_ki++;
3114  val = *BPtr;
3115  // BPtr;
3116  // substract the column of the skyline matrix from the vector.
3117  //EBORIN:
3118  // Is this a hot-spot?
3119  // Is it vectorized?
3120  while(elem_ki < end_ki) *--BPtr -= (*elem_ki++) * val;
3121  }
3122  }
3123  for( j = 0; j< B->Cols(); j++) (*B)(0,j) /= fElem[0][0];
3124  return( 1 );
3125 }
3126 
3127 /***********************/
3128 /*** Subst L Forward ***/
3129 //
3130 // Faz a "Forward substitution" assumindo que os elementos
3131 // da diagonal sao todos iguais a 1.
3132 //
3133 template<class TVar>
3134 int
3136  if ( (B->Rows() != this->Dim()) || (this->fDecomposed != ELDLt && this->fDecomposed != ELU) )
3137  return( 0 );
3138 
3139  int64_t dimension =this->Dim();
3140  for ( int64_t k = 0; k < dimension; k++ ) {
3141  for ( int64_t j = 0; j < B->Cols(); j++ ) {
3142  // Faz sum = SOMA( A[k,i] * B[i,j] ), para i = 1, ..., k-1.
3143  //
3144  TVar sum = 0.0;
3145  TVar *elem_ki = fElem[k]+1;
3146  TVar *end_ki = fElem[k+1];
3147  TVar *BPtr = &(*B)(k,j);
3148  while(elem_ki < end_ki) sum += (*elem_ki++) * (*--BPtr);
3149 
3150  // Faz B[k,j] = (B[k,j] - sum) / A[k,k].
3151  //
3152  // B->PutVal( k, j, (B->GetVal(k, j) - sum) / row_k->pElem[0] );
3153  BPtr = &(*B)(k,j);
3154  *BPtr-= sum;
3155  }
3156  }
3157  return( 1 );
3158 }
3159 
3160 /******************/
3161 /*** Subst Diag ***/
3162 //
3163 // Faz Ax = b, sendo que A e' assumida ser uma matriz diagonal.
3164 //
3165 template<class TVar>
3166 int
3168  if ( (B->Rows() != this->Dim()) || this->fDecomposed != ELDLt) return( 0 );
3169  int64_t dimension = this->Dim();
3170  if (!dimension) {
3171  return 1;
3172  }
3173  for ( int64_t j = 0; j < B->Cols(); j++ ) {
3174  TVar *BPtr = &(*B)(0,j);
3175  int64_t k=0;
3176  while(k < dimension) *BPtr++ /= *(fElem[k++]);
3177  }
3178  return( 1 );
3179 }
3180 
3181 /*********************/
3182 /*** Subst Backward ***/
3183 //
3184 // Faz Ax = b, onde A e' triangular superior.
3185 //
3186 template<class TVar>
3187 int
3189 {
3190  if ( (B->Rows() != this->Dim()) || !this->fDecomposed || this->fDecomposed == ECholesky)
3191  TPZMatrix<TVar>::Error(__PRETTY_FUNCTION__,"TPZSkylMatrix::Subst_LBackward not decomposed properly");
3192 
3193  int64_t Dimension = this->Dim();
3194  for ( int64_t k = Dimension-1; k > 0; k-- ) {
3195  for ( int64_t j = 0; j < B->Cols(); j++ ) {
3196  // Faz sum = SOMA( A[k,i] * B[i,j] ), para i = 1, ..., k-1.
3197  //
3198  // TVar val = 0.0;
3199  TVar *elem_ki = fElem[k]+1;
3200  TVar *end_ki = fElem[k+1];
3201  TVar *BPtr = &(*B)(k,j);
3202  // substract the column of the skyline matrix from the vector.
3203 
3204  TVar val = *BPtr;
3205  while(elem_ki < end_ki) *--BPtr -= (*elem_ki++) * val;
3206  }
3207  }
3208  return( 1 );
3209 }
3210 
3211 
3212 /**************************** PRIVATE ****************************/
3213 
3214 
3215 /***************/
3216 /*** PutZero ***/
3217 
3218 template<class TVar>
3219 int
3221 {
3222 
3223  fStorage.Fill(0.);
3224  this->fDecomposed = 0;
3225  this->fDefPositive = 0;
3226  return( 1 );
3227 }
3228 
3229 /*************/
3230 /*** CLear ***/
3231 
3232 template<class TVar>
3233 int
3235 {
3236  this->fStorage.Resize(0);
3237  // fStorage.Shrink();
3238  this->fElem.Resize(0);
3239  this->fRow = this->fCol = 0;
3240  this->fDecomposed = 0;
3241  return( 1 );
3242 }
3243 
3244 /************/
3245 /*** Copy ***/
3246 
3247 template<class TVar>
3248 void
3250 {
3251  int64_t dimension = A.Dim();
3252  this->fRow = this->fCol = dimension;
3253  fElem.Resize(A.fElem.NElements());
3254  fStorage = A.fStorage;
3255  int64_t i;
3256  TVar *firstp = 0;
3257  if(fStorage.NElements()) firstp = &fStorage[0];
3258  for(i=0; i<=dimension; i++)
3259  fElem[i]=firstp+(A.fElem[i]-A.fElem[0]);
3260  this->fDecomposed = A.fDecomposed;
3261  this->fDefPositive = A.fDefPositive;
3262 
3263 }
3264 
3265 template<class TVar>
3268 
3269 template <class TVar>
3270 void TPZSkylMatrix<TVar>::Read(TPZStream &buf, void *context )
3271 {
3272  TPZMatrix<TVar>::Read(buf, context);
3273  buf.Read( fStorage);
3274  TPZVec<int64_t> skyl(this->Rows()+1,0);
3275  buf.Read( skyl);
3276  TVar *ptr = 0;
3277  if (this->Rows()) {
3278  ptr = &fStorage[0];
3279  }
3280  fElem.Resize(this->Rows()+1);
3281  for (int64_t i=0; i<this->Rows()+1; i++) {
3282  fElem[i] = skyl[i] + ptr;
3283  }
3284 }
3285 
3286 template <class TVar>
3287 void TPZSkylMatrix<TVar>::Write( TPZStream &buf, int withclassid ) const
3288 {
3289  TPZMatrix<TVar>::Write(buf,withclassid);
3290  buf.Write( fStorage);
3291  TPZVec<int64_t> skyl(this->Rows()+1,0);
3292  TVar *ptr = 0;
3293  if (this->Rows()) {
3294  ptr = &fStorage[0];
3295  }
3296  for (int64_t i=0; i<this->Rows()+1; i++) {
3297  skyl[i] = fElem[i] - ptr;
3298  }
3299  buf.Write( skyl);
3300 }
3301 
3302 
3303 template<class TVar>
3304 void TPZSkylMatrix<TVar>::DecomposeColumn(int64_t col, int64_t prevcol){
3305  TVar *ptrprev; //Pointer to prev column
3306  TVar *ptrcol; //Pointer to col column
3307  int64_t skprev, skcol; //prev and col Skyline height respectively
3308  int64_t minline;
3309 
3310  skprev = SkyHeight(prevcol);
3311  skcol = SkyHeight(col);
3312 
3313  ptrprev = Diag(prevcol);
3314  ptrcol = Diag(col);
3315 
3316  if((prevcol-skprev) > (col-skcol)){
3317  minline = prevcol - skprev;
3318  }else
3319  {
3320  minline = col - skcol;
3321  }
3322  if(minline > prevcol) {
3323  cout << "error condition\n";
3324  cout.flush();
3325  return;
3326  }
3327  TVar *run1 = ptrprev + (prevcol-minline);
3328  TVar *run2 = ptrcol + (col-minline);
3329  TVar sum = 0;
3330 
3331  while(run1 != ptrprev) sum += (*run1--)*(*run2--);
3332  *run2-=sum;
3333  if(run1 != run2){
3334  *run2 /= *run1;
3335  }else{
3336  *run2=sqrt(*run2);
3337  }
3338 
3339 }
3340 template<>
3341 void TPZSkylMatrix<std::complex<float> >::DecomposeColumn(int64_t col, int64_t prevcol,std::list<int64_t> &singular)
3342 {
3343  DebugStop();
3344 }
3345 template<>
3346 void TPZSkylMatrix<std::complex<double> >::DecomposeColumn(int64_t col, int64_t prevcol,std::list<int64_t> &singular)
3347 {
3348  DebugStop();
3349 }
3350 template<>
3351 void TPZSkylMatrix<std::complex<long double> >::DecomposeColumn(int64_t col, int64_t prevcol,std::list<int64_t> &singular)
3352 {
3353  DebugStop();
3354 }
3355 
3356 template<class TVar>
3357 void TPZSkylMatrix<TVar>::DecomposeColumn(int64_t col, int64_t prevcol,std::list<int64_t> &singular){
3358  TVar *ptrprev; //Pointer to prev column
3359  TVar *ptrcol; //Pointer to col column
3360  int64_t skprev, skcol; //prev and col Skyline height respectively
3361  int64_t minline;
3362 
3363  skprev = SkyHeight(prevcol);
3364  skcol = SkyHeight(col);
3365 
3366  ptrprev = Diag(prevcol);
3367  ptrcol = Diag(col);
3368 
3369  if((prevcol-skprev) > (col-skcol)){
3370  minline = prevcol - skprev;
3371  }else
3372  {
3373  minline = col - skcol;
3374  }
3375  if(minline > prevcol) {
3376  cout << "error condition\n";
3377  cout.flush();
3378  return;
3379  }
3380  TVar *run1 = ptrprev + (prevcol-minline);
3381  TVar *run2 = ptrcol + (col-minline);
3382  TVar sum = 0;
3383 
3384  while(run1 != ptrprev) sum += (*run1--)*(*run2--);
3385  *run2-=sum;
3386  if(run1 != run2){
3387  *run2 /= *run1;
3388  }else{
3389  TVar pivot = *run2;
3390  if ( fabs(pivot) < fabs((TVar)1.e-10) ) {
3391 #ifdef LOG4CXX
3392  std::stringstream sout;
3393  sout << "equation " << col << " is singular pivot " << pivot;
3394  LOGPZ_WARN(logger,sout.str())
3395 #endif
3396  singular.push_back(col);
3397  pivot = 1.;
3398  }
3399 
3400  *run2=sqrt(pivot);
3401  }
3402 
3403 }
3404 
3405 template<>
3406 void TPZSkylMatrix<std::complex<float> >::DecomposeColumn2(int64_t col, int64_t prevcol)
3407 {
3408  DebugStop();
3409 }
3410 template<>
3411 void TPZSkylMatrix<std::complex<double> >::DecomposeColumn2(int64_t col, int64_t prevcol)
3412 {
3413  DebugStop();
3414 }
3415 
3416 template<>
3418 {
3419  DebugStop();
3420 }
3421 
3422 
3423 template<class TVar>
3424 void TPZSkylMatrix<TVar>::DecomposeColumn2(int64_t col, int64_t prevcol){
3425 
3426  //Cholesky Decomposition
3427  TVar *ptrprev; //Pointer to prev column
3428  TVar *ptrcol; //Pointer to col column
3429  int64_t skprev, skcol; //prev and col Skyline height respectively
3430  int64_t minline;
3431 
3432  skprev = SkyHeight(prevcol);
3433  skcol = SkyHeight(col);
3434 
3435  ptrprev = Diag(prevcol);
3436  ptrcol = Diag(col);
3437 
3438  if((prevcol-skprev) > (col-skcol)){
3439  minline = prevcol - skprev;
3440  }else
3441  {
3442  minline = col - skcol;
3443  }
3444  if(minline > prevcol) {
3445  cout << "error condition\n";
3446  cout.flush();
3447  return;
3448  }
3449  TVar *run1 = ptrprev + 1;
3450  TVar *run2 = ptrcol + (col-prevcol)+1;
3451  TVar *lastptr = ptrprev + prevcol-minline+1;
3452  TVar sum = 0;
3453  TVar *modify = ptrcol+(col-prevcol);
3454 #ifndef BLAS
3455  while(run1 != lastptr) sum += (*run1++)*(*run2++);
3456 
3457 #else
3458  int64_t n=lastptr-run1;
3459  sum = cblas_ddot(n,run1,1,run2,1);
3460 #endif
3461  *modify-=sum;
3462  if(col != prevcol){
3463  *modify /= *ptrprev;
3464  }else{
3465  if ( fabs(*modify) < fabs((TVar)1.e-25) ) {
3466  cout << "TPZSkylMatrix::DecomposeCholesky a matrix nao e positiva definida" << *modify << endl;
3467  *modify = ((TVar)1.e-10);
3468  }
3469  *modify=sqrt(*modify);
3470  }
3471 }
3472 
3473 template <class TVar>
3474 void TPZSkylMatrix<TVar>::AutoFill(int64_t nrow, int64_t ncol, int symmetric) {
3475  if (nrow != ncol || !symmetric)
3476  {
3477  DebugStop();
3478  }
3479  TPZMatrix<TVar>::Redim(nrow,ncol);
3480  TPZVec<int64_t> skyline(nrow);
3481  fElem.resize(nrow+1);
3482  fElem.Fill(0);
3483  for (int64_t i=0; i<nrow; i++) {
3484  skyline[i]=(i*(rand()))/RAND_MAX;
3485  }
3486 // std::cout << "skyline " << skyline << std::endl;
3487  InitializeElem(skyline,fStorage,fElem);
3488 
3489  for (int64_t i=0; i<nrow; i++) {
3490  TVar sum = 0.;
3491  for (int64_t j=skyline[i]; j<i; j++) {
3492  TVar val = ((TVar)rand())/((TVar)RAND_MAX);
3493  PutVal(j,i,val);
3494  sum += fabs(val);
3495  }
3496  PutVal(i,i,sum+(TVar)1.);
3497  }
3498  for (int64_t i=0; i<nrow; i++) {
3499  TVar sum = (TVar)0.;
3500  for (int64_t j=0; j<nrow; j++) {
3501  TVar val;
3502  val = GetVal(j,i);
3503  sum += fabs(val);
3504  }
3505  PutVal(i,i,sum+(TVar)1.);
3506  }
3507 
3508 }
3509 
3510 template class TPZSkylMatrix<float>;
3511 template class TPZSkylMatrix<std::complex<float> >;
3512 
3513 template class TPZSkylMatrix<double>;
3514 template class TPZSkylMatrix<std::complex<double> >;
3515 
3516 #ifndef BORLAND
3517 template class TPZRestoreClass<TPZSkylMatrix<float>>;
3520 
3524 #endif
3525 
3526 template class TPZSkylMatrix<long double>;
3528 
3529 template class TPZSkylMatrix<TPZFlopCounter>;
3530 
3531 #endif // USING_NEW_SKYLMAT
3532 
3533 #if (defined DUMP_BEFORE_DECOMPOSE) || (defined DUMP_BEFORE_SUBST)
3534 
3535 pthread_mutex_t dump_matrix_mutex = PTHREAD_MUTEX_INITIALIZER;
3536 unsigned matrix_unique_id = 0;
3537 clarg::argString dm_prefix("-dm_prefix",
3538  "Filename prefix for matrices dumped before decompose/subst",
3539  "matrix_");
3540 
3541 template<class TVar>
3542 void dump_matrix(TPZMatrix<TVar>* m, const char* fn_annotation)
3543 {
3544  if (!dm_prefix.was_set()) return;
3545  PZ_PTHREAD_MUTEX_LOCK(&dump_matrix_mutex, "dump_matrix");
3546  std::stringstream fname;
3547  fname << dm_prefix.get_value() << fn_annotation << "_" << matrix_unique_id++ << ".bin";
3548  std::cout << "Dump matrix before... (file: " << fname << ")" << std::endl;
3549  TPZBFileStream fs;
3550  fs.OpenWrite(fname.str());
3551  m->Write(fs, 0);
3552  std::cout << "Dump matrix before... [Done]" << std::endl;
3553  PZ_PTHREAD_MUTEX_UNLOCK(&dump_matrix_mutex, "dump_matrix");
3554 }
3555 
3556 template<class TVar>
3557 void dump_matrices(const TPZMatrix<TVar>* a, const TPZMatrix<TVar>* b, const char* fn_annotation)
3558 {
3559  if (!dm_prefix.was_set()) return;
3560  PZ_PTHREAD_MUTEX_LOCK(&dump_matrix_mutex, "dump_matrices");
3561  std::stringstream fname;
3562  fname << dm_prefix.get_value() << fn_annotation << "_" << matrix_unique_id++ << ".bin";
3563  std::cout << "Dump matrix before... (file: " << fname << ")" << std::endl;
3564  TPZBFileStream fs;
3565  fs.OpenWrite(fname.str());
3566  a->Write(fs, 0);
3567  b->Write(fs, 0);
3568  std::cout << "Dump matrix before... [Done]" << std::endl;
3569  PZ_PTHREAD_MUTEX_UNLOCK(&dump_matrix_mutex, "dump_matrices");
3570 }
3571 #endif
3572 
int Subst_LForward(TPZFMatrix< TVar > *b) const override
Computes B = Y, where A*Y = B, A is lower triangular with A(i,i)=1.
Definition: pzskylmat.cpp:3135
#define PZ_PTHREAD_MUTEX_UNLOCK(mutex, fn)
Definition: pz_pthread.h:79
expr_ expr_ expr_ expr_ expr_ expr_ expr_ expr_ expr_ expr_ expr_ expr_ expr_ expr_ expr_ expr_ expr_ expr_ expr_ expr_ expr_ expr_ expr_ expr_ expr_ expr_ expr_ expr_ expr_ expr_ fabs
Definition: tfadfunc.h:140
int Resize(const int64_t newDim, const int64_t) override
Redimensions a matriz keeping the previous values.
Definition: pzskylmat.cpp:2487
void AddKel(TPZFMatrix< TVar > &elmat, TPZVec< int64_t > &sourceindex, TPZVec< int64_t > &destinationindex) override
Add a contribution of a stiffness matrix.
Definition: pzskylmat.cpp:2259
virtual void Residual(const TPZFMatrix< TVar > &x, const TPZFMatrix< TVar > &rhs, TPZFMatrix< TVar > &res)
Computes res = rhs - this * x.
Definition: pzmatrix.h:827
Definition: pzmatrix.h:52
bool IsZero(long double a)
Returns if the value a is close Zero as the allowable tolerance.
Definition: pzreal.h:668
Contains definitions to LOGPZ_DEBUG, LOGPZ_INFO, LOGPZ_WARN, LOGPZ_ERROR and LOGPZ_FATAL, and the implementation of the inline InitializePZLOG(string) function using log4cxx library or not. It must to be called out of "#ifdef LOG4CXX" scope.
void ReallocForNuma()
Definition: pzskylmat.h:603
void Write(TPZStream &buf, int withclassid) const override
Packs the object structure in a stream of bytes.
Definition: pzmatrix.cpp:1420
const TVar & GetVal(const int64_t row, const int64_t col) const override
Get values without bounds checking This method is faster than "Get" if DEBUG is defined.
Definition: pzskylmat.cpp:2130
static int Error(const char *msg, const char *msg2=0)
Returns error messages.
Definition: pzmatrix.cpp:1402
virtual void resize(const int64_t newsize)
Definition: pzvec.h:213
clarg::argBool clk_rea("-skl_chk_rea", "Reallocate Skyline data before Cholesky Decomposition")
TVar & operator()(const int64_t row, const int64_t col)
Definition: pzskylmat.cpp:1856
clarg::argInt dimension("-d", "Matrices dimension M x M", 1000)
TPZVec< TVar > fStorage
fStorage is a unique vector which contains all the data of the skyline matrix
Definition: pzskylmat.h:625
static void ComputeMaxSkyline(const TPZSkylMatrix< TVar > &first, const TPZSkylMatrix< TVar > &second, TPZVec< int64_t > &res)
Computes the highest skyline of both objects.
Definition: pzskylmat.cpp:1838
int64_t fRow
Number of rows in matrix.
Definition: pzmatrix.h:779
void Write(TPZStream &buf, int withclassid) const override
Packs the object structure in a stream of bytes.
Definition: pzskylmat.cpp:3287
int Decompose_Cholesky() override
Decomposes the current matrix using Cholesky method. The current matrix has to be symmetric...
Definition: pzskylmat.cpp:2668
void PrepareZ(const TPZFMatrix< TVar > &y, TPZFMatrix< TVar > &z, const TVar beta, const int opt) const
Is an auxiliar method used by MultiplyAdd.
Definition: pzmatrix.cpp:135
char fDecomposed
Decomposition type used to decompose the current matrix.
Definition: pzmatrix.h:783
TPZSkylMatrix & operator-=(const TPZSkylMatrix< TVar > &A)
Definition: pzskylmat.cpp:2406
int64_t SkyHeight(int64_t col)
return the height of the skyline for a given column
Definition: pzskylmat.h:422
TinyFad< 8, T > abs(const TinyFad< 8, T > &in)
Definition: tinyfadeight.h:846
REAL val(STATE &number)
Returns value of the variable.
Definition: pzartdiff.h:23
#define LOGPZ_WARN(A, B)
Define log for warnings.
Definition: pzlog.h:91
#define MAX(a, b)
Gets maxime value between a and b.
Definition: pzreal.h:81
AutoPointerMutexArrayInit tmp
void Copy(const TPZSkylMatrix< TVar > &)
Definition: pzskylmat.cpp:3249
int Subst_Diag(TPZFMatrix< TVar > *b) const override
Computes B = Y, where A*Y = B, A is diagonal matrix.
Definition: pzskylmat.cpp:3167
TPZSkylMatrix & operator*=(TVar value)
Definition: pzskylmat.cpp:2457
void MigratePages()
Definition: pzskylmat.h:598
char fDefPositive
Definite Posistiveness of current matrix.
Definition: pzmatrix.h:785
static TVar gZero
Initializing value to static variable.
Definition: pzmatrix.h:786
Implements a skyline storage format. A Skyline matrix is symmetric so square. Matrix.
Definition: pzskylmat.h:394
TPZSkylMatrix< REAL > matrix
Definition: numatst.cpp:255
TVar * Diag(int64_t col)
This method returns a pointer to the diagonal element of the matrix of the col column.
Definition: pzskylmat.h:577
TPZSkylMatrix operator*(const TVar v) const
Definition: pzskylmat.cpp:2431
virtual void UpdateFrom(TPZAutoPointer< TPZMatrix< TVar > > mat) override
Updates the values of the matrix based on the values of the matrix.
Definition: pzskylmat.cpp:1775
virtual TVar & s(const int64_t row, const int64_t col) override
The operators check on the bounds if the DEBUG variable is defined.
Definition: pzskylmat.cpp:1871
int Zero() override
Makes Zero all the elements.
Definition: pzfmatrix.h:651
virtual void Resize(const int64_t newsize, const T &object)
Resizes the vector object reallocating the necessary storage, copying the existing objects to the new...
Definition: pzvec.h:373
int Subst_Forward(TPZFMatrix< TVar > *b) const override
Computes B = Y, where A*Y = B, A is lower triangular.
Definition: pzskylmat.cpp:3034
static const double tol
Definition: pzgeoprism.cpp:23
void SetSkyline(const TPZVec< int64_t > &skyline)
modify the skyline of the matrix, throwing away its values skyline indicates the minimum row number w...
Definition: pzskylmat.cpp:1792
#define PZ_PTHREAD_MUTEX_LOCK(mutex, fn)
Definition: pz_pthread.h:76
virtual void Write(const bool val)
Definition: TPZStream.cpp:8
TPZSkylMatrix & operator+=(const TPZSkylMatrix< TVar > &A)
Definition: pzskylmat.cpp:2391
TVar Norm(const TPZFMatrix< TVar > &A)
Returns the norm of the matrix A.
Contains TPZMatrixclass which implements full matrix (using column major representation).
#define DebugStop()
Returns a message to user put a breakpoint in.
Definition: pzerror.h:20
#define LOGPZ_DEBUG(A, B)
Define log for debug info.
Definition: pzlog.h:87
TPZSkylMatrix & operator=(const TPZSkylMatrix< TVar > &A)
Definition: pzskylmat.cpp:2164
Definition: pzmatrix.h:52
int64_t Rows() const
Returns number of rows.
Definition: pzmatrix.h:803
ci
Definition: test.py:262
int Zero() override
Zeroes the matrix.
Definition: pzskylmat.cpp:3220
int Decompose_LDLt() override
Decomposes the current matrix using LDLt.
Definition: pzskylmat.cpp:2952
expr_ expr_ expr_ expr_ expr_ expr_ expr_ expr_ expr_ expr_ expr_ expr_ expr_ expr_ expr_ expr_ expr_ expr_ expr_ expr_ expr_ expr_ sqrt
Definition: tfadfunc.h:120
string res
Definition: test.py:151
static int64_t NumElements(const TPZVec< int64_t > &skyline)
Definition: pzskylmat.cpp:1803
Contains TPZSkyline class which implements a skyline storage format.
int64_t Size(const int64_t column) const
Definition: pzskylmat.h:558
void Read(TPZStream &buf, void *context) override
Unpacks the object structure from a stream of bytes.
Definition: pzmatrix.cpp:1413
#define MIN(a, b)
Gets minime value between a and b.
Definition: pzreal.h:85
TPZVec< TVar * > fElem
Storage to keep the first elements to each equation.
Definition: pzskylmat.h:620
virtual int Redim(const int64_t newRows, const int64_t newCols)
Redimensions the matrix reinitializing it with zero.
Definition: pzmatrix.h:289
Full matrix class. Matrix.
Definition: pzfmatrix.h:32
int Subst_Backward(TPZFMatrix< TVar > *b) const override
Computes B = Y, where A*Y = B, A is upper triangular.
Definition: pzskylmat.cpp:3086
int Redim(const int64_t newRows, const int64_t newCols) override
Redimension a matrix and ZERO your elements.
Definition: pzfmatrix.h:616
virtual int64_t Dim() const
Returns the dimension of the matrix if the matrix is square.
Definition: pzmatrix.h:892
virtual void MultAdd(const TPZFMatrix< TVar > &x, const TPZFMatrix< TVar > &y, TPZFMatrix< TVar > &z, const TVar alpha, const TVar beta, const int opt=0) const override
It computes z = beta * y + alpha * opt(this)*x but z and x can not overlap in memory.
Definition: pzskylmat.cpp:1944
int Clear() override
It clears data structure.
Definition: pzskylmat.cpp:3234
int Redim(const int64_t newDim, const int64_t) override
Redimensions the matrix reinitializing it with zero.
Definition: pzskylmat.cpp:2516
TPZSkylMatrix operator-() const
Definition: pzskylmat.cpp:3267
int64_t fCol
Number of cols in matrix.
Definition: pzmatrix.h:781
void Read(TPZStream &buf, void *context) override
Unpacks the object structure from a stream of bytes.
Definition: pzskylmat.cpp:3270
virtual void SolveSOR(int64_t &numiterations, const TPZFMatrix< TVar > &F, TPZFMatrix< TVar > &result, TPZFMatrix< TVar > *residual, TPZFMatrix< TVar > &scratch, const REAL overrelax, REAL &tol, const int FromCurrent=0, const int direction=1) override
Solves the linear system using Successive Over Relaxation method (Gauss Seidel). ...
Definition: pzskylmat.cpp:1988
TPZSkylMatrix operator+(const TPZSkylMatrix< TVar > &A) const
Definition: pzskylmat.cpp:2176
void Copy(TinyVector< T, Num > &y, const TinyVector< T, Num > &x)
Definition: tinyveccpy.h:87
int PutVal(const int64_t row, const int64_t col, const TVar &element) override
Put values without bounds checking This method is faster than "Put" if DEBUG is defined.
Definition: pzskylmat.cpp:1920
static void InitializeElem(const TPZVec< int64_t > &skyline, TPZVec< TVar > &storage, TPZVec< TVar *> &elem)
Definition: pzskylmat.cpp:1814
bool was_set() const
Definition: arglib.h:138
int64_t Cols() const
Definition: pzshtmat.h:47
int64_t Rows() const
Definition: pzshtmat.h:45
const T & get_value() const
Definition: arglib.h:177
static void Swap(int64_t *a, int64_t *b)
Swaps contents of a in b and b in a.
Definition: pzmatrix.h:949
void AddSameStruct(TPZSkylMatrix< TVar > &B, double k=1.)
Add a skyline matrix B with same structure of this It makes this += k * B.
Definition: pzskylmat.cpp:1747
void Fill(const T &copy, const int64_t from=0, const int64_t numelem=-1)
Will fill the elements of the vector with a copy object.
Definition: pzvec.h:460
void DecomposeColumn(int64_t col, int64_t prevcol)
Definition: pzskylmat.cpp:3304
int Decompose_Cholesky_blk(int64_t blk_sz)
Definition: pzskylmat.cpp:2811
Contains TPZVerySparseMatrix class which implements a matrix whose nonzero elements are stored in bin...
int64_t Cols() const
Returns number of cols.
Definition: pzmatrix.h:809
int ClassId() const override
Define the class id associated with the class.
Definition: pzskylmat.h:629
void OpenWrite(const std::string &fileName)
Defines the interface for saving and reading data. Persistency.
Definition: TPZStream.h:50
int64_t NElements() const
Returns the number of elements of the vector.
Definition: pzvec.h:190
clarg::argString m("-m", "input matrix file name (text format)", "matrix.txt")
clarg::argBool clk_mig("-skl_chk_pm", "Migrate Skyline pages before Cholesky Decomposition")
REAL ZeroTolerance()
Returns the tolerance to Zero value. Actually: .
Definition: pzreal.h:633
TVar & g(const int64_t row, const int64_t col) const
Definition: pzfmatrix.h:594
Implements an interface to register a class id and a restore function. Persistence.
Definition: TPZSavable.h:150
#define PZError
Defines the output device to error messages and the DebugStop() function.
Definition: pzerror.h:15
void DecomposeColumn2(int64_t col, int64_t prevcol)
Definition: pzskylmat.cpp:3424
virtual void Read(bool &val)
Definition: TPZStream.cpp:91
int Subst_LBackward(TPZFMatrix< TVar > *b) const override
Computes B = Y, where A*Y = B, A is upper triangular with A(i,i)=1.
Definition: pzskylmat.cpp:3188
virtual void AutoFill(int64_t nrow, int64_t ncol, int symmetric)
Definition: pzskylmat.cpp:3474
Root matrix class (abstract). Matrix.
Definition: pzmatrix.h:60
This class implements a reference counter mechanism to administer a dynamically allocated object...