|
mops.h00001 // 00002 // mops.h --- block matrix operations 00003 // 00004 // Copyright (C) 1997 Limit Point Systems, Inc. 00005 // 00006 // Author: Edward Seidl <seidl@janed.com> 00007 // Maintainer: LPS 00008 // 00009 // This file is part of the SC Toolkit. 00010 // 00011 // The SC Toolkit is free software; you can redistribute it and/or modify 00012 // it under the terms of the GNU Library General Public License as published by 00013 // the Free Software Foundation; either version 2, or (at your option) 00014 // any later version. 00015 // 00016 // The SC Toolkit is distributed in the hope that it will be useful, 00017 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00018 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00019 // GNU Library General Public License for more details. 00020 // 00021 // You should have received a copy of the GNU Library General Public License 00022 // along with the SC Toolkit; see the file COPYING.LIB. If not, write to 00023 // the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 00024 // 00025 // The U.S. Government is granted a limited license as per AL 91-7. 00026 // 00027 00028 #ifndef _math_scmat_mops_h 00029 #define _math_scmat_mops_h 00030 00031 #define D1 32 00032 00033 // copy a chunk of rectangular matrix source into dest. dest is D1xD1, and is 00034 // padded with zeros 00035 00036 static inline void 00037 copy_block(double **dest, double **source, 00038 int istart, int ni, int jstart, int nj) 00039 { 00040 int ii,jj; 00041 00042 for (ii=0; ii < ni; ii++) { 00043 double *di = dest[ii]; 00044 double *si = &source[istart+ii][jstart]; 00045 for (jj=0; jj < nj; jj++) 00046 di[jj] = si[jj]; 00047 for (; jj < D1; jj++) 00048 di[jj] = 0; 00049 } 00050 00051 int left=D1-ii; 00052 if (left) 00053 memset(dest[ii], 0, sizeof(double)*left*D1); 00054 } 00055 00056 static inline void 00057 copy_trans_block(double **dest, double **source, 00058 int istart, int ni, int jstart, int nj) 00059 { 00060 int ii,jj; 00061 00062 memset(dest[0], 0, sizeof(double)*D1*D1); 00063 00064 for (jj=0; jj < nj; jj++) { 00065 double *sj = &source[jstart+jj][istart]; 00066 for (ii=0; ii < ni; ii++) 00067 dest[ii][jj] = sj[ii]; 00068 } 00069 } 00070 00071 // copy a chunk of symmetric matrix source into dest. dest is D1xD1, and is 00072 // padded with zeros 00073 static inline void 00074 copy_sym_block(double **dest, double **source, 00075 int istart, int ni, int jstart, int nj) 00076 { 00077 int ii,jj; 00078 00079 for (ii=0; ii < ni; ii++) { 00080 double *di = dest[ii]; 00081 double *si = &source[istart+ii][jstart]; 00082 00083 if (jstart < istart) 00084 for (jj=0; jj < nj; jj++) 00085 di[jj] = si[jj]; 00086 else if (jstart==istart) 00087 for (jj=0; jj <= ii; jj++) 00088 di[jj] = dest[jj][ii] = si[jj]; 00089 else 00090 for (jj=0; jj < nj; jj++) 00091 di[jj] = source[jstart+jj][istart+ii]; 00092 00093 for (jj=nj; jj < D1; jj++) 00094 di[jj] = 0; 00095 } 00096 00097 int left=D1-ii; 00098 if (left) 00099 memset(dest[ii], 0, sizeof(double)*left*D1); 00100 } 00101 00102 static inline void 00103 return_block(double **dest, double **source, 00104 int istart, int ni, int jstart, int nj) 00105 { 00106 int ii,jj; 00107 00108 for (ii=0; ii < ni; ii++) 00109 for (jj=0; jj < nj; jj++) 00110 dest[istart+ii][jstart+jj] = source[ii][jj]; 00111 } 00112 00113 // a, b, and c are all D1xD1 blocks 00114 static inline void 00115 mult_block(double **a, double **b, double **c, int ni, int nj, int nk) 00116 { 00117 int ii,jj,kk; 00118 double t00,t10,t20,t30; 00119 double *a0, *a1, *a2, *a3; 00120 double *c0, *c1, *c2, *c3; 00121 00122 for (ii=0; ii < ni; ii += 4) { 00123 a0=a[ii]; a1=a[ii+1]; a2=a[ii+2]; a3=a[ii+3]; 00124 c0=c[ii]; c1=c[ii+1]; c2=c[ii+2]; c3=c[ii+3]; 00125 00126 for (jj=0; jj < nj; jj++) { 00127 double *bt = b[jj]; 00128 t00=c0[jj]; t10=c1[jj]; t20=c2[jj]; t30=c3[jj]; 00129 00130 for (kk=0; kk < nk; kk += 2) { 00131 register double b0=bt[kk], b1=bt[kk+1]; 00132 t00 += a0[kk]*b0 + a0[kk+1]*b1; 00133 t10 += a1[kk]*b0 + a1[kk+1]*b1; 00134 t20 += a2[kk]*b0 + a2[kk+1]*b1; 00135 t30 += a3[kk]*b0 + a3[kk+1]*b1; 00136 } 00137 00138 c0[jj]=t00; 00139 c1[jj]=t10; 00140 c2[jj]=t20; 00141 c3[jj]=t30; 00142 } 00143 } 00144 } 00145 00146 #endif 00147 00148 // Local Variables: 00149 // mode: c++ 00150 // c-file-style: "ETS" 00151 // End: Generated at Fri Jan 10 08:14:09 2003 for MPQC 2.1.3 using the documentation package Doxygen 1.2.14. |