/*************************************************************************/ /* */ /* Copyright (c) 1994 Stanford University */ /* */ /* All rights reserved. */ /* */ /* Permission is given to use, copy, and modify this software for any */ /* non-commercial purpose as long as this copyright notice is not */ /* removed. All other uses, including redistribution in whole or in */ /* part, are forbidden without prior written permission. */ /* */ /* This software is provided with absolutely no warranty and no */ /* support. */ /* */ /*************************************************************************/ EXTERN_ENV #include #include "matrix.h" extern long *node; /* ALL GLOBAL */ extern long postpass_partition_size; extern long distribute; BMatrix LB; extern SMatrix L; long P_dimi, P_dimj; /* perform symbolic factorization of original matrix into block form */ void CreateBlockedMatrix2(SMatrix M, long block_ub, long *T, long *firstchild, long *child, long *PERM, long *INVP, long *domain, long *partition) { long i, j, k, p, which, super, n_nz; long *structure, *nz; Block *blocks; extern long P; extern long *domains, *proc_domains; long num_partitions, piece_size, piece, current; LB.n = M.n; LB.domain = domain; LB.domains = domains; LB.proc_domains = proc_domains; LB.n_domains = proc_domains[P]; LB.entries_allocated = block_ub+20; LB.proc_domain_storage = (double **) MyMalloc(LB.n_domains*sizeof(double *), DISTRIBUTED); for (i=0; i LB.max_partition) LB.max_partition = LB.partition_size[j]; LB.n_partitions++; j = k; } } for (j=0; ji = LB.row[i]; BLOCK(i)->j = j; if (LB.renumbering[BLOCK(i)->i] < 0 || LB.renumbering[BLOCK(i)->j] < 0) { printf("Block %ld has bad structure\n", which); exit(-1); } BLOCK(i)->done = 0; BLOCK(i)->pair = NULL; which++; } } /* and for domain dummies */ for (p=0; pi = LB.row[i]; BLOCK(i)->j = LB.n+j; BLOCK(i)->nz = NULL; BLOCK(i)->owner = p; BLOCK(i)->done = 0; BLOCK(i)->pair = NULL; which++; } ComputeBlockParents(T); } long FindNumPartitions(long set_size, long piece_size) { long num_partitions; if (set_size <= 4*piece_size/3) num_partitions = 1; else { num_partitions = (set_size+piece_size-1)/piece_size; if (piece_size - set_size/num_partitions > set_size/(num_partitions-1) - piece_size) num_partitions--; } return(num_partitions); } void ComputeBlockParents(long *T) { long b, i, parent_col; /* compute block parents */ for (b=0; bparent = -1; else if (BLOCK(i)->i <= BLOCK(i)->j) BLOCK(i)->parent = -1; /* above diag */ else { BLOCK(i)->parent = FindBlock(BLOCK(i)->i, parent_col); if (BLOCK(i)->parent == -1) printf("Parent not found\n"); } } } /* find parents for domain dummy blocks */ for (b=0; bparent = FindBlock(BLOCK(i)->i, parent_col); } } /* find non-zero structure of individual blocks */ void FillInStructure(SMatrix M, long *firstchild, long *child, long *PERM, long *INVP) { long i, j, col, super; long *structure, *nz, n_nz; /* all procedures get structure=0, and return structure=0 */ structure = (long *) malloc(M.n*sizeof(long)); nz = (long *) malloc(M.n*sizeof(long)); for (i=0; i LB.entries_allocated) { printf("Overflow\n"); exit(-1); } for (i=col-super; i= current_block && LB.row[row] < current_block_last && row < LB.col[col+1]) row++; } if (LB.col[LB.n+which_domain+1] > LB.entries_allocated) { printf("Overflow!!\n"); exit(-1); } } void CheckColLength(long col, long n_nz) { extern long *nz; if (n_nz != nz[col]) printf("Col %ld: %ld vs %ld\n", col, n_nz, nz[col]); } void FindBlStructure(SMatrix M, long super, long *PERM, long *INVP, long *firstchild, long *child, long *structure, long *nz) { long truecol, i, c, col, the_child, bl, n_nz; n_nz = 0; for (col=super; col= super && !structure[bl]) { structure[bl] = 1; nz[n_nz++] = bl; } } } for (c=firstchild[super]; c= super && !structure[bl]) { structure[bl] = 1; nz[n_nz++] = bl; } } } /* reset structure[] to zero */ for (i=0; i LB.entries_allocated) { printf("Overflow\n"); exit(-1); } for (i=0; i= super && !structure[row]) { structure[row] = 1; nz[(*n_nz)++] = row; } } } /* then add non-zeroes that come from children */ for (c=firstchild[super]; c= super && !structure[row]) { structure[row] = 1; nz[(*n_nz)++] = row; } } } else { for (i=LB.col[the_child]; ilength; bl++) { if (BLOCK(i)->structure) row = LB.row[i]+BLOCK(i)->structure[bl]; else row = LB.row[i]+bl; if (row >= super && !structure[row]) { structure[row] = 1; nz[(*n_nz)++] = row; } } } } } for (i=0; i<*n_nz; i++) structure[nz[i]] = 0; InsSort(nz, *n_nz); CheckColLength(super, *n_nz); } void FindDetailedStructure(long col, long *structure, long *nz, long n_nz) { long i, j, row, n, owner; for (i=0; ilength = n; if (n == LB.partition_size[row]) { BLOCK(i)->structure = NULL; } else { owner = EmbeddedOwner(i); if (owner < 0) printf("%ld,%ld: %ld\n", BLOCKROW(i), BLOCKCOL(i), owner); BLOCK(i)->structure = (long *) MyMalloc(n*sizeof(long), owner); n = 0; for (j=0; jstructure[n++] = j; } } for (i=0; ilength; BLOCK(b)->nz = (double *) MyMalloc(size*sizeof(double), BLOCK(b)->owner); for (i=0; inz[i] = 0.0; } } } void FillIn(SMatrix M, long col, long *PERM, long *INVP, double *scatter) { long i, b, j1, row, truecol; truecol = PERM[col]; if (LB.domain[col]) { for (i=M.col[truecol]; i= col) { if (M.nz) scatter[row] = M.nz[i]; else scatter[row] = Value(M.row[i], truecol); } } for (i=LB.col[col]; i= col+j1) { if (M.nz) scatter[row] = M.nz[i]; else scatter[row] = Value(M.row[i], truecol); } } for (b=LB.col[col]; blength; i++) { if (BLOCK(b)->structure) row = LB.row[b] + BLOCK(b)->structure[i]; else row = LB.row[b] + i; BLOCK(b)->nz[i+j1*BLOCK(b)->length] = scatter[row]; scatter[row] = 0.0; } } } } } void InsSort(long *nz, long n) { long i, j, tmp; for (i=1; i0 && nz[j-1] > nz[j]) { tmp = nz[j]; nz[j] = nz[j-1]; nz[j-1] = tmp; j--; } } } /* determine relative indices for all blocks */ long BlDepth(long col) { long current, depth; extern long *T; depth = 0; current = col; while (T[current] != current) { current = T[current]; depth++; } return(depth); } /* must be stable, blocks in same column must remain in sorted order */ void SortByKey(long n, long *blocks, long *keys) { long i, j, blocki, keyi; for (i=0; i 0) && (keys[j-1] > keyi)) { blocks[j] = blocks[j-1]; keys[j] = keys[j-1]; j--; } blocks[j] = blocki; keys[j] = keyi; } } /* must be stable, blocks in same column must remain in sorted order */ void DumpSizes(BMatrix LB, long *domain, long *sizes) { long i, *buckets, maxm; maxm = 0; for (i=0; i maxm) maxm = sizes[i]; buckets = (long *) malloc((maxm+1)*sizeof(long)); for (i=0; i<=maxm; i++) buckets[i] = 0; for (i=0; i0; try--) { div = P/try; if (div*try == P) break; } P_dimi = div; P_dimj = try; printf("Processor array is %ld by %ld\n", P_dimi, P_dimj); } long EmbeddedOwner(long block) { long row, col; row = LB.mapI[LB.renumbering[BLOCKROW(block)]] % P_dimi; col = LB.mapJ[LB.renumbering[BLOCKCOL(block)]] % P_dimj; return(row + col*P_dimi); }