SYSTEM  systolic_array;
(* compute the matrix product "c := a * b" *)
CONST q      = 2;
      n      = 2 ** q;
TYPE  matrix = ARRAY [1..n],[1..n] OF REAL;

CONFIGURATION  grid [n],[n],[n];
               hyper[2],[2],[2],[2],[2],[2];

CONNECTION     d(0): hyper[h,i,j,k,l,m] -> hyper[(h+1) mod 2,i,j,k,l,m].d(1);
               d(1): hyper[h,i,j,k,l,m] -> hyper[h,(i+1) mod 2,j,k,l,m].d(1);
               d(2): hyper[h,i,j,k,l,m] -> hyper[h,i,(j+1) mod 2,k,l,m].d(2);
               d(3): hyper[h,i,j,k,l,m] -> hyper[h,i,j,(k+1) mod 2,l,m].d(3);
               d(4): hyper[h,i,j,k,l,m] -> hyper[h,i,j,k,(l+1) mod 2,m].d(4);
               d(5): hyper[h,i,j,k,l,m] -> hyper[h,i,j,k,l,(m+1) mod 2].d(5);


SCALAR i,j         : INTEGER;
       a,b,c       : matrix;


PROCEDURE matrix_mult(SCALAR VAR a,b,c : matrix);
(* c := a * b *)
SCALAR m,dir   : INTEGER;
VECTOR va,vb,vc: REAL;
BEGIN
  parallel grid
    va := 0.0;
    vb := 0.0;
  endparallel;

  LOAD grid[*],[*],[0] (va,a);
  LOAD grid[*],[*],[0] (vb,b);

  (* step 1 *)
  dir := q+2;
  FOR m := q-1 to 0 by -1 DO
  PARALLEL grid[*],[*],[(dim3 div 2**m) mod 2 = 0]
    SEND hyper.d(dir) (va) TO hyper.d(dir) (va);
    dir := dir+1;
  ENDPARALLEL;
  END;
 
(* test *)
for i:=0 to n-1 do
writeCard(i,4);writeln;
STORE grid [*],[*],[i] (va,a);
out(a);
end;

  (* step 4 *)
  PARALLEL grid
    vc := va * vb;
  ENDPARALLEL; 
  STORE grid[*],[*],[0] (vc,c);
END matrix_mult;


PROCEDURE out(SCALAR VAR a: matrix);
SCALAR i,j: CARDINAL;
BEGIN
  FOR i:=1 TO n DO
    FOR j:=1 TO n DO WriteFixPt(a[i,j], 10,2) END;
    WriteLn
  END;
  WriteLn
END out;

BEGIN
  (* preset input matrices "a" and "b" (or read them from a file instead) *)
  FOR i:=1 TO n DO
    FOR j:=1 TO n DO
      a[i,j] := FLOAT( ((i-1)*n + j) mod 10 );
      b[i,j] := FLOAT( ((i-1)*n + j+1) mod 10 );
   END;
  END;
  out(a);  out(b);
  matrix_mult(a,b,c);
  out(c);             (* print result matrix "c" *)

END systolic_array.
