Paragon Program Listing
program filtered_bak
C****************************************************************
C PROGRAM FOR CARRYING OUT TOMOGRAPHIC RECONSTRUCTION
C FOR 2d PROJECTION DATA FOR PARALLEL BEAM PROJECTIONS
C ON THE INTEL PARAGON.
C
C PROGRAMMER: RAMAN RAO
C DATE OF LAST REVISION: 1/2/95
C
C VIRGINIA POLYTECHNIC INSTITUTE AND STATE UNIVERSITY
C BLACKSBURG, VA 24061
C***************************************************************
C
C u = # of rows per processor(=# of angles/nproc )
C nproc = np = number of processors used
C np1 = np-1
C nnodes=number ofnodes (generated by the system)
C iam = my processor number
C ptype = process type
C szreal = number of bytes in type real in fortran
C power2 = length of the final image in the power of 2
C MAX = actual length of the final image
C rays = # of rays used in the forward projection
C rows=number of anglesa
C cols = number of detectors(rays)
C odim = object dimension
C rdim = reconstruction dimension (=xdim=ydim in this object only)
C xdim, ydim = size of the final image
C etime = elapsed time
C etime1-6 = elapsed times for intermediate stages of the program
C step = step size for generating the checker board
C blank = density allocated inside the object
C den = same as the above
C object = object to be projected and then backprojected
C r=xcos(theta)+ysin(theta) normalized to lie in the range (-1,1)
C pwr2=power of 2 (real), ipower2=int(pwr2)
C mat= initial projection matrix
C pmat = projection matrix on each processor. This is the same
C as the matrix 'mat' except that the data contained in
C 'pmat' is different on each processor (contains data
C pertinant to that processor)
C nbytes_pmat=number of bytes in the 'pmat' matrix
C H = row matrix of the filter/response function
C nbytes_h = number of bytes in the H matrix
C P = temporary row matrix for storing the rows of 'pmat'
C lb = low bin; hb = high bin; mb = mid bin
C i,j,k,iall,jall,ii1,l1,ii,ii2,ik: all are temporary variables
C for running the 'do loops'
C tmp_xy= 2D array for storing the partial summations in the
C back-projection stage
C sum_xy = 2D array for storing the summation for all (x,y)
C msg_xy = 2D array which is the same as tmp_xy but is a
C message sent from some processor to the root
C or zeroth processor
C f = the final matrix containing the reconstructed densities
implicit none
include 'fnx.h'
integer max,angles,rays,xdim,ydim,u,nproc,power2,iix,ij
integer odim,step,iproc
parameter(MAX=64,angles=128,rays=16,xdim=64,ydim=64)
parameter(odim=16,step=4)
parameter(u=128,nproc=1)
parameter(power2=6)
complex xx,H,pmat,mat,P,tmp2
real tau,theta,r,beta,frac,x_max,y_max,mb,mp,tw
real delq,tmp_xy,T,sum,f,msg,pwr2,pi,x_cen,y_cen
real tot_angle_sum,proc_angle_sum,sum_s,msg_s,const
real proc_zero_ang_sum,sum_xy,msg_xy
double precision stime,etime,etime1,etime2,etime3,etime4
double precision etime5,etime6
integer rows,cols,n1,k1,ptype,nnodes,iam,x,y,int_val
integer iall,jall,i,j,k,nbytes_pmat,ii1,l1,ii,ii2,ik
integer ipwr2,szreal,npp,np,nbytes_p,nbytes_h,lb,hb
integer blank,den,i1,j1,np1
real x_cen_o,y_cen_o,object
dimension xx(16),H(MAX),mat(angles,rays),pmat(u,MAX)
dimension P(MAX),T(30),f(xdim,ydim),sum(ydim),msg(ydim)
dimension object(odim,odim)
dimension proc_zero_ang_sum(MAX),proc_angle_sum(MAX)
dimension tot_angle_sum(MAX),tmp_xy(xdim,ydim)
dimension sum_xy(xdim,ydim),msg_xy(xdim,ydim)
C begin initializations
etime=0.0
etime1=0.0
etime2=0.0
etime3=0.0
etime4=0.0
etime5=0.0
etime6=0.0
stime=0.0
PI = acos(-1.0)
c stime = dclock()
c write(*,*) 'PI=',PI
np=nproc
np1 = nproc-1
write(*,*) 'np1=',np1
szreal = 4
rows = angles
cols = rays
tau = 1.0
npp = rows/np
nbytes_pmat = MAX*u*szreal*2
nbytes_h = MAX*szreal*2
x_max = real(xdim)
y_max = real(ydim)
x_cen = x_max/2.
y_cen = y_max/2.
x_cen_o=real(odim)/2.0
y_cen_o=real(odim)/2.0
mp = real(rays)/2.
tw = 1./(1.15*sqrt(2.))
const = PI/real(rows)
write(*,*) 'const=',const
ipwr2 = power2
C initialize system variables on each node
iam = mynode()
nnodes = numnodes()
ptype = myptype()
C find the start time
stime = dclock()
C init pmat in all processors to zero: this step takes care
C of the additional padding with zero's which is required later
do iall=1,u
do jall = 1,MAX
pmat(iall,jall)=(0.,0.)
enddo
enddo
etime1 = dclock()-stime
write(*,*) 'etime1=',etime1
do iall=1,odim
do jall=1,odim
object(iall,jall)=0.
enddo
enddo
if (iam. eq. 0) then
write(*,*) 'Number of processors is',np
write(*,*) 'angles=',rows,'rays=',cols
C generating the checker board (object to be reconstructed)
blank =0
do i=1,odim,step
if (blank .eq. 0) then
blank = 255
else
blank = 0
endif
do j=1,odim,step
if (blank .eq. 0) then
blank = 255
else
blank = 0
endif
do i1=i,i+step
do j1=j,j+step
object(i1,j1)=real(blank)
c write(*,*) object(i1,j1),'i1=',i1,'j1=',j1
enddo
enddo
enddo
enddo
etime2 = dclock()-stime
write(*,*) 'etime2=',etime2
write(*,*) 'finished generating the object'
C uncomment the following lines if you need to write the object to a file
c open (unit = 19,file ='object.fil',status='old',form='formatted')
c do iix=1,odim
c do ii=1,odim
c write(19,*) object(iix,ii)
c enddo
c enddo
c close(19)
c write(*,*) 'finished writing the object.fil'
C Evaluate the response function, find its fft
C and send this data to the other nodes (p. 72 of kak)
call evalh2(H,1.0,cols)
write(*,*) 'H evaluated'
C uncomment the following line if you need to write H to a file
c open (unit = 15,file ='h.fil',status='old',form='formatted')
c do i=1,MAX
c write(15,*) 'i=',i,'H=',H(i)
c enddo
c close(15)
C broadcast the object and H to all the processors
call csend(20,object,odim*odim*szreal,-1,ptype)
call csend(20,H,nbytes_h,-1,ptype)
else
C Give the corresponding receives to synchronize send's and receive's
call crecv(20,object,odim*odim*szreal,0,ptype)
call crecv(20,H,nbytes_h,0,ptype)
end if
etime3 = dclock()-stime
write(*,*) 'etime3=',etime3
C forward project the object.
C This step directly generates the 'pmat' matrices
C for all the processors
C finding the forward projections in parallel on all the nodes
do i=1,odim
do j=1,odim
do k=1,u
theta = 2.0*real((u*iam)+k)*pi/real(angles)
r = cos(theta)*((real(i)-x_cen_o)/x_cen_o)
$ +sin(theta)*((real(j)-y_cen_o)/y_cen_o)
mb=(r*mp*tw)+mp
lb=int(mb)
c write(*,*) 'lb=',lb
hb=lb+1
frac=mb-lb
pmat(k,lb)=pmat(k,lb)+cmplx((1.0-frac)*object(i,j))
pmat(k,hb)=pmat(k,hb)+cmplx(frac*object(i,j))
enddo
enddo
enddo
write(*,*) 'finished the forward projections'
etime4 = dclock()-stime
write(*,*) 'etime4=',etime4
C Now find the FFT of the input matrix row-wise on all the nodes
C this is done in parallel but row-wise serial on all the nodes
C The results of the FFT are again stored in the pmat matrix
do i=1,u
C write a row of PMAT to P
do j=1,MAX
P(j) = PMAT(i,j)
enddo
C find the FFT
call diffft(P,ipwr2)
C write back P into PMAT
do l1=1,MAX
PMAT(i,l1) = P(l1)
enddo
enddo
C uncomment if you need to verify this step
c open (unit = 18,file ='pmat1.fil',status='old'
c $,form='formatted')
c do i =1,np
c if (iam .eq. i-1) then
c do ii=1,u
c write(18,*) 'iam=',iam,'row=',ii
c write(18,*) (pmat(ii,ij), ij=1,MAX)
c enddo
c endif
c enddo
c close(18)
C multiply the two funs H and and pmat. Multiply here
C means scalar multiplication of equivalent elements in the
C same position (becomes convolution in the time domain).
C This part is executed in parallel on all the nodes
C ref( p. 75 kak). Multiplication with a Hamming function
C is also done right here
do i=1,u
do j=1,MAX
PMAT(i,j) = PMAT(i,j) * H(j) *
$ (0.54-0.46*cos((2.0*(real(j-1))*pi)/real(MAX)))
c write(*,*) 'i=',i,'j=',j,'pmat=',PMAT(i,j)
enddo
enddo
C uncomment if you need to verify this step
c open (unit = 17,file ='pmat2.fil',status='old',form='formatted')
c do iix=1,nproc
c call gsync()
c if (iam .eq. iix-1) then
c do ii=1,u
c write(17,*) 'iam=',iam,'row=',ii
c write(17,*) (pmat(ii,ij), ij=1,MAX)
c enddo
c endif
c enddo
c write(*,*) 'I finished the point to point multiply','iam=',iam
c write(*,*) 'iam=',iam
c close(17)
C find the row-wise IFFT of the new matrix
C IFFT is found by: cmplx conj. of the data row by row, finding the
C forward transform, again cmplx conj. and then multiply by the
C scale factor. This step proceeds in parallel on all the
C nodes(ref. any d.s.p. book or d.s.p. by Oppenheim and Schafer.
C for all rows on the processor do:
do i=1,u
C move a row from PMAT to P
do j=1,MAX
P(j) = PMAT(i,j)
enddo
C find the complex conj. of each element
do ij=1,MAX
tmp2 = P(ij)
P(ij) = conjg(tmp2)
enddo
C find the FFT
call diffft(P,ipwr2)
C find the complex conj. of each element
do ij=1,MAX
tmp2 = P(ij)
P(ij) = conjg(tmp2)
enddo
C normalize by the scale factor
do ik =1,MAX
tmp2= P(ik)
P(ik) = (tmp2)/real(MAX)
enddo
C put it bak in the original matrix
do j=1,MAX
PMAT(i,j) = P(j)
enddo
enddo
write(*,*) 'Finished calculating the filtered vals'
C synchronize all processors: This is done to time the filtered
C projections
call gsync()
etime5 = dclock() - stime
write(*,*) 'etime5=',etime5
etime = etime1 + (etime5-etime4)
if (iam .eq. 0) then
write(*,*) 'time for calculating filtered projections is',etime
write(*,*) 'iam=',iam
endif
C uncomment if you need to verify results at this stage
c open (unit = 13,file ='pmat3.fil',status='old',form='formatted')
c do iix=1,nproc
c call gsync()
c if (iam .eq. iix-1) then
c do ii=1,u
c write(13,*) 'iam=',iam,'row=',ii
c write(13,*) (PMAT(ii,ij), ij=1,MAX)
c enddo
c endif
c enddo
c close(13)
c do i=1,u
c theta = real((u*iam)+i)*PI/real(angles)
c write(*,*) 'theta=',theta,'iam=',iam
c enddo
C Backprojection begins here
C
C initialize
do x=1,xdim
do y=1,ydim
tmp_xy(x,y)=0.0
msg_xy(x,y)=0.0
sum_xy(x,y)=0.0
enddo
enddo
C now carrying out the summation in the backprojection stage
C (ref. p.67 kak. The summation on p.67 is split into smaller
C summations on each processor. Then these smaller summations
C are transferred to the zeroth processor for finding the
C total sum.
C
C finds the tmp_xy's for each x and y
C for all x and y do:
do x = 1,xdim
do y= 1,ydim
C fo all rows on each processor do:
do i= 1,u
C theta evaluated: Note 2.0*PI may not be required
C
C Just PI will suffice as the multiplier
theta = 2.0*PI*real((u*iam)+i)/real(angles)
C find the value of 't'(ref. p49 kak)
r = cos(theta)*((x-x_cen)/x_cen)
$ + sin(theta)*((y-y_cen)/y_cen)
C
C find the mid bin, low bin and high bin
mb= (r*mp*tw) + mp
lb = int(mb)
hb = int(mb)+1
C
C find the fractional value of the mid bin
frac = mb-lb
tmp_xy(x,y)=tmp_xy(x,y)+
$ real(PMAT(i,lb))*(1.0-frac) +
$ frac*real(pmat(i,hb))
c write(*,*) 'mb=',mb,'lb=',lb,'hb=',hb,'frac=',frac
enddo
enddo
enddo
C transfer all the tmp_xy's to the zeroth processor
C and add them all up.
if (iam .ne. 0) then
call csend(20,tmp_xy,xdim*ydim*szreal,0,ptype)
else
C if iam the zeoro'th processor:
C initialize the sum to zero
do x=1,xdim
do y=1,ydim
sum_xy(x,y) = 0.0
enddo
enddo
C for all the processors on the system other than zero do:
do ii1 = 1,np1
C
C receive the tmp_xy sent earlier
call crecv(20,msg_xy,xdim*ydim*szreal,ii1,ptype)
C
C add them all up as they are received
do x=1,xdim
do y=1,ydim
sum_xy(x,y)=sum_xy(x,y)+msg_xy(x,y)
enddo
enddo
enddo
C find the density matrix
do x=1,xdim
do y=1,ydim
f(x,y)=const*(sum_xy(x,y)+tmp_xy(x,y))
enddo
enddo
C endif for the zero'th processor
endif
C synchronize all the processors for getting the timing information
call gsync()
if (iam .eq. 0) then
etime6 = dclock() - stime
write(*,*) 'etime6=',etime6
etime = etime1+(etime5-etime4)+(etime6-etime5)
write(*,*) 'total elapsed time=',etime,'iam=',iam
C
C write the densities to a file
open (unit = 11,file ='f.fil',status='old',
$ form='formatted')
write(11,*), ((f(i,j),j=1,ydim),i=1,xdim)
close(11)
endif
C end of main program
stop
end
C finds the FFT. The method used is 'decimation in time'
SUBROUTINE DIFFFT(X,NU)
COMPLEX X(1024),U,W,T
N=2**NU
PI=3.14159265358979
DO 20 L=1,NU
LE=2**(NU+1-L)
LE1=LE/2
U=(1.,0.)
W=CMPLX(COS(PI/FLOAT(LE1)),-SIN(PI/FLOAT(LE1)))
DO 20 J=1,LE1
DO 10 I=J,N,LE
IP=I+LE1
T=X(I)+X(IP)
X(IP)=(X(I)-X(IP))*U
10 X(I)=T
20 U=U*W
NV2=N/2
NM1=N-1
J=1
DO 30 I=1,NM1
IF (I .GE. J) GO TO 25
T=X(J)
X(J)=X(I)
X(I)=T
25 K=NV2
26 IF (K .GE. J) GO TO 30
J=J-K
K=K/2
GO TO 26
30 J=J+K
RETURN
END
C evaluates the filter (or response function) in the
C frequency domain.
subroutine evalh2(H,tau,N)
real tau, PI
integer N,j,i,k,ii,MAX,HMAX
parameter(MAX=64,HMAX=32)
complex H(MAX),a1(HMAX)
PI = acos(-1.0)
do j=1,MAX
H(j) = (0.,0.)
enddo
do i=1,HMAX
a1(i) = real(i)
enddo
do i =1,MAX
if (i .le. HMAX) then
H(i) = real(i)
else
H(i) = real(MAX)+1.0 -real(i)
endif
enddo
C Optional: can multiply with a Hamming window function here.
C Not required really, but can be used to get better(?) images.
C Note that the filter function C changes from a ramp on using
C this Hamming multiply.
c do i = 1,MAX
c H(i) = H(i)*(.54 + .46*cos(2.0*pi*real(i)/(real(MAX))))
c enddo
return
end