Hello,
I would like to speed up the execution of my parallelized fortran program. I think it's not relevant that it's parallelized.
My program calls many times in the main and in a subroutine a big array (2,200,000 rows x 6 columns, >200 Megabytes) and then interpolates into the array; the array represents a cubic mesh with a vectorial field defined in every cell, so there are 3 space variables (xT,yT,zT) and 3 vector components (Bx,By,Bz) at every cell. I use an external module to declare the array and I would like not to call the array every time I call the subroutine "array". Below an excerpt:
module variables
real*8, dimension), allocatable:: xT,yT,zT
real*8, dimension), allocatable:: Bx,By,Bz
end module variables
Program main
include 'mpif.h'
integer*4 i,npart,k,nstep
integer*4 ic, npartj
integer*4 mpierr,npe,mype
integer*4 j,lfile,imin
…..
character(LEN=80)::filename_B
….
common /B/ B0,Btot,P0,conv,lfile,imin
call MPI_Init(mpierr)
call MPI_Comm_size(MPI_COMM_WORLD, npe, mpierr)
call MPI_Comm_rank(MPI_COMM_WORLD, mype, mpierr)
open(5,file=filename_B)
do i=1,10000000
read(5,*,end=198) t,t,t,t,t,t
lfile = lfile + 1
198 enddo
close(5)
print*,"lfile = ", lfile
open(5,file=filename_B,action='read',status='old')
allocate(xT(lfile),yT(lfile),zT(lfile))
allocate(Bx(lfile),By(lfile),Bz(lfile))
….
! Loop on particles
do 102 npartj=1,npart_local
Xin = 1.
Yin = 2.
Zin = 2.
…..
call array (Xin,Yin,Zin)
…...
do 203 i=1,nstep
…..
call odeint(y,nvar,t1,t2,eps,h1,hmin,nok,nbad,derivs,bsstep)
….
call magfield3d (y(1), y(2), y(3))
…..
203 continue
…
102 continue
call MPI_Barrier (MPI_COMM_WORLD, mpierr)
deallocate(Bx,By,Bz)
deallocate(xT,yT,zT)
print*, "Done"
close(5)
call MPI_Finalize(mpierr)
end
! Subroutine for the interpolation of the array
subroutine array(x,y,z)
use variables
implicit none
integer*4 j,lfile,imin
real*8 B0,Btot,P0,conv,PB,Bmax
real*8 x,y,z
common /B/ B0,Btot,P0,conv,lfile,imin
do j=1,lfile
read(5,*) xT(j),yT(j),zT(j),Bx(j),By(j),Bz(j)
dist(j)=dsqrt((x-xT(j))**2.+(y-yT(j))**2.+(z-zT(j))**2.)
dist2(j)=dsqrt((Bx(j))**2.+(By(j))**2.+(Bz(j))**2.)
enddo
P0 = minval(dist) !interpolate particle position on the grid
PB = maxval(dist2)
imin = minloc(dist,DIM=1)
Bmax = maxloc(dist2,DIM=1)
Btot = dsqrt(Bx(imin)**2.+By(imin)**2.+Bz(imin)**2.) ! in gauss
rewind(5)
return
end
I would like to speed up the execution of my parallelized fortran program. I think it's not relevant that it's parallelized.
My program calls many times in the main and in a subroutine a big array (2,200,000 rows x 6 columns, >200 Megabytes) and then interpolates into the array; the array represents a cubic mesh with a vectorial field defined in every cell, so there are 3 space variables (xT,yT,zT) and 3 vector components (Bx,By,Bz) at every cell. I use an external module to declare the array and I would like not to call the array every time I call the subroutine "array". Below an excerpt:
module variables
real*8, dimension), allocatable:: xT,yT,zT
real*8, dimension), allocatable:: Bx,By,Bz
end module variables
Program main
include 'mpif.h'
integer*4 i,npart,k,nstep
integer*4 ic, npartj
integer*4 mpierr,npe,mype
integer*4 j,lfile,imin
…..
character(LEN=80)::filename_B
….
common /B/ B0,Btot,P0,conv,lfile,imin
call MPI_Init(mpierr)
call MPI_Comm_size(MPI_COMM_WORLD, npe, mpierr)
call MPI_Comm_rank(MPI_COMM_WORLD, mype, mpierr)
open(5,file=filename_B)
do i=1,10000000
read(5,*,end=198) t,t,t,t,t,t
lfile = lfile + 1
198 enddo
close(5)
print*,"lfile = ", lfile
open(5,file=filename_B,action='read',status='old')
allocate(xT(lfile),yT(lfile),zT(lfile))
allocate(Bx(lfile),By(lfile),Bz(lfile))
….
! Loop on particles
do 102 npartj=1,npart_local
Xin = 1.
Yin = 2.
Zin = 2.
…..
call array (Xin,Yin,Zin)
…...
do 203 i=1,nstep
…..
call odeint(y,nvar,t1,t2,eps,h1,hmin,nok,nbad,derivs,bsstep)
….
call magfield3d (y(1), y(2), y(3))
…..
203 continue
…
102 continue
call MPI_Barrier (MPI_COMM_WORLD, mpierr)
deallocate(Bx,By,Bz)
deallocate(xT,yT,zT)
print*, "Done"
close(5)
call MPI_Finalize(mpierr)
end
! Subroutine for the interpolation of the array
subroutine array(x,y,z)
use variables
implicit none
integer*4 j,lfile,imin
real*8 B0,Btot,P0,conv,PB,Bmax
real*8 x,y,z
common /B/ B0,Btot,P0,conv,lfile,imin
do j=1,lfile
read(5,*) xT(j),yT(j),zT(j),Bx(j),By(j),Bz(j)
dist(j)=dsqrt((x-xT(j))**2.+(y-yT(j))**2.+(z-zT(j))**2.)
dist2(j)=dsqrt((Bx(j))**2.+(By(j))**2.+(Bz(j))**2.)
enddo
P0 = minval(dist) !interpolate particle position on the grid
PB = maxval(dist2)
imin = minloc(dist,DIM=1)
Bmax = maxloc(dist2,DIM=1)
Btot = dsqrt(Bx(imin)**2.+By(imin)**2.+Bz(imin)**2.) ! in gauss
rewind(5)
return
end