CP2K 9.1
Webpage
Version
9.1 (9.1.0)
Build Environment
- Intel Parallel Studio 2020 Update 2 (only MPI)
- GCC 9.3.1 (devtoolset-9)
- cmake 3.16.3
Files Required
- cp2k-9.1.0.tar.gz
- tc-install-intelmpi.sh.diff (force to use mpicc, mpicxx, and mpif90)
--- scripts/stage1/install_intelmpi.sh.org 2022-01-26 09:13:02.023215170 +0900
+++ scripts/stage1/install_intelmpi.sh 2022-01-26 09:14:03.485735051 +0900
@@ -32,15 +32,9 @@
__SYSTEM__)
echo "==================== Finding Intel MPI from system paths ===================="
check_command mpirun "intelmpi" && MPIRUN="$(command -v mpirun)" || exit
- check_command mpiicc "intelmpi" && MPICC="$(command -v mpiicc)" || exit
- check_command mpiifort "intelmpi" && MPIFC="$(command -v mpiifort)" || exit
- if [ $(command -v mpiicpc >&- 2>&-) ]; then
- check_command mpiicpc "intelmpi" && MPICXX="$(command -v mpiicpc)"
- elif [ $(command -v mpic++ >&- 2>&-) ]; then
- check_command mpic++ "intelmpi" && MPICXX="$(command -v mpic++)"
- else
- check_command mpicxx "intelmpi" && MPICXX="$(command -v mpicxx)" || exit
- fi
+ check_command mpicc "intelmpi" && MPICC="$(command -v mpicc)" || exit
+ check_command mpif90 "intelmpi" && MPIFC="$(command -v mpif90)" || exit
+ check_command mpicxx "intelmpi" && MPICXX="$(command -v mpicxx)" || exit
add_include_from_paths INTELMPI_CFLAGS "mpi.h" $INCLUDE_PATHS
add_lib_from_paths INTELMPI_LDFLAGS "libmpi.*" $LIB_PATHS
check_lib -lmpi "intelmpi"
- tc-install-libint.sh.diff (to avoid error)
--- scripts/stage3/install_libint.sh.org 2022-01-26 09:34:28.922143775 +0900
+++ scripts/stage3/install_libint.sh 2022-01-26 09:41:44.440708192 +0900
@@ -90,6 +90,7 @@
# Fix bug in makefile for Fortran module
sed -i "s/\$(CXX) \$(CXXFLAGS)/\$(FC) \$(FCFLAGS)/g" fortran/Makefile
fi
+ sed -i 's/FCLIBS)/FCLIBS) -lstdc++/' fortran/Makefile
make -j $(get_nprocs) > make.log 2>&1
make install > install.log 2>&1
Build Procedure
#!/bin/sh
INSTDIR=/local/apl/lx/cp2k910
GITHUB_VERSION=9.1.0
VERSION=9.1.0
DBCSR_VERSION=v2.2.0
SOURCE_ROOT=/home/users/${USER}/Software/CP2K/${GITHUB_VERSION}
TARBALL=${SOURCE_ROOT}/cp2k-${VERSION}.tar.gz
TC_PATCH_1_1=${SOURCE_ROOT}/tc-install-intelmpi.sh.diff
TC_PATCH_3_1=${SOURCE_ROOT}/tc-install-libint.sh.diff
PARALLEL=12
#---------------------------------------------------------------------------
umask 0022
export LANG=C
export LC_ALL=C
ulimit -s unlimited
module purge
module load scl/devtoolset-9
module load mpi/intelmpi/2019.8.254
module load cmake/3.16.3
cd $INSTDIR
if [ -d cp2k-${VERSION} ]; then
mv cp2k-${VERSION} cp2k-erase
rm -rf cp2k-erase &
fi
tar zxf ${TARBALL}
sleep 5
mv cp2k-${VERSION}/* .
sleep 5
rm -rf cp2k-${VERSION}/{.dockerignore,.github}
rmdir cp2k-${VERSION}
cd ${INSTDIR}/tools/toolchain
# apply patches
patch -p0 < ${TC_PATCH_1_1}
patch -p0 < ${TC_PATCH_3_1}
export CC=gcc
export CXX=g++
export FC=gfortran
export MPICC=mpicc
export MPICXX=mpicxx
export MPIFC=mpif90
./install_cp2k_toolchain.sh --mpi-mode=intelmpi \
--math-mode=openblas \
--with-gcc=system \
--with-cmake=system \
--with-openmpi=no \
--with-mpich=no \
--with-intelmpi=system \
--with-libxc=install \
--with-libint=install \
--with-fftw=install \
--with-acml=no \
--with-mkl=system \
--with-openblas=install \
--with-scalapack=install \
--with-libsmm=no \
--with-libxsmm=install \
--with-elpa=install \
--with-ptscotch=install \
--with-superlu=install \
--with-pexsi=install \
--with-quip=install \
--with-plumed=install \
--with-sirius=install \
--with-gsl=install \
--with-libvdwxc=install \
--with-spglib=install \
--with-hdf5=install \
--with-spfft=install \
--with-spla=install \
--with-cosma=no \
--with-libvori=install \
-j ${PARALLEL}
## -Werror is no longer a problem!
#sed -e "s/-Werror / /g" install/arch/local.psmp > ../../arch/rccs.psmp
cp install/arch/local.psmp ../../arch/rccs.psmp
cd ${INSTDIR}
# prep dbcsr
cd ${INSTDIR}/exts
rmdir dbcsr
git clone https://github.com/cp2k/dbcsr.git
cd dbcsr
git checkout refs/tags/${DBCSR_VERSION}
git submodule update --init --recursive
cd ${INSTDIR}
make -j ${PARALLEL} ARCH=rccs VERSION=psmp
make -j ${PARALLEL} ARCH=rccs VERSION=psmp libcp2k
(we should have set --with-mkl to "no"... MKL is not used actually, though.)
Tests
#!/bin/sh
#PBS -l select=1:ncpus=16:mpiprocs=16:ompthreads=1:jobtype=core
#PBS -l walltime=12:00:00
export LC_ALL=C
export LANG=""
export OMP_STACKSIZE=64M
module purge
module load scl/devtoolset-9
module load mpi/intelmpi/2019.8.254
module load cmake/3.16.3
CP2K=/local/apl/lx/cp2k910
CP2K_ARCH=rccs
CP2K_VER=psmp
TIMEOUT=600
PARALLEL=16
ulimit -s unlimited
cd ${CP2K}/regtesting/${CP2K_ARCH}/${CP2K_VER}
rm -rf LAST-${CP2K_ARCH}-${CP2K_VER}
# serial test
../../../tools/regtesting/do_regtest \
-nobuild \
-arch ${CP2K_ARCH} \
-version ${CP2K_VER} \
-mpiranks 1 \
-ompthreads 1 \
-jobmaxtime ${TIMEOUT} \
-cp2kdir ../../../ \
-maxtasks ${PARALLEL} >& regtest_mpi1_omp1.log
rm -rf LAST-${CP2K_ARCH}-${CP2K_VER}
# omp test
../../../tools/regtesting/do_regtest \
-nobuild \
-arch ${CP2K_ARCH} \
-version ${CP2K_VER} \
-mpiranks 1 \
-ompthreads 2 \
-jobmaxtime ${TIMEOUT} \
-cp2kdir ../../../ \
-maxtasks ${PARALLEL} >& regtest_mpi1_omp2.log
rm -rf LAST-${CP2K_ARCH}-${CP2K_VER}
# mpi test
../../../tools/regtesting/do_regtest \
-nobuild \
-arch ${CP2K_ARCH} \
-version ${CP2K_VER} \
-mpiranks 2 \
-ompthreads 1 \
-jobmaxtime ${TIMEOUT} \
-cp2kdir ../../../ \
-maxtasks ${PARALLEL} >& regtest_mpi2_omp1.log
rm -rf LAST-${CP2K_ARCH}-${CP2K_VER}
# mpi/openmp test
../../../tools/regtesting/do_regtest \
-nobuild \
-arch ${CP2K_ARCH} \
-version ${CP2K_VER} \
-mpiranks 2 \
-ompthreads 2 \
-jobmaxtime ${TIMEOUT} \
-cp2kdir ../../../ \
-maxtasks ${PARALLEL} >& regtest_mpi2_omp2.log
rm -rf LAST-${CP2K_ARCH}-${CP2K_VER}
# yet another mpi test
../../../tools/regtesting/do_regtest \
-nobuild \
-arch ${CP2K_ARCH} \
-version ${CP2K_VER} \
-mpiranks 8 \
-ompthreads 1 \
-jobmaxtime ${TIMEOUT} \
-cp2kdir ../../../ \
-maxtasks ${PARALLEL} >& regtest_mpi8_omp1.log
rm -rf LAST-${CP2K_ARCH}-${CP2K_VER}
# yet another mpi/openmp test
../../../tools/regtesting/do_regtest \
-nobuild \
-arch ${CP2K_ARCH} \
-version ${CP2K_VER} \
-mpiranks 8 \
-ompthreads 2 \
-jobmaxtime ${TIMEOUT} \
-cp2kdir ../../../ \
-maxtasks ${PARALLEL} >& regtest_mpi8_omp2.log
rm -rf LAST-${CP2K_ARCH}-${CP2K_VER}
test result
[qf7@ccfep8 9.1.0]$ cd /local/apl/lx/cp2k910/regtesting/rccs/psmp/
[qf7@ccfep8 psmp]$ grep "GREPME" *.log
regtest_mpi1_omp1.log:GREPME 0 0 3596 0 3596 X
regtest_mpi1_omp2.log:GREPME 0 0 3596 0 3596 X
regtest_mpi2_omp1.log:GREPME 0 0 3652 0 3652 X
regtest_mpi2_omp2.log:GREPME 0 0 3652 0 3652 X
regtest_mpi8_omp1.log:GREPME 3 20 3643 0 3666 X
regtest_mpi8_omp2.log:GREPME 3 20 3643 0 3666 X
Some errors and wrong results only in 8 MPI cases.
- 8 MPI 1 OMP
- h2o_f35.inp: RUNTIME FAIL
- Can not add blocks to matrix with no rows. (dbcsr_block_access.F:555)
- HeH-mixed-cdft-5.inp & HeH-mixed-cdft-7.inp: RUNTIME FAIL
- Load balancing error: too much data to redistribute. (mixed_cdft_methods.F:3041)
- may be fixed by changing LOAD_SCALE?
- H2O-32-dftb-ls-2_mult.inp: WRONG 11 (ENERGY| Total FORCE_EVAL error: 3.15639644e-04 tolerance: 1e-12)
- H2O-32-dftb-ls-2.inp: WRONG 11 (ENERGY| Total FORCE_EVAL error: 3.15639644e-04 tolerance: 1e-12)
- H2O-OT-ASPC-1.inp: WRONG 1 (Total energy error 2.09579836e-11 tolerance 4e-14)
- H2O_grad_gpw.inp: WRONG 11 (ENERGY| Total FORCE_EVAL error 1.31898310e-09 tolerance: 7e-11)
- H2O-6.inp: WRONG 1 (Total energy error: 3.34421827e-11 tolerance: 2e-14)
- h2o_v01.inp: WRONG 8 (VIB|Frequency error: 2.36308063e-02 tolerance: 1.0E-07)
- H2O-debug-5.inp: WRONG 86 (DIPOLE : CheckSum error: 2.20748333e-07 tolerance: 4e-10)
- H2O-debug-6.inp: WRONG 86 (DIPOLE : CheckSum error: 2.20751799e-07 tolerance: 4e-10)
- H2O_grad_mme.inp: WRONG 11 (ENERGY| Total FORCE_EVAL error: 6.67224387e-09 tolerance: 6e-09)
- H2O_grad_ri-hfx.inp: WRONG 11 (ENERGY| Total FORCE_EVAL error: 6.40217238e-09 tolerance: 6e-09)
- ethene-vib-mode-sel-int.inp: WRONG 21 (VIB| error: 5.18090909e+02 tolerance: 1.0E-14)
- N3dye_vib_bfgs2.inp: WRONG 18 (MS| TRACKED FREQUENCY error: 2.61646572e+00 tolerance: 2e-08)
- N3dye_vib_inv_atoms.inp: WRONG 18 (MS| TRACKED FREQUENCY error: 2.11358036e+00 tolerance: 1.0E-14)
- N3dye_vib_restart_vec3.inp: WRONG 18 (MS| TRACKED FREQUENCY error: 1.42820600e-03 tolerance: 1.0E-14)
- N3dye_vib_restart_vec2.inp: WRONG 18 (MS| TRACKED FREQUENCY error: 2.04088385e+00 tolerance: 1.0E-14)
- N3dye_vib_restart_vec.inp: WRONG 18 (MS| TRACKED FREQUENCY error: 1.42831089e-03 tolerance: 1.0E-14)
- N3dye_vib_restart_vec4.inp: WRONG 18 (MS| TRACKED FREQUENCY error: 3.26939470e+00 tolerance: 1.0E-14)
- H2O-VIB-MS-INT-1.inp: WRONG 21 (VIB| error: 6.54403214e-02 tolerance: 1.0E-14)
- H2O-VIB-MS-INT-2.inp: WRONG 21 (VIB| error: 6.54403214e-02 tolerance: 1.0E-14)
- vib-mixed.inp: WRONG 8 (VIB|Frequency error: 1.57503770e-02 tolerance: 1e-14)
- h2o_f35.inp: RUNTIME FAIL
- 8 MPI 2 OMP
- the same error as non-omp version (excluding very minor numerical differences).
Benchmark
H2O-64.inp was used as in the case of 8.2. Elapsed time was obtained by 'grep "CP2K " *.log'.
Run 20 times and average of last 19 runs was used. (1st run was excluded since that is bit unstable.)
jobtype | #cores (#nodes) | MPI | OMP | GPU | elapse(sec) |
core | 18 (1) | 18 | 1 | - | 59.63 |
small | 40 (1) | 40 | 1 | - | 45.74 |
small | 80 (2) | 80 | 1 | - | 29.37 |
small | 160 (4) | 160 | 1 | - | 22.4 |
- For "core" benchmark, whole node was reserved to get a stable result.
- There are no significant differences from version 8.2.
- OpenMP is not so effective at this size of benchmark.
Notes
- OpenMPI 3.x version failed on some more tests. We thus used Intel MPI.
- OpenMPI 4.x version version is further worse than 3.x. For OpenMPI, 3.x was recommended.
- https://github.com/cp2k/cp2k/issues/1274#issuecomment-753291518
- OpenMPI 4.x version version is further worse than 3.x. For OpenMPI, 3.x was recommended.
- GCC10 or later cannot be used with Intel MPI of Parallel Studio 2020 update 2 (corresponding mpi.mod does not exist?)
- Other environment such as MVAPICH + GCC10 was not tested.
- COSMA was intentionally disabled; there could be some pitfalls.
- Intel compilers were not tested.
- MKL version showed slightly worse performance than OpenBLAS one. (not very sure about this)
- libcp2k.a was also built. (Collaboration between CP2K and Gromacs?)
- Very occasionally, performance of a certain run becomes very slow (probably regardless of input data).
- It happened only once; we couldn't reproduce it...