#!/bin/bash # Description: AlphaFold non-docker version # Author: Sanjay Kumar Srikakulam # # # RCCS notes: # This script was customized for RCCS by M. Kamiya (IMS). # original: https://github.com/kalininalab/alphafold_non_docker # This script is for AlphaFold 2.1.x! # (2.1.0 was released on Nov. 2, 2021) # Former AlphaFold versions may not be compatible with this script! # RCCS default value af2root="/local/apl/lx/alphafold2/alphafold-20211106" data_dir="/local/apl/lx/alphafold2/databases-20211105" max_template_date="2021-11-05" benchmark=false db_preset="full_dbs" model_preset="monomer" usage() { echo "" echo "Usage: $0 " echo "Required Parameters:" echo "-o Path to a directory that will store the results." echo "-f Path to a FASTA file containing one sequence" echo "" echo "Optional Parameters:" echo "-a Path to alphafold code" echo "-d Path to directory of supporting data" echo "-t Maximum template release date to consider (ISO-8601 format - i.e. YYYY-MM-DD). Important if folding historical test sets (default: 2021-11-05)" echo "-Q show also pTM score etc. (alias of -p monomer_ptm)" echo "-b Run multiple JAX model evaluations to obtain a timing that excludes the compilation time, which should be more indicative of the time required for inferencing many proteins (default: 'False')" #echo "-g Enable NVIDIA runtime to run with GPUs (default: 'True')" echo "-a Comma separated list of devices to pass to 'CUDA_VISIBLE_DEVICES' (default: '')" echo "-p Choose db preset - no ensembling (full_dbs), reduced version of dbs (reduced_dbs) (default: 'full_dbs')" echo "-m Choose model preset - monomer model (monomer), monomer with extra ensembling (monomer_casp14), monomer model with pTM head (monomer_ptm), or multimer model (multimer) (default: 'monomer')" echo "" exit 1 } while getopts ":a:d:o:f:t:a:p:m:bgQ" i; do case "${i}" in a) echo "INFO: set AF2 root to $OPTARG" af2root=$OPTARG ;; d) echo "INFO: set database root to $OPTARG" data_dir=$OPTARG ;; o) output_dir=$OPTARG ;; f) fasta_path=$OPTARG ;; t) max_template_date=$OPTARG ;; b) benchmark=true ;; g) #use_gpu=true ;; Q) echo "INFO: set model_preset=monomer_ptm" model_preset="monomer_ptm" ;; a) gpu_devices=$OPTARG ;; p) db_preset=$OPTARG ;; m) model_preset=$OPTARG ;; esac done # Parse input and set defaults if [[ "$data_dir" == "" || "$output_dir" == "" || "$fasta_path" == "" ]] ; then usage fi if [[ "$db_preset" != "full_dbs" && "$db_preset" != "reduced_dbs" ]] ; then echo "Unknown db_preset! Using default ('full_dbs')" db_preset="full_dbs" fi if [[ "$model_preset" != "monomer" && "$model_preset" != "monomer_casp14" && "$model_preset" != "monomer_ptm" && "$model_preset" != "multimer" ]]; then echo "Unknown model_preset! Using default ('monomer')" model_preset="monomer" fi alphafold_script="$af2root/run_alphafold.py" if [ ! -f "$alphafold_script" ]; then echo "Alphafold python script $alphafold_script does not exist." exit 1 fi if [[ "$gpu_devices" ]] ; then export CUDA_VISIBLE_DEVICES=$gpu_devices fi export TF_FORCE_UNIFIED_MEMORY='1' export XLA_PYTHON_CLIENT_MEM_FRACTION='4.0' MYOPTS="" # Binary path (change me if required) hhblits_binary_path=$(which hhblits) hhsearch_binary_path=$(which hhsearch) jackhmmer_binary_path=$(which jackhmmer) kalign_binary_path=$(which kalign) MYOPTS="$MYOPTS --hhblits_binary_path=$hhblits_binary_path" MYOPTS="$MYOPTS --hhsearch_binary_path=$hhsearch_binary_path" MYOPTS="$MYOPTS --jackhmmer_binary_path=$jackhmmer_binary_path" MYOPTS="$MYOPTS --kalign_binary_path=$kalign_binary_path" # uniclust30 path uniref_new=$(find $data_dir -maxdepth 1 -name 'UniRef*') if [ ! -z "$uniref_new" ]; then uniref_name=$(basename $uniref_new) uniclust30_database_path="$data_dir/$uniref_name/$uniref_name" elif [ -d "$data_dir/uniclust30" ]; then uniclust30_database_path="$data_dir/uniclust30/uniclust30_2018_08/uniclust30_2018_08" fi # bfd path if [[ "$db_preset" == "reduced_dbs" ]] ; then small_bfd_database_path="$data_dir/small_bfd/bfd-first_non_consensus_sequences.fasta" MYOPTS="$MYOPTS --small_bfd_database_path=$small_bfd_database_path" # uniclust30 not necessary else bfd_database_path="$data_dir/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt" MYOPTS="$MYOPTS --bfd_database_path=$bfd_database_path" # uniclust30 required MYOPTS="$MYOPTS --uniclust30_database_path=$uniclust30_database_path" fi # Path and user config (change me if required) mgnify_database_path="$data_dir/mgnify/mgy_clusters.fa" template_mmcif_dir="$data_dir/pdb_mmcif/mmcif_files" obsolete_pdbs_path="$data_dir/pdb_mmcif/obsolete.dat" uniref90_database_path="$data_dir/uniref90/uniref90.fasta" MYOPTS="$MYOPTS --mgnify_database_path=$mgnify_database_path" MYOPTS="$MYOPTS --template_mmcif_dir=$template_mmcif_dir" MYOPTS="$MYOPTS --obsolete_pdbs_path=$obsolete_pdbs_path" MYOPTS="$MYOPTS --uniref90_database_path=$uniref90_database_path" # for multimer (pdb70 must not be specified this case) if [[ "$model_preset" == "multimer" ]]; then echo "INFO: appending database paths for multimer model..." uniprot_database_path="$data_dir/uniprot/uniprot.fasta" MYOPTS="$MYOPTS --uniprot_database_path=$uniprot_database_path" pdb_seqres_database_path="$data_dir/pdb_seqres/pdb_seqres.txt" MYOPTS="$MYOPTS --pdb_seqres_database_path=$pdb_seqres_database_path" else pdb70_database_path="$data_dir/pdb70/pdb70" MYOPTS="$MYOPTS --pdb70_database_path=$pdb70_database_path" fi #echo $MYOPTS # Run AlphaFold with required parameters $(python $alphafold_script --data_dir=$data_dir --output_dir=$output_dir --fasta_paths=$fasta_path --max_template_date=$max_template_date --db_preset=$db_preset --model_preset=$model_preset --benchmark=$benchmark --logtostderr $MYOPTS)