#!/bin/bash # Description: AlphaFold non-docker version # Author: Sanjay Kumar Srikakulam # # # RCCS notes: # This script was customized for RCCS by M. Kamiya (IMS). # original: https://github.com/kalininalab/alphafold_non_docker # RCCS default value af2root="/local/apl/lx/alphafold2/alphafold-20210819" data_dir="/local/apl/lx/alphafold2/databases-20210819" max_template_date="2021-08-20" benchmark=false preset="full_dbs" model_names="model_1,model_2,model_3,model_4,model_5" usage() { echo "" echo "Usage: $0 " echo "Required Parameters:" echo "-o Path to a directory that will store the results." echo "-f Path to a FASTA file containing one sequence" echo "Optional Parameters:" echo "-a Path to alphafold code" echo "-d Path to directory of supporting data" echo "-t Maximum template release date to consider (ISO-8601 format - i.e. YYYY-MM-DD). Important if folding historical test sets (default: 2021-08-19)" echo "-Q show also pTM score etc. (modify default model names to model_1_ptm,...)" echo "-m Names of models to use (a comma separated list)" echo "-b Run multiple JAX model evaluations to obtain a timing that excludes the compilation time, which should be more indicative of the time required for inferencing many proteins (default: 'False')" #echo "-g Enable NVIDIA runtime to run with GPUs (default: 'True')" echo "-a Comma separated list of devices to pass to 'CUDA_VISIBLE_DEVICES' (default: '')" echo "-p Choose preset model configuration - no ensembling (full_dbs), reduced version of dbs (reduced_dbs), or 8 model ensemblings (casp14) (default: 'full_dbs')" echo "" exit 1 } while getopts ":a:d:o:m:f:t:a:p:bgQ" i; do case "${i}" in a) echo "INFO: set AF2 root to $OPTARG" af2root=$OPTARG ;; d) echo "INFO: set database root to $OPTARG" data_dir=$OPTARG ;; o) output_dir=$OPTARG ;; m) model_names=$OPTARG ;; f) fasta_path=$OPTARG ;; t) max_template_date=$OPTARG ;; b) benchmark=true ;; g) #use_gpu=true ;; Q) echo "INFO: use _ptm models" model_names="model_1_ptm,model_2_ptm,model_3_ptm,model_4_ptm,model_5_ptm" ;; a) gpu_devices=$OPTARG ;; p) preset=$OPTARG ;; esac done # Parse input and set defaults if [[ "$data_dir" == "" || "$output_dir" == "" || "$model_names" == "" || "$fasta_path" == "" ]] ; then usage fi if [[ "$preset" != "full_dbs" && "$preset" != "casp14" && "$preset" != "reduced_dbs" ]] ; then echo "Unknown preset! Using default ('full_dbs')" preset="full_dbs" fi alphafold_script="$af2root/run_alphafold.py" if [ ! -f "$alphafold_script" ]; then echo "Alphafold python script $alphafold_script does not exist." exit 1 fi if [[ "$gpu_devices" ]] ; then export CUDA_VISIBLE_DEVICES=$gpu_devices fi export TF_FORCE_UNIFIED_MEMORY='1' export XLA_PYTHON_CLIENT_MEM_FRACTION='4.0' # Path and user config (change me if required) mgnify_database_path="$data_dir/mgnify/mgy_clusters.fa" template_mmcif_dir="$data_dir/pdb_mmcif/mmcif_files" obsolete_pdbs_path="$data_dir/pdb_mmcif/obsolete.dat" pdb70_database_path="$data_dir/pdb70/pdb70" uniref90_database_path="$data_dir/uniref90/uniref90.fasta" # uniclust30 path uniref_new=$(find $data_dir -maxdepth 1 -name 'UniRef*') if [ ! -z "$uniref_new" ]; then uniref_name=$(basename $uniref_new) uniclust30_database_path="$data_dir/$uniref_name/$uniref_name" elif [ -d "$data_dir/uniclust30" ]; then uniclust30_database_path="$data_dir/uniclust30/uniclust30_2018_08/uniclust30_2018_08" fi # bfd path if [[ "$preset" == "reduced_dbs" ]] ; then bfd_database_option="--small_bfd_database_path=$data_dir/small_bfd/bfd-first_non_consensus_sequences.fasta" uniclust30_database_option="" else bfd_database_path="$data_dir/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt" bfd_database_option="--bfd_database_path=$bfd_database_path" uniclust30_database_option="--uniclust30_database_path=$uniclust30_database_path" fi # Binary path (change me if required) hhblits_binary_path=$(which hhblits) hhsearch_binary_path=$(which hhsearch) jackhmmer_binary_path=$(which jackhmmer) kalign_binary_path=$(which kalign) # Run AlphaFold with required parameters $(python $alphafold_script --hhblits_binary_path=$hhblits_binary_path --hhsearch_binary_path=$hhsearch_binary_path --jackhmmer_binary_path=$jackhmmer_binary_path --kalign_binary_path=$kalign_binary_path $bfd_database_option --mgnify_database_path=$mgnify_database_path --template_mmcif_dir=$template_mmcif_dir --obsolete_pdbs_path=$obsolete_pdbs_path --pdb70_database_path=$pdb70_database_path $uniclust30_database_option --uniref90_database_path=$uniref90_database_path --data_dir=$data_dir --output_dir=$output_dir --fasta_paths=$fasta_path --model_names=$model_names --max_template_date=$max_template_date --preset=$preset --benchmark=$benchmark --logtostderr)