import subprocess
import os
# Local Psi Blast installation path
path_to_psiblast = 'C:\\Program Files\\NCBI\\blast-2.7.1+\\bin\\psiblast.exe'
# Path to Proteins in Fasta format
fasta_path = 'processed_fastas/mouse_train/'
from os import listdir
from os.path import isfile, join
onlyfiles = [f for f in listdir(fasta_path) if isfile(join(fasta_path, f))]
# psiblast -query A0JNU3.fasta -db swissprot/swissprot -num_iterations 3 -evalue 0.001 -num_threads 8 -save_each_pssm -out_ascii_pssm A0JNU3.pssm
for i in onlyfiles:
query_fasta = fasta_path + i
# Output filename for each PSSM
output_pssm = 'pssm/mouse_train/' + i + '.pssm'
# Call the sub process with proper arguments
subprocess.call([path_to_psiblast, '-query', query_fasta, '-db', 'uniref50/uniref50.fasta', '-num_iterations', '3', '-evalue', '0.001', '-num_threads', '8', '-out_ascii_pssm', output_pssm])
/ncbi-blast-2.9.0+/bin$ psiblast \
-db /data/BlastDB/uniref50.fasta \
-query Test_1999.fasta \
-evalue 0.001 \
-out /add_valid_path/Test.txt \
-out_ascii_pssm /add_valid_path/seq.1.pssm \
-num_threads 8 \
–num_iterations 2
- PSSM 은 여전히 강력한 피쳐임
- 길이, 다른 시퀀스 특징을 고려해서 파라미터를 조정해야 함.
'Resources > Personal Projects' 카테고리의 다른 글
[UniProt Challenge] MMseqs2 : 10,000x faster BLAST & profile generator (0) | 2022.09.27 |
---|---|
[UniProt Challenge] Protein database download (0) | 2022.09.27 |
[UniProt Challenge] CNN + Attention (0) | 2022.09.17 |
[UniProt Challenge] Amino acid binding / pocket signature (0) | 2022.09.15 |
[UniProt Challenge] PSSM : Binding residue representation (1) | 2022.09.15 |
댓글