BioPython snippets

Jan. 3, 2021

Biopython stuff

These scripts are classified into different usage scenario. Some of them are easier to understand while not so efficient.

Fasta IO

# read fasta file
from Bio import SeqIO
seq_dict = SeqIO.to_dict(SeqIO.parse(file_name, "fasta"))

SeqIO.write(r_gene_seq_list, "r_gene_aa_v1_frozen.fa", "fasta")

Blast

need Blast+ installed

# blast
output_file = f'r_gene_{genome_name}.xml'
blastp_cline = NcbiblastpCommandline(query="r_gene_merge.fa", db=work_db, evalue=0.001, outfmt=5, out=output_file)
print(blastp_cline)
stdout, stderr = blastp_cline()

# parse blast
from Bio import SearchIO
blast_qresult = SearchIO.parse(xml_file, 'blast-xml')
for qresult in blast_qresult:
#        print(len(qresult))
        for hit in qresult:
            hit_list.append(hit.id)