Biopython stuff
These scripts are classified into different usage scenario. Some of them are easier to understand while not so efficient.
# read fasta file
from Bio import SeqIO
seq_dict = SeqIO.to_dict(SeqIO.parse(file_name, "fasta"))
SeqIO.write(r_gene_seq_list, "r_gene_aa_v1_frozen.fa", "fasta")
need Blast+ installed
# blast
output_file = f'r_gene_{genome_name}.xml'
blastp_cline = NcbiblastpCommandline(query="r_gene_merge.fa", db=work_db, evalue=0.001, outfmt=5, out=output_file)
print(blastp_cline)
stdout, stderr = blastp_cline()
# parse blast
from Bio import SearchIO
blast_qresult = SearchIO.parse(xml_file, 'blast-xml')
for qresult in blast_qresult:
# print(len(qresult))
for hit in qresult:
hit_list.append(hit.id)