Translate DNA sequences¶
from cogent3 import get_code
standard_code = get_code(1)
standard_code.translate("TTTGCAAAC")
'FAN'
Conversion to a ProteinSequence
from a DnaSequence
is shown in Translate a DnaSequence to protein.
Translate all six frames¶
from cogent3 import get_code, make_seq
standard_code = get_code(1)
seq = make_seq("ATGCTAACATAAA", moltype="dna")
translations = standard_code.sixframes(seq)
print(translations)
['MLT*', 'C*HK', 'ANI', 'FMLA', 'LC*H', 'YVS']
Find out how many stops in a frame¶
from cogent3 import get_code, make_seq
standard_code = get_code(1)
seq = make_seq("ATGCTAACATAAA", moltype="dna")
stops_frame1 = standard_code.get_stop_indices(seq, start=0)
stops_frame1
stop_index = stops_frame1[0]
seq[stop_index : stop_index + 3]
0 | |
None | TAA |
3 DnaSequence
Translate a codon¶
from cogent3 import get_code, make_seq
standard_code = get_code(1)
standard_code["TTT"]
'F'
or get the codons for a single amino acid
standard_code["A"]
['GCT', 'GCC', 'GCA', 'GCG']
Look up the amino acid corresponding to a single codon¶
from cogent3 import get_code
standard_code = get_code(1)
standard_code["TTT"]
'F'
Get all the codons for one amino acid¶
from cogent3 import get_code
standard_code = get_code(1)
standard_code["A"]
['GCT', 'GCC', 'GCA', 'GCG']
Get all the codons for a group of amino acids¶
targets = ["A", "C"]
codons = [standard_code[aa] for aa in targets]
codons
flat_list = sum(codons, [])
flat_list
['GCT', 'GCC', 'GCA', 'GCG', 'TGT', 'TGC']
Converting the CodonAlphabet
to codon series¶
from cogent3 import make_seq
my_seq = make_seq("AGTACACTGGTT", moltype="dna")
sorted(my_seq.codon_alphabet())
len(my_seq.codon_alphabet())
61
Obtaining the codons from a DnaSequence
object¶
Use the method get_in_motif_size
from cogent3 import make_seq
my_seq = make_seq("ATGCACTGGTAA", name="my_gene", moltype="dna")
codons = my_seq.get_in_motif_size(3)
print(codons)
['ATG', 'CAC', 'TGG', 'TAA']
Translating a DNA sequence with a terminating stop codon¶
You can’t translate a sequence that contains a stop codon.
pep = my_seq.get_translation()
---------------------------------------------------------------------------
AlphabetError Traceback (most recent call last)
Cell In[12], line 1
----> 1 pep = my_seq.get_translation()
File ~/work/cogent3.github.io/cogent3.github.io/.venv/lib/python3.9/site-packages/cogent3/core/sequence.py:1316, in NucleicAcidSequence.get_translation(self, gc, incomplete_ok)
1314 orig_codon = self._seq[posn : posn + 3]
1315 try:
-> 1316 resolved = codon_alphabet.resolve_ambiguity(orig_codon)
1317 except AlphabetError:
1318 if not incomplete_ok or "-" not in orig_codon:
File ~/work/cogent3.github.io/cogent3.github.io/.venv/lib/python3.9/site-packages/cogent3/core/alphabet.py:710, in Alphabet.resolve_ambiguity(self, ambig_motif)
707 motif_set = [motif for motif in motif_set if motif in self._quick_motifset]
709 if not motif_set:
--> 710 raise AlphabetError(ambig_motif)
712 return tuple(motif_set)
AlphabetError: TAA
By removing the trailing stop codon first¶
from cogent3 import make_seq
my_seq = make_seq("ATGCACTGGTAA", name="my_gene", moltype="dna")
seq = my_seq.trim_stop_codon()
pep = seq.get_translation()
print(pep.to_fasta())
print(type(pep))
>my_gene
MHW
<class 'cogent3.core.sequence.ProteinSequence'>
By slicing the DnaSequence
first¶
from cogent3 import make_seq
my_seq = make_seq("CAAATGTATTAA", name="my_gene", moltype="dna")
pep = my_seq[:-3].get_translation()
print(pep.to_fasta())
>my_gene
QMY