Module protkit.seq.protein_sequence
Implements class ProteinSequence
to represent a protein sequence.
Expand source code
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
# Authors: Fred Senekal (FS)
# Contact: fred@silicogenesis.com
# License: GPLv3
"""
Implements class `ProteinSequence` to represent a protein sequence.
"""
from typing import Optional, List, Union
from protkit.seq.sequence import Sequence
class ProteinSequence(Sequence):
THREE_TO_ONE = {
# Standard Amino Acids
"ALA": "A",
"CYS": "C",
"ASP": "D",
"GLU": "E",
"PHE": "F",
"GLY": "G",
"HIS": "H",
"ILE": "I",
"LYS": "K",
"LEU": "L",
"MET": "M",
"ASN": "N",
"PRO": "P",
"GLN": "Q",
"ARG": "R",
"SER": "S",
"THR": "T",
"VAL": "V",
"TRP": "W",
"TYR": "Y",
# Non-standard Amino Acids
"SEC": "U", # Selenocysteine
"PYL": "O", # Pyrrolysine
"ASX": "B", # Asparagine or Aspartic Acid
"GLX": "Z", # Glutamine or Glutamic Acid
"XLE": "J", # Leucine or Isoleucine
"UNK": "X", # Unknown
"XAA": "X"
}
ONE_TO_THREE = {
# Standard Amino Acids
"A": "ALA",
"C": "CYS",
"D": "ASP",
"E": "GLU",
"F": "PHE",
"G": "GLY",
"H": "HIS",
"I": "ILE",
"K": "LYS",
"L": "LEU",
"M": "MET",
"N": "ASN",
"P": "PRO",
"Q": "GLN",
"R": "ARG",
"S": "SER",
"T": "THR",
"V": "VAL",
"W": "TRP",
"Y": "TYR",
# Non-standard Amino Acids
"U": "SEC",
"O": "PYL",
"B": "ASX",
"Z": "GLX",
"J": "XLE",
"X": "UNK"
}
def __init__(self,
sequence: Union[str, List[str]],
description: Optional[str] = None,
chain_id: Optional[str] = None
):
"""
Constructor.
Args:
sequence (Union[str, List[str]]): The sequence of residue names.
description (Optional[str]): Optional description associated with the sequence.
chain_id (Optional[str]): Optional chain ID associated with the sequence.
Returns:
None
"""
super().__init__(sequence, description, chain_id)
@staticmethod
def from_sequence(seq: Sequence):
"""
Creates a ProteinSequence object from a Sequence object.
Args:
seq (Sequence): The Sequence object.
Returns:
ProteinSequence: The ProteinSequence object.
"""
sequence = ProteinSequence(seq.sequence, seq.description, seq.chain_id)
return sequence
def to_single_letter(self):
"""
Converts the sequence to single-letter residue names.
Args:
None
Returns:
None
"""
for i in range(len(self._sequence)):
res = self._sequence[i].upper()
if res in ProteinSequence.THREE_TO_ONE:
self._sequence[i] = ProteinSequence.THREE_TO_ONE[res]
else:
self._sequence[i] = "X"
def to_triple_letter(self):
"""
Converts the sequence to three-letter residue names.
Args:
None
Returns:
None
"""
for i in range(len(self._sequence)):
res = self._sequence[i].upper()
if res in ProteinSequence.ONE_TO_THREE:
self._sequence[i] = ProteinSequence.ONE_TO_THREE[res]
else:
self._sequence[i] = "UNK"
Classes
class ProteinSequence (sequence: Union[str, List[str]], description: Optional[str] = None, chain_id: Optional[str] = None)
-
Constructor.
Args
sequence
:Union[str, List[str]]
- The sequence of residue names.
description
:Optional[str]
- Optional description associated with the sequence.
chain_id
:Optional[str]
- Optional chain ID associated with the sequence.
Returns
None
Expand source code
class ProteinSequence(Sequence): THREE_TO_ONE = { # Standard Amino Acids "ALA": "A", "CYS": "C", "ASP": "D", "GLU": "E", "PHE": "F", "GLY": "G", "HIS": "H", "ILE": "I", "LYS": "K", "LEU": "L", "MET": "M", "ASN": "N", "PRO": "P", "GLN": "Q", "ARG": "R", "SER": "S", "THR": "T", "VAL": "V", "TRP": "W", "TYR": "Y", # Non-standard Amino Acids "SEC": "U", # Selenocysteine "PYL": "O", # Pyrrolysine "ASX": "B", # Asparagine or Aspartic Acid "GLX": "Z", # Glutamine or Glutamic Acid "XLE": "J", # Leucine or Isoleucine "UNK": "X", # Unknown "XAA": "X" } ONE_TO_THREE = { # Standard Amino Acids "A": "ALA", "C": "CYS", "D": "ASP", "E": "GLU", "F": "PHE", "G": "GLY", "H": "HIS", "I": "ILE", "K": "LYS", "L": "LEU", "M": "MET", "N": "ASN", "P": "PRO", "Q": "GLN", "R": "ARG", "S": "SER", "T": "THR", "V": "VAL", "W": "TRP", "Y": "TYR", # Non-standard Amino Acids "U": "SEC", "O": "PYL", "B": "ASX", "Z": "GLX", "J": "XLE", "X": "UNK" } def __init__(self, sequence: Union[str, List[str]], description: Optional[str] = None, chain_id: Optional[str] = None ): """ Constructor. Args: sequence (Union[str, List[str]]): The sequence of residue names. description (Optional[str]): Optional description associated with the sequence. chain_id (Optional[str]): Optional chain ID associated with the sequence. Returns: None """ super().__init__(sequence, description, chain_id) @staticmethod def from_sequence(seq: Sequence): """ Creates a ProteinSequence object from a Sequence object. Args: seq (Sequence): The Sequence object. Returns: ProteinSequence: The ProteinSequence object. """ sequence = ProteinSequence(seq.sequence, seq.description, seq.chain_id) return sequence def to_single_letter(self): """ Converts the sequence to single-letter residue names. Args: None Returns: None """ for i in range(len(self._sequence)): res = self._sequence[i].upper() if res in ProteinSequence.THREE_TO_ONE: self._sequence[i] = ProteinSequence.THREE_TO_ONE[res] else: self._sequence[i] = "X" def to_triple_letter(self): """ Converts the sequence to three-letter residue names. Args: None Returns: None """ for i in range(len(self._sequence)): res = self._sequence[i].upper() if res in ProteinSequence.ONE_TO_THREE: self._sequence[i] = ProteinSequence.ONE_TO_THREE[res] else: self._sequence[i] = "UNK"
Ancestors
Subclasses
Class variables
var ONE_TO_THREE
var THREE_TO_ONE
Static methods
def from_sequence(seq: Sequence)
-
Creates a ProteinSequence object from a Sequence object.
Args
seq
:Sequence
- The Sequence object.
Returns
ProteinSequence
- The ProteinSequence object.
Expand source code
@staticmethod def from_sequence(seq: Sequence): """ Creates a ProteinSequence object from a Sequence object. Args: seq (Sequence): The Sequence object. Returns: ProteinSequence: The ProteinSequence object. """ sequence = ProteinSequence(seq.sequence, seq.description, seq.chain_id) return sequence
Methods
def to_single_letter(self)
-
Converts the sequence to single-letter residue names.
Args
None
Returns
None
Expand source code
def to_single_letter(self): """ Converts the sequence to single-letter residue names. Args: None Returns: None """ for i in range(len(self._sequence)): res = self._sequence[i].upper() if res in ProteinSequence.THREE_TO_ONE: self._sequence[i] = ProteinSequence.THREE_TO_ONE[res] else: self._sequence[i] = "X"
def to_triple_letter(self)
-
Converts the sequence to three-letter residue names.
Args
None
Returns
None
Expand source code
def to_triple_letter(self): """ Converts the sequence to three-letter residue names. Args: None Returns: None """ for i in range(len(self._sequence)): res = self._sequence[i].upper() if res in ProteinSequence.ONE_TO_THREE: self._sequence[i] = ProteinSequence.ONE_TO_THREE[res] else: self._sequence[i] = "UNK"
Inherited members