Module protkit.seq.sequence

Implements class Sequence to represent a sequence of residues.

This class provides a representation of a sequence. The class provides core functionality for sequence alignment and comparison, that is applicable to both nucleotide and protein sequences.

Specific functionality for nucleotide and protein sequences are provided by the NucleotideSequence and ProteinSequence classes.

This class should be able to handle sequences of any length, and of any type (nucleotide or protein). The underlying representation of the sequence should be able to handle any type of sequence.

Expand source code
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
# Authors:  Fred Senekal (FS)
# Contact:  fred@silicogenesis.com
# License:  GPLv3

"""
Implements class `Sequence` to represent a sequence of residues.

This class provides a representation of a sequence. The class
provides core functionality for sequence alignment and comparison,
that is applicable to both nucleotide and protein sequences.

Specific functionality for nucleotide and protein sequences are
provided by the NucleotideSequence and ProteinSequence classes.

This class should be able to handle sequences of any length, and
of any type (nucleotide or protein). The underlying representation
of the sequence should be able to handle any type of sequence.
"""

from typing import Optional, List, Union
from protkit.core.extend_attributes import ExtendedAttributes


class Sequence(ExtendedAttributes):
    def __init__(self,
                 sequence: Union[str, List[str]],
                 description: Optional[str] = None,
                 chain_id: Optional[str] = None):
        """
        Constructor.

        Args:
            sequence (Union[str, List[str]]): The sequence of residue names.
            description (Optional[str]): Optional description associated with the sequence.
            chain_id (Optional[str]): Optional chain ID associated with the sequence.

        Returns:
            None

        Notes:
            The sequence can be provided as a string or a list of strings.
            If provided as a string, the string will be converted to a list of strings.

            A sequence can be respresented by a string of single letters, such as "AGILE",
            or a list of three-letter codes such as ["ALA", "GLY", "ILE", "LEU", "GLU"]. For
            consistency, the sequence is always represented as a list of codes.
        """
        super().__init__()

        if type(sequence) is str:
            # Convert string representations to a list representation.
            sequence = list(sequence)

        self._sequence = sequence
        self._chain_id = chain_id
        self._description = description

    def __str__(self):
        """
        Returns a string representation of the sequence.

        Returns:
            str: The string representation of the sequence.
        """
        if self._sequence is None:
            return ""
        if len(self.sequence) == 0:
            return ""
        if len(self._sequence[0]) == 1:
            spacer = ""
        else:
            spacer = " "
        return spacer.join(self._sequence)

    def to_string(self, start_index: int, end_index: int) -> str:
        """
        Returns a string representation of a subsequence.

        Args:
            start_index (int): The start index of the subsequence.
            end_index (int): The end index of the subsequence.

        Returns:
            str: The string representation of the subsequence.
        """
        if self._sequence is None or len(self.sequence) == 0:
            return ""

        sub_sequence = [self._sequence[i] for i in range(start_index, min(end_index + 1, self.length))]

        if len(self._sequence[0]) == 1:
            return "".join(sub_sequence)
        else:
            return " ".join(sub_sequence)


    def __len__(self):
        """
        Returns the length of the sequence.

        Returns:
            int: The length of the sequence.
        """
        return len(self._sequence)

    def __getitem__(self, index):
        """
        Returns the residue at the specified index.

        Args:
            index (int): The index of the residue to return.

        Returns:
            str: The residue at the specified index.
        """
        return self._sequence[index]

    def __setitem__(self, index, value):
        """
        Sets the residue at the specified index.

        Args:
            index (int): The index of the residue to set.
            value (str): The residue to set.

        Returns:
            None
        """
        self._sequence[index] = value

    @property
    def sequence(self):
        """
        Returns the sequence.

        Returns:
            str: The sequence.
        """
        return self._sequence

    @sequence.setter
    def sequence(self, sequence: Union[str, List[str]]):
        """
        Sets the sequence.

        Args:
            sequence (Union[str, List[str]]): The sequence.

        Returns:
            None
        """
        if type(sequence) is str:
            # Convert string representations to a list representation.
            self._sequence = list(sequence)
        else:
            self._sequence = sequence

    @property
    def description(self):
        """
        Returns the description.

        Returns:
            str: The description.
        """
        return self._description

    @description.setter
    def description(self, description):
        """
        Sets the description.

        Args:
            description (str): The description.

        Returns:
            None
        """
        self._description = description

    @property
    def chain_id(self):
        """
        Returns the chain ID.

        Returns:
            str: The chain ID.
        """
        return self._chain_id

    @chain_id.setter
    def chain_id(self, chain_id):
        """
        Sets the chain ID.

        Args:
            chain_id (str): The chain ID.

        Returns:
            None
        """
        self._chain_id = chain_id

    @property
    def length(self):
        """
        Returns the length of the sequence.

        Returns:
            int: The length of the sequence.
        """
        return len(self._sequence)

Classes

class Sequence (sequence: Union[str, List[str]], description: Optional[str] = None, chain_id: Optional[str] = None)

Constructor.

Args

sequence : Union[str, List[str]]
The sequence of residue names.
description : Optional[str]
Optional description associated with the sequence.
chain_id : Optional[str]
Optional chain ID associated with the sequence.

Returns

None

Notes

The sequence can be provided as a string or a list of strings. If provided as a string, the string will be converted to a list of strings.

A sequence can be respresented by a string of single letters, such as "AGILE", or a list of three-letter codes such as ["ALA", "GLY", "ILE", "LEU", "GLU"]. For consistency, the sequence is always represented as a list of codes.

Expand source code
class Sequence(ExtendedAttributes):
    def __init__(self,
                 sequence: Union[str, List[str]],
                 description: Optional[str] = None,
                 chain_id: Optional[str] = None):
        """
        Constructor.

        Args:
            sequence (Union[str, List[str]]): The sequence of residue names.
            description (Optional[str]): Optional description associated with the sequence.
            chain_id (Optional[str]): Optional chain ID associated with the sequence.

        Returns:
            None

        Notes:
            The sequence can be provided as a string or a list of strings.
            If provided as a string, the string will be converted to a list of strings.

            A sequence can be respresented by a string of single letters, such as "AGILE",
            or a list of three-letter codes such as ["ALA", "GLY", "ILE", "LEU", "GLU"]. For
            consistency, the sequence is always represented as a list of codes.
        """
        super().__init__()

        if type(sequence) is str:
            # Convert string representations to a list representation.
            sequence = list(sequence)

        self._sequence = sequence
        self._chain_id = chain_id
        self._description = description

    def __str__(self):
        """
        Returns a string representation of the sequence.

        Returns:
            str: The string representation of the sequence.
        """
        if self._sequence is None:
            return ""
        if len(self.sequence) == 0:
            return ""
        if len(self._sequence[0]) == 1:
            spacer = ""
        else:
            spacer = " "
        return spacer.join(self._sequence)

    def to_string(self, start_index: int, end_index: int) -> str:
        """
        Returns a string representation of a subsequence.

        Args:
            start_index (int): The start index of the subsequence.
            end_index (int): The end index of the subsequence.

        Returns:
            str: The string representation of the subsequence.
        """
        if self._sequence is None or len(self.sequence) == 0:
            return ""

        sub_sequence = [self._sequence[i] for i in range(start_index, min(end_index + 1, self.length))]

        if len(self._sequence[0]) == 1:
            return "".join(sub_sequence)
        else:
            return " ".join(sub_sequence)


    def __len__(self):
        """
        Returns the length of the sequence.

        Returns:
            int: The length of the sequence.
        """
        return len(self._sequence)

    def __getitem__(self, index):
        """
        Returns the residue at the specified index.

        Args:
            index (int): The index of the residue to return.

        Returns:
            str: The residue at the specified index.
        """
        return self._sequence[index]

    def __setitem__(self, index, value):
        """
        Sets the residue at the specified index.

        Args:
            index (int): The index of the residue to set.
            value (str): The residue to set.

        Returns:
            None
        """
        self._sequence[index] = value

    @property
    def sequence(self):
        """
        Returns the sequence.

        Returns:
            str: The sequence.
        """
        return self._sequence

    @sequence.setter
    def sequence(self, sequence: Union[str, List[str]]):
        """
        Sets the sequence.

        Args:
            sequence (Union[str, List[str]]): The sequence.

        Returns:
            None
        """
        if type(sequence) is str:
            # Convert string representations to a list representation.
            self._sequence = list(sequence)
        else:
            self._sequence = sequence

    @property
    def description(self):
        """
        Returns the description.

        Returns:
            str: The description.
        """
        return self._description

    @description.setter
    def description(self, description):
        """
        Sets the description.

        Args:
            description (str): The description.

        Returns:
            None
        """
        self._description = description

    @property
    def chain_id(self):
        """
        Returns the chain ID.

        Returns:
            str: The chain ID.
        """
        return self._chain_id

    @chain_id.setter
    def chain_id(self, chain_id):
        """
        Sets the chain ID.

        Args:
            chain_id (str): The chain ID.

        Returns:
            None
        """
        self._chain_id = chain_id

    @property
    def length(self):
        """
        Returns the length of the sequence.

        Returns:
            int: The length of the sequence.
        """
        return len(self._sequence)

Ancestors

Subclasses

Instance variables

var chain_id

Returns the chain ID.

Returns

str
The chain ID.
Expand source code
@property
def chain_id(self):
    """
    Returns the chain ID.

    Returns:
        str: The chain ID.
    """
    return self._chain_id
var description

Returns the description.

Returns

str
The description.
Expand source code
@property
def description(self):
    """
    Returns the description.

    Returns:
        str: The description.
    """
    return self._description
var length

Returns the length of the sequence.

Returns

int
The length of the sequence.
Expand source code
@property
def length(self):
    """
    Returns the length of the sequence.

    Returns:
        int: The length of the sequence.
    """
    return len(self._sequence)
var sequence

Returns the sequence.

Returns

str
The sequence.
Expand source code
@property
def sequence(self):
    """
    Returns the sequence.

    Returns:
        str: The sequence.
    """
    return self._sequence

Methods

def to_string(self, start_index: int, end_index: int) ‑> str

Returns a string representation of a subsequence.

Args

start_index : int
The start index of the subsequence.
end_index : int
The end index of the subsequence.

Returns

str
The string representation of the subsequence.
Expand source code
def to_string(self, start_index: int, end_index: int) -> str:
    """
    Returns a string representation of a subsequence.

    Args:
        start_index (int): The start index of the subsequence.
        end_index (int): The end index of the subsequence.

    Returns:
        str: The string representation of the subsequence.
    """
    if self._sequence is None or len(self.sequence) == 0:
        return ""

    sub_sequence = [self._sequence[i] for i in range(start_index, min(end_index + 1, self.length))]

    if len(self._sequence[0]) == 1:
        return "".join(sub_sequence)
    else:
        return " ".join(sub_sequence)

Inherited members