Module protkit.seq.sequence_alignment

Implements class SequenceAlignment to align two sequences.

  • local_align() to align two sequences using the Smith-Waterman algorithm.
  • global_align() to align two sequences using the Needleman-Wunsch algorithm.
Expand source code
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
# Authors:  Fred Senekal (FS)
# Contact:  fred@silicogenesis.com
# License:  GPLv3

"""
Implements class `SequenceAlignment` to align two sequences.

- `local_align()` to align two sequences using the Smith-Waterman algorithm.
- `global_align()` to align two sequences using the Needleman-Wunsch algorithm.
"""

from typing import List
from protkit.seq.sequence import Sequence


class SequenceAlignment:
    @staticmethod
    def local_align(
            sequence1: Sequence,
            sequence2: Sequence,
            match_value: int = 2,
            mismatch_penalty: int = -1,
            gap_penalty: int = -1) -> (List[str], List[str]):
        """
        Aligns two sequences using the Smith-Waterman algorithm.

        Args:
            sequence1 (Sequence): The first sequence.
            sequence2 (Sequence): The second sequence.
            match_value (int): The score for a match.
            mismatch_penalty (int): The penalty for a mismatch.
            gap_penalty (int): The penalty for a gap.

        Returns:
            List[str], List[str]: The aligned sequences.

        Raises:
            None
        """
        sequence1 = sequence1.sequence
        sequence2 = sequence2.sequence

        # Initialize the matrix.
        rows = len(sequence1) + 1
        cols = len(sequence2) + 1
        matrix = [[0] * cols for _ in range(rows)]

        # Fill the matrix.
        for i in range(1, rows):
            for j in range(1, cols):
                match_score = matrix[i - 1][j - 1] + (match_value if sequence1[i - 1] == sequence2[j - 1] else mismatch_penalty)
                delete_score = matrix[i - 1][j] + gap_penalty
                insert_score = matrix[i][j - 1] + gap_penalty
                matrix[i][j] = max(0, match_score, delete_score, insert_score)

        # Find the maximum score.
        max_i = 0
        max_j = 0
        max_score = -1
        alignment1 = []
        alignment2 = []
        for i in range(1, rows):
            for j in range(1, cols):
                if matrix[i][j] > max_score:
                    max_score = matrix[i][j]
                    max_i = i
                    max_j = j

        # Traceback
        i = max_i
        j = max_j
        while i > 0 and j > 0 and matrix[i][j] > 0:
            if matrix[i][j] == matrix[i - 1][j - 1] + (match_value if sequence1[i - 1] == sequence2[j - 1] else mismatch_penalty):
                alignment1.append(sequence1[i - 1])
                alignment2.append(sequence2[j - 1])
                i -= 1
                j -= 1
            elif matrix[i][j] == matrix[i - 1][j] + gap_penalty:
                alignment1.append(sequence1[i - 1])
                alignment2.append("-")
                i -= 1
            else:
                alignment1.append("-")
                alignment2.append(sequence2[j - 1])
                j -= 1

        alignment1.reverse()
        alignment2.reverse()

        return alignment1, alignment2

    @staticmethod
    def global_align(
            sequence1: Sequence,
            sequence2: Sequence,
            match_value: int = 2,
            mismatch_penalty: int = -1,
            gap_penalty: int = -1
    ) -> (List[str], List[str]):
        """
        Aligns two sequences using the Needleman-Wunsch algorithm.

        Args:
            sequence1 (Sequence): The first sequence.
            sequence2 (Sequence): The second sequence.
            match_value (int): The score for a match.
            mismatch_penalty (int): The penalty for a mismatch.
            gap_penalty (int): The penalty for a gap.
        Returns:
            List[str], List[str]: The aligned sequences.

        Raises:
            None
        """
        sequence1 = sequence1.sequence
        sequence2 = sequence2.sequence

        # Initialize the scoring matrix.
        rows = len(sequence1) + 1
        cols = len(sequence2) + 1
        matrix = [[0] * cols for _ in range(rows)]

        # Assign gap penalties.
        for i in range(1, rows):
            matrix[i][0] = i * gap_penalty
        for j in range(1, cols):
            matrix[0][j] = j * gap_penalty

        # Fill the scoring matrix.
        for i in range(1, rows):
            for j in range(1, cols):
                match_score = matrix[i - 1][j - 1] + (match_value if sequence1[i - 1] == sequence2[j - 1] else mismatch_penalty)
                delete_score = matrix[i - 1][j] + gap_penalty
                insert_score = matrix[i][j - 1] + gap_penalty
                matrix[i][j] = max(match_score, delete_score, insert_score)

        # Traceback.
        i = rows - 1
        j = cols - 1
        alignment1 = []
        alignment2 = []

        while i > 0 or j > 0:
            if i > 0 and j > 0 and matrix[i][j] == matrix[i - 1][j - 1] + (match_value if sequence1[i - 1] == sequence2[j - 1] else mismatch_penalty):
                alignment1.append(sequence1[i - 1])
                alignment2.append(sequence2[j - 1])
                i -= 1
                j -= 1
            elif i > 0 and matrix[i][j] == matrix[i - 1][j] + gap_penalty:
                alignment1.append(sequence1[i - 1])
                alignment2.append("-")
                i -= 1
            else:
                alignment1.append("-")
                alignment2.append(sequence2[j - 1])
                j -= 1

        alignment1.reverse()
        alignment2.reverse()

        return alignment1, alignment2

Classes

class SequenceAlignment
Expand source code
class SequenceAlignment:
    @staticmethod
    def local_align(
            sequence1: Sequence,
            sequence2: Sequence,
            match_value: int = 2,
            mismatch_penalty: int = -1,
            gap_penalty: int = -1) -> (List[str], List[str]):
        """
        Aligns two sequences using the Smith-Waterman algorithm.

        Args:
            sequence1 (Sequence): The first sequence.
            sequence2 (Sequence): The second sequence.
            match_value (int): The score for a match.
            mismatch_penalty (int): The penalty for a mismatch.
            gap_penalty (int): The penalty for a gap.

        Returns:
            List[str], List[str]: The aligned sequences.

        Raises:
            None
        """
        sequence1 = sequence1.sequence
        sequence2 = sequence2.sequence

        # Initialize the matrix.
        rows = len(sequence1) + 1
        cols = len(sequence2) + 1
        matrix = [[0] * cols for _ in range(rows)]

        # Fill the matrix.
        for i in range(1, rows):
            for j in range(1, cols):
                match_score = matrix[i - 1][j - 1] + (match_value if sequence1[i - 1] == sequence2[j - 1] else mismatch_penalty)
                delete_score = matrix[i - 1][j] + gap_penalty
                insert_score = matrix[i][j - 1] + gap_penalty
                matrix[i][j] = max(0, match_score, delete_score, insert_score)

        # Find the maximum score.
        max_i = 0
        max_j = 0
        max_score = -1
        alignment1 = []
        alignment2 = []
        for i in range(1, rows):
            for j in range(1, cols):
                if matrix[i][j] > max_score:
                    max_score = matrix[i][j]
                    max_i = i
                    max_j = j

        # Traceback
        i = max_i
        j = max_j
        while i > 0 and j > 0 and matrix[i][j] > 0:
            if matrix[i][j] == matrix[i - 1][j - 1] + (match_value if sequence1[i - 1] == sequence2[j - 1] else mismatch_penalty):
                alignment1.append(sequence1[i - 1])
                alignment2.append(sequence2[j - 1])
                i -= 1
                j -= 1
            elif matrix[i][j] == matrix[i - 1][j] + gap_penalty:
                alignment1.append(sequence1[i - 1])
                alignment2.append("-")
                i -= 1
            else:
                alignment1.append("-")
                alignment2.append(sequence2[j - 1])
                j -= 1

        alignment1.reverse()
        alignment2.reverse()

        return alignment1, alignment2

    @staticmethod
    def global_align(
            sequence1: Sequence,
            sequence2: Sequence,
            match_value: int = 2,
            mismatch_penalty: int = -1,
            gap_penalty: int = -1
    ) -> (List[str], List[str]):
        """
        Aligns two sequences using the Needleman-Wunsch algorithm.

        Args:
            sequence1 (Sequence): The first sequence.
            sequence2 (Sequence): The second sequence.
            match_value (int): The score for a match.
            mismatch_penalty (int): The penalty for a mismatch.
            gap_penalty (int): The penalty for a gap.
        Returns:
            List[str], List[str]: The aligned sequences.

        Raises:
            None
        """
        sequence1 = sequence1.sequence
        sequence2 = sequence2.sequence

        # Initialize the scoring matrix.
        rows = len(sequence1) + 1
        cols = len(sequence2) + 1
        matrix = [[0] * cols for _ in range(rows)]

        # Assign gap penalties.
        for i in range(1, rows):
            matrix[i][0] = i * gap_penalty
        for j in range(1, cols):
            matrix[0][j] = j * gap_penalty

        # Fill the scoring matrix.
        for i in range(1, rows):
            for j in range(1, cols):
                match_score = matrix[i - 1][j - 1] + (match_value if sequence1[i - 1] == sequence2[j - 1] else mismatch_penalty)
                delete_score = matrix[i - 1][j] + gap_penalty
                insert_score = matrix[i][j - 1] + gap_penalty
                matrix[i][j] = max(match_score, delete_score, insert_score)

        # Traceback.
        i = rows - 1
        j = cols - 1
        alignment1 = []
        alignment2 = []

        while i > 0 or j > 0:
            if i > 0 and j > 0 and matrix[i][j] == matrix[i - 1][j - 1] + (match_value if sequence1[i - 1] == sequence2[j - 1] else mismatch_penalty):
                alignment1.append(sequence1[i - 1])
                alignment2.append(sequence2[j - 1])
                i -= 1
                j -= 1
            elif i > 0 and matrix[i][j] == matrix[i - 1][j] + gap_penalty:
                alignment1.append(sequence1[i - 1])
                alignment2.append("-")
                i -= 1
            else:
                alignment1.append("-")
                alignment2.append(sequence2[j - 1])
                j -= 1

        alignment1.reverse()
        alignment2.reverse()

        return alignment1, alignment2

Static methods

def global_align(sequence1: Sequence, sequence2: Sequence, match_value: int = 2, mismatch_penalty: int = -1, gap_penalty: int = -1) ‑> (typing.List[str], typing.List[str])

Aligns two sequences using the Needleman-Wunsch algorithm.

Args

sequence1 : Sequence
The first sequence.
sequence2 : Sequence
The second sequence.
match_value : int
The score for a match.
mismatch_penalty : int
The penalty for a mismatch.
gap_penalty : int
The penalty for a gap.

Returns

List[str], List[str]
The aligned sequences.

Raises

None

Expand source code
@staticmethod
def global_align(
        sequence1: Sequence,
        sequence2: Sequence,
        match_value: int = 2,
        mismatch_penalty: int = -1,
        gap_penalty: int = -1
) -> (List[str], List[str]):
    """
    Aligns two sequences using the Needleman-Wunsch algorithm.

    Args:
        sequence1 (Sequence): The first sequence.
        sequence2 (Sequence): The second sequence.
        match_value (int): The score for a match.
        mismatch_penalty (int): The penalty for a mismatch.
        gap_penalty (int): The penalty for a gap.
    Returns:
        List[str], List[str]: The aligned sequences.

    Raises:
        None
    """
    sequence1 = sequence1.sequence
    sequence2 = sequence2.sequence

    # Initialize the scoring matrix.
    rows = len(sequence1) + 1
    cols = len(sequence2) + 1
    matrix = [[0] * cols for _ in range(rows)]

    # Assign gap penalties.
    for i in range(1, rows):
        matrix[i][0] = i * gap_penalty
    for j in range(1, cols):
        matrix[0][j] = j * gap_penalty

    # Fill the scoring matrix.
    for i in range(1, rows):
        for j in range(1, cols):
            match_score = matrix[i - 1][j - 1] + (match_value if sequence1[i - 1] == sequence2[j - 1] else mismatch_penalty)
            delete_score = matrix[i - 1][j] + gap_penalty
            insert_score = matrix[i][j - 1] + gap_penalty
            matrix[i][j] = max(match_score, delete_score, insert_score)

    # Traceback.
    i = rows - 1
    j = cols - 1
    alignment1 = []
    alignment2 = []

    while i > 0 or j > 0:
        if i > 0 and j > 0 and matrix[i][j] == matrix[i - 1][j - 1] + (match_value if sequence1[i - 1] == sequence2[j - 1] else mismatch_penalty):
            alignment1.append(sequence1[i - 1])
            alignment2.append(sequence2[j - 1])
            i -= 1
            j -= 1
        elif i > 0 and matrix[i][j] == matrix[i - 1][j] + gap_penalty:
            alignment1.append(sequence1[i - 1])
            alignment2.append("-")
            i -= 1
        else:
            alignment1.append("-")
            alignment2.append(sequence2[j - 1])
            j -= 1

    alignment1.reverse()
    alignment2.reverse()

    return alignment1, alignment2
def local_align(sequence1: Sequence, sequence2: Sequence, match_value: int = 2, mismatch_penalty: int = -1, gap_penalty: int = -1) ‑> (typing.List[str], typing.List[str])

Aligns two sequences using the Smith-Waterman algorithm.

Args

sequence1 : Sequence
The first sequence.
sequence2 : Sequence
The second sequence.
match_value : int
The score for a match.
mismatch_penalty : int
The penalty for a mismatch.
gap_penalty : int
The penalty for a gap.

Returns

List[str], List[str]
The aligned sequences.

Raises

None

Expand source code
@staticmethod
def local_align(
        sequence1: Sequence,
        sequence2: Sequence,
        match_value: int = 2,
        mismatch_penalty: int = -1,
        gap_penalty: int = -1) -> (List[str], List[str]):
    """
    Aligns two sequences using the Smith-Waterman algorithm.

    Args:
        sequence1 (Sequence): The first sequence.
        sequence2 (Sequence): The second sequence.
        match_value (int): The score for a match.
        mismatch_penalty (int): The penalty for a mismatch.
        gap_penalty (int): The penalty for a gap.

    Returns:
        List[str], List[str]: The aligned sequences.

    Raises:
        None
    """
    sequence1 = sequence1.sequence
    sequence2 = sequence2.sequence

    # Initialize the matrix.
    rows = len(sequence1) + 1
    cols = len(sequence2) + 1
    matrix = [[0] * cols for _ in range(rows)]

    # Fill the matrix.
    for i in range(1, rows):
        for j in range(1, cols):
            match_score = matrix[i - 1][j - 1] + (match_value if sequence1[i - 1] == sequence2[j - 1] else mismatch_penalty)
            delete_score = matrix[i - 1][j] + gap_penalty
            insert_score = matrix[i][j - 1] + gap_penalty
            matrix[i][j] = max(0, match_score, delete_score, insert_score)

    # Find the maximum score.
    max_i = 0
    max_j = 0
    max_score = -1
    alignment1 = []
    alignment2 = []
    for i in range(1, rows):
        for j in range(1, cols):
            if matrix[i][j] > max_score:
                max_score = matrix[i][j]
                max_i = i
                max_j = j

    # Traceback
    i = max_i
    j = max_j
    while i > 0 and j > 0 and matrix[i][j] > 0:
        if matrix[i][j] == matrix[i - 1][j - 1] + (match_value if sequence1[i - 1] == sequence2[j - 1] else mismatch_penalty):
            alignment1.append(sequence1[i - 1])
            alignment2.append(sequence2[j - 1])
            i -= 1
            j -= 1
        elif matrix[i][j] == matrix[i - 1][j] + gap_penalty:
            alignment1.append(sequence1[i - 1])
            alignment2.append("-")
            i -= 1
        else:
            alignment1.append("-")
            alignment2.append(sequence2[j - 1])
            j -= 1

    alignment1.reverse()
    alignment2.reverse()

    return alignment1, alignment2