Module protkit.properties.circular_variance

Implements class CircularVariance to calculate the circular variance of a protein.

Circular variance is a measure of uniformity of a set of points on a sphere. It is calculated as 1 - |sum(unit_vectors)| / n, where n is the number of points and unit_vectors are the vectors from the center to the points.

The circular variance of a protein can be calculated for each atom or residue. For atoms, the circular variance is calculated using the coordinates of the atom. For residues, the circular variance is calculated using the coordinates of the alpha carbon atom of the residue.

Expand source code
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
# Authors:  Fred Senekal (FS)
# Contact:  fred@silicogenesis.com
# License:  GPLv3

"""
Implements class `CircularVariance` to calculate the circular variance of a protein.

Circular variance is a measure of uniformity of a set of points on a sphere. It is
calculated as 1 - |sum(unit_vectors)| / n, where n is the number of points and
unit_vectors are the vectors from the center to the points.

The circular variance of a protein can be calculated for each atom or residue.
For atoms, the circular variance is calculated using the coordinates of the atom.
For residues, the circular variance is calculated using the coordinates of the alpha carbon
atom of the residue.
"""

from typing import List
import numpy as np

from protkit.structure.protein import Protein
from protkit.geometry.space_query import SpaceQuery


class CircularVariance:
    @staticmethod
    def circular_variance(coordinates: List[List[float]], radius: float = 5.0) -> List[float]:
        # Set up the space query.
        space_query = SpaceQuery(coordinates)

        circular_variance = []

        coordinates = space_query.coordinates
        for i, coordinate in enumerate(coordinates):
            # Determine distances and neighbours to all the neighbouring atoms.
            neighbours, distances = space_query.query_distance_np([coordinate], radius)
            neighbours = neighbours[0]
            distances = distances[0]

            # Calculate circular variance.
            non_zero_indices = np.where(distances > 0.01)
            neighbour_coordinates = coordinates[neighbours[non_zero_indices]]
            coordinate = np.array([coordinate])
            unit_vectors = neighbour_coordinates - coordinate
            unit_vectors /= distances[non_zero_indices].reshape(-1, 1)
            cv = 1.0 - np.linalg.norm(np.sum(unit_vectors, axis=0)) / neighbours[non_zero_indices].shape[0]

            circular_variance.append(float(cv))

        return circular_variance

    @staticmethod
    def circular_variance_by_atom(protein: Protein,
                                  radius: float = 5.0,
                                  assign_attribute: bool = False,
                                  key: str = "cv_atom") -> List[float]:
        """
        Calculate the circular variance of a protein for each atom.

        Args:
            protein (Protein): The protein for which to calculate the circular variance.
            radius (float): The radius to use for the calculation.
            assign_attribute (bool): Whether to assign the circular variance to the atoms.
            key (str): The key to use for the attribute.

        Returns:
            List[float]: The circular variance of each atom in the protein.
        """
        # Prepare coordinates
        atom_coordinates = [(atom.x, atom.y, atom.z) for atom in protein.atoms]

        # Calculate circular variance
        circular_variance_by_atom = CircularVariance.circular_variance(atom_coordinates, radius)

        # Assign circular variance to atoms
        if assign_attribute:
            for atom, cv in zip(protein.atoms, circular_variance_by_atom):
                atom.set_attribute(key, cv)

        return circular_variance_by_atom

    @staticmethod
    def circular_variance_by_residue(protein: Protein,
                                     radius: float = 5.0,
                                     assign_attribute: bool = False,
                                     key: str = "cv_residue") -> List[float]:
        """
        Calculate the circular variance of a protein for each residue.

        Args:
            protein (Protein): The protein for which to calculate the circular variance.
            radius (float): The radius to use for the calculation.
            assign_attribute (bool): Whether to assign the circular variance to the residues.
            key (str): The key to use for the attribute.

        Returns:
            List[float]: The circular variance of each residue in the protein.
        """
        # Prepare coordinates
        residues = protein.filter_residues()
        residue_coordinates = [(residue.get_atom("CA").x, residue.get_atom("CA").y, residue.get_atom("CA").z) for residue in residues]

        # Assign circular variance to residues
        circular_variance_by_residue = CircularVariance.circular_variance(residue_coordinates, radius)
        if assign_attribute:
            residues = protein.filter_residues()
            for residue, cv in zip(residues, circular_variance_by_residue):
                residue.set_attribute(key, cv)

        return circular_variance_by_residue

Classes

class CircularVariance
Expand source code
class CircularVariance:
    @staticmethod
    def circular_variance(coordinates: List[List[float]], radius: float = 5.0) -> List[float]:
        # Set up the space query.
        space_query = SpaceQuery(coordinates)

        circular_variance = []

        coordinates = space_query.coordinates
        for i, coordinate in enumerate(coordinates):
            # Determine distances and neighbours to all the neighbouring atoms.
            neighbours, distances = space_query.query_distance_np([coordinate], radius)
            neighbours = neighbours[0]
            distances = distances[0]

            # Calculate circular variance.
            non_zero_indices = np.where(distances > 0.01)
            neighbour_coordinates = coordinates[neighbours[non_zero_indices]]
            coordinate = np.array([coordinate])
            unit_vectors = neighbour_coordinates - coordinate
            unit_vectors /= distances[non_zero_indices].reshape(-1, 1)
            cv = 1.0 - np.linalg.norm(np.sum(unit_vectors, axis=0)) / neighbours[non_zero_indices].shape[0]

            circular_variance.append(float(cv))

        return circular_variance

    @staticmethod
    def circular_variance_by_atom(protein: Protein,
                                  radius: float = 5.0,
                                  assign_attribute: bool = False,
                                  key: str = "cv_atom") -> List[float]:
        """
        Calculate the circular variance of a protein for each atom.

        Args:
            protein (Protein): The protein for which to calculate the circular variance.
            radius (float): The radius to use for the calculation.
            assign_attribute (bool): Whether to assign the circular variance to the atoms.
            key (str): The key to use for the attribute.

        Returns:
            List[float]: The circular variance of each atom in the protein.
        """
        # Prepare coordinates
        atom_coordinates = [(atom.x, atom.y, atom.z) for atom in protein.atoms]

        # Calculate circular variance
        circular_variance_by_atom = CircularVariance.circular_variance(atom_coordinates, radius)

        # Assign circular variance to atoms
        if assign_attribute:
            for atom, cv in zip(protein.atoms, circular_variance_by_atom):
                atom.set_attribute(key, cv)

        return circular_variance_by_atom

    @staticmethod
    def circular_variance_by_residue(protein: Protein,
                                     radius: float = 5.0,
                                     assign_attribute: bool = False,
                                     key: str = "cv_residue") -> List[float]:
        """
        Calculate the circular variance of a protein for each residue.

        Args:
            protein (Protein): The protein for which to calculate the circular variance.
            radius (float): The radius to use for the calculation.
            assign_attribute (bool): Whether to assign the circular variance to the residues.
            key (str): The key to use for the attribute.

        Returns:
            List[float]: The circular variance of each residue in the protein.
        """
        # Prepare coordinates
        residues = protein.filter_residues()
        residue_coordinates = [(residue.get_atom("CA").x, residue.get_atom("CA").y, residue.get_atom("CA").z) for residue in residues]

        # Assign circular variance to residues
        circular_variance_by_residue = CircularVariance.circular_variance(residue_coordinates, radius)
        if assign_attribute:
            residues = protein.filter_residues()
            for residue, cv in zip(residues, circular_variance_by_residue):
                residue.set_attribute(key, cv)

        return circular_variance_by_residue

Static methods

def circular_variance(coordinates: List[List[float]], radius: float = 5.0) ‑> List[float]
Expand source code
@staticmethod
def circular_variance(coordinates: List[List[float]], radius: float = 5.0) -> List[float]:
    # Set up the space query.
    space_query = SpaceQuery(coordinates)

    circular_variance = []

    coordinates = space_query.coordinates
    for i, coordinate in enumerate(coordinates):
        # Determine distances and neighbours to all the neighbouring atoms.
        neighbours, distances = space_query.query_distance_np([coordinate], radius)
        neighbours = neighbours[0]
        distances = distances[0]

        # Calculate circular variance.
        non_zero_indices = np.where(distances > 0.01)
        neighbour_coordinates = coordinates[neighbours[non_zero_indices]]
        coordinate = np.array([coordinate])
        unit_vectors = neighbour_coordinates - coordinate
        unit_vectors /= distances[non_zero_indices].reshape(-1, 1)
        cv = 1.0 - np.linalg.norm(np.sum(unit_vectors, axis=0)) / neighbours[non_zero_indices].shape[0]

        circular_variance.append(float(cv))

    return circular_variance
def circular_variance_by_atom(protein: Protein, radius: float = 5.0, assign_attribute: bool = False, key: str = 'cv_atom') ‑> List[float]

Calculate the circular variance of a protein for each atom.

Args

protein : Protein
The protein for which to calculate the circular variance.
radius : float
The radius to use for the calculation.
assign_attribute : bool
Whether to assign the circular variance to the atoms.
key : str
The key to use for the attribute.

Returns

List[float]
The circular variance of each atom in the protein.
Expand source code
@staticmethod
def circular_variance_by_atom(protein: Protein,
                              radius: float = 5.0,
                              assign_attribute: bool = False,
                              key: str = "cv_atom") -> List[float]:
    """
    Calculate the circular variance of a protein for each atom.

    Args:
        protein (Protein): The protein for which to calculate the circular variance.
        radius (float): The radius to use for the calculation.
        assign_attribute (bool): Whether to assign the circular variance to the atoms.
        key (str): The key to use for the attribute.

    Returns:
        List[float]: The circular variance of each atom in the protein.
    """
    # Prepare coordinates
    atom_coordinates = [(atom.x, atom.y, atom.z) for atom in protein.atoms]

    # Calculate circular variance
    circular_variance_by_atom = CircularVariance.circular_variance(atom_coordinates, radius)

    # Assign circular variance to atoms
    if assign_attribute:
        for atom, cv in zip(protein.atoms, circular_variance_by_atom):
            atom.set_attribute(key, cv)

    return circular_variance_by_atom
def circular_variance_by_residue(protein: Protein, radius: float = 5.0, assign_attribute: bool = False, key: str = 'cv_residue') ‑> List[float]

Calculate the circular variance of a protein for each residue.

Args

protein : Protein
The protein for which to calculate the circular variance.
radius : float
The radius to use for the calculation.
assign_attribute : bool
Whether to assign the circular variance to the residues.
key : str
The key to use for the attribute.

Returns

List[float]
The circular variance of each residue in the protein.
Expand source code
@staticmethod
def circular_variance_by_residue(protein: Protein,
                                 radius: float = 5.0,
                                 assign_attribute: bool = False,
                                 key: str = "cv_residue") -> List[float]:
    """
    Calculate the circular variance of a protein for each residue.

    Args:
        protein (Protein): The protein for which to calculate the circular variance.
        radius (float): The radius to use for the calculation.
        assign_attribute (bool): Whether to assign the circular variance to the residues.
        key (str): The key to use for the attribute.

    Returns:
        List[float]: The circular variance of each residue in the protein.
    """
    # Prepare coordinates
    residues = protein.filter_residues()
    residue_coordinates = [(residue.get_atom("CA").x, residue.get_atom("CA").y, residue.get_atom("CA").z) for residue in residues]

    # Assign circular variance to residues
    circular_variance_by_residue = CircularVariance.circular_variance(residue_coordinates, radius)
    if assign_attribute:
        residues = protein.filter_residues()
        for residue, cv in zip(residues, circular_variance_by_residue):
            residue.set_attribute(key, cv)

    return circular_variance_by_residue