Module protkit.download.download

Implements the Download class to download biological data from the internet.

Currently, downloading of the following data is supported:

  • PDB files from the RCSB and Sabdab
  • CIF files from the RCSB
  • Binary CIF files from the RCSB
  • FASTA files from the RCSB and Uniprot

For more information about the various data sources, see the following URLs:

A list of download services provided by RCSB is available at:

Expand source code
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
# Authors:  Fred Senekal (FS)
# Contact:  fred@silicogenesis.com
# License:  GPLv3

"""
Implements the `Download` class to download biological data from the internet.

Currently, downloading of the following data is supported:

- PDB files from the RCSB and Sabdab
- CIF files from the RCSB
- Binary CIF files from the RCSB
- FASTA files from the RCSB and Uniprot

For more information about the various data sources, see the following URLs:

- RCSB: https://www.rcsb.org/
- Uniprot: https://www.uniprot.org/
- Sabdab: https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabdab/

A list of download services provided by RCSB is available at:

- https://www.rcsb.org/docs/programmatic-access/file-download-services

"""
import os.path

import requests
from joblib import Parallel, delayed
from typing import List, Union


class Download:
    """
    Class `Download` is a base class for downloading
    biological data from the internet.
    """

    @staticmethod
    def download_file(url: str, file_path: str) -> None:
        """
        Downloads a single file from the internet.

        Uses the `requests` library to download the file.

        Args:
            url (str): The URL of the file.
            file_path (str): The path to the file.

        Returns:
            None

        Raises:
            Exception: If the file could not be downloaded or saved.
        """
        try:
            response = requests.get(url)
            if response.status_code == 200:
                # Create the directory if it does not exist.
                directory = os.path.dirname(file_path)
                if directory != "":
                    if not os.path.exists(directory):
                        os.makedirs(directory)

                # Write the file to disk.
                with open(file_path, "wb") as file:
                    file.write(response.content)
        except Exception as e:
            raise e

    @staticmethod
    def _download_file(url: str, file_path: str) -> None:
        """
        Downloads a single file from the internet.

        Uses the `urllib` library to download the file.

        Args:
            url (str): The URL of the file.
            file_path (str): The path to the file.

        Returns:
            None

        Raises:
            Exception: If the file could not be downloaded or saved.
        """
        import urllib.request
        try:
            with urllib.request.urlopen(url) as response:
                content = response.read()

                with open(file_path, 'wb') as file:
                    file.write(content)
        except Exception as e:
            raise e

    @staticmethod
    def parallel_download(
            urls: List[str],
            file_paths: List[str],
            n_jobs: int = -1) -> None:
        """
        Downloads multiple files from the internet in parallel.

        Args:
            urls (List[str]): The URLs of the files.
            file_paths (List[str]): The paths to the files.
            n_jobs (int): The number of jobs to run in parallel.
                If -1, the number of jobs is set to the number of CPU cores.

        Returns:
            None

        Raises:
            Exception: If the files could not be downloaded or saved.
        """
        Parallel(n_jobs=n_jobs)(delayed(Download.download_file)(url, file_path)
                                for url, file_path in zip(urls, file_paths))

    @staticmethod
    def download_fasta_file_from_rcsb(
            pdb_id: str,
            file_path_or_directory: str) -> None:
        """
        Downloads a single FASTA file from the RCSB.

        Args:
            pdb_id (str): The ID of the PDB file.
            file_path_or_directory (str): The path where the FASTA file should be saved.
                If the path is a directory, the file is saved in the directory
        """
        url = f"https://www.rcsb.org/fasta/entry/{pdb_id}"
        if os.path.isdir(file_path_or_directory):
            file_path_or_directory = os.path.join(file_path_or_directory, f"{pdb_id}.fasta")

        Download.download_file(url, file_path_or_directory)

    @staticmethod
    def download_fasta_files_from_rcsb(
            pdb_ids: List[str],
            directory: str,
            n_jobs: int = -1) -> None:
        """
        Downloads multiple FASTA files from the RCSB.

        Args:
            pdb_ids (List[str]): The IDs of the PDB files.
            directory (str): The directory where the FASTA files should be saved.
            n_jobs (int): The number of jobs to run in parallel.
                If -1, the number of jobs is set to the number of CPU cores.

        Returns:
            None
        """
        urls = [f"https://www.rcsb.org/fasta/entry/{pdb_id}" for pdb_id in pdb_ids]
        file_paths = [os.path.join(directory, f"{pdb_id}.fasta") for pdb_id in pdb_ids]
        Download.parallel_download(urls, file_paths, n_jobs=n_jobs)

    @staticmethod
    def download_fasta_file_from_uniprot(
            uniprot_id: str,
            file_path_or_directory: str) -> None:
        """
        Downloads a single FASTA file from Uniprot.

        Args:
            uniprot_id (str): The ID of the UniProt file.
            file_path_or_directory (str): The path where the FASTA file should be saved.
                If the path is a directory, the file is saved in the directory
        """
        url = f"https://www.uniprot.org/uniprot/{uniprot_id}.fasta"
        if os.path.isdir(file_path_or_directory):
            file_path_or_directory = os.path.join(file_path_or_directory, f"{uniprot_id}.fasta")

        Download.download_file(url, file_path_or_directory)

    @staticmethod
    def download_fasta_files_from_uniprot(
            uniprot_ids: List[str],
            directory: str,
            n_jobs: int = -1) -> None:
        """
        Downloads multiple FASTA files from Uniprot.

        Args:
            uniprot_ids (List[str]): The IDs of the UniProt files.
            directory (str): The directory where the FASTA files should be saved.
            n_jobs (int): The number of jobs to run in parallel.
                If -1, the number of jobs is set to the number of CPU cores.
        Returns:
            None
        """
        urls = [f"https://www.uniprot.org/uniprot/{uniprot_id}.fasta" for uniprot_id in uniprot_ids]
        file_paths = [os.path.join(directory, f"{uniprot_id}.fasta") for uniprot_id in uniprot_ids]
        Download.parallel_download(urls, file_paths, n_jobs=n_jobs)

    @staticmethod
    def download_pdb_file_from_rcsb(
            pdb_id: str,
            file_path_or_directory: str) -> None:
        """
        Downloads a single PDB file from the RCSB.

        Args:
            pdb_id (str): The ID of the PDB file.
            file_path_or_directory (str): The path where the PDB file should be saved.
                If the path is a directory, the file is saved in the directory
                with the name <pdb_id>.pdb.
        """
        download_url = f"https://files.rcsb.org/download/{pdb_id}.pdb"
        if os.path.isdir(file_path_or_directory):
            file_path_or_directory = os.path.join(file_path_or_directory, f"{pdb_id}.pdb")

        Download.download_file(download_url, file_path_or_directory)

    @staticmethod
    def download_pdb_files_from_rcsb(
            pdb_ids: List[str],
            directory: str,
            n_jobs: int = -1) -> None:
        """
        Downloads multiple PDB files from the RCSB.

        Args:
            pdb_ids (List[str]): The IDs of the PDB files.
            directory (str): The directory where the PDB files should be saved.
            n_jobs (int): The number of jobs to run in parallel.
                If -1, the number of jobs is set to the number of CPU cores.

        Returns:
            None
        """
        urls = [f"https://files.rcsb.org/download/{pdb_id}.pdb" for pdb_id in pdb_ids]
        file_paths = [os.path.join(directory, f"{pdb_id}.pdb") for pdb_id in pdb_ids]
        Download.parallel_download(urls, file_paths, n_jobs=n_jobs)

    @staticmethod
    def download_pdb_file_from_sabdab(
            pdb_id: str,
            file_path_or_directory: str) -> None:
        """
        Downloads a single PDB file from Sabdab.

        Args:
            pdb_id (str): The ID of the PDB file.
            file_path_or_directory (str): The path where the PDB file should be saved.
                If the path is a directory, the file is saved in the directory
                with the name <pdb_id>.pdb.
        """
        download_url = f"https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabdab/pdb/{pdb_id.lower()}/?raw=true"
        if os.path.isdir(file_path_or_directory):
            file_path_or_directory = os.path.join(file_path_or_directory, f"{pdb_id}.pdb")

        Download.download_file(download_url, file_path_or_directory)

    @staticmethod
    def download_pdb_files_from_sabdab(
            pdb_ids: List[str],
            directory: str,
            n_jobs: int = -1) -> None:
        """
        Downloads multiple PDB files from Sabdab.

        Args:
            pdb_ids (List[str]): The IDs of the PDB files.
            directory (str): The directory where the PDB files should be saved.
            n_jobs (int): The number of jobs to run in parallel.
                If -1, the number of jobs is set to the number of CPU cores.

        Returns:
            None
        """
        urls = [f"https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabdab/pdb/{pdb_id.lower()}/?raw=true" for pdb_id in pdb_ids]
        file_paths = [os.path.join(directory, f"{pdb_id}.pdb") for pdb_id in pdb_ids]
        Download.parallel_download(urls, file_paths, n_jobs=n_jobs)

    @staticmethod
    def download_cif_file_from_rcsb(
            pdb_id: str,
            file_path_or_directory: str) -> None:
        """
        Downloads a single CIF file from the RCSB.

        Args:
            pdb_id (str): The ID of the CIF file.
            file_path_or_directory (str): The path where the CIF file should be saved.
                If the path is a directory, the file is saved in the directory
                with the name <pdb_id>.cif.
        """
        download_url = f"https://files.rcsb.org/download/{pdb_id}.cif"
        if os.path.isdir(file_path_or_directory):
            file_path_or_directory = os.path.join(file_path_or_directory, f"{pdb_id}.cif")

        Download.download_file(download_url, file_path_or_directory)

    @staticmethod
    def download_cif_files_from_rcsb(
            pdb_ids: List[str],
            directory: str,
            n_jobs: int = -1) -> None:
        """
        Downloads multiple CIF files from the RCSB.

        Args:
            pdb_ids (List[str]): The IDs of the CIF files.
            directory (str): The directory where the CIF files should be saved.
            n_jobs (int): The number of jobs to run in parallel.
                If -1, the number of jobs is set to the number of CPU cores.

        Returns:
            None
        """
        urls = [f"https://files.rcsb.org/download/{pdb_id}.cif" for pdb_id in pdb_ids]
        file_paths = [os.path.join(directory, f"{pdb_id}.cif") for pdb_id in pdb_ids]
        Download.parallel_download(urls, file_paths, n_jobs=n_jobs)


    @staticmethod
    def download_binary_cif_file_from_rcsb(
            pdb_id: str,
            file_path_or_directory: str) -> None:
        """
        Downloads a single Binary CIF file from the RCSB.

        Args:
            pdb_id (str): The ID of the Binary CIF file.
            file_path_or_directory (str): The path where the Binary CIF file should be saved.
                If the path is a directory, the file is saved in the directory
                with the name <pdb_id>.bcif.
        """
        download_url = f"https://models.rcsb.org/{pdb_id}.bcif"
        if os.path.isdir(file_path_or_directory):
            file_path_or_directory = os.path.join(file_path_or_directory, f"{pdb_id}.bcif")

        Download.download_file(download_url, file_path_or_directory)

    @staticmethod
    def download_binary_cif_files_from_rcsb(
            pdb_ids: List[str],
            directory: str,
            n_jobs: int = -1) -> None:
        """
        Downloads multiple Bianry CIF files from the RCSB.

        Args:
            pdb_ids (List[str]): The IDs of the Binary CIF files.
            directory (str): The directory where the CIF files should be saved.
            n_jobs (int): The number of jobs to run in parallel.
                If -1, the number of jobs is set to the number of CPU cores.

        Returns:
            None
        """
        urls = [f"https://models.rcsb.org/{pdb_id}.bcif" for pdb_id in pdb_ids]
        file_paths = [os.path.join(directory, f"{pdb_id}.bcif") for pdb_id in pdb_ids]
        Download.parallel_download(urls, file_paths, n_jobs=n_jobs)

Classes

class Download

Class Download is a base class for downloading biological data from the internet.

Expand source code
class Download:
    """
    Class `Download` is a base class for downloading
    biological data from the internet.
    """

    @staticmethod
    def download_file(url: str, file_path: str) -> None:
        """
        Downloads a single file from the internet.

        Uses the `requests` library to download the file.

        Args:
            url (str): The URL of the file.
            file_path (str): The path to the file.

        Returns:
            None

        Raises:
            Exception: If the file could not be downloaded or saved.
        """
        try:
            response = requests.get(url)
            if response.status_code == 200:
                # Create the directory if it does not exist.
                directory = os.path.dirname(file_path)
                if directory != "":
                    if not os.path.exists(directory):
                        os.makedirs(directory)

                # Write the file to disk.
                with open(file_path, "wb") as file:
                    file.write(response.content)
        except Exception as e:
            raise e

    @staticmethod
    def _download_file(url: str, file_path: str) -> None:
        """
        Downloads a single file from the internet.

        Uses the `urllib` library to download the file.

        Args:
            url (str): The URL of the file.
            file_path (str): The path to the file.

        Returns:
            None

        Raises:
            Exception: If the file could not be downloaded or saved.
        """
        import urllib.request
        try:
            with urllib.request.urlopen(url) as response:
                content = response.read()

                with open(file_path, 'wb') as file:
                    file.write(content)
        except Exception as e:
            raise e

    @staticmethod
    def parallel_download(
            urls: List[str],
            file_paths: List[str],
            n_jobs: int = -1) -> None:
        """
        Downloads multiple files from the internet in parallel.

        Args:
            urls (List[str]): The URLs of the files.
            file_paths (List[str]): The paths to the files.
            n_jobs (int): The number of jobs to run in parallel.
                If -1, the number of jobs is set to the number of CPU cores.

        Returns:
            None

        Raises:
            Exception: If the files could not be downloaded or saved.
        """
        Parallel(n_jobs=n_jobs)(delayed(Download.download_file)(url, file_path)
                                for url, file_path in zip(urls, file_paths))

    @staticmethod
    def download_fasta_file_from_rcsb(
            pdb_id: str,
            file_path_or_directory: str) -> None:
        """
        Downloads a single FASTA file from the RCSB.

        Args:
            pdb_id (str): The ID of the PDB file.
            file_path_or_directory (str): The path where the FASTA file should be saved.
                If the path is a directory, the file is saved in the directory
        """
        url = f"https://www.rcsb.org/fasta/entry/{pdb_id}"
        if os.path.isdir(file_path_or_directory):
            file_path_or_directory = os.path.join(file_path_or_directory, f"{pdb_id}.fasta")

        Download.download_file(url, file_path_or_directory)

    @staticmethod
    def download_fasta_files_from_rcsb(
            pdb_ids: List[str],
            directory: str,
            n_jobs: int = -1) -> None:
        """
        Downloads multiple FASTA files from the RCSB.

        Args:
            pdb_ids (List[str]): The IDs of the PDB files.
            directory (str): The directory where the FASTA files should be saved.
            n_jobs (int): The number of jobs to run in parallel.
                If -1, the number of jobs is set to the number of CPU cores.

        Returns:
            None
        """
        urls = [f"https://www.rcsb.org/fasta/entry/{pdb_id}" for pdb_id in pdb_ids]
        file_paths = [os.path.join(directory, f"{pdb_id}.fasta") for pdb_id in pdb_ids]
        Download.parallel_download(urls, file_paths, n_jobs=n_jobs)

    @staticmethod
    def download_fasta_file_from_uniprot(
            uniprot_id: str,
            file_path_or_directory: str) -> None:
        """
        Downloads a single FASTA file from Uniprot.

        Args:
            uniprot_id (str): The ID of the UniProt file.
            file_path_or_directory (str): The path where the FASTA file should be saved.
                If the path is a directory, the file is saved in the directory
        """
        url = f"https://www.uniprot.org/uniprot/{uniprot_id}.fasta"
        if os.path.isdir(file_path_or_directory):
            file_path_or_directory = os.path.join(file_path_or_directory, f"{uniprot_id}.fasta")

        Download.download_file(url, file_path_or_directory)

    @staticmethod
    def download_fasta_files_from_uniprot(
            uniprot_ids: List[str],
            directory: str,
            n_jobs: int = -1) -> None:
        """
        Downloads multiple FASTA files from Uniprot.

        Args:
            uniprot_ids (List[str]): The IDs of the UniProt files.
            directory (str): The directory where the FASTA files should be saved.
            n_jobs (int): The number of jobs to run in parallel.
                If -1, the number of jobs is set to the number of CPU cores.
        Returns:
            None
        """
        urls = [f"https://www.uniprot.org/uniprot/{uniprot_id}.fasta" for uniprot_id in uniprot_ids]
        file_paths = [os.path.join(directory, f"{uniprot_id}.fasta") for uniprot_id in uniprot_ids]
        Download.parallel_download(urls, file_paths, n_jobs=n_jobs)

    @staticmethod
    def download_pdb_file_from_rcsb(
            pdb_id: str,
            file_path_or_directory: str) -> None:
        """
        Downloads a single PDB file from the RCSB.

        Args:
            pdb_id (str): The ID of the PDB file.
            file_path_or_directory (str): The path where the PDB file should be saved.
                If the path is a directory, the file is saved in the directory
                with the name <pdb_id>.pdb.
        """
        download_url = f"https://files.rcsb.org/download/{pdb_id}.pdb"
        if os.path.isdir(file_path_or_directory):
            file_path_or_directory = os.path.join(file_path_or_directory, f"{pdb_id}.pdb")

        Download.download_file(download_url, file_path_or_directory)

    @staticmethod
    def download_pdb_files_from_rcsb(
            pdb_ids: List[str],
            directory: str,
            n_jobs: int = -1) -> None:
        """
        Downloads multiple PDB files from the RCSB.

        Args:
            pdb_ids (List[str]): The IDs of the PDB files.
            directory (str): The directory where the PDB files should be saved.
            n_jobs (int): The number of jobs to run in parallel.
                If -1, the number of jobs is set to the number of CPU cores.

        Returns:
            None
        """
        urls = [f"https://files.rcsb.org/download/{pdb_id}.pdb" for pdb_id in pdb_ids]
        file_paths = [os.path.join(directory, f"{pdb_id}.pdb") for pdb_id in pdb_ids]
        Download.parallel_download(urls, file_paths, n_jobs=n_jobs)

    @staticmethod
    def download_pdb_file_from_sabdab(
            pdb_id: str,
            file_path_or_directory: str) -> None:
        """
        Downloads a single PDB file from Sabdab.

        Args:
            pdb_id (str): The ID of the PDB file.
            file_path_or_directory (str): The path where the PDB file should be saved.
                If the path is a directory, the file is saved in the directory
                with the name <pdb_id>.pdb.
        """
        download_url = f"https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabdab/pdb/{pdb_id.lower()}/?raw=true"
        if os.path.isdir(file_path_or_directory):
            file_path_or_directory = os.path.join(file_path_or_directory, f"{pdb_id}.pdb")

        Download.download_file(download_url, file_path_or_directory)

    @staticmethod
    def download_pdb_files_from_sabdab(
            pdb_ids: List[str],
            directory: str,
            n_jobs: int = -1) -> None:
        """
        Downloads multiple PDB files from Sabdab.

        Args:
            pdb_ids (List[str]): The IDs of the PDB files.
            directory (str): The directory where the PDB files should be saved.
            n_jobs (int): The number of jobs to run in parallel.
                If -1, the number of jobs is set to the number of CPU cores.

        Returns:
            None
        """
        urls = [f"https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabdab/pdb/{pdb_id.lower()}/?raw=true" for pdb_id in pdb_ids]
        file_paths = [os.path.join(directory, f"{pdb_id}.pdb") for pdb_id in pdb_ids]
        Download.parallel_download(urls, file_paths, n_jobs=n_jobs)

    @staticmethod
    def download_cif_file_from_rcsb(
            pdb_id: str,
            file_path_or_directory: str) -> None:
        """
        Downloads a single CIF file from the RCSB.

        Args:
            pdb_id (str): The ID of the CIF file.
            file_path_or_directory (str): The path where the CIF file should be saved.
                If the path is a directory, the file is saved in the directory
                with the name <pdb_id>.cif.
        """
        download_url = f"https://files.rcsb.org/download/{pdb_id}.cif"
        if os.path.isdir(file_path_or_directory):
            file_path_or_directory = os.path.join(file_path_or_directory, f"{pdb_id}.cif")

        Download.download_file(download_url, file_path_or_directory)

    @staticmethod
    def download_cif_files_from_rcsb(
            pdb_ids: List[str],
            directory: str,
            n_jobs: int = -1) -> None:
        """
        Downloads multiple CIF files from the RCSB.

        Args:
            pdb_ids (List[str]): The IDs of the CIF files.
            directory (str): The directory where the CIF files should be saved.
            n_jobs (int): The number of jobs to run in parallel.
                If -1, the number of jobs is set to the number of CPU cores.

        Returns:
            None
        """
        urls = [f"https://files.rcsb.org/download/{pdb_id}.cif" for pdb_id in pdb_ids]
        file_paths = [os.path.join(directory, f"{pdb_id}.cif") for pdb_id in pdb_ids]
        Download.parallel_download(urls, file_paths, n_jobs=n_jobs)


    @staticmethod
    def download_binary_cif_file_from_rcsb(
            pdb_id: str,
            file_path_or_directory: str) -> None:
        """
        Downloads a single Binary CIF file from the RCSB.

        Args:
            pdb_id (str): The ID of the Binary CIF file.
            file_path_or_directory (str): The path where the Binary CIF file should be saved.
                If the path is a directory, the file is saved in the directory
                with the name <pdb_id>.bcif.
        """
        download_url = f"https://models.rcsb.org/{pdb_id}.bcif"
        if os.path.isdir(file_path_or_directory):
            file_path_or_directory = os.path.join(file_path_or_directory, f"{pdb_id}.bcif")

        Download.download_file(download_url, file_path_or_directory)

    @staticmethod
    def download_binary_cif_files_from_rcsb(
            pdb_ids: List[str],
            directory: str,
            n_jobs: int = -1) -> None:
        """
        Downloads multiple Bianry CIF files from the RCSB.

        Args:
            pdb_ids (List[str]): The IDs of the Binary CIF files.
            directory (str): The directory where the CIF files should be saved.
            n_jobs (int): The number of jobs to run in parallel.
                If -1, the number of jobs is set to the number of CPU cores.

        Returns:
            None
        """
        urls = [f"https://models.rcsb.org/{pdb_id}.bcif" for pdb_id in pdb_ids]
        file_paths = [os.path.join(directory, f"{pdb_id}.bcif") for pdb_id in pdb_ids]
        Download.parallel_download(urls, file_paths, n_jobs=n_jobs)

Static methods

def download_binary_cif_file_from_rcsb(pdb_id: str, file_path_or_directory: str) ‑> None

Downloads a single Binary CIF file from the RCSB.

Args

pdb_id : str
The ID of the Binary CIF file.
file_path_or_directory : str
The path where the Binary CIF file should be saved. If the path is a directory, the file is saved in the directory with the name .bcif.
Expand source code
@staticmethod
def download_binary_cif_file_from_rcsb(
        pdb_id: str,
        file_path_or_directory: str) -> None:
    """
    Downloads a single Binary CIF file from the RCSB.

    Args:
        pdb_id (str): The ID of the Binary CIF file.
        file_path_or_directory (str): The path where the Binary CIF file should be saved.
            If the path is a directory, the file is saved in the directory
            with the name <pdb_id>.bcif.
    """
    download_url = f"https://models.rcsb.org/{pdb_id}.bcif"
    if os.path.isdir(file_path_or_directory):
        file_path_or_directory = os.path.join(file_path_or_directory, f"{pdb_id}.bcif")

    Download.download_file(download_url, file_path_or_directory)
def download_binary_cif_files_from_rcsb(pdb_ids: List[str], directory: str, n_jobs: int = -1) ‑> None

Downloads multiple Bianry CIF files from the RCSB.

Args

pdb_ids : List[str]
The IDs of the Binary CIF files.
directory : str
The directory where the CIF files should be saved.
n_jobs : int
The number of jobs to run in parallel. If -1, the number of jobs is set to the number of CPU cores.

Returns

None

Expand source code
@staticmethod
def download_binary_cif_files_from_rcsb(
        pdb_ids: List[str],
        directory: str,
        n_jobs: int = -1) -> None:
    """
    Downloads multiple Bianry CIF files from the RCSB.

    Args:
        pdb_ids (List[str]): The IDs of the Binary CIF files.
        directory (str): The directory where the CIF files should be saved.
        n_jobs (int): The number of jobs to run in parallel.
            If -1, the number of jobs is set to the number of CPU cores.

    Returns:
        None
    """
    urls = [f"https://models.rcsb.org/{pdb_id}.bcif" for pdb_id in pdb_ids]
    file_paths = [os.path.join(directory, f"{pdb_id}.bcif") for pdb_id in pdb_ids]
    Download.parallel_download(urls, file_paths, n_jobs=n_jobs)
def download_cif_file_from_rcsb(pdb_id: str, file_path_or_directory: str) ‑> None

Downloads a single CIF file from the RCSB.

Args

pdb_id : str
The ID of the CIF file.
file_path_or_directory : str
The path where the CIF file should be saved. If the path is a directory, the file is saved in the directory with the name .cif.
Expand source code
@staticmethod
def download_cif_file_from_rcsb(
        pdb_id: str,
        file_path_or_directory: str) -> None:
    """
    Downloads a single CIF file from the RCSB.

    Args:
        pdb_id (str): The ID of the CIF file.
        file_path_or_directory (str): The path where the CIF file should be saved.
            If the path is a directory, the file is saved in the directory
            with the name <pdb_id>.cif.
    """
    download_url = f"https://files.rcsb.org/download/{pdb_id}.cif"
    if os.path.isdir(file_path_or_directory):
        file_path_or_directory = os.path.join(file_path_or_directory, f"{pdb_id}.cif")

    Download.download_file(download_url, file_path_or_directory)
def download_cif_files_from_rcsb(pdb_ids: List[str], directory: str, n_jobs: int = -1) ‑> None

Downloads multiple CIF files from the RCSB.

Args

pdb_ids : List[str]
The IDs of the CIF files.
directory : str
The directory where the CIF files should be saved.
n_jobs : int
The number of jobs to run in parallel. If -1, the number of jobs is set to the number of CPU cores.

Returns

None

Expand source code
@staticmethod
def download_cif_files_from_rcsb(
        pdb_ids: List[str],
        directory: str,
        n_jobs: int = -1) -> None:
    """
    Downloads multiple CIF files from the RCSB.

    Args:
        pdb_ids (List[str]): The IDs of the CIF files.
        directory (str): The directory where the CIF files should be saved.
        n_jobs (int): The number of jobs to run in parallel.
            If -1, the number of jobs is set to the number of CPU cores.

    Returns:
        None
    """
    urls = [f"https://files.rcsb.org/download/{pdb_id}.cif" for pdb_id in pdb_ids]
    file_paths = [os.path.join(directory, f"{pdb_id}.cif") for pdb_id in pdb_ids]
    Download.parallel_download(urls, file_paths, n_jobs=n_jobs)
def download_fasta_file_from_rcsb(pdb_id: str, file_path_or_directory: str) ‑> None

Downloads a single FASTA file from the RCSB.

Args

pdb_id : str
The ID of the PDB file.
file_path_or_directory : str
The path where the FASTA file should be saved. If the path is a directory, the file is saved in the directory
Expand source code
@staticmethod
def download_fasta_file_from_rcsb(
        pdb_id: str,
        file_path_or_directory: str) -> None:
    """
    Downloads a single FASTA file from the RCSB.

    Args:
        pdb_id (str): The ID of the PDB file.
        file_path_or_directory (str): The path where the FASTA file should be saved.
            If the path is a directory, the file is saved in the directory
    """
    url = f"https://www.rcsb.org/fasta/entry/{pdb_id}"
    if os.path.isdir(file_path_or_directory):
        file_path_or_directory = os.path.join(file_path_or_directory, f"{pdb_id}.fasta")

    Download.download_file(url, file_path_or_directory)
def download_fasta_file_from_uniprot(uniprot_id: str, file_path_or_directory: str) ‑> None

Downloads a single FASTA file from Uniprot.

Args

uniprot_id : str
The ID of the UniProt file.
file_path_or_directory : str
The path where the FASTA file should be saved. If the path is a directory, the file is saved in the directory
Expand source code
@staticmethod
def download_fasta_file_from_uniprot(
        uniprot_id: str,
        file_path_or_directory: str) -> None:
    """
    Downloads a single FASTA file from Uniprot.

    Args:
        uniprot_id (str): The ID of the UniProt file.
        file_path_or_directory (str): The path where the FASTA file should be saved.
            If the path is a directory, the file is saved in the directory
    """
    url = f"https://www.uniprot.org/uniprot/{uniprot_id}.fasta"
    if os.path.isdir(file_path_or_directory):
        file_path_or_directory = os.path.join(file_path_or_directory, f"{uniprot_id}.fasta")

    Download.download_file(url, file_path_or_directory)
def download_fasta_files_from_rcsb(pdb_ids: List[str], directory: str, n_jobs: int = -1) ‑> None

Downloads multiple FASTA files from the RCSB.

Args

pdb_ids : List[str]
The IDs of the PDB files.
directory : str
The directory where the FASTA files should be saved.
n_jobs : int
The number of jobs to run in parallel. If -1, the number of jobs is set to the number of CPU cores.

Returns

None

Expand source code
@staticmethod
def download_fasta_files_from_rcsb(
        pdb_ids: List[str],
        directory: str,
        n_jobs: int = -1) -> None:
    """
    Downloads multiple FASTA files from the RCSB.

    Args:
        pdb_ids (List[str]): The IDs of the PDB files.
        directory (str): The directory where the FASTA files should be saved.
        n_jobs (int): The number of jobs to run in parallel.
            If -1, the number of jobs is set to the number of CPU cores.

    Returns:
        None
    """
    urls = [f"https://www.rcsb.org/fasta/entry/{pdb_id}" for pdb_id in pdb_ids]
    file_paths = [os.path.join(directory, f"{pdb_id}.fasta") for pdb_id in pdb_ids]
    Download.parallel_download(urls, file_paths, n_jobs=n_jobs)
def download_fasta_files_from_uniprot(uniprot_ids: List[str], directory: str, n_jobs: int = -1) ‑> None

Downloads multiple FASTA files from Uniprot.

Args

uniprot_ids : List[str]
The IDs of the UniProt files.
directory : str
The directory where the FASTA files should be saved.
n_jobs : int
The number of jobs to run in parallel. If -1, the number of jobs is set to the number of CPU cores.

Returns

None

Expand source code
@staticmethod
def download_fasta_files_from_uniprot(
        uniprot_ids: List[str],
        directory: str,
        n_jobs: int = -1) -> None:
    """
    Downloads multiple FASTA files from Uniprot.

    Args:
        uniprot_ids (List[str]): The IDs of the UniProt files.
        directory (str): The directory where the FASTA files should be saved.
        n_jobs (int): The number of jobs to run in parallel.
            If -1, the number of jobs is set to the number of CPU cores.
    Returns:
        None
    """
    urls = [f"https://www.uniprot.org/uniprot/{uniprot_id}.fasta" for uniprot_id in uniprot_ids]
    file_paths = [os.path.join(directory, f"{uniprot_id}.fasta") for uniprot_id in uniprot_ids]
    Download.parallel_download(urls, file_paths, n_jobs=n_jobs)
def download_file(url: str, file_path: str) ‑> None

Downloads a single file from the internet.

Uses the requests library to download the file.

Args

url : str
The URL of the file.
file_path : str
The path to the file.

Returns

None

Raises

Exception
If the file could not be downloaded or saved.
Expand source code
@staticmethod
def download_file(url: str, file_path: str) -> None:
    """
    Downloads a single file from the internet.

    Uses the `requests` library to download the file.

    Args:
        url (str): The URL of the file.
        file_path (str): The path to the file.

    Returns:
        None

    Raises:
        Exception: If the file could not be downloaded or saved.
    """
    try:
        response = requests.get(url)
        if response.status_code == 200:
            # Create the directory if it does not exist.
            directory = os.path.dirname(file_path)
            if directory != "":
                if not os.path.exists(directory):
                    os.makedirs(directory)

            # Write the file to disk.
            with open(file_path, "wb") as file:
                file.write(response.content)
    except Exception as e:
        raise e
def download_pdb_file_from_rcsb(pdb_id: str, file_path_or_directory: str) ‑> None

Downloads a single PDB file from the RCSB.

Args

pdb_id : str
The ID of the PDB file.
file_path_or_directory : str
The path where the PDB file should be saved. If the path is a directory, the file is saved in the directory with the name .pdb.
Expand source code
@staticmethod
def download_pdb_file_from_rcsb(
        pdb_id: str,
        file_path_or_directory: str) -> None:
    """
    Downloads a single PDB file from the RCSB.

    Args:
        pdb_id (str): The ID of the PDB file.
        file_path_or_directory (str): The path where the PDB file should be saved.
            If the path is a directory, the file is saved in the directory
            with the name <pdb_id>.pdb.
    """
    download_url = f"https://files.rcsb.org/download/{pdb_id}.pdb"
    if os.path.isdir(file_path_or_directory):
        file_path_or_directory = os.path.join(file_path_or_directory, f"{pdb_id}.pdb")

    Download.download_file(download_url, file_path_or_directory)
def download_pdb_file_from_sabdab(pdb_id: str, file_path_or_directory: str) ‑> None

Downloads a single PDB file from Sabdab.

Args

pdb_id : str
The ID of the PDB file.
file_path_or_directory : str
The path where the PDB file should be saved. If the path is a directory, the file is saved in the directory with the name .pdb.
Expand source code
@staticmethod
def download_pdb_file_from_sabdab(
        pdb_id: str,
        file_path_or_directory: str) -> None:
    """
    Downloads a single PDB file from Sabdab.

    Args:
        pdb_id (str): The ID of the PDB file.
        file_path_or_directory (str): The path where the PDB file should be saved.
            If the path is a directory, the file is saved in the directory
            with the name <pdb_id>.pdb.
    """
    download_url = f"https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabdab/pdb/{pdb_id.lower()}/?raw=true"
    if os.path.isdir(file_path_or_directory):
        file_path_or_directory = os.path.join(file_path_or_directory, f"{pdb_id}.pdb")

    Download.download_file(download_url, file_path_or_directory)
def download_pdb_files_from_rcsb(pdb_ids: List[str], directory: str, n_jobs: int = -1) ‑> None

Downloads multiple PDB files from the RCSB.

Args

pdb_ids : List[str]
The IDs of the PDB files.
directory : str
The directory where the PDB files should be saved.
n_jobs : int
The number of jobs to run in parallel. If -1, the number of jobs is set to the number of CPU cores.

Returns

None

Expand source code
@staticmethod
def download_pdb_files_from_rcsb(
        pdb_ids: List[str],
        directory: str,
        n_jobs: int = -1) -> None:
    """
    Downloads multiple PDB files from the RCSB.

    Args:
        pdb_ids (List[str]): The IDs of the PDB files.
        directory (str): The directory where the PDB files should be saved.
        n_jobs (int): The number of jobs to run in parallel.
            If -1, the number of jobs is set to the number of CPU cores.

    Returns:
        None
    """
    urls = [f"https://files.rcsb.org/download/{pdb_id}.pdb" for pdb_id in pdb_ids]
    file_paths = [os.path.join(directory, f"{pdb_id}.pdb") for pdb_id in pdb_ids]
    Download.parallel_download(urls, file_paths, n_jobs=n_jobs)
def download_pdb_files_from_sabdab(pdb_ids: List[str], directory: str, n_jobs: int = -1) ‑> None

Downloads multiple PDB files from Sabdab.

Args

pdb_ids : List[str]
The IDs of the PDB files.
directory : str
The directory where the PDB files should be saved.
n_jobs : int
The number of jobs to run in parallel. If -1, the number of jobs is set to the number of CPU cores.

Returns

None

Expand source code
@staticmethod
def download_pdb_files_from_sabdab(
        pdb_ids: List[str],
        directory: str,
        n_jobs: int = -1) -> None:
    """
    Downloads multiple PDB files from Sabdab.

    Args:
        pdb_ids (List[str]): The IDs of the PDB files.
        directory (str): The directory where the PDB files should be saved.
        n_jobs (int): The number of jobs to run in parallel.
            If -1, the number of jobs is set to the number of CPU cores.

    Returns:
        None
    """
    urls = [f"https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabdab/pdb/{pdb_id.lower()}/?raw=true" for pdb_id in pdb_ids]
    file_paths = [os.path.join(directory, f"{pdb_id}.pdb") for pdb_id in pdb_ids]
    Download.parallel_download(urls, file_paths, n_jobs=n_jobs)
def parallel_download(urls: List[str], file_paths: List[str], n_jobs: int = -1) ‑> None

Downloads multiple files from the internet in parallel.

Args

urls : List[str]
The URLs of the files.
file_paths : List[str]
The paths to the files.
n_jobs : int
The number of jobs to run in parallel. If -1, the number of jobs is set to the number of CPU cores.

Returns

None

Raises

Exception
If the files could not be downloaded or saved.
Expand source code
@staticmethod
def parallel_download(
        urls: List[str],
        file_paths: List[str],
        n_jobs: int = -1) -> None:
    """
    Downloads multiple files from the internet in parallel.

    Args:
        urls (List[str]): The URLs of the files.
        file_paths (List[str]): The paths to the files.
        n_jobs (int): The number of jobs to run in parallel.
            If -1, the number of jobs is set to the number of CPU cores.

    Returns:
        None

    Raises:
        Exception: If the files could not be downloaded or saved.
    """
    Parallel(n_jobs=n_jobs)(delayed(Download.download_file)(url, file_path)
                            for url, file_path in zip(urls, file_paths))