#! /usr/bin/env python
# -*- coding: utf-8 -*-
"""
Protein Blocks related data --- :mod:`pbxplore.PB`
==================================================
Protein blocks definition
-------------------------
* :data:`REFERENCES`
The definition of each block as a dictionary; each key is a block name (as
a lower case letter), and each value is a list of the dihedral angle values
that define the block.
* :data:`NAMES`
The names of all the protein blocks.
Substitution matrix
-------------------
* :data:`SUBSTITUTION_MATRIX_NAME`
The absolute path to the file that contains the subtitution matrix
.. autofunction:: load_substitution_matrix
Exceptions
----------
.. autoexception:: SizeError
.. autoexception:: InvalidBlockError
"""
from __future__ import print_function, absolute_import
# Standard modules
import os
# Third-party modules
import numpy
# Python2/Python3 compatibility
# The range function in python 3 behaves as the range function in python 2
# and returns a generator rather than a list. To produce a list in python 3,
# one should use list(range). Here we change range to behave the same in
# python 2 and in python 3. In both cases, range will return a generator.
try:
range = xrange
except NameError:
pass
# Data
# Protein Blocks reference angles
# taken from A. G. de Brevern, C. Etchebest and S. Hazout.
# "Bayesian probabilistic approach for predicting backbone structures
# in terms of protein blocks"
# Proteins, 41: 271-288 (2000)
REFERENCES = {
'a': [ 41.14, 75.53, 13.92, -99.80, 131.88, -96.27, 122.08, -99.68],
'b': [108.24, -90.12, 119.54, -92.21, -18.06, -128.93, 147.04, -99.90],
'c': [-11.61, -105.66, 94.81, -106.09, 133.56, -106.93, 135.97, -100.63],
'd': [141.98, -112.79, 132.20, -114.79, 140.11, -111.05, 139.54, -103.16],
'e': [133.25, -112.37, 137.64, -108.13, 133.00, -87.30, 120.54, 77.40],
'f': [116.40, -105.53, 129.32, -96.68, 140.72, -74.19, -26.65, -94.51],
'g': [ 0.40, -81.83, 4.91, -100.59, 85.50, -71.65, 130.78, 84.98],
'h': [119.14, -102.58, 130.83, -67.91, 121.55, 76.25, -2.95, -90.88],
'i': [130.68, -56.92, 119.26, 77.85, 10.42, -99.43, 141.40, -98.01],
'j': [114.32, -121.47, 118.14, 82.88, -150.05, -83.81, 23.35, -85.82],
'k': [117.16, -95.41, 140.40, -59.35, -29.23, -72.39, -25.08, -76.16],
'l': [139.20, -55.96, -32.70, -68.51, -26.09, -74.44, -22.60, -71.74],
'm': [-39.62, -64.73, -39.52, -65.54, -38.88, -66.89, -37.76, -70.19],
'n': [-35.34, -65.03, -38.12, -66.34, -29.51, -89.10, -2.91, 77.90],
'o': [-45.29, -67.44, -27.72, -87.27, 5.13, 77.49, 30.71, -93.23],
'p': [-27.09, -86.14, 0.30, 59.85, 21.51, -96.30, 132.67, -92.91],
}
# PB psi(n-2) phi(n-1) psi(n-1) phi(n) psi(n) phi(n+1) psi(n+1) phi(n+2)
NAMES = 'abcdefghijklmnop' # name of the 16 PBs
SUBSTITUTION_MATRIX_NAME = os.path.join(os.path.dirname(__file__),
"PBs_substitution_matrix.dat")
[docs]class InvalidBlockError(ValueError):
"""
Exception raised when encounter an invalid protein block.
"""
def __init__(self, block=None):
super(InvalidBlockError, self).__init__(self)
self.block = block
def __repr__(self):
if self.block is None:
return "Invald block"
else:
return "Ivalid block '{}'".format(self.block)
[docs]class SizeError(AssertionError):
"""
Exception raised when a sequence does not have the expected length.
"""
pass
[docs]def load_substitution_matrix(name):
"""
Load PB substitution matrix.
The matrix must be 16x16.
Parameters
----------
name : str
Name of the file containing the PBs susbtitution matrix.
Returns
-------
mat : numpy array
Array of floats.
Raises
------
InvalidBlockError
encountered an unexpected PB
"""
mat = numpy.loadtxt(name, dtype=float, skiprows=2)
assert mat.shape == (16, 16), 'wrong substitution matrix size'
for i in range(len(mat)):
for j in range(len(mat[0])):
if mat[i][j] != mat[j][i]:
raise ValueError("Matrix is not symetric - idx {} and {}".format(i, j))
return mat