Source code for mgkit.counts.glm

"""
.. versionadded:: 0.3.3

GLM models with metagenomes and metatranscriptomes. Experimental
"""

from __future__ import division
from builtins import range
import statsmodels.api as sm
from scipy import interpolate
from scipy import optimize
import pandas as pd


[docs]def lowess_ci_bootstrap(endog, exog, num=100, frac=.2, it=3, alpha=.05, delta=0., min_alpha=10**-3, kind='slinear'): """ Performance increase with the value of *delta*. """ data = pd.DataFrame( { 'endog': endog, 'exog': exog } ).sort_values(by=['exog', 'endog'], ascending=True) boots = [] for idx in range(num): sample = data.sample(n=data.index.size, replace=True) lw = sm.nonparametric.lowess( sample.endog, sample.exog, is_sorted=False, frac=frac, it=it, return_sorted=True, delta=delta ) boots.append(pd.DataFrame({ 'endog': lw[:, 1], 'exog': lw[:, 0] })) alpha = alpha / 2 boots = pd.concat(boots) q1 = boots.groupby('exog').quantile( alpha, interpolation='nearest' ).sort_index() q1 = q1.endog[q1.endog > 0].fillna(min_alpha) q1 = interpolate.interp1d(q1.index, q1, kind='slinear', fill_value='extrapolate') q2 = boots.groupby('exog').quantile( 1 - alpha, interpolation='nearest' ).sort_index() q2 = q2.endog[q2.endog > 0].fillna(min_alpha) q2 = interpolate.interp1d(q2.index, q2, kind='slinear', fill_value='extrapolate') m = fit_lowess_interpolate(data.endog, data.exog, frac=frac, it=it, kind=kind) return q1, q2, m
[docs]def fit_lowess_interpolate(endog, exog, frac=.2, it=3, kind='slinear'): data = pd.DataFrame( { 'endog': endog, 'exog': exog } ).sort_values(by=['exog', 'endog'], ascending=True) lw = sm.nonparametric.lowess( data.endog, data.exog, frac=frac, it=it, ) return interpolate.interp1d( lw[:, 0], lw[:, 1], kind=kind, fill_value='extrapolate' )
[docs]def variance_to_alpha(mu, func, min_alpha=10**-3): """ Based on the variance defined in the Negative Binomial in statsmodels var = mu + alpha * (mu ** 2) """ alpha = (func(mu) - mu) / (mu ** 2) return min_alpha if alpha <= 0 else alpha