##
## biorpy plotting wrappers
##
## nspies in ze house
##
import collections
import numpy
import pandas
import rpy2
#from rpy2.robjects import r
from biorpy.betteR import BetteR
import rpy2.robjects.numpy2ri
from rpy2 import robjects as robj
from rpy2.rlike.container import TaggedList
# rpy2.robjects.numpy2ri.activate()
r = BetteR()
def _setdefaults(toupdate, defaults):
""" calls dict.setdefault() multiple times """
for key, val in defaults.iteritems():
toupdate.setdefault(key, val)
[docs]def plotMulti(xs, ys, names, colors=None, legendWhere="bottomright", xlab="", ylab="", plotArgs=None, lineArgs=None, **kwdargs):
""" Plot multiple lines on the same axes; convenience function for calling
r.plot() and then r.lines() (possibly multiple times) and adding an r.legend()
Args:
xs: a list of vectors of x values, one vector for each dataset to be plotted
ys: a list of vectors of y values, as above, in the same order
names: the names of each dataset, used for putting together the legend
colors: an optional list of colors (html hex style)
legendWhere: the location parameter used to specify positioning of the legend (a combination
of bottom/top and right/left)
plotArgs: an optional dictionary of arguments to r.plot(), for example xlim=[0,3]
lineArgs: an option dictionary of arguments to r.lines()
kwdArgs: optional R plotting arguments can be passed in as keyword arguments [ie, plotMulti(xs, ys, names, lty=3)]
to specify parameters for both the r.plot() and r.lines() commands
"""
assert len(ys) == len(names)
if len(xs) != len(ys):
xs = [xs for i in range(len(names))]
assert len(xs) == len(ys)
if colors is None:
colors = ["red", "blue", "green", "orange", "brown", "purple", "black"]
ylim = [min(min(y) for y in ys), max(max(y) for y in ys)]
xlim = [min(min(x) for x in xs), max(max(x) for x in xs)]
if plotArgs is None: plotArgs = {}
if lineArgs is None: lineArgs = {}
plotArgsDefaults = {"xlab":xlab, "ylab":ylab, "xlim":xlim, "ylim":ylim, "type":"l"}
_setdefaults(plotArgs, plotArgsDefaults)
plotArgs.update(kwdargs)
lineArgsDefaults = {"type":"l"}
_setdefaults(lineArgs, lineArgsDefaults)
lineArgs.update(kwdargs)
for i in range(len(xs)):
if i == 0:
r.plot(xs[0], ys[0], col=colors[0], **plotArgs)
else:
r.lines(xs[i], ys[i], col=colors[i%len(colors)], **lineArgs)
r.legend(legendWhere, legend=names, lty=1, lwd=2, col=colors, bg="white")
[docs]def plotWithCor(x, y, method="spearman", main="", **kwdargs):
""" Adds the correlation coefficient to the title of a scatterplot """
cor = r.cor(x, y, method=method)[0]
r.plot(x, y, main="{} rs = {}".format(main, cor), **kwdargs)
[docs]def plotWithFit(x, y, main="", fitkwdargs=None, **plotkwdargs):
""" Plots data and adds a linear best fit line to the scatterplot
Args
fitkwdargs: a dictionary with r.line() drawing parameters for the fit line
additional keyword arguments arg passed directly to r.plot()
"""
import scipy.stats
if fitkwdargs is None:
fitkwdargs = {}
slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(x, y)
r.plot(x, y, main="{} r={:.2g} p={:.2g}".format(main, r_value, p_value), **plotkwdargs)
r.abline(a=intercept, b=slope, **fitkwdargs)
[docs]def errbars(x=None, y=None, x_lower=None, x_upper=None, y_lower=None, y_upper=None, length=0.08, *args, **kwdargs):
""" Draws error bars on top of an existing plot
specify EITHER: (y, x_lower and x_upper) OR (x, y_lower, y_upper)
y: the y coordinate of each data point
x_lower: the left coordinate of the error bar
x_upper: the right coordinate of the error bar
similarly for `x`, `y_lower` and `y_upper`
uses the r.arrows() command, and passes any additional keyword args to r.arrows()
"""
if y is not None and x_lower is not None and x_upper is not None:
r.arrows(x_lower, y, x_upper, y, angle = 90, code = 3, length = length, *args, **kwdargs)
elif x is not None and y_lower is not None and y_upper is not None:
r.arrows(x, y_lower, x, y_upper, angle = 90, code = 3, length = length, *args, **kwdargs)
else:
raise Exception("must define either (y, x_lower, x_upper) or (x, y_lower, y_upper)")
[docs]def ecdf(vectors, labels, colors=["red", "blue", "orange", "violet", "green", "brown"],
xlab="", ylab="cumulative fraction", main="", legendWhere="topleft",
lty=1, lwd=1, **ecdfKwdArgs):
""" Take a list of lists, convert them to vectors, and plots them sequentially on a CDF """
#print "MEANS:", main
#for vector, label in zip(convertToVectors, labels):
# print label, numpy.mean(vector)
def _expand(item):
try:
iter(item)
return item
except TypeError:
return [item] * len(vectors)
lty = _expand(lty)
lwd = _expand(lwd)
ecdfKwdArgs.update({"verticals":True, "do.points":False, "col.hor":colors[0], "col.vert":colors[0], "lty":lty[0], "lwd":lwd[0]})
if not "xlim" in ecdfKwdArgs or ecdfKwdArgs["xlim"] is None:
xlim = [min(min(vector) for vector in vectors),
max(max(vector) for vector in vectors)]
ecdfKwdArgs["xlim"] = xlim
r.plot(r.ecdf(vectors[0]), main=main, xlab=xlab, ylab=ylab, **ecdfKwdArgs)
for i, vector in enumerate(vectors[1:]):
r.plot(r.ecdf(vector), add=True,
**{"verticals":True, "do.points":False, "col.hor":colors[i+1], "col.vert":colors[i+1],
"lty":lty[i+1], "lwd":lwd[i+1]})
labelsWithN = []
for i, label in enumerate(labels):
labelsWithN.append(label+" (n=%d)"%len(vectors[i]))
r.legend(legendWhere, legend=labelsWithN, lty=lty, lwd=[lwdi*2 for lwdi in lwd], col=colors, cex=0.7, bg="white")
[docs]def boxPlot(dict_, keysInOrder=None, **kwdargs):
""" Plot a boxplot
dict_: a dictionary of group_name -> vector, where vector is the data points to be plotted for each group;
use a collections.OrderedDict() to easily convey the order of the groups
keysInOrder: an optional ordering of the keys in dict_ (alternate option to using collections.OrderedDict)
additional ``kwdargs`` are passed directly to ``r.boxplot()``
"""
if not keysInOrder:
keysInOrder = dict_.keys()
t = TaggedList([])
for key in keysInOrder:
t.append(robj.FloatVector(dict_[key]), "X:"+str(key))
x = r.boxplot(t, names=keysInOrder, **kwdargs)
return x
[docs]def barPlot(dict_, keysInOrder=None, printCounts=True, ylim=None, *args, **kwdargs):
""" Plot a bar plot
Args:
dict_: a dictionary of name -> value, where value is the height of the bar
use a collections.OrderedDict() to easily convey the order of the groups
keysInOrder: an optional ordering of the keys in dict_ (alternate option to using collections.OrderedDict)
printCounts: option to print the counts on top of each bar
additional kwdargs are passed directly to r.barplot()
"""
if not keysInOrder:
keysInOrder = dict_.keys()
heights = [dict_[key] for key in keysInOrder]
kwdargs["names.arg"] = keysInOrder
if ylim is None:
if printCounts:
ylim = [min(heights), max(heights)*1.1]
else:
ylim = [min(heights), max(heights)]
x = r.barplot(heights, ylim=ylim, *args, **kwdargs)
if printCounts:
heightsStrings = ["{:.2g}".format(height) for height in heights]
r.text(x, heights, heightsStrings, pos=3)
return x
[docs]def scatterplotMatrix(dataFrame, main="", **kwdargs):
""" Plots a scatterplot matrix, with scatterplots in the upper left and correlation
values in the lower right. Input is a pandas DataFrame.
"""
robj.r.library("lattice")
taggedList = TaggedList(map(robj.FloatVector, [dataFrame[col] for col in dataFrame.columns]), dataFrame.columns)
#print taggedList
#df = robj.r['data.frame'](**datapointsDict)
#df = robj.r['data.frame'](taggedList)
df = robj.DataFrame(taggedList)
#print df
#robj.r.splom(df)
#robj.r.pairs(df)
robj.r("""panel.cor <- function(x, y, digits=2, prefix="", cex.cor)
{
usr <- par("usr"); on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
r <- cor(x, y, method="spearman")
scale = abs(r)*0.8+0.2
txt <- format(c(r, 0.123456789), digits=digits)[1]
txt <- paste(prefix, txt, sep="")
if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt)
text(0.5, 0.5, txt, cex = cex.cor * scale+0.2)
}
""")
robj.r("""panel.hist <- function(x, ...)
{
usr <- par("usr"); on.exit(par(usr))
par(usr = c(usr[1:2], 0, 1.5) )
h <- hist(x, plot = FALSE)
breaks <- h$breaks; nB <- length(breaks)
y <- h$counts; y <- y/max(y)
rect(breaks[-nB], 0, breaks[-1], y, col="lightgrey", ...)
}""")
additionalParams = {"upper.panel": robj.r["panel.smooth"], "lower.panel": robj.r["panel.cor"], "diag.panel":robj.r["panel.hist"]}
additionalParams.update(kwdargs)
robj.r["pairs"](df, main=main, **additionalParams)
def plotWithSolidErrbars(x, y, upper, lower, add=False, errbarcol="lightgray", plotargs={}, polygonargs={}):
x = numpy.asarray(x)
errbarx = numpy.concatenate([x, x[::-1]])
errbary = numpy.concatenate([upper, lower[::-1]])
if not add:
r.plot(x, y, type="n", **plotargs)
polygondefaults = {"border":"NA"}
polygonargs.update(polygondefaults)
r.polygon(errbarx, errbary, col=errbarcol, **polygonargs)
r.lines(x, y, **plotargs)
return x, y, upper, lower, errbarx, errbary