Source code for biorpy.conversion

from collections import OrderedDict
import pandas
import numpy
from rpy2.robjects import numpy2ri
from rpy2 import robjects, rinterface

## CONVERSION

# this might be best put in a separate module
[docs]def convertToR(obj): """ Convert Pandas/Numpy objects to R objects. If the inumpyut object is a Pandas DataFrame, convert it to an R DataFrame. If it's a Series, treat it like a vector/numpy array. """ if isinstance(obj, pandas.core.frame.DataFrame): return pandasDataFrameToRPy2DataFrame(obj) elif isinstance(obj, pandas.Series): return convertToR(list(obj)) elif isinstance(obj, numpy.ndarray): return numpy2ri.numpy2ri(obj) elif isinstance(obj, list) or isinstance(obj, tuple): if len(obj) == 0: return robjects.FloatVector([]) else: try: return robjects.FloatVector(obj) except ValueError: pass try: return robjects.StrVector(obj) except ValueError: pass elif isinstance(obj, OrderedDict): lengths = set() asrpy2 = OrderedDict() for key in obj: asrpy2[key] = convertToR(obj[key]) try: lengths.add(len(asrpy2[key])) except: lengths.add(1) if len(lengths) == 1: return robjects.DataFrame(asrpy2) else: return robjects.ListVector(asrpy2) return obj
[docs]class ResultWrapper(object): """ Represents output from R as a dictionary-like object, with conversion of each value to a pandas DataFrame or a numpy.array if possible Attributes that contain a period in R can usually be accessed directly from python by omitting the period (eg, 'p.value' can be accessed from 'pvalue') """ def __init__(self, result): self._result = result def __repr__(self): return str(dict(self.iteritems())) def __str__(self): return str(self._result) def keys(self): return list(self._result.names) def iteritems(self): for key in self._result.names: yield (key, self[key]) def __getitem__(self, attr): return self.__getattribute__(attr) def __getattribute__(self, attr): try: return super(ResultWrapper, self).__getattribute__(attr) except AttributeError as ae: orig_ae = ae if attr in self._result.names: toconvert = self._result.rx2(attr) return convertFromR(toconvert) else: try: # see if we can find the attribute if we remove periods undotted_names = dict((name.replace(".", ""), name) for name in self._result.names) undotted_name = undotted_names[attr] return convertFromR(self._result.rx2(undotted_name)) except AttributeError: pass raise orig_ae
def addResultWrapper(result): if isinstance(result, rinterface.RNULLType): # could convert this to numpy.nan return if isinstance(result, numpy.ndarray): return if isinstance(result, robjects.vectors.DataFrame): result.py = rpy2DataFrameToPandasDataFrame(result) else: result.py = ResultWrapper(result) def convertFromR(obj): if isinstance(obj, robjects.vectors.DataFrame): return rpy2DataFrameToPandasDataFrame(obj) elif isinstance(obj, robjects.vectors.Vector): return numpy.array(obj) else: return obj VECTOR_TYPES = {numpy.float64: robjects.FloatVector, numpy.float32: robjects.FloatVector, numpy.float: robjects.FloatVector, numpy.int: robjects.IntVector, numpy.int32: robjects.IntVector, numpy.int64: robjects.IntVector, numpy.object_: robjects.StrVector, numpy.str: robjects.StrVector, numpy.bool: robjects.BoolVector} NA_TYPES = {numpy.float64: robjects.NA_Real, numpy.float32: robjects.NA_Real, numpy.float: robjects.NA_Real, numpy.int: robjects.NA_Integer, numpy.int32: robjects.NA_Integer, numpy.int64: robjects.NA_Integer, numpy.object_: robjects.NA_Character, numpy.str: robjects.NA_Character, numpy.bool: robjects.NA_Logical} import rpy2.rlike.container as rlc def rpy2DataFrameToPandasDataFrame(rdf): recarray = numpy2ri.ri2numpy(rdf) df = pandas.DataFrame.from_records(recarray, index=list(rdf.rownames), columns=list(rdf.colnames)) return df
[docs]def pandasDataFrameToRPy2DataFrame(df, strings_as_factors=False): """ Convert a pandas DataFrame to a R data.frame. Args: df: The DataFrame being converted strings_as_factors: Whether to turn strings into R factors (default: False) Returns: An R data.frame """ columns = rlc.OrdDict() # FIXME: This doesn't handle MultiIndex for column in df: value = df[column] value_type = value.dtype.type if value_type == numpy.datetime64: value = convert_to_r_posixct(value) else: value = [item if pandas.notnull(item) else NA_TYPES[value_type] for item in value] value = VECTOR_TYPES[value_type](value) if not strings_as_factors: I = robjects.baseenv.get("I") value = I(value) columns[column] = value r_dataframe = robjects.DataFrame(columns) r_dataframe.rownames = robjects.StrVector(df.index) return r_dataframe