# Copyright (C) 2020 Krishnaswamy Lab, Yale University
import numpy as np
import pandas as pd
import graphtools
from . import utils
from . import filter
from graphtools.estimator import GraphEstimator, attribute
from functools import partial
[docs]class MELD(GraphEstimator):
"""MELD operator for filtering signals over a graph.
Parameters
----------
beta : int, optional, Default: 60
Amount of smoothing to apply. Default value of 60 determined through analysis
of simulated data using Splatter.
offset: float, optional, Default: 0
Amount to shift the MELD filter in the eigenvalue spectrum.
Recommend using an eigenvalue from the graph based on the
spectral distribution. Should be in interval [0,1]
order: int, optional, Default: 1
Falloff and smoothness of the filter. High order leads to square-like filters.
filter: str, optional, Default: 'heat'
Filter type to use. Should be in ['heat', 'laplacian']
solver : string, optional, Default: 'chebyshev'
Method to solve convex problem.
'chebyshev' uses a chebyshev polynomial approximation of the corresponding
filter. 'exact' uses the eigenvalue solution to the problem
chebyshev_order : int, optional, Default: 50
Order of chebyshev approximation to use.
lap_type : ('combinatorial', 'normalized'), Default: 'combinatorial'
The kind of Laplacian to calculate
sample_normalize : boolean, optional, Default: True
If True, the sample indicator vectors are column normalized to sum to 1
"""
# parameters
beta = attribute(
"beta",
doc="Amount of smoothing to apply. Default value of 60 determined through"
"analysis of simulated data using Splatter",
default=40,
on_set=graphtools.utils.check_positive,
)
offset = attribute(
"offset",
doc="Amount to shift the MELD filter in the eigenvalue spectrum."
"Recommend using an eigenvalue from the graph based on the"
"spectral distribution. Should be in interval [0,1]",
default=0,
)
order = attribute(
"order",
doc="Falloff and smoothness of the filter."
"High order leads to square-like filters.",
default=1,
)
filter = attribute(
"filter",
default="heat",
doc="Filter type to use. Should be in ['heat', 'laplacian']",
on_set=partial(graphtools.utils.check_in, ["heat", "laplacian"]),
)
solver = attribute(
"solver",
default="chebyshev",
doc="Method to solve convex problem."
"'chebyshev' uses a chebyshev polynomial approximation of the corresponding"
"filter. 'exact' uses the eigenvalue solution to the problem",
on_set=partial(graphtools.utils.check_in, ["chebyshev", "exact"]),
)
chebyshev_order = attribute(
"chebyshev_order",
default=30,
doc="Order of chebyshev approximation to use.",
on_set=[graphtools.utils.check_int, graphtools.utils.check_positive],
)
lap_type = attribute(
"lap_type",
default="combinatorial",
doc="The kind of Laplacian to calculate",
on_set=partial(graphtools.utils.check_in, ["combinatorial", "normalized"]),
)
# stored attributes
sample_densities = attribute(
"sample_densities", doc="Density associated with each sample"
)
def __init__(
self,
beta=60,
offset=0,
order=1,
filter="heat",
solver="chebyshev",
chebyshev_order=50,
lap_type="combinatorial",
sample_normalize=True,
anisotropy=1,
n_landmark=None,
**kwargs
):
self.beta = beta
self.offset = offset
self.order = order
self.solver = solver
self.chebyshev_order = chebyshev_order
self.lap_type = lap_type
self.filter = filter
self.sample_normalize = sample_normalize
kwargs["use_pygsp"] = True
super().__init__(anisotropy=anisotropy, n_landmark=n_landmark, **kwargs)
def _reset_graph(self):
self._reset_filter()
def _reset_filter(self):
self.filt = None
self.sample_densities = None
def set_params(self, **params):
for p in [
"beta",
"offset",
"order",
"solver",
"chebyshev_order",
"lap_type",
"filter",
]:
if p in params and params[p] != getattr(self, p):
self._reset_filter()
setattr(self, p, params[p])
del params[p]
super().set_params(**params)
def _create_sample_indicators(self, sample_labels):
"""
Helper function to take an array-like of non-numerics and produce a collection
of sample indicator vectors.
"""
self.sample_labels_ = sample_labels
self.samples = np.unique(sample_labels)
try:
labels = sample_labels.values
except AttributeError:
labels = self.sample_labels_
if len(labels.shape) > 1:
# If you have a 2D array
if labels.shape[1] == 1:
# If it's just a column-vector, reshape it
labels = labels.reshape(-1)
else:
# If its got multiple-columns, raise Error
raise ValueError(
"sample_labels must be a single column. Got"
"shape={}".format(labels.shape)
)
if self.samples.shape[0] == 2:
# When there's two samples (i.e. [A, A, B, B])
# LabelBinarizer doesn't work nicely with only two labels
# This creates a two-column dataframe using the sample labels
df = pd.DataFrame(
[labels == self.samples[0], labels == self.samples[1]],
columns=self._labels_index,
).astype(int)
df.index = self.samples
self.sample_indicators = df.T
else:
# We have more than two samples, use label binarizer.
import sklearn
self._LB = sklearn.preprocessing.LabelBinarizer()
sample_indicators = self._LB.fit_transform(self.sample_labels_)
self.sample_indicators = pd.DataFrame(
sample_indicators, columns=self._LB.classes_
)
return self.sample_indicators