Source code for flirt.eda.feature_calculation

import multiprocessing
import warnings
from datetime import timedelta

import cvxopt as cvx
import numpy as np
import pandas as pd
from joblib import Parallel, delayed
from tqdm.autonotebook import trange
from ..util import processing

from ..stats.common import get_stats


[docs]def get_eda_features(data: pd.Series, window_length: int = 60, window_step_size: int = 1, data_frequency: int = 4, num_cores: int = 0): """ Computes statistical EDA features based on the signal decompositon into phasic/tonic components of the skin \ conductivity. Parameters ---------- data : pd.Series input EDA time series window_length : int the window size in seconds to consider window_step_size : int the time step to shift each window data_frequency : int the frequency of the input signal num_cores : int, optional number of cores to use for parallel processing, by default use all available Returns ------- EDA Features: pd.DataFrame A DataFrame containing statistical aggregation features of the tonic/phasic EDA components. Examples -------- >>> import flirt.reader.empatica >>> eda = flirt.reader.empatica.read_eda_file_into_df("EDA.csv") >>> acc_features = flirt.get_eda_features(eda['eda'], 180, 1) """ input_data = data.copy() # ensure we have a DatetimeIndex, needed for calculation if not isinstance(input_data.index, pd.DatetimeIndex): input_data.index = pd.DatetimeIndex(input_data.index) # use only every nth value inputs = trange(0, len(input_data) - 1, window_step_size * data_frequency, desc="EDA features") if not num_cores >= 1: num_cores = multiprocessing.cpu_count() def process(memmap_data) -> dict: with Parallel(n_jobs=num_cores, max_nbytes=None) as parallel: return parallel(delayed(__get_scr_scl) (memmap_data, window_length=window_length, data_frequency=data_frequency, i=k) for k in inputs) results = processing.memmap_auto(input_data, process) results = pd.DataFrame(list(filter(None, results))) results.set_index('datetime', inplace=True) results.sort_index(inplace=True) for column in results.columns: nan_count = results[column].isin([np.nan, np.inf, -np.inf]).sum() proportion = nan_count / len(results) if proportion > 0.05: warnings.warn(str(column) + " contains more than 5% (actual: " + str((proportion * 100).round(2)) + "%) nan, inf, or -inf values. We recommend to delete this feature column.", stacklevel=3) return results
def __get_scr_scl(data: pd.Series, window_length: int, data_frequency: int, i: int): if pd.Timedelta(data.index[i + 1] - data.index[i]).total_seconds() <= window_length: min_timestamp = data.index[i] max_timestamp = min_timestamp + timedelta(seconds=window_length) results = { 'datetime': max_timestamp, } relevant_data = data.loc[(data.index >= min_timestamp) & (data.index < max_timestamp)] try: r, t = __cvx_eda(relevant_data.values, 1 / data_frequency) results.update(get_stats(np.ravel(t), 'tonic')) results.update(get_stats(np.ravel(r), 'phasic')) except Exception: pass return results else: return None def __cvx_eda(y, delta, tau0=2., tau1=0.7, delta_knot=10., alpha=8e-4, gamma=1e-2, solver=None, options={'reltol': 1e-9, 'show_progress': False}): """ CVXEDA Convex optimization approach to electrodermal activity processing This function implements the cvxEDA algorithm described in "cvxEDA: a Convex Optimization Approach to Electrodermal Activity Processing" (http://dx.doi.org/10.1109/TBME.2015.2474131, also available from the authors' homepages). Arguments: y: observed EDA signal (we recommend normalizing it: y = zscore(y)) delta: sampling interval (in seconds) of y tau0: slow time constant of the Bateman function tau1: fast time constant of the Bateman function delta_knot: time between knots of the tonic spline function alpha: penalization for the sparse SMNA driver gamma: penalization for the tonic spline coefficients solver: sparse QP solver to be used, see cvxopt.solvers.qp options: solver options, see: http://cvxopt.org/userguide/coneprog.html#algorithm-parameters Returns (see paper for details): r: phasic component p: sparse SMNA driver of phasic component t: tonic component l: coefficients of tonic spline d: offset and slope of the linear drift term e: model residuals obj: value of objective function being minimized (eq 15 of paper) """ n = len(y) y = cvx.matrix(y) # bateman ARMA model a1 = 1. / min(tau1, tau0) # a1 > a0 a0 = 1. / max(tau1, tau0) ar = np.array([(a1 * delta + 2.) * (a0 * delta + 2.), 2. * a1 * a0 * delta ** 2 - 8., (a1 * delta - 2.) * (a0 * delta - 2.)]) / ((a1 - a0) * delta ** 2) ma = np.array([1., 2., 1.]) # matrices for ARMA model i = np.arange(2, n) A = cvx.spmatrix(np.tile(ar, (n - 2, 1)), np.c_[i, i, i], np.c_[i, i - 1, i - 2], (n, n)) M = cvx.spmatrix(np.tile(ma, (n - 2, 1)), np.c_[i, i, i], np.c_[i, i - 1, i - 2], (n, n)) # spline delta_knot_s = int(round(delta_knot / delta)) spl = np.r_[np.arange(1., delta_knot_s), np.arange(delta_knot_s, 0., -1.)] # order 1 spl = np.convolve(spl, spl, 'full') spl /= max(spl) # matrix of spline regressors i = np.c_[np.arange(-(len(spl) // 2), (len(spl) + 1) // 2)] + np.r_[np.arange(0, n, delta_knot_s)] nB = i.shape[1] j = np.tile(np.arange(nB), (len(spl), 1)) p = np.tile(spl, (nB, 1)).T valid = (i >= 0) & (i < n) B = cvx.spmatrix(p[valid], i[valid], j[valid]) # trend C = cvx.matrix(np.c_[np.ones(n), np.arange(1., n + 1.) / n]) nC = C.size[1] # Solve the problem: # .5*(M*q + B*l + C*d - y)^2 + alpha*sum(A,1)*p + .5*gamma*l'*l # s.t. A*q >= 0 # old_options = cvx.solvers.options.copy() cvx.solvers.options.clear() cvx.solvers.options.update(options) if solver == 'conelp': # Use conelp z = lambda m, n: cvx.spmatrix([], [], [], (m, n)) G = cvx.sparse([[-A, z(2, n), M, z(nB + 2, n)], [z(n + 2, nC), C, z(nB + 2, nC)], [z(n, 1), -1, 1, z(n + nB + 2, 1)], [z(2 * n + 2, 1), -1, 1, z(nB, 1)], [z(n + 2, nB), B, z(2, nB), cvx.spmatrix(1.0, range(nB), range(nB))]]) h = cvx.matrix([z(n, 1), .5, .5, y, .5, .5, z(nB, 1)]) c = cvx.matrix([(cvx.matrix(alpha, (1, n)) * A).T, z(nC, 1), 1, gamma, z(nB, 1)]) res = cvx.solvers.conelp(c, G, h, dims={'l': n, 'q': [n + 2, nB + 2], 's': []}) obj = res['primal objective'] else: # Use qp Mt, Ct, Bt = M.T, C.T, B.T H = cvx.sparse([[Mt * M, Ct * M, Bt * M], [Mt * C, Ct * C, Bt * C], [Mt * B, Ct * B, Bt * B + gamma * cvx.spmatrix(1.0, range(nB), range(nB))]]) f = cvx.matrix([(cvx.matrix(alpha, (1, n)) * A).T - Mt * y, -(Ct * y), -(Bt * y)]) res = cvx.solvers.qp(H, f, cvx.spmatrix(-A.V, A.I, A.J, (n, len(f))), cvx.matrix(0., (n, 1)), solver=solver) obj = res['primal objective'] + .5 * (y.T * y) # cvx.solvers.options.clear() # cvx.solvers.options.update(old_options) l = res['x'][-nB:] d = res['x'][n:n + nC] t = B * l + C * d q = res['x'][:n] p = A * q r = M * q e = y - r - t return r, t # return r, p, t, l, d, e, obj