Source code for flirt.simple.empatica

import os
import zipfile
import multiprocessing

import pandas as pd

# import flirt
import flirt.reader.empatica


[docs]def get_features_for_empatica_archive(zip_file_path: str,
                                      window_length: int = 180,
                                      window_step_size: int = 1,
                                      hrv_features: bool = True,
                                      eda_features: bool = True,
                                      acc_features: bool = True,
                                      debug: bool = False,
                                      num_cores: int = 0) -> pd.DataFrame:
    """
    This function provides a standard set of HRV, EDA, ACC features for a given Empatica archive \
    (e.g. downloaded from E4 connect)

    Parameters
    ----------
    zip_file_path : str
        path to the Empatica zip file
    window_length : int
        the epoch width (window size) in seconds, for which features should be calculated
    window_step_size : int
        the step size for the sliding window in seconds
    hrv_features : bool
        whether HRV features should be calculated from the archive
    eda_features : bool
        whether EDA features should be calculated from the archive
    acc_features : bool
        whether ACC features should be calculated from the archive
    debug : bool
        whether debug output should be printed
    num_cores : int, optional
        number of cores to use for parallel processing, by default use all available

    Returns
    -------
    pd.DataFrame
        a pandas DataFrame containing a standard set of features for Empatica

    Examples
    --------
    >>> import flirt.simple
    >>> features = flirt.simple.get_features_for_empatica_archive("1560460372_A12345.zip")
    """

    if zip_file_path is None or not os.path.isfile(zip_file_path) or not zipfile.is_zipfile(zip_file_path):
        raise ValueError('zip file does note exist %s' % zip_file_path)

    if debug:
        print("Reading files")

    if not num_cores >= 1:
        num_cores = multiprocessing.cpu_count()

    df_hrv_features = pd.DataFrame()
    df_eda_features = pd.DataFrame()
    df_acc_features = pd.DataFrame()

    with zipfile.ZipFile(zip_file_path) as zip_file:
        if hrv_features:
            with zip_file.open("IBI.csv") as f:
                ibi_data = flirt.reader.empatica.read_ibi_file_into_df(f)

            if debug:
                print("Calculating HRV features")
            df_hrv_features = flirt.hrv.get_hrv_features(ibi_data.iloc[:, 0], window_length=window_length,
                                                         window_step_size=window_step_size,
                                                         num_cores = num_cores)

        if eda_features:
            with zip_file.open("EDA.csv") as f:
                eda_data = flirt.reader.empatica.read_eda_file_into_df(f)

                if debug:
                    print("Calculating EDA features")
                df_eda_features = flirt.eda.get_eda_features(eda_data.iloc[:, 0], window_length=window_length,
                                                             window_step_size=window_step_size,
                                                             num_cores = num_cores).add_prefix('eda_')

        if acc_features:
            with zip_file.open("ACC.csv") as f:
                acc_data = flirt.reader.empatica.read_acc_file_into_df(f)

                if debug:
                    print("Calculating ACC features")
                df_acc_features = flirt.acc.get_acc_features(acc_data[:], window_length=window_length,
                                                             window_step_size=window_step_size,
                                                             num_cores = num_cores).add_prefix('acc_')

        return __merge_features(df_hrv_features, df_eda_features, df_acc_features, freq='%ds' % window_step_size)


def __merge_features(hrv_features: pd.DataFrame, eda_features: pd.DataFrame, acc_features: pd.DataFrame,
                     freq: str = '1s') -> pd.DataFrame:
    if hrv_features.empty and eda_features.empty and acc_features.empty:
        print("Received empty input features, returning empty df")
        return pd.DataFrame()

    merged_features = pd.concat([hrv_features, eda_features, acc_features], axis=1, sort=True)

    target_index = pd.date_range(start=merged_features.iloc[0].name.ceil('s'),
                                 end=merged_features.iloc[-1].name.floor('s'), freq=freq, tz='UTC')

    merged_features = merged_features.reindex(index=merged_features.index.union(target_index))
    merged_features.interpolate(method='time', inplace=True)
    merged_features = merged_features.reindex(target_index)

    return merged_features