Yet to decide on Train-Val check or Inner-Outer Fold check

import pandas as pd
import matplotlib.pyplot as plt
from latex import latexify, format_axes
import numpy as np
import tsfel
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import export_graphviz
from sklearn import tree
import graphviz
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import seaborn as sns
from MakeDataset import *
%matplotlib inline
# Retina
%config InlineBackend.figure_format = 'retina'

Training data shape:  (108, 500, 3)
Testing data shape:  (36, 500, 3)
Validation data shape:  (36, 500, 3)

X_train, y_train
X_test, y_test
X_val, y_val

(180, 500, 3)

\((a_x^2 + a_y^2 + a_z^2)\)

X_train_TS = np.sum(np.square(X_train), axis = -1)
X_test_TS = np.sum(np.square(X_test), axis = -1)
X_val_TS = np.sum(np.square(X_val), axis = -1)
print(X_train_TS.shape, X_test_TS.shape, X_val_TS.shape)

(108, 500) (36, 500) (36, 500)

features_sel = ["0_Mean", "0_Variance", "0_Peak to peak distance", "0_Mean absolute deviation"]

classesN = {1 : 'WALKING', 2 : 'WALKING_UPSTAIRS', 3 : 'WALKING_DOWNSTAIRS', 4 : 'SITTING', 5 : 'STANDING', 6 : 'LAYING'}
namedLabel = [classesN[i] for i in y_train]
classesN

{1: 'WALKING',
 2: 'WALKING_UPSTAIRS',
 3: 'WALKING_DOWNSTAIRS',
 4: 'SITTING',
 5: 'STANDING',
 6: 'LAYING'}

def Featuriser(XTimeSeries, features):
    cfg = tsfel.get_features_by_domain()
    df = pd.DataFrame(XTimeSeries)
    dataFrames = []
    for i in df.index:
        dataFrames.append(tsfel.time_series_features_extractor(cfg, df.iloc[i,:], fs = 50))
    dfN = pd.concat(dataFrames, axis = 0)
    dfNFeaturized = dfN[features]
    return dfNFeaturized

Featurising all the `X_train_TS`, `X_test_TS`, `X_val_TS`

dfTrain = Featuriser(X_train_TS, features_sel)
dfTest = Featuriser(X_test_TS, features_sel)
dfVal = Featuriser(X_val_TS, features_sel)

dfTrain.shape

(108, 4)

dfTest.shape

(36, 4)

dfVal.shape

(36, 4)

dfTrain

	0_Mean	0_Variance	0_Peak to peak distance	0_Mean absolute deviation
0	1.058182	0.000441	0.276308	0.010239
0	1.072680	0.000439	0.302652	0.011554
0	1.141142	0.281282	2.951101	0.382585
0	1.193139	0.442850	2.853736	0.537589
0	1.005901	0.000026	0.042222	0.004003
...	...	...	...	...
0	1.328807	1.370835	4.655614	0.995502
0	1.188371	0.436127	3.625210	0.506932
0	1.066069	0.000026	0.031092	0.004027
0	1.116685	0.245975	2.492894	0.382408
0	1.118337	0.250313	2.180257	0.381846

108 rows × 4 columns

hyperparams = {"max_depth" : [2, 3, 4, 5, 6, 7, 8, 9, 10], "criterion" : ["gini", "entropy"], "min_samples_leaf" : [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]}
hyperparams

{'max_depth': [2, 3, 4, 5, 6, 7, 8, 9, 10],
 'criterion': ['gini', 'entropy'],
 'min_samples_leaf': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]}

from itertools import product
final, counter = {}, 0
for max_depth, criteria, min_sample in product(hyperparams["max_depth"], hyperparams["criterion"], hyperparams["min_samples_leaf"]):
    model = DecisionTreeClassifier(max_depth = max_depth, criterion = criteria, min_samples_leaf = min_sample, random_state = 42)
    model.fit(dfTrain, y_train)
    val_score = model.score(dfVal, y_val)
    final[counter] = {"max_depth" : max_depth, "criterion" : criteria, "min_samples_leaf" : min_sample, "val_score" : val_score}
    counter += 1

hparam_df = pd.DataFrame(final).T
hparam_df

	max_depth	criterion	min_samples_leaf	val_score
0	2	gini	1	0.638889
1	2	gini	2	0.638889
2	2	gini	3	0.638889
3	2	gini	4	0.638889
4	2	gini	5	0.638889
...	...	...	...	...
265	10	entropy	11	0.694444
266	10	entropy	12	0.666667
267	10	entropy	13	0.666667
268	10	entropy	14	0.666667
269	10	entropy	15	0.666667

270 rows × 4 columns

hparam_df.sort_values(by = "val_score", ascending = False).head(10)

	max_depth	criterion	min_samples_leaf	val_score
136	6	entropy	2	0.777778
256	10	entropy	2	0.722222
255	10	entropy	1	0.722222
115	5	entropy	11	0.694444
130	6	gini	11	0.694444
220	9	gini	11	0.694444
241	10	gini	2	0.694444
100	5	gini	11	0.694444
250	10	gini	11	0.694444
190	8	gini	11	0.694444

dfTrain_Val_Test = pd.concat([dfTrain, dfVal, dfTest], axis = 0)
y_train_test_val = np.hstack([y_train, y_val, y_test])
dfTrain_Val_Test

	0_Mean	0_Variance	0_Peak to peak distance	0_Mean absolute deviation
0	1.058182	0.000441	0.276308	0.010239
0	1.072680	0.000439	0.302652	0.011554
0	1.141142	0.281282	2.951101	0.382585
0	1.193139	0.442850	2.853736	0.537589
0	1.005901	0.000026	0.042222	0.004003
...	...	...	...	...
0	1.012756	0.000036	0.046293	0.004625
0	1.029612	0.000035	0.055289	0.004476
0	1.221348	0.614296	3.526818	0.662692
0	1.168859	0.455832	3.239159	0.501238
0	1.159747	0.317845	2.934196	0.436656

180 rows × 4 columns

model = DecisionTreeClassifier(max_depth = 6, min_samples_leaf = 2, criterion = "entropy", random_state = 42)
model.fit(dfTrain_Val_Test, y_train_test_val)

DecisionTreeClassifier(criterion='entropy', max_depth=6, min_samples_leaf=2,
                       random_state=42)

In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.

def getTimeSeries(filename):
    filePath = f"./Time Series Data/{filename}"
    df = pd.read_csv(filePath)
    return df

df = getTimeSeries('TS2Walking.csv')
df

	time	gFx	gFy	gFz	0
0	0.004371	-0.9965	0.1796	0.2842	1.068
1	0.005229	-1.0007	0.1845	0.2910	1.075
2	0.005670	-1.0034	0.1886	0.2964	1.080
3	0.006074	-1.0026	0.1903	0.2984	1.080
4	0.006489	-0.9985	0.1920	0.2954	1.076
...	...	...	...	...	...
19310	38.514996	-0.9303	-0.0344	0.4249	1.032
19311	38.516256	-0.9301	-0.0349	0.4234	1.031
19312	38.518234	-0.9315	-0.0347	0.4222	1.032
19313	38.520242	-0.9335	-0.0354	0.4229	1.034
19314	38.522770	-0.9354	-0.0364	0.4251	1.037

19315 rows × 5 columns

def fetchTotTS(dataFrame):
    return pd.DataFrame(dataFrame.iloc[:, 4]**2)

def PlotTimeSeries(df, flag):
    latexify()
    if flag:
        plt.figure(figsize = (9, 3))
        plt.title(r"Time Series of Acceleration $(acc_x, acc_y, acc_z)$")
        colors = ["red", "green", "blue"]
        for k in range(1, 4):
            plt.plot(df.iloc[:, k], color = colors[k - 1], linewidth = 0.8)
        plt.xlabel("Time Samples")
        plt.ylabel(r"Acceleration in $m/s^2$")
        plt.legend([r"$a_x$", r"$a_y$", r"$a_z$"])
        plt.grid()
        plt.show()
    else:
        plt.figure(figsize = (9, 3))
        plt.title(r"Time Series of Total Acceleration $(acc_x^2 + acc_y^2 + acc_z^2)$")
        plt.plot(df.iloc[:, 4]**2, color = "deeppink", linewidth = 0.8)
        plt.xlabel("Time Samples")
        plt.ylabel(r"Total Acceleration in $m/s^2$")
        plt.legend([r"$(acc_x^2 + acc_y^2 + acc_z^2)$"])
        plt.grid()
        plt.show()

\(\text{Sampling Time} = \frac{\text{No. of Samples}}{f_s}\)

\(f_s = 500 Hz\)

df.shape[0] / 500.0

38.63

def FeaturiserN(XTimeSeries, features):
    model1 = tsfel.get_features_by_domain()
    df = pd.DataFrame(XTimeSeries).T
    dfN = tsfel.time_series_features_extractor(model1, signal_windows = list(df.iloc[0, :]), fs = 50)
    dfNFeaturized = dfN[features]
    return dfNFeaturized

pd.DataFrame(fetchTotTS(df.iloc[2500:7500, :])).T

	2500	2501	2502	2503	2504	2505	2506	2507	2508	2509	...	7490	7491	7492	7493	7494	7495	7496	7497	7498	7499
0	0.751689	0.755161	0.758641	0.765625	0.776161	0.786769	0.799236	0.808201	0.817216	0.8281	...	1.646089	1.565001	1.517824	1.485961	1.452025	1.420864	1.3924	1.364224	1.331716	1.301881

1 rows × 5000 columns

dfN1 = FeaturiserN(fetchTotTS(df.iloc[2500:7500, :]), features_sel)
dfN1

*** Feature extraction started ***

*** Feature extraction finished ***

Progress: 100% Complete

	0_Mean	0_Variance	0_Peak to peak distance	0_Mean absolute deviation
0	1.18036	0.49156	3.675776	0.540058

PlotTimeSeries(df.iloc[2500:7500,:], 1)
PlotTimeSeries(df.iloc[2500:7500,:], 0)

y_pred = model.predict(dfN1)
y_pred

array([2])

classesN

{1: 'WALKING',
 2: 'WALKING_UPSTAIRS',
 3: 'WALKING_DOWNSTAIRS',
 4: 'SITTING',
 5: 'STANDING',
 6: 'LAYING'}

df1 = getTimeSeries('TS4Walking.csv')
DF = df1.iloc[2500:7500, :]
PlotTimeSeries(DF, 1)
PlotTimeSeries(DF, 0)
dfN2 = FeaturiserN(fetchTotTS(DF), features_sel)
y_pred = model.predict(dfN2)
classesN[y_pred[0]]

*** Feature extraction started ***

*** Feature extraction finished ***

Progress: 100% Complete

'WALKING'

df2 = getTimeSeries('TS10Sitting.csv')
DF = df2.iloc[2500:7500, :]
PlotTimeSeries(DF, 1)
PlotTimeSeries(DF, 0)
dfN3 = FeaturiserN(fetchTotTS(DF), features_sel)
y_pred = model.predict(dfN3)
classesN[y_pred[0]]

*** Feature extraction started ***

*** Feature extraction finished ***

Progress: 100% Complete

'SITTING'

# flag = 1 -> Only display the orginal untrimmed TS and trim-prediction on flag != 1
def PredictPlot(filename,  flag = 1, start = None, end = None):
    df = getTimeSeries(filename)
    if flag:
        print("Original Time Series")
        PlotTimeSeries(df, 1)
        PlotTimeSeries(df, 0)
    else:
        DF = df.iloc[start : end, :]
        print("Trimmed Time Series")
        PlotTimeSeries(DF, 1)
        PlotTimeSeries(DF, 0)
        dfN = FeaturiserN(fetchTotTS(DF), features_sel)
        y_pred = model.predict(dfN)
        print(classesN[y_pred[0]])

PredictPlot("TS5WalkingUpstairs.csv", 0, 6000, 11000)

Trimmed Time Series
*** Feature extraction started ***

*** Feature extraction finished ***
WALKING_UPSTAIRS

Progress: 100% Complete

PredictPlot("TS6WalkingDownstairs.csv", 0, 6000, 12000)

Trimmed Time Series
*** Feature extraction started ***

*** Feature extraction finished ***
WALKING_DOWNSTAIRS

Progress: 100% Complete

PredictPlot("TS7WalkingDownstairs.csv", 0, 4000, 12000)

Trimmed Time Series
*** Feature extraction started ***

*** Feature extraction finished ***
WALKING_DOWNSTAIRS

Progress: 100% Complete

PredictPlot("TS8WalkingUpstairs.csv", 0, 4000, 11000)

Trimmed Time Series
*** Feature extraction started ***

*** Feature extraction finished ***
STANDING

Progress: 100% Complete

PredictPlot("TS9Sitting.csv", 0, 4000, 12000)

Trimmed Time Series
*** Feature extraction started ***

*** Feature extraction finished ***
SITTING

Progress: 100% Complete

PredictPlot("TS11Standing.csv", 0, 4000, 12500)

Trimmed Time Series
*** Feature extraction started ***

*** Feature extraction finished ***
SITTING

Progress: 100% Complete

PredictPlot("TS12Standing.csv", 0, 4000, 12500)

Trimmed Time Series
*** Feature extraction started ***

*** Feature extraction finished ***
SITTING

Progress: 100% Complete

PredictPlot("TS13SittingStanding.csv", 1)

Original Time Series

PredictPlot("TS13SittingStanding.csv", 0, 6000, 12000)

Trimmed Time Series
*** Feature extraction started ***

*** Feature extraction finished ***
SITTING

Progress: 100% Complete

PredictPlot("TS13SittingStanding.csv", 0, 17000, 22000)

Trimmed Time Series
*** Feature extraction started ***

*** Feature extraction finished ***
LAYING

Progress: 100% Complete

PredictPlot("TS13SittingStanding.csv", 0, 1000, 7800)

Trimmed Time Series
*** Feature extraction started ***

*** Feature extraction finished ***
WALKING

Progress: 100% Complete

PredictPlot("TS15Walking.csv", 0, 2600, 10000)

Trimmed Time Series
*** Feature extraction started ***

*** Feature extraction finished ***
WALKING

Progress: 100% Complete

PredictPlot("TS16Laying.csv", 0, 5000, 17000)

Trimmed Time Series
*** Feature extraction started ***

*** Feature extraction finished ***
LAYING

Progress: 100% Complete

classesN

{1: 'WALKING',
 2: 'WALKING_UPSTAIRS',
 3: 'WALKING_DOWNSTAIRS',
 4: 'SITTING',
 5: 'STANDING',
 6: 'LAYING'}

## flag = 1 for a single plot and 0 for subplots for 2 - 8 depths
def confMatrix(dataFrame, flag = 1, accuracies = None):
    if flag:
        plt.figure(figsize = (6, 6))
        ax = sns.heatmap(dataFrame, annot = True, cmap = "PuBu")
        plt.setp(ax.get_xticklabels(), rotation = 45, fontsize = 8)
        plt.setp(ax.get_yticklabels(), fontsize = 8)
        plt.ylabel("True label", fontsize = 18)
        plt.xlabel("Predicted label", fontsize = 18)
        plt.title(f"Accuracy = {accuracy_score(y, y_pred)*100: .4f}%", fontweight = "bold", fontsize = 13)
        plt.show()
    else:
        fig, axes = plt.subplots(3, 3, figsize = (25, 25))
        axes = axes.flatten()

        for i, df in enumerate(dataFrame):
            ax = sns.heatmap(df, annot = True, ax = axes[i], cbar = False, cmap = "PuBu")
            
            plt.setp(ax.get_xticklabels(), rotation = 45, fontsize = 6)
            plt.setp(ax.get_yticklabels(), fontsize = 8)
            ax.set_title(f"Depth = {i + 2}\nAccuracy = {accuracies[i] * 100: .4f}%", fontsize = 10)
            ax.set_ylabel("True label", fontsize = 12)
            ax.set_xlabel("Predicted label", fontsize = 12)
            
        plt.delaxes(axes[7])
        plt.delaxes(axes[8])
        plt.tight_layout()
        plt.subplots_adjust(wspace = 1.1, hspace = 1.1)
        plt.show()

PredictPlot("TS17Sitting.csv", 0, 5000, 10000)

Trimmed Time Series
*** Feature extraction started ***

*** Feature extraction finished ***
SITTING

Progress: 100% Complete

PredictPlot("TS18Sitting.csv", 0, 4000, 10000)

Trimmed Time Series
*** Feature extraction started ***

*** Feature extraction finished ***
SITTING

Progress: 100% Complete

PredictPlot("TS19Sitting.csv", 0, 4000, 10000)

Trimmed Time Series
*** Feature extraction started ***

*** Feature extraction finished ***
SITTING

Progress: 100% Complete

PredictPlot("TS21Sitting.csv", 0, 5000, 15000)

Trimmed Time Series
*** Feature extraction started ***

*** Feature extraction finished ***
SITTING

Progress: 100% Complete

PredictPlot("TS22Standing.csv", 0, 5000, 11000)

Trimmed Time Series
*** Feature extraction started ***

*** Feature extraction finished ***
SITTING

Progress: 100% Complete

PredictPlot("TS23Standing.csv", 0, 4000, 10000)

Trimmed Time Series
*** Feature extraction started ***

*** Feature extraction finished ***
SITTING

Progress: 100% Complete

PredictPlot("TS24Standing.csv", 0, 5000, 12500)

Trimmed Time Series
*** Feature extraction started ***

*** Feature extraction finished ***
SITTING

Progress: 100% Complete

PredictPlot("TS25Standing.csv", 0, 4000, 10000)

Trimmed Time Series
*** Feature extraction started ***

*** Feature extraction finished ***
SITTING

Progress: 100% Complete

PredictPlot("TS26Standing.csv", 0, 5000, 12500)

Trimmed Time Series
*** Feature extraction started ***

*** Feature extraction finished ***
SITTING

Progress: 100% Complete

PredictPlot("TS27Laying.csv", 0, 2500, 10000)

Trimmed Time Series
*** Feature extraction started ***

*** Feature extraction finished ***
LAYING

Progress: 100% Complete

PredictPlot("TS28Laying.csv", 0, 5000, 15000)

Trimmed Time Series
*** Feature extraction started ***

*** Feature extraction finished ***
LAYING

Progress: 100% Complete

PredictPlot("TS29Laying.csv", 0, 4000, 10000)

Trimmed Time Series
*** Feature extraction started ***

*** Feature extraction finished ***
LAYING

Progress: 100% Complete

PredictPlot("TS30Laying.csv", 0, 4000, 12000)

Trimmed Time Series
*** Feature extraction started ***

*** Feature extraction finished ***
LAYING

Progress: 100% Complete

PredictPlot("TS31Laying.csv", 0, 5000, 12500)

Trimmed Time Series
*** Feature extraction started ***

*** Feature extraction finished ***
LAYING

Progress: 100% Complete

PredictPlot("TS32Walking.csv", 0, 2500, 12500)

Trimmed Time Series
*** Feature extraction started ***

*** Feature extraction finished ***
WALKING

Progress: 100% Complete

PredictPlot("TS33Walking.csv", 0, 2500, 12500)

Trimmed Time Series
*** Feature extraction started ***

*** Feature extraction finished ***
WALKING

Progress: 100% Complete

PredictPlot("TS34Walking.csv", 0, 2500, 12500)

Trimmed Time Series
*** Feature extraction started ***

*** Feature extraction finished ***
WALKING

Progress: 100% Complete

PredictPlot("TS35Walking.csv", 0, 2500, 12500)

Trimmed Time Series
*** Feature extraction started ***

*** Feature extraction finished ***
WALKING_UPSTAIRS

Progress: 100% Complete

PredictPlot("TS36Walking.csv", 0, 2500, 12500)

Trimmed Time Series
*** Feature extraction started ***

*** Feature extraction finished ***
WALKING

Progress: 100% Complete

PredictPlot("TS37Upstairs.csv", 0, 2000, 10000)

Trimmed Time Series
*** Feature extraction started ***

*** Feature extraction finished ***
STANDING

Progress: 100% Complete

PredictPlot("TS38Downstairs.csv", 0, 2500, 10000)

Trimmed Time Series
*** Feature extraction started ***

*** Feature extraction finished ***
WALKING_DOWNSTAIRS

Progress: 100% Complete

PredictPlot("TS39Upstairs.csv", 0, 2000, 10000)

Trimmed Time Series
*** Feature extraction started ***

*** Feature extraction finished ***
WALKING_UPSTAIRS

Progress: 100% Complete

PredictPlot("TS40Downstairs.csv", 0, 2500, 10000)

Trimmed Time Series
*** Feature extraction started ***

*** Feature extraction finished ***
WALKING_UPSTAIRS

Progress: 100% Complete

PredictPlot("TS41Upstairs.csv", 0, 2500, 10000)

Trimmed Time Series
*** Feature extraction started ***

*** Feature extraction finished ***
WALKING_UPSTAIRS

Progress: 100% Complete

PredictPlot("TS42Downstairs.csv", 0, 2500, 10000)

Trimmed Time Series
*** Feature extraction started ***

*** Feature extraction finished ***
WALKING_DOWNSTAIRS

Progress: 100% Complete

PredictPlot("TS43Upstairs.csv", 0, 2500, 10000)

Trimmed Time Series
*** Feature extraction started ***

*** Feature extraction finished ***
WALKING_UPSTAIRS

Progress: 100% Complete

PredictPlot("TS44Downstairs.csv", 0, 2500, 10000)

Trimmed Time Series
*** Feature extraction started ***

*** Feature extraction finished ***
WALKING_DOWNSTAIRS

Progress: 100% Complete

PredictPlot("TS45Upstairs.csv", 0, 2000, 10000)

Trimmed Time Series
*** Feature extraction started ***

*** Feature extraction finished ***
WALKING_UPSTAIRS

Progress: 100% Complete

PredictPlot("TS46Downstairs.csv", 0, 2500, 10000)

Trimmed Time Series
*** Feature extraction started ***

*** Feature extraction finished ***
WALKING_DOWNSTAIRS

Progress: 100% Complete

classesN

{1: 'WALKING',
 2: 'WALKING_UPSTAIRS',
 3: 'WALKING_DOWNSTAIRS',
 4: 'SITTING',
 5: 'STANDING',
 6: 'LAYING'}

y = [1, 4, 2, 3, 3, 2, 4, 5, 5, 4, 5, 1, 6, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 1, 1, 1, 1, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3]
y_pred = [1, 4, 2, 3, 3, 5, 4, 4, 4, 4, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 6, 6, 6, 6, 6, 1, 1, 1, 2, 1, 5, 3, 2, 2, 2, 3, 2, 3, 2, 3]

42 42

cm = confusion_matrix(y, y_pred)
df_cm = pd.DataFrame(cm, index = [classT for classT in classes], columns = [classT for classT in classes])
df_cm

	WALKING	WALKING_UPSTAIRS	WALKING_DOWNSTAIRS	SITTING	STANDING	LAYING
WALKING	5	1	0	0	0	1
WALKING_UPSTAIRS	0	5	0	0	2	0
WALKING_DOWNSTAIRS	0	1	6	0	0	0
SITTING	0	0	0	7	0	0
STANDING	0	0	0	7	0	1
LAYING	0	0	0	1	0	5

print(classification_report(y, y_pred, labels = np.unique(y_pred)))

              precision    recall  f1-score   support

           1       1.00      0.71      0.83         7
           2       0.71      0.71      0.71         7
           3       1.00      0.86      0.92         7
           4       0.47      1.00      0.64         7
           5       0.00      0.00      0.00         8
           6       0.71      0.83      0.77         6

    accuracy                           0.67        42
   macro avg       0.65      0.69      0.65        42
weighted avg       0.63      0.67      0.63        42

confMatrix(df_cm, 1)

Yet to decide on Train-Val check or Inner-Outer Fold check

\((a_x^2 + a_y^2 + a_z^2)\)

Featurising all the X_train_TS, X_test_TS, X_val_TS

\(\text{Sampling Time} = \frac{\text{No. of Samples}}{f_s}\)

\(f_s = 500 Hz\)

Featurising all the `X_train_TS`, `X_test_TS`, `X_val_TS`