Model Deployment on Raw Time Series Data

Classfication
Authors

Guntas Singh Saran

Hrriday V. Ruparel

Published

January 24, 2024

Yet to decide on Train-Val check or Inner-Outer Fold check

import pandas as pd
import matplotlib.pyplot as plt
from latex import latexify, format_axes
import numpy as np
import tsfel
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import export_graphviz
from sklearn import tree
import graphviz
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import seaborn as sns
from MakeDataset import *
%matplotlib inline
# Retina
%config InlineBackend.figure_format = 'retina'
Training data shape:  (108, 500, 3)
Testing data shape:  (36, 500, 3)
Validation data shape:  (36, 500, 3)
X_train, y_train
X_test, y_test
X_val, y_val
(180, 500, 3)

\((a_x^2 + a_y^2 + a_z^2)\)

X_train_TS = np.sum(np.square(X_train), axis = -1)
X_test_TS = np.sum(np.square(X_test), axis = -1)
X_val_TS = np.sum(np.square(X_val), axis = -1)
print(X_train_TS.shape, X_test_TS.shape, X_val_TS.shape)
(108, 500) (36, 500) (36, 500)
classesN = {1 : 'WALKING', 2 : 'WALKING_UPSTAIRS', 3 : 'WALKING_DOWNSTAIRS', 4 : 'SITTING', 5 : 'STANDING', 6 : 'LAYING'}
namedLabel = [classesN[i] for i in y_train]
classesN
{1: 'WALKING',
 2: 'WALKING_UPSTAIRS',
 3: 'WALKING_DOWNSTAIRS',
 4: 'SITTING',
 5: 'STANDING',
 6: 'LAYING'}
hyperparams = {"max_depth" : [2, 3, 4, 5, 6, 7, 8, 9, 10], "criterion" : ["gini", "entropy"], "min_samples_leaf" : [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]}
hyperparams
{'max_depth': [2, 3, 4, 5, 6, 7, 8, 9, 10],
 'criterion': ['gini', 'entropy'],
 'min_samples_leaf': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]}
from itertools import product
final, counter = {}, 0
for max_depth, criteria, min_sample in product(hyperparams["max_depth"], hyperparams["criterion"], hyperparams["min_samples_leaf"]):
    model = DecisionTreeClassifier(max_depth = max_depth, criterion = criteria, min_samples_leaf = min_sample, random_state = 42)
    model.fit(X_train_TS, y_train)
    val_score = model.score(X_val_TS, y_val)
    final[counter] = {"max_depth" : max_depth, "criterion" : criteria, "min_samples_leaf" : min_sample, "val_score" : val_score}
    counter += 1
hparam_df = pd.DataFrame(final).T
hparam_df
max_depth criterion min_samples_leaf val_score
0 2 gini 1 0.472222
1 2 gini 2 0.472222
2 2 gini 3 0.472222
3 2 gini 4 0.472222
4 2 gini 5 0.472222
... ... ... ... ...
265 10 entropy 11 0.527778
266 10 entropy 12 0.527778
267 10 entropy 13 0.527778
268 10 entropy 14 0.444444
269 10 entropy 15 0.472222

270 rows × 4 columns

hparam_df.sort_values(by = "val_score", ascending = False).head(10)
max_depth criterion min_samples_leaf val_score
144 6 entropy 10 0.583333
53 3 entropy 9 0.583333
204 8 entropy 10 0.583333
174 7 entropy 10 0.583333
114 5 entropy 10 0.583333
50 3 entropy 6 0.583333
54 3 entropy 10 0.583333
84 4 entropy 10 0.583333
234 9 entropy 10 0.583333
52 3 entropy 8 0.583333
dfTrain_Val_Test = np.vstack([X_train_TS, X_val_TS, X_test_TS])
y_train_test_val = np.hstack([y_train, y_val, y_test])
dfTrain_Val_Test = pd.DataFrame(dfTrain_Val_Test)
dfTrain_Val_Test
0 1 2 3 4 5 6 7 8 9 ... 490 491 492 493 494 495 496 497 498 499
0 1.056837 1.055002 1.055806 1.056825 1.056743 1.058030 1.059746 1.056402 1.051561 1.051040 ... 1.059888 1.052544 1.056687 1.060374 1.060270 1.057576 1.050376 1.052854 1.056003 1.050580
1 1.083240 1.076504 1.071849 1.070542 1.073735 1.069331 1.065576 1.070615 1.073486 1.074425 ... 1.076160 1.072783 1.070026 1.066329 1.064303 1.069655 1.073976 1.075890 1.078382 1.072455
2 1.138189 1.118926 1.010193 0.908460 0.877500 0.799665 0.755336 0.604213 0.398809 0.387867 ... 1.131734 1.211883 1.395558 1.574451 1.786266 2.000218 2.163595 2.539505 2.744447 2.195609
3 1.181108 1.152283 1.143152 1.270364 1.238777 1.149924 1.015107 0.984543 1.273980 1.684522 ... 0.621903 1.029622 1.784374 2.366215 2.621218 2.250886 1.741832 1.685947 1.807674 1.804153
4 1.011227 1.017584 1.013233 1.011926 1.009752 1.005219 1.001461 1.005883 1.007562 1.007073 ... 1.009191 1.006528 1.004264 1.003962 1.007311 1.005560 0.999966 0.998143 1.002371 1.010588
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
175 1.012265 1.010108 1.011981 1.011857 1.013834 1.014795 1.014063 1.014580 1.009580 1.004343 ... 1.010006 1.004767 1.001302 1.003619 1.011636 1.014957 1.012647 1.015879 1.018396 1.015794
176 1.038929 1.034965 1.030862 1.029954 1.026855 1.026849 1.027156 1.026479 1.032641 1.032752 ... 1.028253 1.032168 1.035130 1.032640 1.030046 1.031069 1.033694 1.031294 1.022681 1.019043
177 1.269837 1.462317 1.900056 1.875284 1.269108 0.712168 0.530259 0.568774 0.467038 0.432422 ... 0.297068 0.393685 0.297214 0.255879 0.319893 0.316965 0.505662 0.799774 1.057363 1.227443
178 1.170950 1.442350 2.105890 2.921814 3.152692 3.007977 2.318748 1.582135 1.562915 1.680339 ... 0.783872 1.191452 1.071581 1.027061 1.272170 1.171053 0.952702 0.696993 0.730763 0.944393
179 2.437082 2.287970 1.472244 0.668358 0.561077 0.932713 1.170589 1.235397 1.133492 0.844032 ... 1.284310 1.342955 1.152644 1.066167 0.971577 0.797153 0.791073 0.748641 0.629820 0.552444

180 rows × 500 columns

model = DecisionTreeClassifier(max_depth = 6, min_samples_leaf = 10, criterion = "entropy", random_state = 42)
model.fit(dfTrain_Val_Test, y_train_test_val)
DecisionTreeClassifier(criterion='entropy', max_depth=6, min_samples_leaf=10,
                       random_state=42)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
def getTimeSeries(filename):
    filePath = f"./Time Series Data/{filename}"
    df = pd.read_csv(filePath)
    return df
df = getTimeSeries('TS2Walking.csv')
df
time gFx gFy gFz 0
0 0.004371 -0.9965 0.1796 0.2842 1.068
1 0.005229 -1.0007 0.1845 0.2910 1.075
2 0.005670 -1.0034 0.1886 0.2964 1.080
3 0.006074 -1.0026 0.1903 0.2984 1.080
4 0.006489 -0.9985 0.1920 0.2954 1.076
... ... ... ... ... ...
19310 38.514996 -0.9303 -0.0344 0.4249 1.032
19311 38.516256 -0.9301 -0.0349 0.4234 1.031
19312 38.518234 -0.9315 -0.0347 0.4222 1.032
19313 38.520242 -0.9335 -0.0354 0.4229 1.034
19314 38.522770 -0.9354 -0.0364 0.4251 1.037

19315 rows × 5 columns

def fetchTotTS(dataFrame):
    return pd.DataFrame(dataFrame.iloc[:, 3]**2)
def PlotTimeSeries(df, flag):
    latexify()
    if flag:
        plt.figure(figsize = (9, 3))
        plt.title(r"Time Series of Acceleration $(acc_x, acc_y, acc_z)$")
        colors = ["red", "green", "blue"]
        for k in range(1, 4):
            plt.plot(df.iloc[:, k], color = colors[k - 1], linewidth = 0.8)
        plt.xlabel("Time Samples")
        plt.ylabel(r"Acceleration in $m/s^2$")
        plt.legend([r"$a_x$", r"$a_y$", r"$a_z$"])
        plt.grid()
        plt.show()
    else:
        plt.figure(figsize = (9, 3))
        plt.title(r"Time Series of Total Acceleration $(acc_x^2 + acc_y^2 + acc_z^2)$")
        plt.plot(df.iloc[:, 3]**2, color = "deeppink", linewidth = 0.8)
        plt.xlabel("Time Samples")
        plt.ylabel(r"Total Acceleration in $m/s^2$")
        plt.legend([r"$(acc_x^2 + acc_y^2 + acc_z^2)$"])
        plt.grid()
        plt.show()

\(\text{Sampling Time} = \frac{\text{No. of Samples}}{f_s}\)

\(f_s = 500 Hz\)

df.shape[0] / 500.0
38.63
pd.DataFrame(fetchTotTS(df)).T
0 1 2 3 4 5 6 7 8 9 ... 19305 19306 19307 19308 19309 19310 19311 19312 19313 19314
0 1.140624 1.155625 1.1664 1.1664 1.157776 1.147041 1.138489 1.125721 1.115136 1.096209 ... 1.083681 1.079521 1.077444 1.073296 1.069156 1.065024 1.062961 1.065024 1.069156 1.075369

1 rows × 19315 columns

PlotTimeSeries(df, 1)
PlotTimeSeries(df, 0)

from scipy.signal import resample
dfN = pd.DataFrame(resample(df, 500))
PlotTimeSeries(dfN, 0)
PlotTimeSeries(dfN, 1)

# flag = 1 -> Only display the orginal untrimmed TS and trim-prediction on flag != 1
def PredictPlot(filename,  flag = 1):
    df = getTimeSeries(filename)
    if flag:
        print("Original Time Series")
        PlotTimeSeries(df, 1)
        PlotTimeSeries(df, 0)
    else:
        df.drop(columns = ["time"], inplace = True)
        dfN = pd.DataFrame(resample(df, 500))
        dfN = fetchTotTS(dfN).T
        y_pred = model.predict(dfN)
        print(classesN[y_pred[0]])
PredictPlot('TS2Walking.csv', 1)
Original Time Series

PredictPlot('TS2Walking.csv', 0)
WALKING_DOWNSTAIRS
classesN
{1: 'WALKING',
 2: 'WALKING_UPSTAIRS',
 3: 'WALKING_DOWNSTAIRS',
 4: 'SITTING',
 5: 'STANDING',
 6: 'LAYING'}
PredictPlot('TS4Walking.csv', 1)
PredictPlot('TS4Walking.csv', 0)
Original Time Series
WALKING_DOWNSTAIRS

PredictPlot('TS5WalkingUpstairs.csv', 1)
PredictPlot('TS5WalkingUpstairs.csv', 0)
Original Time Series
WALKING_UPSTAIRS

PredictPlot('TS6WalkingDownstairs.csv', 1)
PredictPlot('TS6WalkingDownstairs.csv', 0)
Original Time Series
WALKING

PredictPlot('TS7WalkingDownstairs.csv', 1)
PredictPlot('TS7WalkingDownstairs.csv', 0)
Original Time Series
WALKING_DOWNSTAIRS

PredictPlot('TS8WalkingUpstairs.csv', 1)
PredictPlot('TS8WalkingUpstairs.csv', 0)
Original Time Series
WALKING_DOWNSTAIRS

PredictPlot('TS9Sitting.csv', 1)
PredictPlot('TS9Sitting.csv', 0)
Original Time Series
SITTING

PredictPlot('TS10Sitting.csv', 1)
PredictPlot('TS10Sitting.csv', 0)
Original Time Series
SITTING

PredictPlot('TS11Standing.csv', 1)
PredictPlot('TS11Standing.csv', 0)
Original Time Series
SITTING

PredictPlot('TS12Standing.csv', 1)
PredictPlot('TS12Standing.csv', 0)
Original Time Series
SITTING

PredictPlot('TS14WalkingUpstairs.csv', 1)
PredictPlot('TS14WalkingUpstairs.csv', 0)
Original Time Series
WALKING_DOWNSTAIRS

PredictPlot('TS15Walking.csv', 1)
PredictPlot('TS15Walking.csv', 0)
Original Time Series
WALKING_DOWNSTAIRS

PredictPlot('TS16Laying.csv', 1)
PredictPlot('TS16Laying.csv', 0)
Original Time Series
WALKING_UPSTAIRS

PredictPlot('TS17Sitting.csv', 1)
PredictPlot('TS17Sitting.csv', 0)
Original Time Series
LAYING

PredictPlot('TS18Sitting.csv', 1)
PredictPlot('TS18Sitting.csv', 0)
Original Time Series
SITTING

PredictPlot('TS19Sitting.csv', 1)
PredictPlot('TS19Sitting.csv', 0)
Original Time Series
LAYING

PredictPlot('TS20Sitting.csv', 1)
PredictPlot('TS20Sitting.csv', 0)
Original Time Series
LAYING

PredictPlot('TS21Sitting.csv', 1)
PredictPlot('TS21Sitting.csv', 0)
Original Time Series
LAYING

PredictPlot('TS22Standing.csv', 1)
PredictPlot('TS22Standing.csv', 0)
Original Time Series
LAYING

PredictPlot('TS23Standing.csv', 1)
PredictPlot('TS23Standing.csv', 0)
Original Time Series
SITTING

PredictPlot('TS24Standing.csv', 1)
PredictPlot('TS24Standing.csv', 0)
Original Time Series
SITTING

PredictPlot('TS25Standing.csv', 1)
PredictPlot('TS25Standing.csv', 0)
Original Time Series
LAYING

PredictPlot('TS26Standing.csv', 1)
PredictPlot('TS26Standing.csv', 0)
Original Time Series
LAYING

PredictPlot('TS27Laying.csv', 1)
PredictPlot('TS27Laying.csv', 0)
Original Time Series
WALKING_UPSTAIRS

PredictPlot('TS28Laying.csv', 1)
PredictPlot('TS28Laying.csv', 0)
Original Time Series
WALKING_UPSTAIRS

PredictPlot('TS29Laying.csv', 1)
PredictPlot('TS29Laying.csv', 0)
Original Time Series
WALKING_UPSTAIRS

PredictPlot('TS30Laying.csv', 1)
PredictPlot('TS30Laying.csv', 0)
Original Time Series
WALKING_UPSTAIRS

PredictPlot('TS31Laying.csv', 1)
PredictPlot('TS31Laying.csv', 0)
Original Time Series
WALKING_UPSTAIRS

PredictPlot('TS32Walking.csv', 1)
PredictPlot('TS32Walking.csv', 0)
Original Time Series
WALKING_UPSTAIRS

PredictPlot('TS33Walking.csv', 1)
PredictPlot('TS33Walking.csv', 0)
Original Time Series
WALKING

PredictPlot('TS34Walking.csv', 1)
PredictPlot('TS34Walking.csv', 0)
Original Time Series
WALKING_UPSTAIRS

PredictPlot('TS35Walking.csv', 1)
PredictPlot('TS35Walking.csv', 0)
Original Time Series
WALKING

PredictPlot('TS36Walking.csv', 1)
PredictPlot('TS36Walking.csv', 0)
Original Time Series
WALKING

PredictPlot('TS37Upstairs.csv', 1)
PredictPlot('TS37Upstairs.csv', 0)
Original Time Series
WALKING

PredictPlot('TS38Downstairs.csv', 1)
PredictPlot('TS38Downstairs.csv', 0)
Original Time Series
WALKING_DOWNSTAIRS

PredictPlot('TS39Upstairs.csv', 1)
PredictPlot('TS39Upstairs.csv', 0)
Original Time Series
WALKING_UPSTAIRS

PredictPlot('TS40Downstairs.csv', 1)
PredictPlot('TS40Downstairs.csv', 0)
Original Time Series
WALKING

PredictPlot('TS41Upstairs.csv', 1)
PredictPlot('TS41Upstairs.csv', 0)
Original Time Series
WALKING

PredictPlot('TS42Downstairs.csv', 1)
PredictPlot('TS42Downstairs.csv', 0)
Original Time Series
WALKING_UPSTAIRS

PredictPlot('TS43Upstairs.csv', 1)
PredictPlot('TS43Upstairs.csv', 0)
Original Time Series
STANDING

PredictPlot('TS44Downstairs.csv', 1)
PredictPlot('TS44Downstairs.csv', 0)
Original Time Series
WALKING_UPSTAIRS

PredictPlot('TS45Upstairs.csv', 1)
PredictPlot('TS45Upstairs.csv', 0)
Original Time Series
WALKING

PredictPlot('TS46Downstairs.csv', 1)
PredictPlot('TS46Downstairs.csv', 0)
Original Time Series
WALKING_DOWNSTAIRS

y = [1, 1, 2, 3, 3, 2, 4, 4, 5, 5, 2 ,1, 6, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 1, 1, 1, 1, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3]
y_pred = [3, 3, 2, 1, 3, 3, 4, 4, 4, 4, 3, 3, 2, 6, 4, 6, 6, 6, 6, 4, 4, 6, 6, 2, 2, 2, 2, 2, 2, 1, 2, 1, 1, 1, 3, 2, 1, 1, 2, 5, 2, 1, 3]
cm = confusion_matrix(y, y_pred)
df_cm = pd.DataFrame(cm, index = [classT for classT in classes], columns = [classT for classT in classes])
df_cm
WALKING WALKING_UPSTAIRS WALKING_DOWNSTAIRS SITTING STANDING LAYING
WALKING 3 2 3 0 0 0
WALKING_UPSTAIRS 3 2 2 0 1 0
WALKING_DOWNSTAIRS 2 2 3 0 0 0
SITTING 0 0 0 3 0 4
STANDING 0 0 0 4 0 3
LAYING 0 6 0 0 0 0
print(classification_report(y, y_pred, labels = np.unique(y_pred)))
              precision    recall  f1-score   support

           1       0.38      0.38      0.38         8
           2       0.17      0.25      0.20         8
           3       0.38      0.43      0.40         7
           4       0.43      0.43      0.43         7
           5       0.00      0.00      0.00         7
           6       0.00      0.00      0.00         6

    accuracy                           0.26        43
   macro avg       0.22      0.25      0.23        43
weighted avg       0.23      0.26      0.24        43
def confMatrix(dataFrame, flag = 1, accuracies = None):
    if flag:
        plt.figure(figsize = (6, 6))
        ax = sns.heatmap(dataFrame, annot = True, cmap = "PuBu")
        plt.setp(ax.get_xticklabels(), rotation = 45, fontsize = 8)
        plt.setp(ax.get_yticklabels(), fontsize = 8)
        plt.ylabel("True label", fontsize = 18)
        plt.xlabel("Predicted label", fontsize = 18)
        plt.title(f"Accuracy = {accuracy_score(y, y_pred)*100: .4f}%", fontweight = "bold", fontsize = 13)
        plt.show()
    else:
        fig, axes = plt.subplots(3, 3, figsize = (25, 25))
        axes = axes.flatten()

        for i, df in enumerate(dataFrame):
            ax = sns.heatmap(df, annot = True, ax = axes[i], cbar = False, cmap = "PuBu")
            
            plt.setp(ax.get_xticklabels(), rotation = 45, fontsize = 6)
            plt.setp(ax.get_yticklabels(), fontsize = 8)
            ax.set_title(f"Depth = {i + 2}\nAccuracy = {accuracies[i] * 100: .4f}%", fontsize = 10)
            ax.set_ylabel("True label", fontsize = 12)
            ax.set_xlabel("Predicted label", fontsize = 12)
            
        plt.delaxes(axes[7])
        plt.delaxes(axes[8])
        plt.tight_layout()
        plt.subplots_adjust(wspace = 1.1, hspace = 1.1)
        plt.show()
confMatrix(df_cm, 1)

classesN
{1: 'WALKING',
 2: 'WALKING_UPSTAIRS',
 3: 'WALKING_DOWNSTAIRS',
 4: 'SITTING',
 5: 'STANDING',
 6: 'LAYING'}