What does data look like

What does data look like#

What libraries should I import?#

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

How to read data?#

Dummy data for the following exercises is provided here.

file = '/Users/guillermo/Downloads/pose-3d.csv'

data = pd.read_csv(file, header=0)

How is my data structured?#

data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 450 entries, 0 to 449
Columns: 139 entries, lefteye1_x to fnum
dtypes: float64(138), int64(1)
memory usage: 488.8 KB

np.shape(data)

(450, 139)

data

	lefteye1_x	lefteye1_y	lefteye1_z	lefteye1_error	lefteye1_ncams	lefteye1_score	lefteye2_x	lefteye2_y	lefteye2_z	lefteye2_error	...	M_10	M_11	M_12	M_20	M_21	M_22	center_0	center_1	center_2	fnum
0	-11.488100	-0.391982	-6.391103	1.938121	2.0	0.999995	14.093313	-1.432060	-1.898372	2.190833	...	0.025309	0.004592	0.999669	-0.155911	-0.987735	0.008484	136.712949	21.796899	59.925219	0
1	-11.563727	-0.248620	-6.538817	2.030270	2.0	0.999996	14.219575	-1.263471	-2.031282	1.935011	...	0.025309	0.004592	0.999669	-0.155911	-0.987735	0.008484	136.712949	21.796899	59.925219	1
2	-11.630087	-0.129231	-6.667193	1.802676	2.0	0.999997	14.361094	-1.127249	-2.146595	1.606363	...	0.025309	0.004592	0.999669	-0.155911	-0.987735	0.008484	136.712949	21.796899	59.925219	2
3	-11.666934	-0.057356	-6.765140	2.019693	2.0	0.999998	14.528884	-1.018296	-2.235377	1.527383	...	0.025309	0.004592	0.999669	-0.155911	-0.987735	0.008484	136.712949	21.796899	59.925219	3
4	-11.657633	-0.022595	-6.825935	2.186289	2.0	0.999998	14.727834	-0.916615	-2.290928	1.662191	...	0.025309	0.004592	0.999669	-0.155911	-0.987735	0.008484	136.712949	21.796899	59.925219	4
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
445	19.858564	-0.186870	3.376668	1.879663	2.0	1.000000	43.138611	-0.935498	6.239373	2.397886	...	0.025309	0.004592	0.999669	-0.155911	-0.987735	0.008484	136.712949	21.796899	59.925219	445
446	19.828282	-0.197882	3.399467	1.926345	2.0	1.000000	43.099870	-0.936545	6.289830	2.507487	...	0.025309	0.004592	0.999669	-0.155911	-0.987735	0.008484	136.712949	21.796899	59.925219	446
447	19.792748	-0.197968	3.419085	1.987387	2.0	1.000000	43.070633	-0.924082	6.339649	2.495454	...	0.025309	0.004592	0.999669	-0.155911	-0.987735	0.008484	136.712949	21.796899	59.925219	447
448	19.753271	-0.185416	3.436065	1.666125	2.0	1.000000	43.049360	-0.899445	6.388871	2.244805	...	0.025309	0.004592	0.999669	-0.155911	-0.987735	0.008484	136.712949	21.796899	59.925219	448
449	19.712078	-0.173427	3.453072	1.993099	2.0	1.000000	43.030381	-0.879919	6.437327	2.190697	...	0.025309	0.004592	0.999669	-0.155911	-0.987735	0.008484	136.712949	21.796899	59.925219	449

450 rows × 139 columns

Cleaning data#

coords = data.loc[:, ~data.columns.str.contains(
    'score|error|ncams|fnum|center|M_')]

scores = data.loc[:, data.columns.str.contains('score')]

Changing the data structure#

# Let us transform the data to be centered around a reference point
centered_coords = coords.copy()
for i in range(centered_coords.shape[1]):
    if '_x' in centered_coords.columns[i]:
        centered_coords.loc[:, centered_coords.columns[i]] = centered_coords.loc[:,
                                                                                 centered_coords.columns[i]].subtract(coords.loc[:, "nose1_x"].values)
    elif '_y' in centered_coords.columns[i]:
        centered_coords.loc[:, centered_coords.columns[i]] = centered_coords.loc[:,
                                                                                 centered_coords.columns[i]].subtract(coords.loc[:, "nose1_y"].values)
    elif '_z' in centered_coords.columns[i]:
        centered_coords.loc[:, centered_coords.columns[i]] = centered_coords.loc[:,
                                                                                 centered_coords.columns[i]].subtract(coords.loc[:, "nose1_z"].values)
    else:
        pass

centered_coords

	lefteye1_x	lefteye1_y	lefteye1_z	lefteye2_x	lefteye2_y	lefteye2_z	righteye1_x	righteye1_y	righteye1_z	righteye2_x	...	lowlip_z	llip_x	llip_y	llip_z	rlip_x	rlip_y	rlip_z	chin_x	chin_y	chin_z
0	21.138248	0.149274	1.339277	46.719661	-0.890804	5.832008	-22.571980	-0.471520	-9.779492	-37.820867	...	-80.979542	37.796704	-0.980286	-67.807799	-10.351668	1.195690	-76.963349	19.029902	-0.445000	-115.416169
1	21.201282	0.213704	1.322499	46.984585	-0.801147	5.830035	-22.538740	-0.402639	-9.909138	-37.677083	...	-80.990478	37.855529	-0.946660	-67.868614	-10.338881	1.200735	-76.917438	19.053691	-0.440997	-115.518292
2	21.256664	0.240579	1.302346	47.247845	-0.757440	5.822944	-22.508311	-0.368747	-10.036418	-37.536744	...	-81.001777	37.907243	-0.940885	-67.933516	-10.330400	1.193681	-76.872875	19.065735	-0.449761	-115.618253
3	21.299285	0.240397	1.274168	47.495103	-0.720543	5.803931	-22.480971	-0.356541	-10.160358	-37.410751	...	-81.009722	37.937340	-0.950328	-68.005056	-10.330788	1.182822	-76.827400	19.055308	-0.460424	-115.710979
4	21.325677	0.220319	1.235597	47.711144	-0.673701	5.770603	-22.456571	-0.350904	-10.279109	-37.310866	...	-81.007437	37.939126	-0.940040	-68.081264	-10.349203	1.157687	-76.779312	19.016472	-0.470678	-115.789328
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
445	23.060752	-0.199214	1.879664	46.340798	-0.947842	4.742368	-19.185499	1.345010	-6.506368	-41.953663	...	-71.425305	43.196517	-0.785810	-56.866667	-18.389502	1.428511	-66.358897	18.667268	0.086065	-118.640513
446	23.079261	-0.215957	1.892306	46.350848	-0.954620	4.782669	-19.195640	1.345283	-6.492639	-41.925121	...	-71.434948	43.233000	-0.798553	-56.874906	-18.308865	1.423512	-66.383337	18.711471	0.078809	-118.608907
447	23.092409	-0.221658	1.901226	46.370295	-0.947773	4.821791	-19.207953	1.350226	-6.479757	-41.894597	...	-71.446489	43.267756	-0.816412	-56.887243	-18.231429	1.421237	-66.409016	18.751617	0.062069	-118.578352
448	23.100727	-0.215898	1.907636	46.396815	-0.929927	4.860443	-19.222547	1.355703	-6.466862	-41.864912	...	-71.458910	43.300950	-0.836934	-56.901680	-18.156717	1.420305	-66.434610	18.788906	0.038433	-118.547809
449	23.107326	-0.207480	1.914174	46.425629	-0.913972	4.898429	-19.238019	1.361772	-6.453821	-41.835615	...	-71.471432	43.332800	-0.859951	-56.916573	-18.082740	1.422841	-66.459968	18.825424	0.014881	-118.516762

450 rows × 63 columns

# What is the difference between pandas Data Frame and numpy Array?
coords_egocentric = centered_coords.to_numpy()
coords_egocentric

array([[ 2.11382476e+01,  1.49273730e-01,  1.33927693e+00, ...,
         1.90299024e+01, -4.45000140e-01, -1.15416169e+02],
       [ 2.12012820e+01,  2.13704103e-01,  1.32249940e+00, ...,
         1.90536908e+01, -4.40996722e-01, -1.15518292e+02],
       [ 2.12566635e+01,  2.40579034e-01,  1.30234553e+00, ...,
         1.90657352e+01, -4.49760897e-01, -1.15618253e+02],
       ...,
       [ 2.30924089e+01, -2.21658316e-01,  1.90122647e+00, ...,
         1.87516173e+01,  6.20694224e-02, -1.18578352e+02],
       [ 2.31007267e+01, -2.15897713e-01,  1.90763642e+00, ...,
         1.87889064e+01,  3.84331742e-02, -1.18547809e+02],
       [ 2.31073256e+01, -2.07480062e-01,  1.91417434e+00, ...,
         1.88254245e+01,  1.48810149e-02, -1.18516762e+02]])

Reading DeepLabCut Data#

Note that DeepLabCut files contain multiple headers

# .h5 vs csv with multiple headings
file = '/Users/guillermo/Downloads/DLC_data.csv'
data = pd.read_csv(file, header=0)
data

/Users/guillermo/opt/anaconda3/lib/python3.8/site-packages/IPython/core/interactiveshell.py:3444: DtypeWarning: Columns (0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18) have mixed types.Specify dtype option on import or set low_memory=False.
  exec(code_obj, self.user_global_ns, self.user_ns)

	scorer	DLC_resnet50_Picky_Pigeon_ProjectDec17shuffle1_1030000	DLC_resnet50_Picky_Pigeon_ProjectDec17shuffle1_1030000.1	DLC_resnet50_Picky_Pigeon_ProjectDec17shuffle1_1030000.2	DLC_resnet50_Picky_Pigeon_ProjectDec17shuffle1_1030000.3	DLC_resnet50_Picky_Pigeon_ProjectDec17shuffle1_1030000.4	DLC_resnet50_Picky_Pigeon_ProjectDec17shuffle1_1030000.5	DLC_resnet50_Picky_Pigeon_ProjectDec17shuffle1_1030000.6	DLC_resnet50_Picky_Pigeon_ProjectDec17shuffle1_1030000.7	DLC_resnet50_Picky_Pigeon_ProjectDec17shuffle1_1030000.8	DLC_resnet50_Picky_Pigeon_ProjectDec17shuffle1_1030000.9	DLC_resnet50_Picky_Pigeon_ProjectDec17shuffle1_1030000.10	DLC_resnet50_Picky_Pigeon_ProjectDec17shuffle1_1030000.11	DLC_resnet50_Picky_Pigeon_ProjectDec17shuffle1_1030000.12	DLC_resnet50_Picky_Pigeon_ProjectDec17shuffle1_1030000.13	DLC_resnet50_Picky_Pigeon_ProjectDec17shuffle1_1030000.14	DLC_resnet50_Picky_Pigeon_ProjectDec17shuffle1_1030000.15	DLC_resnet50_Picky_Pigeon_ProjectDec17shuffle1_1030000.16	DLC_resnet50_Picky_Pigeon_ProjectDec17shuffle1_1030000.17
0	bodyparts	tophead	tophead	tophead	neckbase	neckbase	neckbase	tailend	tailend	tailend	wingbowleft	wingbowleft	wingbowleft	wingbowright	wingbowright	wingbowright	backcenter	backcenter	backcenter
1	coords	x	y	likelihood	x	y	likelihood	x	y	likelihood	x	y	likelihood	x	y	likelihood	x	y	likelihood
2	0	736.8365478515625	440.2051086425781	0.9999855756759644	736.7742309570312	521.3764038085938	0.9882965683937073	855.1517944335938	502.51654052734375	0.9929274916648865	722.4508666992188	638.5551147460938	0.9817160964012146	848.135498046875	725.8486328125	0.5518141388893127	779.173828125	637.4671020507812	0.9575235247612
3	1	736.6965732465003	439.96420420329684	0.9999710321426392	736.6901676790957	521.3737120583947	0.9984785914421082	854.1960618426	497.6001401711765	0.041485220193862915	717.9101631977412	636.903006227701	0.9891084432601929	843.8478180157791	722.9210467101142	0.8639070987701416	777.0301382784822	636.5131135081384	0.9471402168273926
4	2	735.7040493627546	436.7637389370659	0.999962568283081	736.6576001497499	520.317367317496	0.9994844198226929	855.3453658338591	630.6766258156872	0.0038163603749126196	718.9731625027971	637.8714981367024	0.9381800889968872	709.894219318126	604.0449981131718	0.14082202315330505	777.3939321502603	636.2873536219506	0.9269355535507202
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
71921	71919	816.685736	394.333402	0.996886	747.200637	430.753285	0.99992	575.128539	563.851408	0.999976	756.409661	354.888011	0.999997	816.141309	446.039971	0.999989	672.113405	479.530765	0.999961
71922	71920	816.844076	394.197315	0.993028	747.467628	430.599931	0.999918	575.047661	564.093645	0.999984	756.498965	354.752492	0.999998	816.132225	445.987682	0.999973	672.046608	479.611423	0.999972
71923	71921	816.824561	393.841141	0.991949	747.3541	430.614478	0.999922	575.211231	564.088919	0.999988	756.520165	354.817808	0.999998	815.87947	446.139886	0.99998	672.175552	479.673533	0.999959
71924	71922	816.406188	393.766398	0.992734	747.626257	430.654457	0.999948	575.462232	564.156158	0.999987	756.487524	354.854141	0.999997	815.797104	446.552003	0.999989	672.213608	479.578535	0.999954
71925	71923	816.119821	393.876178	0.993627	747.240035	430.915333	0.999949	575.500692	563.874192	0.999987	756.474672	354.848543	0.999998	815.629617	446.596012	0.999986	672.112326	479.64037	0.999971

71926 rows × 19 columns

You can specify multiple headers in pd.read_csv(file, header=[0,1,2]), but your data frame will be a little more difficult to subset, as columns will be a MultiIndex array.

data = pd.read_csv(file, header=[0, 1, 2])
data

	scorer	DLC_resnet50_Picky_Pigeon_ProjectDec17shuffle1_1030000
	bodyparts	tophead			neckbase			tailend			wingbowleft			wingbowright			backcenter
	coords	x	y	likelihood	x	y	likelihood	x	y	likelihood	x	y	likelihood	x	y	likelihood	x	y	likelihood
0	0	736.836548	440.205109	0.999986	736.774231	521.376404	0.988297	855.151794	502.516541	0.992927	722.450867	638.555115	0.981716	848.135498	725.848633	0.551814	779.173828	637.467102	0.957524
1	1	736.696573	439.964204	0.999971	736.690168	521.373712	0.998479	854.196062	497.600140	0.041485	717.910163	636.903006	0.989108	843.847818	722.921047	0.863907	777.030138	636.513114	0.947140
2	2	735.704049	436.763739	0.999963	736.657600	520.317367	0.999484	855.345366	630.676626	0.003816	718.973163	637.871498	0.938180	709.894219	604.044998	0.140822	777.393932	636.287354	0.926936
3	3	734.293249	431.819332	0.999991	735.908934	517.310811	0.998731	827.010097	675.563420	0.004858	721.308479	640.070535	0.857882	796.094113	682.185972	0.460554	777.985463	635.496588	0.972686
4	4	732.744517	426.053270	0.999990	735.196591	513.924565	0.999366	791.315112	546.063109	0.032099	723.677384	638.630865	0.999613	811.859656	696.591080	0.586390	779.552232	633.139890	0.991626
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
71919	71919	816.685736	394.333402	0.996886	747.200637	430.753285	0.999920	575.128539	563.851408	0.999976	756.409661	354.888011	0.999997	816.141309	446.039971	0.999989	672.113405	479.530765	0.999961
71920	71920	816.844076	394.197315	0.993028	747.467628	430.599931	0.999918	575.047661	564.093645	0.999984	756.498965	354.752492	0.999998	816.132225	445.987682	0.999973	672.046608	479.611423	0.999972
71921	71921	816.824561	393.841141	0.991949	747.354100	430.614478	0.999922	575.211231	564.088919	0.999988	756.520165	354.817808	0.999998	815.879470	446.139886	0.999980	672.175552	479.673533	0.999959
71922	71922	816.406188	393.766398	0.992734	747.626257	430.654457	0.999948	575.462232	564.156158	0.999987	756.487524	354.854141	0.999997	815.797104	446.552003	0.999989	672.213608	479.578535	0.999954
71923	71923	816.119821	393.876178	0.993627	747.240035	430.915333	0.999949	575.500692	563.874192	0.999987	756.474672	354.848543	0.999998	815.629617	446.596012	0.999986	672.112326	479.640370	0.999971

71924 rows × 19 columns

data.columns

MultiIndex([(                                                'scorer', ...),
            ('DLC_resnet50_Picky_Pigeon_ProjectDec17shuffle1_1030000', ...),
            ('DLC_resnet50_Picky_Pigeon_ProjectDec17shuffle1_1030000', ...),
            ('DLC_resnet50_Picky_Pigeon_ProjectDec17shuffle1_1030000', ...),
            ('DLC_resnet50_Picky_Pigeon_ProjectDec17shuffle1_1030000', ...),
            ('DLC_resnet50_Picky_Pigeon_ProjectDec17shuffle1_1030000', ...),
            ('DLC_resnet50_Picky_Pigeon_ProjectDec17shuffle1_1030000', ...),
            ('DLC_resnet50_Picky_Pigeon_ProjectDec17shuffle1_1030000', ...),
            ('DLC_resnet50_Picky_Pigeon_ProjectDec17shuffle1_1030000', ...),
            ('DLC_resnet50_Picky_Pigeon_ProjectDec17shuffle1_1030000', ...),
            ('DLC_resnet50_Picky_Pigeon_ProjectDec17shuffle1_1030000', ...),
            ('DLC_resnet50_Picky_Pigeon_ProjectDec17shuffle1_1030000', ...),
            ('DLC_resnet50_Picky_Pigeon_ProjectDec17shuffle1_1030000', ...),
            ('DLC_resnet50_Picky_Pigeon_ProjectDec17shuffle1_1030000', ...),
            ('DLC_resnet50_Picky_Pigeon_ProjectDec17shuffle1_1030000', ...),
            ('DLC_resnet50_Picky_Pigeon_ProjectDec17shuffle1_1030000', ...),
            ('DLC_resnet50_Picky_Pigeon_ProjectDec17shuffle1_1030000', ...),
            ('DLC_resnet50_Picky_Pigeon_ProjectDec17shuffle1_1030000', ...),
            ('DLC_resnet50_Picky_Pigeon_ProjectDec17shuffle1_1030000', ...)],
           )

data.columns.get_level_values(1)

Index(['bodyparts', 'tophead', 'tophead', 'tophead', 'neckbase', 'neckbase',
       'neckbase', 'tailend', 'tailend', 'tailend', 'wingbowleft',
       'wingbowleft', 'wingbowleft', 'wingbowright', 'wingbowright',
       'wingbowright', 'backcenter', 'backcenter', 'backcenter'],
      dtype='object')

data.columns.get_level_values(2)

Index(['coords', 'x', 'y', 'likelihood', 'x', 'y', 'likelihood', 'x', 'y',
       'likelihood', 'x', 'y', 'likelihood', 'x', 'y', 'likelihood', 'x', 'y',
       'likelihood'],
      dtype='object')

Better rename the columns of your data frame to avoid MultiIndex

data.columns.get_level_values(1) + '_' + data.columns.get_level_values(1)

Index(['bodyparts_bodyparts', 'tophead_tophead', 'tophead_tophead',
       'tophead_tophead', 'neckbase_neckbase', 'neckbase_neckbase',
       'neckbase_neckbase', 'tailend_tailend', 'tailend_tailend',
       'tailend_tailend', 'wingbowleft_wingbowleft', 'wingbowleft_wingbowleft',
       'wingbowleft_wingbowleft', 'wingbowright_wingbowright',
       'wingbowright_wingbowright', 'wingbowright_wingbowright',
       'backcenter_backcenter', 'backcenter_backcenter',
       'backcenter_backcenter'],
      dtype='object')

new_col_names = list(data.columns.get_level_values(
    1) + '_' + data.columns.get_level_values(2))
data.columns = new_col_names
data

	bodyparts_coords	tophead_x	tophead_y	tophead_likelihood	neckbase_x	neckbase_y	neckbase_likelihood	tailend_x	tailend_y	tailend_likelihood	wingbowleft_x	wingbowleft_y	wingbowleft_likelihood	wingbowright_x	wingbowright_y	wingbowright_likelihood	backcenter_x	backcenter_y	backcenter_likelihood
0	0	736.836548	440.205109	0.999986	736.774231	521.376404	0.988297	855.151794	502.516541	0.992927	722.450867	638.555115	0.981716	848.135498	725.848633	0.551814	779.173828	637.467102	0.957524
1	1	736.696573	439.964204	0.999971	736.690168	521.373712	0.998479	854.196062	497.600140	0.041485	717.910163	636.903006	0.989108	843.847818	722.921047	0.863907	777.030138	636.513114	0.947140
2	2	735.704049	436.763739	0.999963	736.657600	520.317367	0.999484	855.345366	630.676626	0.003816	718.973163	637.871498	0.938180	709.894219	604.044998	0.140822	777.393932	636.287354	0.926936
3	3	734.293249	431.819332	0.999991	735.908934	517.310811	0.998731	827.010097	675.563420	0.004858	721.308479	640.070535	0.857882	796.094113	682.185972	0.460554	777.985463	635.496588	0.972686
4	4	732.744517	426.053270	0.999990	735.196591	513.924565	0.999366	791.315112	546.063109	0.032099	723.677384	638.630865	0.999613	811.859656	696.591080	0.586390	779.552232	633.139890	0.991626
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
71919	71919	816.685736	394.333402	0.996886	747.200637	430.753285	0.999920	575.128539	563.851408	0.999976	756.409661	354.888011	0.999997	816.141309	446.039971	0.999989	672.113405	479.530765	0.999961
71920	71920	816.844076	394.197315	0.993028	747.467628	430.599931	0.999918	575.047661	564.093645	0.999984	756.498965	354.752492	0.999998	816.132225	445.987682	0.999973	672.046608	479.611423	0.999972
71921	71921	816.824561	393.841141	0.991949	747.354100	430.614478	0.999922	575.211231	564.088919	0.999988	756.520165	354.817808	0.999998	815.879470	446.139886	0.999980	672.175552	479.673533	0.999959
71922	71922	816.406188	393.766398	0.992734	747.626257	430.654457	0.999948	575.462232	564.156158	0.999987	756.487524	354.854141	0.999997	815.797104	446.552003	0.999989	672.213608	479.578535	0.999954
71923	71923	816.119821	393.876178	0.993627	747.240035	430.915333	0.999949	575.500692	563.874192	0.999987	756.474672	354.848543	0.999998	815.629617	446.596012	0.999986	672.112326	479.640370	0.999971

71924 rows × 19 columns

What does my data tell me?#

# Does this make sense?
coords.mean(axis='columns')

   -19.892839
   -19.914494
   -19.930776
   -19.937752
   -19.923985
         ...    
  -6.321469
  -6.313084
  -6.304621
  -6.296241
  -6.288533
Length: 450, dtype: float64

# What about this?
coords.mean(axis='index')

lefteye1_x     18.106572
lefteye1_y     -0.019333
lefteye1_z      0.796235
lefteye2_x     43.672666
lefteye2_y     -0.645574
                 ...    
rlip_y          0.996420
rlip_z        -74.329763
chin_x         17.394544
chin_y         -0.286453
chin_z       -118.842049
Length: 63, dtype: float64

coords['lefteye1_x'].mean()

18.10657168922479

coords.describe()

	lefteye1_x	lefteye1_y	lefteye1_z	lefteye2_x	lefteye2_y	lefteye2_z	righteye1_x	righteye1_y	righteye1_z	righteye2_x	...	lowlip_z	llip_x	llip_y	llip_z	rlip_x	rlip_y	rlip_z	chin_x	chin_y	chin_z
count	450.000000	450.000000	450.000000	450.000000	450.000000	450.000000	450.000000	450.000000	450.000000	450.000000	...	450.000000	450.000000	450.000000	450.000000	450.000000	450.000000	450.000000	450.000000	450.000000	450.000000
mean	18.106572	-0.019333	0.796235	43.672666	-0.645574	5.586992	-21.632261	1.278443	-6.218734	-44.885085	...	-77.892073	36.387709	-0.794339	-65.950381	-14.813016	0.996420	-74.329763	17.394544	-0.286453	-118.842049
std	11.434284	0.599469	3.828957	11.505354	0.705344	4.144886	12.124253	0.803002	4.433668	11.226158	...	6.383489	12.088332	0.708887	5.909938	12.327903	0.705005	5.116139	11.669450	0.823316	3.496960
min	-18.098617	-1.145164	-6.844781	5.858257	-1.948370	-2.307151	-57.526330	-1.012775	-18.430021	-81.767449	...	-89.068969	0.151487	-2.103737	-76.193362	-50.004844	-0.675003	-84.866708	-17.538646	-1.985098	-125.301249
25%	18.225435	-0.612380	-2.954008	43.193416	-1.344781	2.123240	-22.683956	0.584199	-10.600334	-45.410591	...	-83.604016	33.761567	-1.246099	-70.754029	-16.660826	0.725735	-78.226643	15.961120	-1.119348	-121.885470
50%	19.861041	-0.047041	0.653475	46.836783	-0.575274	5.330278	-18.695114	1.352917	-6.413949	-42.622488	...	-77.400903	39.924777	-0.819950	-67.125851	-12.696178	1.048074	-74.785135	18.953581	-0.140253	-117.925277
75%	24.614006	0.572676	3.737492	48.943208	-0.011691	8.761392	-15.253385	1.890220	-3.227924	-39.535513	...	-72.539818	43.852962	-0.265885	-63.012012	-10.172126	1.509309	-71.510924	24.541857	0.266672	-115.933453
max	34.449984	0.968391	8.652052	60.024778	0.420347	14.328102	-5.669487	2.681084	2.781161	-28.287430	...	-64.906500	53.732858	0.461475	-54.243952	8.041773	2.103936	-63.094976	34.490673	0.977069	-111.792641

8 rows × 63 columns

How could my data look like#

scores.hist(figsize=(20, 20))

array([[<AxesSubplot:title={'center':'lefteye1_score'}>,
        <AxesSubplot:title={'center':'lefteye2_score'}>,
        <AxesSubplot:title={'center':'righteye1_score'}>,
        <AxesSubplot:title={'center':'righteye2_score'}>,
        <AxesSubplot:title={'center':'leyebrow1_score'}>],
       [<AxesSubplot:title={'center':'leyebrow2_score'}>,
        <AxesSubplot:title={'center':'leyebrow3_score'}>,
        <AxesSubplot:title={'center':'reyebrow1_score'}>,
        <AxesSubplot:title={'center':'reyebrow2_score'}>,
        <AxesSubplot:title={'center':'reyebrow3_score'}>],
       [<AxesSubplot:title={'center':'lear_score'}>,
        <AxesSubplot:title={'center':'rear_score'}>,
        <AxesSubplot:title={'center':'nose1_score'}>,
        <AxesSubplot:title={'center':'nose2_score'}>,
        <AxesSubplot:title={'center':'nose3_score'}>],
       [<AxesSubplot:title={'center':'nose4_score'}>,
        <AxesSubplot:title={'center':'uplip_score'}>,
        <AxesSubplot:title={'center':'lowlip_score'}>,
        <AxesSubplot:title={'center':'llip_score'}>,
        <AxesSubplot:title={'center':'rlip_score'}>],
       [<AxesSubplot:title={'center':'chin_score'}>, <AxesSubplot:>,
        <AxesSubplot:>, <AxesSubplot:>, <AxesSubplot:>]], dtype=object)

_images/7119a05ca2c6d305f7285970f80005db46726c6b13fd987611d8fc1dcb207d89.png

scores.boxplot(column=['chin_score', 'lefteye1_score'], figsize=(10, 10))

<AxesSubplot:>

_images/53eafd8a966274ef455251910a347fdb7fd8b390a46bf0194538487c10d44ec8.png

x_coords = coords.loc[:, coords.columns.str.contains('_x')]
y_coords = coords.loc[:, coords.columns.str.contains('_y')]
z_coords = coords.loc[:, coords.columns.str.contains('_z')]

t = 0

fig = plt.figure(figsize=(6, 4), dpi=100)
ax = fig.add_subplot(projection='3d')
x_points = x_coords[t:t+1]
y_points = y_coords[t:t+1]
z_points = z_coords[t:t+1]

ax.scatter3D(x_points, y_points, z_points)
ax.view_init(11, 280)
ax.set(xlabel='X axis', ylabel='Y axis', zlabel='Z axis')

plt.title("My First Plot")

Text(0.5, 0.92, 'My First Plot')

_images/21a363d9d44519b62cf5a9ad4cdc1929e2b963ee4b9093ca385c693eb781cd5f.png

In the following section we will learn to calculate some easy kinematic features to better understand our data.

Bonus#

def face_skeleton(pose):
    """
    The face_skeleton function defines a mesh skeleton by connecting the facial landmarks as defined below.
    This function is directly passed to plot_3Dpose. 
    """
    skeletons = []
    for n in range(len(pose)):  # read out n_components from different poses

        lefteye = [pose[n]['lefteye1_x'], pose[n]['lefteye2_x']], [
            pose[n]['lefteye1_y'], pose[n]['lefteye2_y']], [pose[n]['lefteye1_z'], pose[n]['lefteye2_z']]
        righteye = [pose[n]['righteye1_x'], pose[n]['righteye2_x']], [
            pose[n]['righteye1_y'], pose[n]['righteye2_y']], [pose[n]['righteye1_z'], pose[n]['righteye2_z']]
        leyebrow = [pose[n]['leyebrow1_x'], pose[n]['leyebrow2_x'], pose[n]['leyebrow3_x']], [pose[n]['leyebrow1_y'], pose[n]
                                                                                              ['leyebrow2_y'], pose[n]['leyebrow3_y']], [pose[n]['leyebrow1_z'], pose[n]['leyebrow2_z'], pose[n]['leyebrow3_z']]
        reyebrow = [pose[n]['reyebrow1_x'], pose[n]['reyebrow2_x'], pose[n]['reyebrow3_x']], [pose[n]['reyebrow1_y'], pose[n]
                                                                                              ['reyebrow2_y'], pose[n]['reyebrow3_y']], [pose[n]['reyebrow1_z'], pose[n]['reyebrow2_z'], pose[n]['reyebrow3_z']]
        nose = [pose[n]['nose1_x'], pose[n]['nose3_x'], pose[n]['nose2_x'], pose[n]['nose4_x'], pose[n]['nose1_x']], [pose[n]['nose1_y'], pose[n]['nose3_y'], pose[n]
                                                                                                                      ['nose2_y'], pose[n]['nose4_y'], pose[n]['nose1_y']], [pose[n]['nose1_z'], pose[n]['nose3_z'], pose[n]['nose2_z'], pose[n]['nose4_z'], pose[n]['nose1_z']]
        lips = [pose[n]['uplip_x'], pose[n]['llip_x'], pose[n]['lowlip_x'], pose[n]['rlip_x'], pose[n]['uplip_x']], [pose[n]['uplip_y'], pose[n]['llip_y'], pose[n]
                                                                                                                     ['lowlip_y'], pose[n]['rlip_y'], pose[n]['uplip_y']], [pose[n]['uplip_z'], pose[n]['llip_z'], pose[n]['lowlip_z'], pose[n]['rlip_z'], pose[n]['uplip_z']]
        face = [pose[n]['rear_x'], pose[n]['chin_x'], pose[n]['lear_x']], [pose[n]['rear_y'], pose[n]
                                                                           ['chin_y'], pose[n]['lear_y']], [pose[n]['rear_z'], pose[n]['chin_z'], pose[n]['lear_z']]

        skeleton = lefteye, righteye, leyebrow, reyebrow, nose, lips, face
        skeletons.append(skeleton)

    return skeletons


def plot_3Dpose(pose, elevation, azimuth):
    """
    This plot function takes the average pose coordinates of facial landmarks, creates a skeleton and visualizes the facial expression
    in a 3D coordinate system with predefined elevantion and azimuth angles.
    """
    skeletons = face_skeleton(pose)

    ncols = 3
    nrows = math.ceil(len(pose)/ncols)
    width = ncols*6
    height = nrows * 5

    fig, axes = plt.subplots(nrows, ncols, figsize=(
        width, height), subplot_kw=dict(projection='3d'))

    for ax, n in zip(axes.flat, range(len(pose))):
        x_points = pose[n][['_x' in s for s in pose[n].index]]
        y_points = pose[n][['_y' in s for s in pose[n].index]]
        z_points = pose[n][['_z' in s for s in pose[n].index]]
        ax.scatter3D(x_points, y_points, z_points)
        ax.view_init(elevation, azimuth)
        ax.set(xlabel='X axis', ylabel='Y axis', zlabel='Z axis')
        ax.set_title('Predicted Pose: %d' % (n+1))
        for i in range(len(skeletons[0])):
            x = skeletons[n][i][0]
            y = skeletons[n][i][1]
            z = skeletons[n][i][2]
            ax.plot(x, y, z, color='g')

    plt.suptitle(
        'Hidden Markov Model predictions with N = %d Components' % len(pose))
    plt.show()
    return


def split_data(data, prediction):
    """
    The split_data function will be used to split time series data into smaller 
    chunks by the prediction variable.

    """
    n = max(prediction)+1  # read out the number of predicted components
    data['pred'] = prediction
    grouped = data.groupby(data.pred)
    predictions = [grouped.get_group(i) for i in range(n)]
    pose = [predictions[i].mean() for i in range(n)]

    return predictions, pose

from hmmlearn import hmm
import math
# change the number of components you expect to find in your data
model1 = hmm.GaussianHMM(n_components=9, covariance_type="full")
model1.fit(coords)
pred1 = model1.predict(coords)

_, pose1 = split_data(centered_coords, pred1)

plot_3Dpose(pose1, 11, 280)

_images/53472196f53b8cbb8b15e5130a787c08b3640adfe0dd598f1796c367b5528451.png