%load_ext autoreload
%autoreload 2
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.decomposition import PCA
from sklearn.metrics import pairwise_distances
from htc.cameras.settings_cam import settings_cam # noqa: F401
from htc.cameras.visualization import cam_median_spectra_figure
from htc.utils.helper_functions import group_median_spectra, median_table
df = median_table(dataset_name="2021_02_05_Tivita_multiorgan_masks")
df = df.query("label_name in @settings_cam.labels and camera_name in @settings_cam.cameras")
df_agg = group_median_spectra(df, additional_columns=["camera_name"])
df_agg.head()
subject_name | label_name | median_spectrum | std_spectrum | median_normalized_spectrum | std_normalized_spectrum | camera_name | |
---|---|---|---|---|---|---|---|
0 | P042 | stomach | [0.1297219, 0.12358159, 0.12232345, 0.11359459... | [0.04077928, 0.033759594, 0.029121568, 0.02562... | [0.004175563, 0.0039876043, 0.0039376547, 0.00... | [0.0012897034, 0.0010458297, 0.00088586286, 0.... | 0102-00085_correct-1 |
1 | P042 | colon | [0.09155203, 0.086514436, 0.08638916, 0.080954... | [0.04399869, 0.03606805, 0.030308893, 0.025851... | [0.003640162, 0.00343846, 0.003449043, 0.00321... | [0.0016656602, 0.0013140606, 0.0010563593, 0.0... | 0102-00085_correct-1 |
2 | P042 | liver | [0.017156733, 0.010483901, 0.011443508, 0.0096... | [0.027168514, 0.019810757, 0.016005786, 0.0119... | [0.0016503632, 0.00096326525, 0.0010857421, 0.... | [0.0025573766, 0.0018524164, 0.001492214, 0.00... | 0102-00085_correct-1 |
3 | P042 | gallbladder | [0.07577704, 0.0713027, 0.07237347, 0.06773271... | [0.036809415, 0.03220314, 0.02923993, 0.026654... | [0.00760469, 0.0070759268, 0.007265112, 0.0067... | [0.003397993, 0.0027094285, 0.0021823905, 0.00... | 0102-00085_correct-1 |
4 | P042 | pancreas | [0.1140691, 0.110380255, 0.11058148, 0.1036758... | [0.03779349, 0.03123703, 0.027001316, 0.022912... | [0.0034198128, 0.0032846706, 0.003316825, 0.00... | [0.0010397636, 0.0008067467, 0.0006472343, 0.0... | 0102-00085_correct-1 |
cam_median_spectra_figure(df_agg)
# Aggregate all pigs per camera and organ
rows = []
for cam_name in df_agg["camera_name"].unique():
df_cam = df_agg.query("camera_name == @cam_name")
for label in df_cam["label_name"].unique():
df_label = df_cam.query("label_name == @label")
median_normalized_spectrum = np.mean(np.stack(df_label["median_normalized_spectrum"]), axis=0)
std_normalized_spectrum = np.stack(df_label["std_normalized_spectrum"])
std_normalized_spectrum = std_normalized_spectrum[~np.any(np.isnan(std_normalized_spectrum), axis=1)]
std_normalized_spectrum = np.mean(std_normalized_spectrum, axis=0)
rows.append([cam_name, label, median_normalized_spectrum, std_normalized_spectrum])
df_agg_cam = pd.DataFrame(
rows, columns=["camera_name", "label_name", "median_normalized_spectrum", "std_normalized_spectrum"]
)
df_agg_cam.head()
camera_name | label_name | median_normalized_spectrum | std_normalized_spectrum | |
---|---|---|---|---|
0 | 0102-00085_correct-1 | stomach | [0.004837525, 0.0048194104, 0.0045828014, 0.00... | [0.001759109, 0.0013728896, 0.00112228, 0.0009... |
1 | 0102-00085_correct-1 | colon | [0.00507431, 0.0048801033, 0.0046139047, 0.004... | [0.0022836942, 0.0017547376, 0.0013935001, 0.0... |
2 | 0102-00085_correct-1 | liver | [0.0020213977, 0.0018241481, 0.0016860911, 0.0... | [0.0032810306, 0.0024661662, 0.0018520037, 0.0... |
3 | 0102-00085_correct-1 | gallbladder | [0.009449516, 0.009217508, 0.008873056, 0.0084... | [0.0037826505, 0.0029736974, 0.0024411448, 0.0... |
4 | 0102-00085_correct-1 | pancreas | [0.0046463674, 0.0045730653, 0.004401094, 0.00... | [0.0010133738, 0.00076813507, 0.00063034915, 0... |
n_rows = 4
n_cols = 4
labels = df["label_name"].unique()
fig = make_subplots(
rows=n_rows,
cols=n_cols,
shared_xaxes="all",
shared_yaxes="all",
subplot_titles=labels,
horizontal_spacing=0.05,
vertical_spacing=0.05,
)
dists = []
for i, label in enumerate(df_agg_cam["label_name"].unique()):
df_label = df_agg_cam.query("label_name == @label")
cams = df_label["camera_name"].unique().tolist()
X = np.stack(df_label["median_normalized_spectrum"])
dist = pairwise_distances(X)
dists.append(dist)
row = i // n_cols
col = i % n_cols
fig.add_trace(go.Heatmap(z=dist, x=cams, y=cams, coloraxis="coloraxis1"), row=row + 1, col=col + 1)
fig.layout.height = 900
fig.update_layout(title="Camera distance matrix per organ (L1 normalized median spectra)", title_x=0.5)
fig = go.Figure()
X = np.mean(np.stack(dists), axis=0)
fig.add_trace(go.Heatmap(z=X, x=cams, y=cams))
fig.update_layout(height=400, width=600)
X = np.stack(df["median_normalized_spectrum"])
pca = PCA()
pca.fit(X)
pca.explained_variance_ratio_[:10]
array([0.4556574 , 0.35357636, 0.12885566, 0.03687239, 0.01129766, 0.00430736, 0.00351748, 0.00222947, 0.00101338, 0.00064543], dtype=float32)