Last active
June 6, 2024 18:35
-
-
Save clane9/964c52650f41540c1ad21dad1f247e6f to your computer and use it in GitHub Desktop.
Compute batch intraclass correlation coefficients in python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import NamedTuple | |
import numpy as np | |
class ICCResult(NamedTuple): | |
icc1: np.ndarray | |
icc2: np.ndarray | |
icc3: np.ndarray | |
icc1k: np.ndarray | |
icc2k: np.ndarray | |
icc3k: np.ndarray | |
def batch_icc(data: np.ndarray) -> ICCResult: | |
""" | |
Compute batch intraclass correlation coefficients. | |
Args: | |
x: array of repeated measuresments tables, shape (..., n_subs, n_ratings) | |
Returns: | |
Named tuple of batch ICC results. | |
Example: | |
>>> import numpy as np | |
>>> # example wine data copied from pingouin | |
>>> data = np.array( | |
... [[1, 2, 0, 1], | |
... [1, 3, 3, 2], | |
... [3, 8, 1, 4], | |
... [6, 4, 3, 3], | |
... [6, 5, 5, 6], | |
... [7, 5, 6, 2], | |
... [8, 7, 7, 9], | |
... [9, 9, 9, 8]] | |
... ) | |
>>> icc = batch_icc(data) | |
>>> print(icc.icc2k) | |
0.9144500359453631 | |
References: | |
https://github.com/raphaelvallat/pingouin/blob/b563de7f4c7ea35467d40a675d02a0018c67dc24/src/pingouin/reliability.py#L159 | |
https://github.com/cran/psych/blob/724ef67cfd88aa5e8cf17f297cbf94e0c72f874d/R/ICC.R#L4 | |
https://github.com/TingsterX/ReX/blob/main/R/lme_ICC.R | |
Koo & Li, 2016 | |
McGraw & Wong, 1996 | |
""" | |
assert data.ndim >= 2, "data should be at least 2d" | |
# subject (row) and ratings (column) axes | |
I, J = -2, -1 | |
N, K = data.shape[I], data.shape[J] | |
# handle missing values with list-wise deletion | |
mask = ~np.isnan(data) | |
row_mask = np.all(mask, axis=J, keepdims=True) | |
mask = np.repeat(row_mask, axis=J, repeats=K) | |
col_mask = np.any(mask, axis=I, keepdims=True) | |
data = np.where(mask, data, 0.0) | |
# number of effective rows/cols per table might vary depending on missing values | |
n_row = np.sum(row_mask, axis=I, keepdims=True) | |
n_col = np.sum(col_mask, axis=J, keepdims=True) | |
n_total = np.sum(mask, axis=(I, J), keepdims=True) | |
assert np.min(n_row) >= 2, "at least 2 valid rows for each table are required" | |
assert np.min(n_col) >= 2, "at least 2 valid columns for each table are required" | |
grand_mean = np.sum(data, axis=(I, J), keepdims=True) / n_total | |
row_mean = np.sum(data, axis=J, keepdims=True) / n_col | |
col_mean = np.sum(data, axis=I, keepdims=True) / n_row | |
# simple sum and mean of squares, no anova required | |
ss_row = n_col * np.sum((row_mean - grand_mean) ** 2, axis=I, keepdims=True) | |
ss_col = n_row * np.sum((col_mean - grand_mean) ** 2, axis=J, keepdims=True) | |
ss_tot = np.sum((data - grand_mean) ** 2, axis=(I, J), keepdims=True) | |
ss_err = ss_tot - (ss_row + ss_col) | |
df_row = n_row - 1 | |
df_col = n_col - 1 | |
df_err = df_row * df_col | |
msr = ss_row / df_row | |
msc = ss_col / df_col | |
mse = ss_err / df_err | |
msw = (ss_col + ss_err) / (df_col + df_err) | |
# ICC formulas from pingouin intraclass_corr following notation of Koo & Li, 2016 | |
icc1 = squeeze((msr - msw) / (msr + df_col * msw)) | |
icc2 = squeeze((msr - mse) / (msr + df_col * mse + n_col * (msc - mse) / n_row)) | |
icc3 = squeeze((msr - mse) / (msr + df_col * mse)) | |
icc1k = squeeze((msr - msw) / msr) | |
icc2k = squeeze((msr - mse) / (msr + (msc - mse) / n_row)) | |
icc3k = squeeze((msr - mse) / msr) | |
return ICCResult(icc1, icc2, icc3, icc1k, icc2k, icc3k) | |
def squeeze(x: np.ndarray) -> float | np.ndarray: | |
x = np.squeeze(x) | |
if x.ndim == 0: | |
x = x.item() | |
return x |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment