Last active
June 11, 2019 13:18
-
-
Save peakBreaker/d5aeeb51b2d55d709147c7d380ffa17c to your computer and use it in GitHub Desktop.
ecdf, correlation, bootstrapping
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def ecdf(data): | |
"""Compute ECDF for a one-dimensional array of measurements. | |
Very useful for graphical EDA | |
""" | |
# Number of data points: n | |
n = len(data) | |
# x-data for the ECDF: x | |
x = np.sort(data) | |
# y-data for the ECDF: y | |
y = np.arange(1, n+1) / n | |
return x, y | |
def pearson_r(x, y): | |
"""Compute Pearson correlation coefficient between two arrays. | |
Handy for quantitive EDA and to use as a test stat for hypothesis testing | |
""" | |
# Compute correlation matrix: corr_mat | |
corr_mat = np.corrcoef(x, y) | |
# Return entry [0,1] | |
return corr_mat[0,1] | |
### Bootstrapping and hacker stats | |
def draw_bs_pairs_linreg(x, y, size=1): | |
"""Perform pairs bootstrap for linear regression.""" | |
# Set up array of indices to sample from: inds | |
inds = np.arange(len(x)) | |
# Initialize replicates: bs_slope_reps, bs_intercept_reps | |
bs_slope_reps = np.empty(size=size) | |
bs_intercept_reps = np.empty(size=size) | |
# Generate replicates | |
for i in range(size): | |
bs_inds = np.random.choice(inds, size=len(inds)) | |
bs_x, bs_y = x[bs_inds], y[bs_inds] | |
bs_slope_reps[i], bs_intercept_reps[i] = np.polyfit(bs_x, bs_y, deg=1) | |
return bs_slope_reps, bs_intercept_reps | |
def bootstrap_replicate_1d(data, func): | |
"""Generate bootstrap replicate of 1D data.""" | |
bs_sample = np.random.choice(data, len(data)) | |
return func(bs_sample) | |
def draw_bs_reps(data, func, size=1): | |
"""Draw bootstrap replicates.""" | |
# Initialize array of replicates: bs_replicates | |
bs_replicates = np.empty(size=size) | |
# Generate replicates | |
for i in range(size): | |
bs_replicates[i] = bootstrap_replicate_1d(data, func) | |
return bs_replicates |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment