Last active
March 15, 2021 09:38
-
-
Save tade0726/c1a263ae29fbae8f19d0dfa6e00d4e53 to your computer and use it in GitHub Desktop.
simple script for significant and power report
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import scipy.stats | |
from statsmodels.stats.proportion import proportions_ztest | |
def get_p1_p2_alternative(s, t): | |
""" | |
return alternative base on actual p1, p2 | |
""" | |
s = np.array(s) | |
t = np.array(t) | |
p = s / t | |
return "smaller" if p[0] < p[1] else "larger" | |
def z_test(s, t, alpha=0.05, alternative='two-sided'): | |
stat, p = proportions_ztest(s, t, alternative=alternative) | |
return stat, p, p <= alpha | |
def get_power(s, t, alpha, alternative="two-sided"): | |
s = np.array(s) | |
t = np.array(t) | |
p1, p2 = tuple(s / t) | |
n = t.sum() | |
alpha = alpha | |
believe = alpha / 2 if alternative == "two-sided" else alpha | |
qu = scipy.stats.norm.ppf(1 - believe) | |
diff = abs(p2 - p1) | |
bp = (p1 + p2) / 2 | |
v1 = p1 * (1 - p1) | |
v2 = p2 * (1 - p2) | |
bv = bp * (1 - bp) | |
power_part_one = scipy.stats.norm.cdf((n ** 0.5 * diff - qu * (2 * bv) ** 0.5) / (v1 + v2) ** 0.5) | |
power_part_two = 1 - scipy.stats.norm.cdf((n ** 0.5 * diff + qu * (2 * bv) ** 0.5) / (v1 + v2) ** 0.5) | |
power = power_part_one + power_part_two | |
if alternative == "two-sided": | |
return power | |
elif alternative in ("larger", "smaller"): | |
return max(power_part_one, power_part_two) | |
else: | |
raise ValueError() | |
def full_report(s, t, alpha, is_two_sided=True): | |
"""I assume the alternative is the same direction as the actual p1, p2 values. | |
""" | |
alternative = get_p1_p2_alternative(s, t) if not is_two_sided else "two-sided" | |
z_stats, p, sign = z_test(s, t, alpha=alpha, alternative=alternative) | |
power = get_power(s, t, alpha=alpha, alternative=alternative) | |
s = np.array(s) | |
t = np.array(t) | |
means = s / t | |
p1 = means[0] | |
p2 = means[1] | |
change = (p2 / p1) - 1 | |
return { | |
"alternative": alternative, | |
"change": change, | |
"p1": p1, | |
"p2": p2, | |
"p-value": p, | |
"significant": sign, | |
"z_stats": z_stats, | |
"power": power, | |
} | |
if __name__ == "__main__": | |
# first column should be control (version A) | |
# sec column should be treatment (version B) | |
test_data = [ | |
(6500, 6694), # total | |
(145, 120) # conversion | |
] | |
results = full_report(test_data[1], test_data[0], 0.05) | |
print(results) | |
""" | |
{'alternative': 'two-sided', 'change': -0.19639821559193515, 'p1': 0.022307692307692306, 'p2': 0.0179265013444876, 'p-value': 0.0729107128409782, 'significant': False, 'z_stats': 1.7933892027166773, 'power': 0.717231252963892} | |
""" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment