Created
June 23, 2015 12:18
-
-
Save pavelk2/3e06eb51ed4831f47297 to your computer and use it in GitHub Desktop.
estimate-normal-distribution-from-small-sample-with-rankings
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# Negative log likelihood for parameters `theta`, based on a subset `y` from | |
# a sample of `n` iid values, associated with ranks in `m`. | |
# | |
Lambda <- function(theta, y, m, n) { | |
mu <- theta[1]; sigma <- theta[2] | |
powers <- diff(c(0, m, n+1)) - 1 | |
gaps <- diff(c(0, pnorm(y, mu, sigma), 1)) | |
z <- dnorm(y, mu, sigma) | |
#a <- lgamma(n+1) - sum(lgamma(powers+1)) # Log multinomial coefficient | |
b <- sum(log(z)) + sum(powers*log(gaps)) | |
return (-b) | |
} | |
# | |
# Return the Weibull probability plotting points for `n` values. | |
# | |
pp <- function(n) { | |
mid <- c() | |
if (n >= 3) mid <- (2:(n-1) - 0.3175)/(n + 0.365) | |
u <- (1/2)^(1/n) | |
c(1 - u, mid, u) | |
} | |
# | |
# ROS fit. | |
# | |
ROS <- function(y, m, n) { | |
sigma <- qnorm(pp(n)[m]) | |
return (coef(lm(y ~ sigma))) | |
} | |
# | |
# MLE fit (estimates only). | |
# | |
MLE <- function(y, m, n) { | |
theta <- ROS(y, m, n) # Use ROS for an initial estimate | |
fit <- optim(theta, Lambda, y=y, m=m, n=n) | |
z <- c(fit$par, theta) | |
names(z) <- c("mu.MLE", "sigma.MLE", "mu.ROS", "sigma.ROS") | |
return (z) # Return MLE *and* ROS estimates | |
} | |
# | |
# Generate a dataset from a known Normal distribution. | |
# | |
n <- 100 | |
m <- sort(c(98, 59, 27)) # The ranks to select | |
set.seed(17) # Allow reproducible results | |
x <- sort(rnorm(n)) # Sort the data once and for all | |
y <- x[m] # All fitting is based only on `y`, `m`, and `n` | |
# | |
# Fit using both methods. | |
# | |
fit <- MLE(y, m, n) | |
mu.hat <- fit[1]; sigma.hat <- fit[2] | |
# | |
# Plot the MLE fit. | |
# | |
pp.x <- qnorm(pp(n), mu.hat, sigma.hat) | |
pp.y <- pp.x[m] | |
plot(pp.x, x, col="Gray", xlab="Fit", ylab="Values", | |
main="Simulated Data, n=100") | |
abline(c(0,1), col="Black", lty=3) | |
points(pp.y, y, pch=16, col="#d03020", cex=1.25) | |
# | |
# Peform a Monte-Carlo assessment of the bias and variance of both. | |
# | |
system.time({ # Takes about 4 seconds per 1000 iterations | |
sim <- replicate(1e3, { | |
x <- sort(rnorm(n)) | |
y <- x[m] | |
MLE(y, m, n) | |
}) | |
}) | |
rowMeans(sim) # Average estimates of the parameters | |
apply(sim, 1, sd) # Standard deviations of those estimates |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
taken from http://stats.stackexchange.com/questions/130156/estimate-normal-distribution-from-small-sample-with-rankings