derekmcloughlin · February 21, 2016 11:21
diff --git a/linear_regression.R b/linear_regression.R
 # scaled features.
 # x = square feet
 # y = sale price
 x <- c(1, 2, 4)
 y <- c(2, 2.5, 3)

 # function to calculate the predicted value
 h <- function(x, t0, t1)
 {
    result = t0 + t1 * x
    result
 }


 # given a theta_0 and theta_1, this function calculates
 # their cost. We don't need this function, strictly speaking...
 # but it is nice to print out the costs as gradient descent iterates.
 # We should see the cost go down every time the values of theta get updated.
 cost <- function (theta_0, theta_1, x, y) 
 {
    distance <- 0
    for (i in 1:length(x)) # arrays in R are indexed starting at 1
    {
        square_feet <- x[i]
        predicted_value <- theta_0 + theta_1 * square_feet
        actual_value <- y[i]
        # how far off was the predicted value (make sure you get the absolute value)?
        distance <- distance + abs(actual_value - predicted_value)
    }
    # get how far off we were on average
    distance <- distance / length(x)
    distance
 }

 alpha <- 0.1
 iters <- 1500
 m <- length(x)

 # initial values
 theta_0 <- 0
 theta_1 <- 0

 for (i in 1:iters)
 {
    # we store this calculation in temporary variables,
    # because theta_0 and theta_1 must be updated *together*.
    # i.e. we can't update theta_0 and use the new theta_0 value
    # while updating theta_1.
    cost_0 <- 0
    for (j in 1:m)
    {
        cost_0 <- cost_0 + (h(x[j], theta_0, theta_1) - y[j])
    }
    temp_0 <- theta_0 - alpha * 1/m * cost_0
    
    cost_1 <- 0
    for (j in 1:m)
    {
        cost_1 = cost_1 + ((h(x[j], theta_0, theta_1) - y[j]) * x[j])
    }
    
    temp_1 <- theta_1 - alpha * 1/m * cost_1
    
    theta_0 <- temp_0
    theta_1 <- temp_1
    
    # print out the new values of theta for each iteration,
    # as well as the new, lower cost
    print(theta_0, theta_1, cost(theta_0, theta_1, x, y))
    
 }

 # scale the values of theta, they
 # were too small because we did feature scaling
 theta_0 <- theta_0 * 100000
 theta_1 <- theta_1 * 100

 # un-scale the features. So now we should be
 # back to [1000, 2000, 4000] for square feet,
 # from [1, 2, 4].
 x <- x * 1000
 y <- y * 100000

 plot(x, y, col='red')
 lines(1000:4000, h(1000:4000, theta_0, theta_1), col='green')

 print(h(3000, theta_0, theta_1))
	# scaled features.
	# x = square feet
	# y = sale price
	x <- c(1, 2, 4)
	y <- c(2, 2.5, 3)

	# function to calculate the predicted value
	h <- function(x, t0, t1)
	{
	result = t0 + t1 * x
	result
	}


	# given a theta_0 and theta_1, this function calculates
	# their cost. We don't need this function, strictly speaking...
	# but it is nice to print out the costs as gradient descent iterates.
	# We should see the cost go down every time the values of theta get updated.
	cost <- function (theta_0, theta_1, x, y)
	{
	distance <- 0
	for (i in 1:length(x)) # arrays in R are indexed starting at 1
	{
	square_feet <- x[i]
	predicted_value <- theta_0 + theta_1 * square_feet
	actual_value <- y[i]
	# how far off was the predicted value (make sure you get the absolute value)?
	distance <- distance + abs(actual_value - predicted_value)
	}
	# get how far off we were on average
	distance <- distance / length(x)
	distance
	}

	alpha <- 0.1
	iters <- 1500
	m <- length(x)

	# initial values
	theta_0 <- 0
	theta_1 <- 0

	for (i in 1:iters)
	{
	# we store this calculation in temporary variables,
	# because theta_0 and theta_1 must be updated together.
	# i.e. we can't update theta_0 and use the new theta_0 value
	# while updating theta_1.
	cost_0 <- 0
	for (j in 1:m)
	{
	cost_0 <- cost_0 + (h(x[j], theta_0, theta_1) - y[j])
	}
	temp_0 <- theta_0 - alpha * 1/m * cost_0

	cost_1 <- 0
	for (j in 1:m)
	{
	cost_1 = cost_1 + ((h(x[j], theta_0, theta_1) - y[j]) * x[j])
	}

	temp_1 <- theta_1 - alpha * 1/m * cost_1

	theta_0 <- temp_0
	theta_1 <- temp_1

	# print out the new values of theta for each iteration,
	# as well as the new, lower cost
	print(theta_0, theta_1, cost(theta_0, theta_1, x, y))

	}

	# scale the values of theta, they
	# were too small because we did feature scaling
	theta_0 <- theta_0 * 100000
	theta_1 <- theta_1 * 100

	# un-scale the features. So now we should be
	# back to [1000, 2000, 4000] for square feet,
	# from [1, 2, 4].
	x <- x * 1000
	y <- y * 100000

	plot(x, y, col='red')
	lines(1000:4000, h(1000:4000, theta_0, theta_1), col='green')

	print(h(3000, theta_0, theta_1))