Skip to content

Instantly share code, notes, and snippets.

@jaquesgrobler
Created September 13, 2012 15:39
Show Gist options
  • Save jaquesgrobler/3715166 to your computer and use it in GitHub Desktop.
Save jaquesgrobler/3715166 to your computer and use it in GitHub Desktop.
import numpy as np
import warnings
from scipy import linalg
from sklearn.utils import check_random_state
from sklearn.utils.extmath import logsumexp
def sigmoid(value) :
a = np.exp(value)
return (a/(1+a))
def bin_perm_rep(ndim, a=0, b=1):
"""bin_perm_rep(ndim) -> ndim permutations with repetitions of (a,b).
Returns an array with all the possible permutations with repetitions of
(0,1) in ndim dimensions. The array is shaped as (2**ndim,ndim), and is
ordered with the last index changing fastest. For examble, for ndim=3:
Examples:
>>> bin_perm_rep(3)
array([[0, 0, 0],
[0, 0, 1],
[0, 1, 0],
[0, 1, 1],
[1, 0, 0],
[1, 0, 1],
[1, 1, 0],
[1, 1, 1]])
"""
# Create the leftmost column as 0,0,...,1,1,...
nperms = 2 ** ndim
perms = np.empty((nperms, ndim), type(a))
perms.fill(a)
half_point = nperms / 2
perms[half_point:, 0] = b
# Fill the rest of the table by sampling the pervious column every 2 items
for j in range(1, ndim):
half_col = perms[::2, j - 1]
perms[:half_point, j] = half_col
perms[half_point:, j] = half_col
return perms
def sample_rbm(coef_, intercept_visible_, intercept_hidden_,
state, direction):
"""
"""
#visible->hidden
if (direction == 'up') :
mean = sigmoid(np.dot(coef_, state) + intercept_hidden_)
#hidden->visible
elif (direction == 'down') :
mean = sigmoid(np.dot(coef_.T, state) + intercept_visible_)
sample = np.random.binomial(n=1, p=mean).astype(np.float)
return sample, mean
def gibbs_sampling (coef_, intercept_visible_, intercept_hidden_,
v_state, n_steps) :
for n in range(n_steps):
h_state, _ = sample_rbm(coef_, intercept_visible_, intercept_hidden_,
v_state, 'up')
v_state, _ = sample_rbm(coef_, intercept_visible_, intercept_hidden_,
h_state, 'down')
return v_state
def compute_gradient (coef_, intercept_visible_, intercept_hidden_,
v_state, cd_steps, step_size) :
h_state, h_mean = sample_rbm(coef_, intercept_visible_, intercept_hidden_,
v_state, 'up')
ger, = linalg.get_blas_funcs(('ger',))
# coef_ = ger(step_size, h_mean, v_state)
coef_ += step_size * ( np.dot(h_mean[:, None], (v_state[:, None]).T) )
v_state_neg, _ = sample_rbm(coef_, intercept_visible_, intercept_hidden_,
h_state, 'down')
v_state_neg = gibbs_sampling (coef_, intercept_visible_, intercept_hidden_,
v_state_neg, (cd_steps-1))
_, h_mean_neg = sample_rbm(coef_, intercept_visible_, intercept_hidden_,
v_state_neg, 'up')
#import pdb; pdb.set_trace()
# coef_ = ger(-step_size, h_mean_neg, v_state_neg, a=coef_, overwrite_a=True)
coef_ -= step_size * ( np.dot(h_mean_neg[:, None], (v_state_neg[:, None]).T) )
def compute_free_energy (coef_, intercept_visible_, intercept_hidden_, v_state) :
#print np.dot(v_state, intercept_visible_).shape
#print np.dot(coef_, v_state.T).shape
#print intercept_hidden_.shape
if v_state.ndim == 1:
#print np.exp(np.dot(coef_, v_state) + intercept_hidden_).shape
return np.dot(v_state, intercept_visible_) + np.sum(np.log(1 + np.exp(np.dot(coef_, v_state) + intercept_hidden_)))
else:
#print np.exp(np.dot(coef_, v_state.T) + intercept_hidden_[:, None]).shape
#print np.sum(np.log(1 + np.exp(np.dot(coef_, v_state.T) + intercept_hidden_[:, None])), axis=0).shape
#print np.dot(v_state, intercept_visible_).shape
return np.dot(v_state, intercept_visible_) + np.sum(np.log(1 + np.exp(np.dot(coef_, v_state.T) + intercept_hidden_[:, None])), axis=0)
def compute_log_partition_function(coef_, intercept_visible_, intercept_hidden_) :
n_visible = len(intercept_visible_)
all_v_states = bin_perm_rep(n_visible)
return logsumexp(compute_free_energy(coef_, intercept_visible_, intercept_hidden_, all_v_states))
def compute_log_prob(coef_, intercept_visible_, intercept_hidden_, v_state) :
return compute_free_energy (coef_, intercept_visible_, intercept_hidden_, v_state) - compute_log_partition_function(coef_, intercept_visible_, intercept_hidden_)
if __name__ == '__main__':
V = np.array([1.,1.,0.,0.])
rng = check_random_state(0)
W = rng.randn(6, 4)
#W = np.zeros((6,4))
i_v = np.zeros(4)
i_h = np.zeros(6)
print "Iteration 0, log-prob=", compute_log_prob(W, i_v, i_h, V)
for i in range(10000) :
compute_gradient (W, i_v, i_h, V, 1, 0.01)
print "Iteration ", i+1, ", log-prob=", compute_log_prob(W, i_v, i_h, V)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment