Skip to content

Instantly share code, notes, and snippets.

@jad2192
Created February 6, 2018 16:36
Show Gist options
  • Save jad2192/46b04f3297801b82d0185eb0c0d9a327 to your computer and use it in GitHub Desktop.
Save jad2192/46b04f3297801b82d0185eb0c0d9a327 to your computer and use it in GitHub Desktop.
N-Tuples
import numpy as np
class N_Tuple_Classifier_fast(object):
def __init__(self, pixel_percentage=0.1, num_tuples=100, pixel_tolerance=0.3, warm_start=None):
''' pixel_percentage: The percentage of pixels on to which we'll
For example the default 0.1 randomly choose 78 pixels.
num_tuples: Number of unique tuples on to which project.
pixel_tolerance: The classifier requires binary 0-1 pixel values,
set this as the pixel intensity threshold below which
we set a pixel to zero.
warm_start: Pre-set tuple matrix and frequency table if you want to start with
a pre-trained model.'''
self.M = num_tuples
self.pt = pixel_tolerance
self.class_ind = {}
if warm_start is None:
self.tuples = np.zeros((num_tuples,784), dtype='i8')
for m in range(self.M):
self.tuples[m][np.random.choice(np.arange(784),
size=int(784*pixel_percentage),
replace=False)] = np.ones(int(784*pixel_percentage), dtype='i8')
self.projs = None
else:
self.tuples = warm_start[0]
self.freq_table = warm_start[1]
def fit(self, data):
'''Assume the data is an array of size n_samples x 785, where first column is the label
and next 784 columns are the pixel values.'''
start_t = d_timer()
labs = data[:,0]
for k in range(10):
self.class_ind[k] = np.where(labs == k)
# Transforming image to a binary vector by cutting of pixel intensities which are
# below pixel_tolerance.
data_bin = np.asanyarray((data[:,1:] / 256) > self.pt, dtype='i8')
# Will project all the data on all dimensions simultaneously using tensor ops.
data_tensor = np.ones((self.M, data_bin.shape[0], data_bin.shape[1]), dtype='i8')
data_tensor = np.einsum('ij,kij->kij', data_bin, data_tensor, dtype='i8')
# The following will result in a tensor T of shape (M,N,785) = (num_tup, num_samp, 785)
# where T[m,n,:] is the projection of the n-th data sample onto the m-th tuple set.
self.projs = np.einsum('ij,ikj->ikj', self.tuples, data_tensor, dtype='i8')
print('Model fit, time spent: ', d_timer() - start_t, ' s')
def predict(self, X):
s = d_timer()
X_bin = np.asanyarray(X / 256 > self.pt, dtype='i8')
X_proj = np.einsum('i,ji->ji', X_bin, self.tuples, dtype='i8')
proj_test = np.einsum('ij,ikj->ikj', X_proj, self.projs, dtype='i8')
ext = np.ones(proj_test.shape, dtype='i8')
ext = np.einsum('ij,ikj->ikj', X_proj, ext, dtype='i8')
''' The following tensor holds tell us:
Given the N-th test sample comb[N,m,j] is zero
iff the N-th test input has same projection on the m-th tuple
as the j-th training sample. We can use this to indirecty compute the
counts and hence make a prediction'''
comb = np.asanyarray((proj_test + ext) == 2, dtype='i8').sum(axis=-1)
prob = np.zeros(10)
for k in range(10):
cur_ix = self.class_ind[k]
cur_comb = comb[:,cur_ix]
prob[k] = cur_comb.sum() / len(cur_ix)
print('Prediction took: ', d_timer() - s, ' s')
return prob.argmax()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment