Last active
February 23, 2018 12:03
-
-
Save fede-vaccaro/71ae77420c27e2275638991ec14f8be2 to your computer and use it in GitHub Desktop.
a python benchmark over certain operation.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import numpy as np | |
import numpy.linalg as LA | |
import scipy.linalg | |
from sklearn.mixture import GaussianMixture | |
from sklearn.preprocessing import normalize | |
from sklearn.decomposition import PCA | |
from multiprocessing.dummy import Pool as ThreadPool | |
import time | |
def fisher_vector(xx, gmm): | |
"""Computes the Fisher vector on a set of descriptors. | |
Parameters | |
---------- | |
xx: array_like, shape (N, D) or (D, ) | |
The set of descriptors | |
gmm: instance of sklearn mixture.GMM object | |
Gauassian mixture model of the descriptors. | |
Returns | |
------- | |
fv: array_like, shape (K + 2 * D * K, ) | |
Fisher vector (derivatives with respect to the mixing weights, means | |
and variances) of the given descriptors. | |
Reference | |
--------- | |
J. Krapac, J. Verbeek, F. Jurie. Modeling Spatial Layout with Fisher | |
Vectors for Image Categorization. In ICCV, 2011. | |
http://hal.inria.fr/docs/00/61/94/03/PDF/final.r1.pdf | |
""" | |
xx = np.atleast_2d(xx) | |
N = xx.shape[0] | |
# Compute posterior probabilities. | |
Q = gmm.predict_proba(xx) # NxK | |
# Compute the sufficient statistics of descriptors. | |
Q_sum = np.sum(Q, 0)[:, np.newaxis] / N | |
Q_xx = np.dot(Q.T, xx) / N | |
# Compute derivatives with respect to mixing means | |
d_mu = Q_xx - Q_sum * gmm.means_ | |
# Merge derivatives into a vector. | |
return d_mu.flatten() | |
def calculateFV(im_matrix_): | |
im_matrix_ = np.array(im_matrix_) | |
# compute FVS | |
# compute the Fisher vector, using only the derivative w.r.t mu | |
fv = fisher_vector(im_matrix_, gmm) | |
return fv | |
# function to be mapped over | |
def calculateParallel(tensor): | |
threads = 16 | |
pool = ThreadPool(threads) | |
results = pool.map(calculateFV, tensor) | |
pool.close() | |
pool.join() | |
return np.array(results) | |
def process(): | |
# make a big matrix with all image descriptors | |
dim = 4096 | |
n_images = 256 | |
n_windows = 500 | |
np.random.seed(0) | |
all_desc = [[np.random.random(dim)] * n_windows] * n_images | |
t = time.time() | |
pca = myPCA(np.vstack(i for i in all_desc), dim=256) | |
all_desc_transformed = [] | |
print("Transforming initial data into lower dimension") | |
t_ = time.time() | |
for matrix in all_desc: | |
tr = pca.transform(matrix) | |
all_desc_transformed.append(tr) | |
print("Data transformed in: {}".format(time.time() - t_)) | |
global gmm | |
gmm = GaussianMixture(n_components=64, covariance_type='diag', verbose_interval=1) | |
t_ = time.time() | |
print("Start calculating GMM") | |
gmm.fit( | |
np.vstack(i for i in all_desc_transformed) | |
) | |
print("GMM calculated in: {}s".format(time.time() - t_)) | |
print("Start calculating Fisher Vectors") | |
result = calculateParallel(all_desc_transformed) | |
t_ = time.time() | |
# make one matrix with all FVs | |
image_fvs = np.vstack(result) | |
pca_transform_fvs = myPCA(image_fvs, 256) | |
image_fvs_ = pca_transform_fvs.transform(image_fvs) | |
print("FVS calculated in: {}".format(time.time() - t_)) | |
t_ = time.time() | |
# power-normalization | |
image_fvs_ = np.sign(image_fvs_) * np.abs(image_fvs_) ** 0.5 | |
print("Power normalization computed in: {}s".format(time.time() - t_)) | |
# L2 normalize | |
t_ = time.time() | |
image_fvs_ = normalize(image_fvs_, norm='l2', axis=1) | |
print("L2 normalization computed in: {}s".format(time.time() - t_)) | |
print("All computation executed in: {}s".format(time.time() - t)) | |
print("Mission accomplished!") | |
return "YAEL SCRIPT: Mission accomplished!" | |
def myPCA(matrix, dim): | |
print("Start calculating PCA of dim {0} starting by {1}".format(dim, matrix.shape)) | |
t1 = time.time() | |
pca = PCA(n_components=dim) | |
pca.fit(matrix) | |
print("PCA calculated in {}".format(time.time() - t1)) | |
return pca | |
process() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment