Skip to content

Instantly share code, notes, and snippets.

@ian-ross
Created September 18, 2014 11:43
Show Gist options
  • Save ian-ross/d39bf143ffc482ea3700 to your computer and use it in GitHub Desktop.
Save ian-ross/d39bf143ffc482ea3700 to your computer and use it in GitHub Desktop.
Two-dimensional PCA example
name: pca-2d
version: 0.1
synopsis: Two-dimensional PCA example
license: BSD3
author: Ian Ross
maintainer: [email protected]
copyright: Copyright (2014) Ian Ross
category: Data
build-type: Simple
cabal-version: >=1.8
Executable pca-2d
main-is: pca-2d.hs
build-depends: base >= 4.3 && < 5,
hmatrix == 0.16.0.5,
Chart == 1.3.1,
Chart-cairo == 1.3.1
module Main where
import Control.Monad
import Numeric.LinearAlgebra.HMatrix
import Graphics.Rendering.Chart.Easy hiding (Matrix, Vector, (|>), scale)
import Graphics.Rendering.Chart.Backend.Cairo
-- Number of test data points.
n :: Int
n = 500
-- Mean, standard deviation and correlation for two dimensions of test
-- data.
meanx, meany, sdx, sdy, rho :: Double
meanx = 5.0 ; meany = 3.0 ; sdx = 1.2 ; sdy = 0.6 ; rho = 0.75
-- Generate test data.
generateTestData :: Matrix Double
generateTestData =
let seed = 1023
mean = 2 |> [ meanx, meany ]
cov = matrix 2 [ sdx^2 , rho*sdx*sdy
, rho*sdx*sdy , sdy^2 ]
samples = gaussianSample seed n mean cov
in fromRows $ filter ((> 0) . minElement) $ toRows samples
-- Perform PCA decomposition and project data points onto PCA
-- eigenvectors: return value is (data mean; PCA eigenvalues in
-- descending order; explained variance per PCA eigenvector; PCA
-- eigenvectors as rows of a matrix; projections of data points onto
-- PCA eigenvectors, one per row of a matrix).
pca :: Matrix Double -> (Vector Double, Matrix Double, Matrix Double)
pca xs = (varexp, evecs, projs)
where (mean, cov) = meanCov xs
(_, evals, evecCols) = svd cov
evecs = fromRows $ map evecSigns $ toColumns evecCols
evecSigns ev = let maxelidx = maxIndex $ cmap abs ev
sign = signum (ev ! maxelidx)
in cmap (sign *) ev
varexp = scale (1.0 / sumElements evals) evals
projs = fromRows $ map project $ toRows xs
project x = evecs #> (x - mean)
-- Make plots.
doPlot :: FileFormat -> String ->
Matrix Double -> Matrix Double -> Matrix Double -> Int
-> IO ()
doPlot ptype suffix dat evecs projs idx = do
-- Set up plot characteristics depending on which one we're doing.
let (showEvecs, showProjs) = case idx of
2 -> (True, False)
3 -> (True, True)
_ -> (False, False)
-- Turn the eigenvectors into plotting data.
let toTuple v = (v ! 0, v ! 1)
makeEvec idx = ((0, 0), toTuple $ toRows evecs !! idx)
evs = [makeEvec 0, makeEvec 1]
-- Set up the plot range.
let vdat = flatten dat
rng = (minElement vdat, maxElement vdat)
-- Choose a "typical" point to project.
let typposs = map (\x -> x < 3 && x > 2) $
toList $ flatten $ takeColumns 1 projs
typys = fromList $ zipWith (\f y -> if f && y > 0.0 then y else 0.0)
typposs (toList $ flatten $ dropColumns 1 projs)
typidx = maxIndex typys
typical = dat ! typidx
-- Make square plots...
let fname = "pca-2d-" ++ show idx ++ "." ++ suffix
toFile (fo_format .~ ptype $ fo_size .~ (600, 600) $ def) fname $ do
-- Bigger axis labels and axis labelling.
layout_x_axis . laxis_style . axis_label_style . font_size .= 20.0
layout_y_axis . laxis_style . axis_label_style . font_size .= 20.0
when (idx == 0) $ do
layout_x_axis . laxis_title .= "Shell length (cm)"
layout_x_axis . laxis_title_style . font_size .= 20.0
layout_y_axis . laxis_title .= "Shell width (cm)"
layout_y_axis . laxis_title_style . font_size .= 20.0
-- Make both axes cover the same range to get orthogonal-looking
-- eigenvectors.
layout_x_axis . laxis_generate .= scaledAxis def rng
layout_y_axis . laxis_generate .= scaledAxis def rng
-- Plot data points.
plot $ liftEC $ do
plot_points_style . point_radius .= 4
plot_points_style . point_color .=
(if idx < 2 then opaque red else withOpacity red 0.5)
plot_points_values .= (map (\v -> (v ! 0, v ! 1)) $ toRows dat)
-- Plot "typical" point.
when showProjs $ plot $ liftEC $ do
plot_points_style . point_radius .= 6
plot_points_style . point_color .= opaque green
plot_points_values .= [(typical ! 0, typical ! 1)]
-- Plot "typical" point projections.
let t1 = dat ! typidx ! 0 ; t2 = dat ! typidx ! 1
c1 = projs ! typidx ! 0 ; c2 = projs ! typidx ! 1
e1 = evecs ! 0 ; e2 = evecs ! 1
when showProjs $ do
projection c1 e1 (dat ! typidx)
projection c2 e2 (dat ! typidx)
-- Plot eigenvectors.
when showEvecs $ plot (vectorField evs)
projection c e t = plot $ liftEC $ do
plot_lines_style . line_color .= opaque green
plot_lines_values .= [[(0, 0), (c * e ! 0, c * e ! 1), (t ! 0, t ! 1)]]
vectorField dat = fmap plotVectorField $ liftEC $ do
plot_vectors_values .= dat
plot_vectors_scale .= 0
plot_vectors_style . vector_line_style . line_color .= opaque black
plot_vectors_style . vector_line_style . line_width .= 3.5
plot_vectors_style . vector_head_style . point_color .= opaque black
plot_vectors_style . vector_head_style . point_radius .= 7.0
main :: IO ()
main = do
let dat = generateTestData
(varexp, evecs, projs) = pca dat
(mean, cov) = meanCov dat
cdat = fromRows $ map (subtract mean) $ toRows dat
forM_ [(PNG, "png"), (PDF, "pdf"), (SVG, "svg")] $ \(ptype, suffix) -> do
doPlot ptype suffix dat evecs projs 0
doPlot ptype suffix cdat evecs projs 1
doPlot ptype suffix cdat evecs projs 2
doPlot ptype suffix cdat evecs projs 3
putStrLn $ "FRACTIONAL VARIANCE EXPLAINED: " ++ show varexp
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment