Created
November 8, 2015 17:47
-
-
Save samikrc/a2738a13e12dfb1381c3 to your computer and use it in GitHub Desktop.
word2vec code using dl4j
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CollectionSentenceIterator collectionSentenceIterator = new CollectionSentenceIterator(smsPreprocessor, inputLines); | |
InMemoryLookupCache cache = new InMemoryLookupCache(); | |
WeightLookupTable table = new InMemoryLookupTable.Builder() | |
.vectorLength(25) | |
.useAdaGrad(false) | |
.cache(cache) | |
.lr(0.025f).build(); | |
Word2Vec word2VecModel = new Word2Vec.Builder() | |
.minWordFrequency(5).iterations(3) | |
.layerSize(25).lookupTable(table) | |
.vocabCache(cache).seed(42) | |
.windowSize(5).iterate(collectionSentenceIterator) | |
.build(); | |
word2VecModel.fit(); | |
// Save the model | |
WordVectorSerializer.writeWordVectors(word2VecModel, "data/word2vecModel-dl4j.txt"); | |
System.out.println("\n## Using library: dl4j"); | |
WordVectors wordVectors = WordVectorSerializer.loadTxtVectors(new File("data/word2vecModel-dl4j.txt")); | |
List<String> words = Arrays.asList("debited", "hdfcbank", "icici"); | |
words.forEach(word -> | |
{ | |
try | |
{ | |
// Print the raw vector | |
printVector(word, Doubles.asList(wordVectors.getWordVector(word))); | |
// Print few nearest words | |
Collection<String> list = wordVectors.wordsNearest(word, 5); | |
System.out.println("\tNearest 5 words: " + list); | |
} | |
catch (Exception ex){} | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment