Skip to content

Instantly share code, notes, and snippets.

@samikrc
Created November 8, 2015 17:47
Show Gist options
  • Save samikrc/a2738a13e12dfb1381c3 to your computer and use it in GitHub Desktop.
Save samikrc/a2738a13e12dfb1381c3 to your computer and use it in GitHub Desktop.
word2vec code using dl4j
CollectionSentenceIterator collectionSentenceIterator = new CollectionSentenceIterator(smsPreprocessor, inputLines);
InMemoryLookupCache cache = new InMemoryLookupCache();
WeightLookupTable table = new InMemoryLookupTable.Builder()
.vectorLength(25)
.useAdaGrad(false)
.cache(cache)
.lr(0.025f).build();
Word2Vec word2VecModel = new Word2Vec.Builder()
.minWordFrequency(5).iterations(3)
.layerSize(25).lookupTable(table)
.vocabCache(cache).seed(42)
.windowSize(5).iterate(collectionSentenceIterator)
.build();
word2VecModel.fit();
// Save the model
WordVectorSerializer.writeWordVectors(word2VecModel, "data/word2vecModel-dl4j.txt");
System.out.println("\n## Using library: dl4j");
WordVectors wordVectors = WordVectorSerializer.loadTxtVectors(new File("data/word2vecModel-dl4j.txt"));
List<String> words = Arrays.asList("debited", "hdfcbank", "icici");
words.forEach(word ->
{
try
{
// Print the raw vector
printVector(word, Doubles.asList(wordVectors.getWordVector(word)));
// Print few nearest words
Collection<String> list = wordVectors.wordsNearest(word, 5);
System.out.println("\tNearest 5 words: " + list);
}
catch (Exception ex){}
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment