Created
May 5, 2020 18:59
-
-
Save a7v8x/8d2a9a819078ee2657a2be6910714349 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
tfds.core.DatasetInfo( | |
name='imdb_reviews', | |
version=1.0.0, | |
description='Large Movie Review Dataset. | |
This is a dataset for binary sentiment classification containing substantially more data than previous benchmark datasets. We provide a set of 25,000 highly polar movie reviews for training, and 25,000 for testing. There is additional unlabeled data for use as well.', | |
homepage='http://ai.stanford.edu/~amaas/data/sentiment/', | |
features=FeaturesDict({ | |
'label': ClassLabel(shape=(), dtype=tf.int64, num_classes=2), | |
'text': Text(shape=(), dtype=tf.string), | |
}), | |
total_num_examples=100000, | |
splits={ | |
'test': 25000, | |
'train': 25000, | |
'unsupervised': 50000, | |
}, | |
supervised_keys=('text', 'label'), | |
citation=InProceedings{maas-EtAl:2011:ACL-HLT2011, | |
author = {Maas, Andrew L. and Daly, Raymond E. and Pham, Peter T. and Huang, Dan and Ng, Andrew Y. and Potts, Christopher}, | |
title = {Learning Word Vectors for Sentiment Analysis}, | |
booktitle = {Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies}, | |
month = {June}, | |
year = {2011}, | |
address = {Portland, Oregon, USA}, | |
publisher = {Association for Computational Linguistics}, | |
pages = {142--150}, | |
url = {http:\/\/www.aclweb.org\/anthology\/P11-1015} | |
}, | |
redistribution_info=, | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment