MInner · November 24, 2020 20:51
diff --git a/tfds_feature_dict.py b/tfds_feature_dict.py
 import tensorflow as tf
 import tensorflow_datasets as tfds

 spec = tfds.features.FeaturesDict({
    'image': tfds.features.Tensor(shape=(28, 28, 1), dtype=tf.float32),
    'label': tfds.features.ClassLabel(names=['no', 'yes']),
    'metadata': {
        'id': tf.int64,
        'language': tf.string,
    },
 })

 data = [{
      'image': tf.random.uniform((28, 28, 1)),
      'label': 'yes',
      'metadata': {
          'id': 10,
          'language': 'en'
      }
  }, {
    'image': tf.random.uniform((28, 28, 1)),
    'label': 'no',
    'metadata': {
        'id': 15,
        'language': 'ru'
    }
  }
 ]


 # parsing a single example
 single_data = data[0]
 serializer = tfds.core.example_serializer.ExampleSerializer(spec.get_serialized_info())
 single_tf_example = serializer.serialize_example(spec.encode_example(single_data))
 spec.save_config('/tmp/dataset_config')

 loaded_spec = tfds.features.FeaturesDict.from_config('/tmp/dataset_config')
 parser = tfds.core.example_parser.ExampleParser(loaded_spec.get_serialized_info())
 single_sample = parser.parse_example(single_tf_example)

 print(single_sample['image'].shape, single_sample['label'], 
      single_sample['metadata']['id'])

 # parsing tf.data.Dataset of tf.train.Examples
 # works well in graph mode with Dataset.map

 tf_examples_ds = tf.data.Dataset.from_generator(
    lambda: map(serializer.serialize_example, map(spec.encode_example, data)), 
    output_types=tf.dtypes.string)

 loaded_spec = tfds.features.FeaturesDict.from_config('/tmp/dataset_config')
 parser = tfds.core.example_parser.ExampleParser(loaded_spec.get_serialized_info())
 parsed_ds = tf_examples_ds.map(parser.parse_example)

 data2 = next(iter(parsed_ds))
 print(data2['image'].shape, data2['label'], data2['metadata']['id'])
	import tensorflow as tf
	import tensorflow_datasets as tfds

	spec = tfds.features.FeaturesDict({
	'image': tfds.features.Tensor(shape=(28, 28, 1), dtype=tf.float32),
	'label': tfds.features.ClassLabel(names=['no', 'yes']),
	'metadata': {
	'id': tf.int64,
	'language': tf.string,
	},
	})

	data = [{
	'image': tf.random.uniform((28, 28, 1)),
	'label': 'yes',
	'metadata': {
	'id': 10,
	'language': 'en'
	}
	}, {
	'image': tf.random.uniform((28, 28, 1)),
	'label': 'no',
	'metadata': {
	'id': 15,
	'language': 'ru'
	}
	}
	]


	# parsing a single example
	single_data = data[0]
	serializer = tfds.core.example_serializer.ExampleSerializer(spec.get_serialized_info())
	single_tf_example = serializer.serialize_example(spec.encode_example(single_data))
	spec.save_config('/tmp/dataset_config')

	loaded_spec = tfds.features.FeaturesDict.from_config('/tmp/dataset_config')
	parser = tfds.core.example_parser.ExampleParser(loaded_spec.get_serialized_info())
	single_sample = parser.parse_example(single_tf_example)

	print(single_sample['image'].shape, single_sample['label'],
	single_sample['metadata']['id'])

	# parsing tf.data.Dataset of tf.train.Examples
	# works well in graph mode with Dataset.map

	tf_examples_ds = tf.data.Dataset.from_generator(
	lambda: map(serializer.serialize_example, map(spec.encode_example, data)),
	output_types=tf.dtypes.string)

	loaded_spec = tfds.features.FeaturesDict.from_config('/tmp/dataset_config')
	parser = tfds.core.example_parser.ExampleParser(loaded_spec.get_serialized_info())
	parsed_ds = tf_examples_ds.map(parser.parse_example)

	data2 = next(iter(parsed_ds))
	print(data2['image'].shape, data2['label'], data2['metadata']['id'])