stungeye · December 6, 2018 10:23 · stungeye · Jun 1, 2018
diff --git a/01_extract_meow_reader_images.rb b/01_extract_meow_reader_images.rb
 # Step 1 - Collect reference images of reading cats
 #
 # This script extracts the URLs of all the cats on meow-reader-blog.tumblr.com
 #
 # INPUT: Tumblr API
 # OUTPUT: JSON file of image URLS (meow-reader-images.json)

 require 'net/http'
 require 'json'
 
 total_posts = 218 
 images = []

 (0..total_posts).step(20) do |offset|
  url = "https://api.tumblr.com/v2/blog/meow-reader-blog.tumblr.com/posts/photo?offset=#{offset}&api_key=q7faGrbfcKJBFxikysPD4z9DX7gH6SuZchoSvZKdxXXHLbihFq"
  puts "Fetching #{url}..."
  uri = URI(url)
  json_response = Net::HTTP.get(uri)
  puts "Parsing JSON reponse..."
  response = JSON.parse(json_response)

  if (response['meta']['msg'] == 'OK' && !response['response']['posts'].empty?)
    puts "Adding #{response['response']['posts'].size} images..."
    images += response['response']['posts'].map { |post| post['photos'][0]['alt_sizes'][0]['url'] }
  end
 end

 puts "Writing JSON file..."
 File.open("./meow-reader-images.json", "w") do |file|
  file.write(images.to_json)
 end
diff --git a/02_find_meow_reader_concepts.rb b/02_find_meow_reader_concepts.rb
 # Step 2 - Discover all the concepts present in the meoew reader images.
 #
 # This script uses the clarifai.com API to identify all concepts present in the meow reader images.
 #
 # INPUT: JSON file from previous step (meow-reader-images.json)
 # OUTPUT: JSON file of concepts and their counts / confidence ratings (meow-reader-concepts.json)

 require 'rubygems'
 require 'bundler/setup'
 require 'yaml'
 require 'clarification'

 Clarification.configure do |config|
  config.api_key = '<API KEY GOES HERE>'
  config.default_public_models = [:general]
 end

 file = File.read "./meow-reader-images.json"
 images = JSON.parse(file)
 puts "Loaded #{images.size} images."

 client = Clarification::Client.new

 concepts = Hash.new { |hash,key| hash[key] = {count: 0, confidence: []} }

 images.each do |image|
  response = client.predict.by_url(image)
  print '.'

  response[:general].concepts.each do |concept|
    concepts[concept.name][:count] += 1 # Running count of how many times this concept appears in one of our images.
    concepts[concept.name][:confidence] << concept.value # What was the confidence rating for this occurance of the concept.
  end
 end

 puts "Writing JSON file..."
 File.open("./meow-reader-concepts.json", "w") do |file|
  file.write(concepts.to_json)
 end
diff --git a/03_sort_found_concepts.rb b/03_sort_found_concepts.rb
 # Step 3 - Sort the concepts found in the meow reader images by count and confidence.
 #
 # This script sorts the concepts in two ways:
 #  1) By how often the concept appeared in the meow reader images (by count).
 #  2) By how confident the Clarifai API was in the concept, while ignoring low count concepts (5 or less appearances).
 # 
 # INPUT: JSON file from the previous step  (meow-reader-concepts.json)
 # OUTPUT: Prints sorted concepts to console.

 require 'json'

 class Array
  def avg
    self.sum / self.size
  end
 end

 file = File.read "./meow-reader-concepts.json"
 concepts = JSON.parse(file)

 # Sorted By Count
 concepts_sorted_by_count = concepts.sort { |a, b| b[1][:count] <=> a[1][:count] }

 puts "Loaded #{concepts_sorted_by_count.size} concepts."
 puts "Concepts By Count Showing Average Confidence:"
 puts concepts_sorted_by_count.map { |a| "#{a[0]} (#{a[1]['count']}) [#{a[1]['confidence'].avg}]" }
                             .join(', ')

 # Sorted By Confidence
 puts "Concepts By Average Confidence (Ignoring Low Count Concepts):"
 concepts_sorted_by_confidence = concepts.sort { |a, b| b[1]['confidence'].avg <=> a[1]['confidence'].avg }
 puts concepts_sorted_by_confidence.select { |c| c[1]['count'] > 5 }
                                  .map { |a| "#{a[0]} (#{a[1]['count']}) [#{a[1]['confidence'].avg}]" }
                                  .join(', ')
diff --git a/04_load_new_animal_images.rb b/04_load_new_animal_images.rb
 # Step 4 - Upload all the images from animalsthatdopeoplethings.tumblr.com to our Clarifai account.
 #
 # This script uploads a collection of images from URLs into our Clarifai account. 
 #
 # INPUT: JSON file produced by a modified version of the script from step 1.
 # OUTPUT: None

 require 'rubygems'
 require 'bundler/setup'
 require 'json'
 require 'clarification'

 Clarification.configure do |config|
  config.api_key = '<API KEY GOES HERE>'
  config.default_public_models = [:general]
 end

 file = File.read "./animalsthatdopeoplethings.json"
 images = JSON.parse(file)

 puts "Loaded #{images.size} images."
 client = Clarification::Client.new

 images.each_slice(128) do |images_chunk|
  print "."
  client.search.index_images(images_chunk)
 end
diff --git a/05_search_images_by_concept.rb b/05_search_images_by_concept.rb
 # Step 5 - Search the uploaded AnimalsThatDoPeopleThings images for reading concepts.
 #
 # This script searches the corpus of images we uploaded in step 4 for the concepts we identified in step 3.
 # It then generates an HTML document of the reading animals discovered within this corpus of images.
 #
 # INPUT: Array of concepts we selected from the output from step 3.
 # OUTPUT: An HTML document of the found images of reading animals. (reading-animals.html)

 require 'rubygems'
 require 'bundler/setup'
 require 'yaml'
 require 'set'
 require 'clarification'

 Clarification.configure do |config|
  config.api_key = '<API KEY GOES HERE>'
  config.default_public_models = [:general]
 end

 client = Clarification::Client.new

 # Hand selected collection of the most promising concepts discovered in step 3.
 concepts = ['book bindings', 'book series', 'book', 'education', 'literature', 'newspaper', 'research', 'technology']
 result_urls = Set.new 

 concepts.each do |concept|
  result_urls += client.search.by_concept(concept).hits.map(&:url)
 end

 open("./reading-animals.html", "w") do |file|
  file << "<!DOCTYPE html>\n"
  file << "<html lang='en'>\n"
  file << "<head>\n"
  file << "<meta charset='utf-8'>\n"
  file << "<title>Animals Reading</title>\n"
  file << "<link rel='stylesheet' href='style.css'>\n"
  file << "</head><body><h1>Animals Reading</h1><div id='images'>\n"

  result_urls.each do |url|
    file << "<div class='image'><img src='#{url}'></div>"
  end

  file << "</div></body></html>"
 end
	# Step 1 - Collect reference images of reading cats
	#
	# This script extracts the URLs of all the cats on meow-reader-blog.tumblr.com
	#
	# INPUT: Tumblr API
	# OUTPUT: JSON file of image URLS (meow-reader-images.json)

	require 'net/http'
	require 'json'

	total_posts = 218
	images = []

	(0..total_posts).step(20) do \|offset\|
	url = "https://api.tumblr.com/v2/blog/meow-reader-blog.tumblr.com/posts/photo?offset=#{offset}&api_key=q7faGrbfcKJBFxikysPD4z9DX7gH6SuZchoSvZKdxXXHLbihFq"
	puts "Fetching #{url}..."
	uri = URI(url)
	json_response = Net::HTTP.get(uri)
	puts "Parsing JSON reponse..."
	response = JSON.parse(json_response)

	if (response['meta']['msg'] == 'OK' && !response['response']['posts'].empty?)
	puts "Adding #{response['response']['posts'].size} images..."
	images += response['response']['posts'].map { \|post\| post['photos'][0]['alt_sizes'][0]['url'] }
	end
	end

	puts "Writing JSON file..."
	File.open("./meow-reader-images.json", "w") do \|file\|
	file.write(images.to_json)
	end
	# Step 2 - Discover all the concepts present in the meoew reader images.
	#
	# This script uses the clarifai.com API to identify all concepts present in the meow reader images.
	#
	# INPUT: JSON file from previous step (meow-reader-images.json)
	# OUTPUT: JSON file of concepts and their counts / confidence ratings (meow-reader-concepts.json)

	require 'rubygems'
	require 'bundler/setup'
	require 'yaml'
	require 'clarification'

	Clarification.configure do \|config\|
	config.api_key = '<API KEY GOES HERE>'
	config.default_public_models = [:general]
	end

	file = File.read "./meow-reader-images.json"
	images = JSON.parse(file)
	puts "Loaded #{images.size} images."

	client = Clarification::Client.new

	concepts = Hash.new { \|hash,key\| hash[key] = {count: 0, confidence: []} }

	images.each do \|image\|
	response = client.predict.by_url(image)
	print '.'

	response[:general].concepts.each do \|concept\|
	concepts[concept.name][:count] += 1 # Running count of how many times this concept appears in one of our images.
	concepts[concept.name][:confidence] << concept.value # What was the confidence rating for this occurance of the concept.
	end
	end

	puts "Writing JSON file..."
	File.open("./meow-reader-concepts.json", "w") do \|file\|
	file.write(concepts.to_json)
	end
	# Step 3 - Sort the concepts found in the meow reader images by count and confidence.
	#
	# This script sorts the concepts in two ways:
	# 1) By how often the concept appeared in the meow reader images (by count).
	# 2) By how confident the Clarifai API was in the concept, while ignoring low count concepts (5 or less appearances).
	#
	# INPUT: JSON file from the previous step (meow-reader-concepts.json)
	# OUTPUT: Prints sorted concepts to console.

	require 'json'

	class Array
	def avg
	self.sum / self.size
	end
	end

	file = File.read "./meow-reader-concepts.json"
	concepts = JSON.parse(file)

	# Sorted By Count
	concepts_sorted_by_count = concepts.sort { \|a, b\| b[1][:count] <=> a[1][:count] }

	puts "Loaded #{concepts_sorted_by_count.size} concepts."
	puts "Concepts By Count Showing Average Confidence:"
	puts concepts_sorted_by_count.map { \|a\| "#{a[0]} (#{a[1]['count']}) [#{a[1]['confidence'].avg}]" }
	.join(', ')

	# Sorted By Confidence
	puts "Concepts By Average Confidence (Ignoring Low Count Concepts):"
	concepts_sorted_by_confidence = concepts.sort { \|a, b\| b[1]['confidence'].avg <=> a[1]['confidence'].avg }
	puts concepts_sorted_by_confidence.select { \|c\| c[1]['count'] > 5 }
	.map { \|a\| "#{a[0]} (#{a[1]['count']}) [#{a[1]['confidence'].avg}]" }
	.join(', ')
	# Step 4 - Upload all the images from animalsthatdopeoplethings.tumblr.com to our Clarifai account.
	#
	# This script uploads a collection of images from URLs into our Clarifai account.
	#
	# INPUT: JSON file produced by a modified version of the script from step 1.
	# OUTPUT: None

	require 'rubygems'
	require 'bundler/setup'
	require 'json'
	require 'clarification'

	Clarification.configure do \|config\|
	config.api_key = '<API KEY GOES HERE>'
	config.default_public_models = [:general]
	end

	file = File.read "./animalsthatdopeoplethings.json"
	images = JSON.parse(file)

	puts "Loaded #{images.size} images."
	client = Clarification::Client.new

	images.each_slice(128) do \|images_chunk\|
	print "."
	client.search.index_images(images_chunk)
	end
	# Step 5 - Search the uploaded AnimalsThatDoPeopleThings images for reading concepts.
	#
	# This script searches the corpus of images we uploaded in step 4 for the concepts we identified in step 3.
	# It then generates an HTML document of the reading animals discovered within this corpus of images.
	#
	# INPUT: Array of concepts we selected from the output from step 3.
	# OUTPUT: An HTML document of the found images of reading animals. (reading-animals.html)

	require 'rubygems'
	require 'bundler/setup'
	require 'yaml'
	require 'set'
	require 'clarification'

	Clarification.configure do \|config\|
	config.api_key = '<API KEY GOES HERE>'
	config.default_public_models = [:general]
	end

	client = Clarification::Client.new

	# Hand selected collection of the most promising concepts discovered in step 3.
	concepts = ['book bindings', 'book series', 'book', 'education', 'literature', 'newspaper', 'research', 'technology']
	result_urls = Set.new

	concepts.each do \|concept\|
	result_urls += client.search.by_concept(concept).hits.map(&:url)
	end

	open("./reading-animals.html", "w") do \|file\|
	file << "<!DOCTYPE html>\n"
	file << "<html lang='en'>\n"
	file << "<head>\n"
	file << "<meta charset='utf-8'>\n"
	file << "<title>Animals Reading</title>\n"
	file << "<link rel='stylesheet' href='style.css'>\n"
	file << "</head><body><h1>Animals Reading</h1><div id='images'>\n"

	result_urls.each do \|url\|
	file << "<div class='image'><img src='#{url}'></div>"
	end

	file << "</div></body></html>"
	end