-
-
Save ismailmechbal/2bcd579dbd9c05944fe556db084fe68b to your computer and use it in GitHub Desktop.
Script to import books from Instapaper to Airtable. Will not work out of the box.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Book < Airrecord::Table | |
class Endorser < Airrecord::Table | |
self.base_key = "" | |
self.table_name = "Endorser" | |
end | |
self.base_key = "" | |
self.table_name = "Books" | |
has_many :endorsements, class: 'Book::Endorser', column: 'Endorsements' | |
GOODREADS_BLACKLIST = %w( | |
to-read favorites currently-reading owned | |
series favourites re-read owned-books | |
books-i-own wish-list si audiobook | |
book-club ebook kindle to-buy | |
) | |
GOODREADS_MERGE = { | |
"Non-fiction" => "Nonfiction", | |
"Classic" => "Classics", | |
"Cookbook" => "Cooking", | |
"Cookbooks" => "Cooking", | |
"Biography" => "Memoir", | |
"Biographies" => "Memoir", | |
"Autobiography" => "Memoir", | |
"Auto-biography" => "Memoir", | |
"Sci-fi" => "Science Fiction", | |
"Scifi" => "Science Fiction", | |
"Management" => "Leadership", | |
"Self-help" => "Personal Development", | |
"Selfhelp" => "Personal Development", | |
"Personal-development" => "Personal Development", | |
"Self-improvement" => "Personal Development", | |
"Science-fiction" => "Science Fiction", | |
"Ya" => "Young-adult", | |
"Tech" => "Technology", | |
"Young-adult" => "Young Adult", | |
"Computer-science" => "Programming", | |
"Investing" => "Economics", | |
"Fitness" => "Health", | |
"Food" => "Cooking", | |
"Finance" => "Economics", | |
"Software" => "Programming", | |
"Literature" => "Classics", | |
} | |
CATEGORIES = [ | |
"Business", "Psychology", "Science", "Personal Development", "Philosophy", | |
"History", "Fiction", "Memoir", "Leadership", "Classics", "Economics", | |
"Cooking", "Programming", "Health", "Politics", "Technology", "Science Fiction", | |
"Entrepreneurship", "Design", "Writing", "Fantasy", "Young Adult", "Nonfiction", | |
] | |
def goodreads_id | |
query = self["ISBN"] if self["ISBN"] | |
query ||= "\"#{self[:title]}\"" | |
search = goodreads_client.search_books(query) | |
if search.results.respond_to?(:work) | |
matches = [search.results.work].flatten | |
if self[:author] | |
best_match = matches.find { |match| | |
character_difference?(match["best_book"]["author"]["name"], self[:author]) | |
} | |
end | |
best_match ||= matches.first | |
return unless best_match | |
best_match.best_book.id | |
end | |
end | |
def goodreads_book | |
@book ||= begin | |
id = goodreads_id | |
return unless id | |
goodreads_client.book(id) | |
end | |
end | |
def goodreads_categories(n = 5) | |
popular = goodreads_book.popular_shelves | |
return [] if popular.blank? | |
shelves = popular.shelf | |
return [] unless shelves.first.respond_to?(:name) | |
shelves.map(&:name).reject { |name| | |
GOODREADS_BLACKLIST.include?(name) | |
}.first(n).map { |name| | |
name = name.capitalize | |
name = GOODREADS_MERGE[name] if GOODREADS_MERGE[name] | |
(CATEGORIES.include?(name) && name) || nil | |
}.compact.uniq | |
end | |
def populate_from_goodreads(prevent_duplicates_from: []) | |
book = goodreads_book | |
unless book | |
$stderr.puts "Unable to find book #{self["Title"]}" | |
return | |
end | |
before = self.serializable_fields | |
self["Title"] = book.title | |
self["ISBN"] = book.isbn13 || self["ISBN"] | |
self["Publication Year"] = book.work.original_publication_year.to_s || book.publication_year.to_s | |
self["Goodreads Rating"] = book.average_rating | |
self["Pages"] = book.num_pages | |
authors = [book.authors.author].flatten | |
self["Author"] = authors.first.name | |
self["Categories"] = goodreads_categories.sort | |
self["Goodreads Ratings"] = book.work.ratings_count | |
difference = HashDiff.diff(before, self.serializable_fields) | |
flagged = false | |
author_ok = true | |
$stderr.puts "\x1b[35m#{before["Title"]}\x1b[0m" | |
difference.each do |(type, key, prev, new)| | |
if key == "Author" && type == "~" | |
unless authors.any? { |author| character_difference?(author.name, prev) } | |
$stderr.puts "Author changed too much" | |
flagged = true | |
author_ok = false | |
end | |
end | |
if key == "Title" && type == "~" | |
unless new.downcase.start_with?(prev.downcase) || author_ok | |
$stderr.puts "New title '#{new}' didn't start with old title '#{prev}'" | |
flagged = true | |
end | |
end | |
if type == "~" | |
$stderr.puts "\x1b[34m#{type} #{key}: \x1b[31m#{prev} => \x1b[32m#{new}\x1b[0m" | |
elsif type == "+" | |
$stderr.puts "\x1b[34m#{type} #{key}: \x1b[32m#{prev}\x1b[0m" | |
end | |
end | |
if flagged | |
Rollbar.warn("Skipping book", title: self[:title]) | |
elsif prevent_duplicates_from.find { |other| other["ISBN"] == self["ISBN"] } | |
$stderr.puts "Skipping #{self[:title]} due to duplicate" | |
else | |
if self.new_record? | |
self.create | |
else | |
self.save | |
end | |
end | |
end | |
private | |
def goodreads_client | |
self.class.goodreads_client | |
end | |
def self.goodreads_client | |
@client ||= begin | |
Goodreads::Client.new(api_key: '', api_secret: '') | |
end | |
end | |
def character_difference?(a, b, n = 4) | |
(a.split('') - b.split('')).size <= n && (b.split('') - a.split('')).size <= n | |
end | |
end |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class BookImport | |
def instapaper | |
InstapaperClient.bookmarks(limit: 500).to_enum(:each).map { |bookmark| | |
if URI(bookmark.url).host =~ /\A(www\.)?amazon\.(com|ca)/ | |
uri = URI(bookmark.url) | |
text = client_for("#{uri.scheme}://#{uri.hostname}").get(uri.path).body | |
isbn = text.match(/(ISBN|ASIN)(-13|-10)?:\s*<\/b>\s*(\w{10,13})/) | |
create_record_from_isbn(isbn[3], bookmark.bookmark_id) | |
elsif bookmark.url =~ /goodreads\.com/ | |
uri = URI(bookmark.url) | |
text = client_for("#{uri.scheme}://#{uri.hostname}").get(uri.path).body | |
doc = Nokogiri::HTML(text) | |
create_record_from_isbn(doc.at('meta[property="books:isbn"]')["content"], bookmark.bookmark_id) | |
end | |
}.compact | |
end | |
def kindle | |
books_from_highlights | |
end | |
private | |
# TODO: Do like what we do with words, where it puts the source multiple times | |
# TODO: Refactor to be consistent with Words? | |
# It does work though :) | |
def books_from_highlights | |
sources = JSON.parse(Readwise.get("/munger").body)["data"] | |
existing_books = Book.all | |
sources.each do |source| | |
book_highlights = source["highlights"].select { |h| h["note"] =~ /\A\.?book/i } | |
book_titles = book_highlights.map { |h| h["highlight"] } | |
book_titles.each do |title| | |
next if title == "Randomness)." # ugh can't get rid of it | |
book = Book.new("Title" => title) | |
book.populate_from_goodreads(prevent_duplicates_from: existing_books) | |
end | |
end | |
end | |
def create_record_from_isbn(isbn, bookmark_id) | |
Book.new("ISBN" => isbn).populate_from_goodreads | |
InstapaperClient.delete_bookmark(bookmark_id) | |
end | |
def client_for(host) | |
@clients ||= {} | |
return @clients[host] if @clients[host] | |
@clients[host] ||= Faraday.new(:url => host) do |b| | |
b.request :retry, max: 10, interval: 1, interval_randomness: 2, backoff_factor: 2, exceptions: Semian::NetHTTP::DEFAULT_ERRORS | |
b.use FaradayMiddleware::FollowRedirects | |
b.adapter :net_http_persistent | |
b.headers["User-Agent"] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36" | |
end | |
end | |
end |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
InstapaperClient = Instapaper::Client.new do |client| | |
client.consumer_key = "" | |
client.consumer_secret = "" | |
client.oauth_token = '' | |
client.oauth_token_secret = '' # check docs, need to email them for this | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment