Last active
November 21, 2015 11:47
-
-
Save randombrein/4c6de353330b7d8febfc to your computer and use it in GitHub Desktop.
sync Pocket (read it later) data with safari's reading list
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import shutil | |
from os.path import expanduser, dirname, abspath, join, isfile | |
from datetime import datetime as dt | |
import lxml.html | |
from biplist import readPlist, writePlist, InvalidPlistException, NotBinaryPlistException | |
from time_uuid import TimeUUID | |
""" | |
TODO: | |
- read/unread | |
USE: | |
- export pocket html file and put in same folder, run app.py | |
~~~ | |
* safari will save all items for offline reading, | |
if you have a huge collection, safari will be stuck for a while | |
* after saving process completed better to uncheck/check safari in | |
iCloud settings to update on iCloud devices. | |
~~~ | |
""" | |
# export -> 'https://getpocket.com/export' | |
RIL_FILE = join(dirname(abspath(__file__)), "ril_export.html") | |
SRL_FILE = expanduser('~/Library/Safari/Bookmarks.plist') | |
def backup(): | |
bk = expanduser('~/Library/Safari/Bookmarks~.plist') | |
shutil.copy(SRL_FILE, bk) | |
def parse_ril(): | |
ril_file = open(RIL_FILE, "r") | |
raw_string = lxml.html.fromstring(ril_file.read()) | |
raw_html = raw_string.xpath("//body/ul/li") | |
parsed = [] | |
for item in raw_html: | |
for p in item.iter(): | |
if p.tag == 'a': | |
title = p.text | |
time_added = p.attrib['time_added'] | |
href = p.attrib['href'] | |
tags = [] | |
if len(p.attrib['tags']): | |
# SRL doesnt support tags | |
tags = p.attrib['tags'].split(',') | |
o = (title, time_added, href, tags) | |
parsed.append(o) | |
ril_file.close() | |
return parsed | |
def import_srl(ril_data): | |
try: | |
plist = readPlist(SRL_FILE) | |
except (InvalidPlistException, NotBinaryPlistException), e: | |
print "Not a plist:", e | |
return | |
parent = None | |
sub_items = [] | |
entries = [] | |
ServerID = None | |
AddedLocally = True | |
WebBookmarkType = 'WebBookmarkTypeLeaf' | |
#'Root' -> 'Children' -> 'ItemX'(WebBookmarkTypeList) -> | |
# 'Children' -> 'ItemY' -> 'ReadingListNonSync' | |
for item in plist['Children']: | |
if item.get('Title') == 'com.apple.ReadingList': | |
parent = item | |
for sub_item in item['Children']: | |
if sub_item.get('ReadingListNonSync'): | |
sub_items.append(sub_item) | |
break | |
if not parent: | |
print "couldn't find parent" | |
return | |
else: | |
ServerID = item['Children'][0]['Sync']['ServerID'] | |
Key = item['Children'][0]['Sync']['Key'] | |
for (title, time_added, href, tags) in reversed(ril_data): | |
############################### | |
# +URIDictionary | |
# |--title :string | |
# | | |
# +Sync | |
# |--ServerID :string | |
# | | |
# +ReadingListNonSync | |
# |--AddedLocally :bool | |
# | | |
# |WebBookmarkType :string | |
# |WebBookmarkUUID :string | |
# |URLString :string | |
# | | |
# +ReadingList | |
# |--DateAdded :date | |
# | |
#print("%s\n%s\n%s\n%s\n" % (title, time_added, href, tags)) | |
WebBookmarkUUID = TimeUUID.convert(long(time_added), randomize=False).get_urn()[9:] | |
have = False | |
for sub_item in sub_items: | |
if sub_item['WebBookmarkUUID'] == WebBookmarkUUID: | |
have = True | |
break | |
if have: | |
continue | |
DateAdded = dt.utcfromtimestamp(long(time_added)) | |
entry = dict() | |
URIDictionary = dict() | |
Sync = dict() | |
ReadingListNonSync = dict() | |
ReadingList = dict() | |
URIDictionary['title'] = title | |
entry['URIDictionary'] = URIDictionary | |
Sync['ServerID'] = ServerID | |
Sync['Key'] = Key | |
entry['Sync'] = Sync | |
ReadingListNonSync['AddedLocally'] = AddedLocally | |
entry['ReadingListNonSync'] = ReadingListNonSync | |
entry['WebBookmarkType'] = WebBookmarkType | |
entry['WebBookmarkUUID'] = WebBookmarkUUID | |
entry['URLString'] = href | |
ReadingList['DateAdded'] = DateAdded | |
entry['ReadingList'] = ReadingList | |
entries.append(entry) | |
parent['Children'].extend(entries) | |
try: | |
if len(entries): | |
writePlist(plist, SRL_FILE) | |
print "~~~ have %d reading list items ~~~" % len(parent['Children']) | |
except (InvalidPlistException, NotBinaryPlistException), e: | |
print "couldnt write plist: ", e | |
if __name__ == '__main__': | |
if not isfile(SRL_FILE): | |
raise Exception("safari bookmarks file not found!") | |
if not isfile(RIL_FILE): | |
raise Exception("pocket export file not found!") | |
backup() | |
import_srl(parse_ril()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment