Created
November 16, 2011 20:00
-
-
Save ramsey/1371162 to your computer and use it in GitHub Desktop.
Generate OPML file from Delicious blogroll tag
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/bin/php | |
<?php | |
include_once('getRSSLocation.php'); | |
// del.icio.us username and password | |
$username = 'your_username'; | |
$password = 'your_password'; | |
$cache_file = '/tmp/delicious-blogroll.xml'; | |
$blogs = array(); | |
// STEP ONE: CACHE DEL.ICIO.US DATA | |
// determine whether an update has been made to del.icio.us since the | |
// last update; if so, then grab the results from del.icio.us and cache them | |
$update = simplexml_load_file("https://{$username}:{$password}@api.del.icio.us/v1/posts/update"); | |
if (strtotime($update['time']) > filemtime($cache_file)) | |
{ | |
// del.icio.us has been updated since last cache; recache | |
$data = file_get_contents("https://{$username}:{$password}@api.del.icio.us/v1/posts/all?tag=blogroll"); | |
file_put_contents($cache_file, $data); | |
} | |
else | |
{ | |
// if there have been no updates, then exit | |
exit; | |
} | |
// STEP TWO: READ CACHED DATA | |
$blogroll = simplexml_load_file($cache_file); | |
foreach ($blogroll->post as $post) | |
{ | |
$blogs[] = array( | |
'name' => $post['description'], | |
'href' => $post['href'] | |
); | |
} | |
// sort by name | |
$name = array(); | |
foreach ($blogs as $k => $v) | |
{ | |
$name[$k] = $v['name']; | |
} | |
array_multisort($name, SORT_ASC, $blogs); | |
// STEP THREE: GENERATE OPML | |
// start buffering output | |
ob_start(); | |
echo '<?xml version="1.0" encoding="UTF-8"?>' . "\n"; | |
?> | |
<opml version="1.0"> | |
<head> | |
<title>PHP Blogroll</title> | |
<expansionState/> | |
</head> | |
<body> | |
<outline text="php"> | |
<?php | |
foreach ($blogs as $blog) | |
{ | |
$html = file_get_contents($blog['href']); | |
if ($html !== FALSE) | |
{ | |
// discover the blog's RSS feed | |
$xmlUrl = getRSSLocation($html, $blog['href']); | |
echo ' '; // line things up cleanly in output | |
echo '<outline '; | |
echo 'text="' . htmlentities($blog['name']) . '" '; | |
echo 'htmlUrl="' . htmlentities($blog['href']) . '" '; | |
echo 'title="' . htmlentities($blog['name']) . '"'; | |
if ($xmlUrl) | |
{ | |
echo ' type="rss"'; | |
echo ' xmlUrl="' . htmlentities($xmlUrl) . '"'; | |
} | |
echo "/>\n"; | |
} | |
} | |
?> | |
</outline> | |
</body> | |
</opml> | |
<?php | |
// get OPML from buffer and save to file | |
$opml = ob_get_clean(); | |
file_put_contents('/path/to/blogroll.opml', $opml); | |
?> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* This is a cleaned up and modified version of Keith Deven's getRSSLocation() | |
* function, which can be found at: | |
* http://keithdevens.com/weblog/archive/2002/Jun/03/RSSAuto-DiscoveryPHP | |
* | |
* Cleaned up by Ben Ramsey, http://benramsey.com | |
*/ | |
function getRSSLocation($html, $location) | |
{ | |
if (!$html || !$location) | |
{ | |
return FALSE; | |
} | |
else | |
{ | |
// search through the HTML, save all <link> tags | |
// and store each link's attributes in an associative array | |
preg_match_all('/<link\s+(.*?)\s*\/?>/si', $html, $matches); | |
$links = $matches[1]; | |
$final_links = array(); | |
foreach ($links as $link) | |
{ | |
$attributes = preg_split('/\s+/s', $link); | |
foreach ($attributes as $attribute) | |
{ | |
$att = preg_split('/\s*=\s*/s', $attribute, 2); | |
if (isset($att[1])) | |
{ | |
$att[1] = preg_replace('/([\'"]?)(.*)\1/', '$2', $att[1]); | |
$final_link[strtolower($att[0])] = $att[1]; | |
} | |
} | |
$final_links[] = $final_link; | |
} | |
// now figure out which one points to the RSS file | |
foreach ($final_links as $link) | |
{ | |
$href = FALSE; | |
if (strcasecmp($link['rel'], 'alternate') == 0) | |
{ | |
switch (strtolower($link['type'])) | |
{ | |
case 'application/rss+xml': | |
case 'application/atom+xml': | |
case 'text/xml': | |
$href = $link['href']; | |
break; | |
default: | |
$href = FALSE; | |
break; | |
} | |
if ($href) | |
{ | |
if (strpos($href, "http://") === 0) | |
{ | |
// absolute URL | |
$full_url = $href; | |
} | |
else | |
{ | |
// otherwise, 'absolutize' it | |
$url_parts = parse_url($location); | |
// only made it work for http:// links | |
$full_url = "http://{$url_parts['host']}"; | |
if (isset($url_parts['port'])) | |
{ | |
$full_url .= ":{$url_parts[port]}"; | |
} | |
if (strpos($href, '/') !== 0) | |
{ | |
// it's a relative link on the path | |
$full_url .= dirname($url_parts['path']); | |
if (substr($full_url, -1) != '/') | |
{ | |
// if the last character isn't a '/', add it | |
$full_url .= '/'; | |
} | |
} | |
$full_url .= $href; | |
} | |
return $full_url; | |
} | |
} | |
} | |
return FALSE; | |
} | |
} | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment