-
-
Save anelad/44bf3710566a6b26b1316d84d4d34e1c to your computer and use it in GitHub Desktop.
URL Slugs in Javascript (with UTF-8 and Transliteration Support)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Create a web friendly URL slug from a string. | |
* | |
* Requires XRegExp (http://xregexp.com) with unicode add-ons for UTF-8 support. | |
* | |
* Although supported, transliteration is discouraged because | |
* 1) most web browsers support UTF-8 characters in URLs | |
* 2) transliteration causes a loss of information | |
* | |
* @author Sean Murphy <[email protected]> | |
* @copyright Copyright 2012 Sean Murphy. All rights reserved. | |
* @license http://creativecommons.org/publicdomain/zero/1.0/ | |
* | |
* @param string s | |
* @param object opt | |
* @return string | |
*/ | |
function url_slug(s, opt) { | |
s = String(s); | |
opt = Object(opt); | |
var defaults = { | |
'delimiter': '-', | |
'limit': undefined, | |
'lowercase': true, | |
'replacements': {}, | |
'transliterate': (typeof(XRegExp) === 'undefined') ? true : false | |
}; | |
// Merge options | |
for (var k in defaults) { | |
if (!opt.hasOwnProperty(k)) { | |
opt[k] = defaults[k]; | |
} | |
} | |
var char_map = { | |
// Latin | |
'À': 'A', 'Á': 'A', 'Â': 'A', 'Ã': 'A', 'Ä': 'A', 'Å': 'A', 'Æ': 'AE', 'Ç': 'C', | |
'È': 'E', 'É': 'E', 'Ê': 'E', 'Ë': 'E', 'Ì': 'I', 'Í': 'I', 'Î': 'I', 'Ï': 'I', | |
'Ð': 'D', 'Ñ': 'N', 'Ò': 'O', 'Ó': 'O', 'Ô': 'O', 'Õ': 'O', 'Ö': 'O', 'Ő': 'O', | |
'Ø': 'O', 'Ù': 'U', 'Ú': 'U', 'Û': 'U', 'Ü': 'U', 'Ű': 'U', 'Ý': 'Y', 'Þ': 'TH', | |
'ß': 'ss', | |
'à': 'a', 'á': 'a', 'â': 'a', 'ã': 'a', 'ä': 'a', 'å': 'a', 'æ': 'ae', 'ç': 'c', | |
'è': 'e', 'é': 'e', 'ê': 'e', 'ë': 'e', 'ì': 'i', 'í': 'i', 'î': 'i', 'ï': 'i', | |
'ð': 'd', 'ñ': 'n', 'ò': 'o', 'ó': 'o', 'ô': 'o', 'õ': 'o', 'ö': 'o', 'ő': 'o', | |
'ø': 'o', 'ù': 'u', 'ú': 'u', 'û': 'u', 'ü': 'u', 'ű': 'u', 'ý': 'y', 'þ': 'th', | |
'ÿ': 'y', | |
// Latin symbols | |
'©': '(c)', | |
// Greek | |
'Α': 'A', 'Β': 'B', 'Γ': 'G', 'Δ': 'D', 'Ε': 'E', 'Ζ': 'Z', 'Η': 'H', 'Θ': '8', | |
'Ι': 'I', 'Κ': 'K', 'Λ': 'L', 'Μ': 'M', 'Ν': 'N', 'Ξ': '3', 'Ο': 'O', 'Π': 'P', | |
'Ρ': 'R', 'Σ': 'S', 'Τ': 'T', 'Υ': 'Y', 'Φ': 'F', 'Χ': 'X', 'Ψ': 'PS', 'Ω': 'W', | |
'Ά': 'A', 'Έ': 'E', 'Ί': 'I', 'Ό': 'O', 'Ύ': 'Y', 'Ή': 'H', 'Ώ': 'W', 'Ϊ': 'I', | |
'Ϋ': 'Y', | |
'α': 'a', 'β': 'b', 'γ': 'g', 'δ': 'd', 'ε': 'e', 'ζ': 'z', 'η': 'h', 'θ': '8', | |
'ι': 'i', 'κ': 'k', 'λ': 'l', 'μ': 'm', 'ν': 'n', 'ξ': '3', 'ο': 'o', 'π': 'p', | |
'ρ': 'r', 'σ': 's', 'τ': 't', 'υ': 'y', 'φ': 'f', 'χ': 'x', 'ψ': 'ps', 'ω': 'w', | |
'ά': 'a', 'έ': 'e', 'ί': 'i', 'ό': 'o', 'ύ': 'y', 'ή': 'h', 'ώ': 'w', 'ς': 's', | |
'ϊ': 'i', 'ΰ': 'y', 'ϋ': 'y', 'ΐ': 'i', | |
// Turkish | |
'Ş': 'S', 'İ': 'I', 'Ç': 'C', 'Ü': 'U', 'Ö': 'O', 'Ğ': 'G', | |
'ş': 's', 'ı': 'i', 'ç': 'c', 'ü': 'u', 'ö': 'o', 'ğ': 'g', | |
// Russian | |
'А': 'A', 'Б': 'B', 'В': 'V', 'Г': 'G', 'Д': 'D', 'Е': 'E', 'Ё': 'Yo', 'Ж': 'Zh', | |
'З': 'Z', 'И': 'I', 'Й': 'J', 'К': 'K', 'Л': 'L', 'М': 'M', 'Н': 'N', 'О': 'O', | |
'П': 'P', 'Р': 'R', 'С': 'S', 'Т': 'T', 'У': 'U', 'Ф': 'F', 'Х': 'H', 'Ц': 'C', | |
'Ч': 'Ch', 'Ш': 'Sh', 'Щ': 'Sh', 'Ъ': '', 'Ы': 'Y', 'Ь': '', 'Э': 'E', 'Ю': 'Yu', | |
'Я': 'Ya', | |
'а': 'a', 'б': 'b', 'в': 'v', 'г': 'g', 'д': 'd', 'е': 'e', 'ё': 'yo', 'ж': 'zh', | |
'з': 'z', 'и': 'i', 'й': 'j', 'к': 'k', 'л': 'l', 'м': 'm', 'н': 'n', 'о': 'o', | |
'п': 'p', 'р': 'r', 'с': 's', 'т': 't', 'у': 'u', 'ф': 'f', 'х': 'h', 'ц': 'c', | |
'ч': 'ch', 'ш': 'sh', 'щ': 'sh', 'ъ': '', 'ы': 'y', 'ь': '', 'э': 'e', 'ю': 'yu', | |
'я': 'ya', | |
// Ukrainian | |
'Є': 'Ye', 'І': 'I', 'Ї': 'Yi', 'Ґ': 'G', | |
'є': 'ye', 'і': 'i', 'ї': 'yi', 'ґ': 'g', | |
// Czech | |
'Č': 'C', 'Ď': 'D', 'Ě': 'E', 'Ň': 'N', 'Ř': 'R', 'Š': 'S', 'Ť': 'T', 'Ů': 'U', | |
'Ž': 'Z', | |
'č': 'c', 'ď': 'd', 'ě': 'e', 'ň': 'n', 'ř': 'r', 'š': 's', 'ť': 't', 'ů': 'u', | |
'ž': 'z', | |
// Polish | |
'Ą': 'A', 'Ć': 'C', 'Ę': 'e', 'Ł': 'L', 'Ń': 'N', 'Ó': 'o', 'Ś': 'S', 'Ź': 'Z', | |
'Ż': 'Z', | |
'ą': 'a', 'ć': 'c', 'ę': 'e', 'ł': 'l', 'ń': 'n', 'ó': 'o', 'ś': 's', 'ź': 'z', | |
'ż': 'z', | |
// Latvian | |
'Ā': 'A', 'Č': 'C', 'Ē': 'E', 'Ģ': 'G', 'Ī': 'i', 'Ķ': 'k', 'Ļ': 'L', 'Ņ': 'N', | |
'Š': 'S', 'Ū': 'u', 'Ž': 'Z', | |
'ā': 'a', 'č': 'c', 'ē': 'e', 'ģ': 'g', 'ī': 'i', 'ķ': 'k', 'ļ': 'l', 'ņ': 'n', | |
'š': 's', 'ū': 'u', 'ž': 'z' | |
}; | |
// Make custom replacements | |
for (var k in opt.replacements) { | |
s = s.replace(RegExp(k, 'g'), opt.replacements[k]); | |
} | |
// Transliterate characters to ASCII | |
if (opt.transliterate) { | |
for (var k in char_map) { | |
s = s.replace(RegExp(k, 'g'), char_map[k]); | |
} | |
} | |
// Replace non-alphanumeric characters with our delimiter | |
var alnum = (typeof(XRegExp) === 'undefined') ? RegExp('[^a-z0-9]+', 'ig') : XRegExp('[^\\p{L}\\p{N}]+', 'ig'); | |
s = s.replace(alnum, opt.delimiter); | |
// Remove duplicate delimiters | |
s = s.replace(RegExp('[' + opt.delimiter + ']{2,}', 'g'), opt.delimiter); | |
// Truncate slug to max. characters | |
s = s.substring(0, opt.limit); | |
// Remove delimiter from ends | |
s = s.replace(RegExp('(^' + opt.delimiter + '|' + opt.delimiter + '$)', 'g'), ''); | |
return opt.lowercase ? s.toLowerCase() : s; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment