Last active
August 29, 2015 14:16
-
-
Save brianmcallister/9ec1780838ac67fb8598 to your computer and use it in GitHub Desktop.
Highlight words in text. Still need to remove the underscore dependency.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Private: Highlight individual words within the message text. | |
# | |
# text - Text to format. | |
# words - Array of words to highlight. | |
# | |
# Returns the formatted text. | |
highlightWords = (text, words = []) -> | |
return text if words.length is 0 | |
# Create a DOM element to contain the text so we can detect and handle | |
# HTML inside the `text`. | |
testDiv = document.createElement 'div' | |
testDiv.innerHTML = text | |
# Recursively highlight all the text content inside the testDiv node. | |
highlightNodeContent testDiv, words | |
# Return the string with all the content highlighted as HTML. | |
text = _.unescape testDiv.innerHTML | |
return text | |
# Private: Recursively highlight all occurrences of `words` inside a `node`. | |
# | |
# node - Node in which to highlight text content. | |
# words - List of words to highlight. | |
# | |
# Returns undefined. | |
highlightNodeContent = (node, words) -> | |
if node.childNodes.length | |
# Recurse into child nodes. | |
for childNode in node.childNodes | |
highlightNodeContent childNode, words | |
# Don't do anything if this isn't of type TEXT_NODE. | |
if node.nodeType isnt 3 | |
return undefined | |
content = _.escape node.textContent | |
# Don't do anything if there's no content. | |
if content.trim() is '' | |
return undefined | |
# Replace. | |
node.textContent = highlightWordsInText words, content | |
return undefined | |
# Private: Highlight words in some text. | |
# | |
# words - Array of words to highlight in the text. | |
# text - String of text in which to highlight words. | |
# | |
# Returns the text with words highlighted. | |
highlightWordsInText = (words, text) -> | |
boundaries = getHighlightWordBoundaries words, text | |
return addHighlightWordsMarkup text, boundaries | |
# Private: Given words and text, build an array of word boundaries within | |
# the text. Overlapping words will be handled appropriately. See the examples. | |
# | |
# words - Array of words to highlight in the text. | |
# text - String of text in which to highlight words. | |
# | |
# Examples | |
# | |
# getHighlightWordBoundaries ['test'], 'aaatestaaa' | |
# #=> [[3, 7]] | |
# | |
# getHighlightWordBoundaries ['z'], 'aaazaaazaaa' | |
# #=> [[3, 4], [7, 8]] | |
# | |
# getHighlightWordBoundaries ['do', 'dollar'], 'aaa do dollar aaa' | |
# #=> [[4, 6], [7, 9]] | |
# | |
# As you can see in the above examples, 'overlapping' words should be handled | |
# correctly. If they weren't, you would get too many boundaries back, making | |
# highlighting the text very difficult. | |
# | |
# For example, the string 'do' is a substring of 'dollar'. If the overlapping | |
# boundaries weren't handled, this function would return an array with *3* | |
# inidicies, marking 'do' (after 'aaa'), 'do' (inside 'dollar'), and 'dollar'. | |
# | |
# Returns an array of word boundaries. | |
getHighlightWordBoundaries = (words, textString) -> | |
regex = '' | |
boundaries = [] | |
textString = textString.toLowerCase() | |
# Create a RegExp that we'll use to escape RegExp metacharacters from the | |
# search string. | |
metaCharacterRegExp = new RegExp ['\\\\', '\\^', '\\$', '\\.', '\\|', | |
'\\?', '\\*', '\\+', '\\(', '\\)', '\\[', '\\{'].join('|'), 'g' | |
# Add word start index and word length to the ranges array. | |
for word, index in words | |
word = word.toLowerCase() | |
# Escape RegExp metacharacters. | |
escaped = word.replace metaCharacterRegExp, (match) -> "\\#{match}" | |
regex = new RegExp escaped, 'g' | |
# Find all matches in the string without reseting the lastIndex. | |
# See: http://mzl.la/1yi7TmE | |
while result = regex.exec textString | |
boundaries.push [regex.lastIndex - word.length, regex.lastIndex] | |
# Return now if there's one or less boundary. | |
if boundaries.length <= 1 | |
return boundaries | |
# Flatten the boundaries down into non-overlapping ranges. | |
flattened = [] | |
collapsed = [] | |
# Sort the boundaries by the starting ranges. This makes detecting adjacent | |
# boundaries much easier. | |
boundaries.sort (a, b) -> a[0] - b[0] | |
# Get all the numbers covered by every range in the boundaries. Make sure | |
# they're unique. | |
for range, index in boundaries | |
# Push in a floating point number when the _start_ of a range is a unique | |
# number. This floating point number allows us to indicate where adjacent | |
# boundaries are, even after sorting the flattened array. | |
if index isnt 0 and flattened.indexOf(range[0]) is -1 | |
flattened.push flattened[flattened.length - 1] + 0.5 | |
# Add the number, making sure it's unique. | |
for num in [range[0]..range[1]] | |
if flattened.indexOf(num) is -1 | |
flattened.push num | |
# Sort the flattened array of numbers. | |
flattened.sort (a, b) -> a - b | |
# Iterate over the flattened array of numbers, and construct a new array | |
# of word boundries. | |
for num, index in flattened | |
# Reference the last array in the collapsed boundaries array. | |
last = collapsed[collapsed.length - 1] | |
# On the first iteration, push an array with the current number. | |
if index is 0 | |
collapsed.push [num] | |
continue | |
# On the last iteration, push the current number into the last array. | |
if index is flattened.length - 1 | |
last.push num | |
continue | |
# If the last array already has a length of 2, push in a new array with | |
# the current number. | |
if not last or last.length is 2 | |
collapsed.push [num] | |
continue | |
# If the last array has a length of one, check if the current number is | |
# a floating point (which indicates a new boundary). | |
if last.length is 1 | |
if num % 1 isnt 0 | |
# Push in the previous number (we don't care about the floating point | |
# numbers after this step, they're just temporary indicators). | |
last.push flattened[index - 1] | |
return collapsed | |
# Private: Add highlight markup in some text based on word boundaries. | |
# | |
# text - Text to highlight. | |
# boundaries - Array of word boundaries to indicate where to highlight. | |
# | |
# Returns the text with the appropriate markup added. | |
addHighlightWordsMarkup = (text, boundaries) -> | |
startTag = '<mark>' | |
endTag = '</mark>' | |
for range, index in boundaries | |
if index isnt 0 | |
offset = (startTag.length + endTag.length) * index | |
startIndex = range[0] + offset | |
endIndex = range[1] + offset | |
else | |
startIndex = range[0] | |
endIndex = range[1] | |
start = text.slice 0, startIndex | |
word = text.slice startIndex, endIndex | |
end = text.slice endIndex | |
text = "#{start}#{startTag}#{word}#{endTag}#{end}" | |
return text |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment