Skip to content

Instantly share code, notes, and snippets.

@barseghyanartur
Last active July 16, 2025 10:23
Show Gist options
  • Save barseghyanartur/52d6a1c8ee140bfb75db8aff25a2cb3a to your computer and use it in GitHub Desktop.
Save barseghyanartur/52d6a1c8ee140bfb75db8aff25a2cb3a to your computer and use it in GitHub Desktop.
Get text around passage
# /// script
# requires-python = ">=3.12"
# dependencies = [
# "flair",
# "IPython",
# ]
# ///
from flair.splitter import SegtokSentenceSplitter
# Initialize the splitter once
splitter = SegtokSentenceSplitter()
def get_sentence_context_flair(
text: str,
passage: str,
window: int = 4,
) -> tuple[list[str], list[str], list[str]]:
"""
Extract up to `window` sentences before/after `passage` using Flair's SegtokSentenceSplitter.
Returns a tuple (before, target, after) of sentence lists.
"""
# Split text into Sentence objects, then extract raw text
sentences = [sent.text for sent in splitter.split(text)]
# Locate the character offset of the passage
start_char = text.find(passage)
if start_char < 0:
raise ValueError("Passage not found in text.")
end_char = start_char + len(passage)
# Build cumulative character offsets for each sentence
offsets: list[tuple[int, int]] = []
cum = 0
for s in sentences:
offsets.append((cum, cum + len(s)))
# Account for the separator that was in the original text (usually a space or newline)
cum += len(s) + 1
# Find which sentence contains the start of the passage
target_idx = next(
(i for i, (a, b) in enumerate(offsets) if a <= start_char < b), None
)
if target_idx is None:
raise ValueError("Could not map passage to a sentence.")
# Compute context window bounds
start = max(0, target_idx - window)
end = min(len(sentences), target_idx + window + 1)
before = sentences[start:target_idx]
target = sentences[target_idx : end - window]
after = sentences[target_idx + 1 : end]
return before, target, after
# Example usage
if __name__ == "__main__":
full_text = (
"Alice was beginning to get very tired of sitting by her sister on the bank, and of having nothing to do. "
"Once or twice she had peeped into the book her sister was reading, but it had no pictures or conversations in it, "
"'and what is the use of a book,' thought Alice 'without pictures or conversation?' "
"So she was considering in her own mind (as well as she could, for the hot day made her feel very sleepy and stupid), "
"whether the pleasure of making a daisy-chain would be worth the trouble. "
"Suddenly a White Rabbit with pink eyes ran close by her. "
"There was nothing so very remarkable in that; nor did Alice think it so very much out of the way to hear the Rabbit say "
"to itself 'Oh dear! Oh dear! I shall be late!' (when she thought it over afterwards, it occurred to her that she ought to "
"have wondered at this, but at the time it all seemed quite natural); but when the Rabbit actually took a watch out of its "
"waistcoat-pocket, and looked at it, and hurried on, Alice started to her feet, for it flashed across her mind that she had "
"never before seen a rabbit with either a waistcoat-pocket, or a watch to take out of it. "
"With nothing more than a curious glance back at her sister, who was still sitting on the bank, and was getting very "
"tired of waiting by now, Alice ran across the field after the Rabbit, and fortunately was just in time to see it pop down "
"a large rabbit-hole under the hedge. "
"In another moment down went Alice after it, never once considering how in the world she was to get out again. "
"The rabbit-hole went straight on like a tunnel for some way, and then dipped suddenly down, so suddenly that Alice had not "
"a moment to think about stopping herself before she found herself falling down a very deep well. "
"Either the well was very deep, or she fell very slowly, for she had plenty of time as she went down to look about her and to "
"wonder what was going to happen next. First, she tried to look down and make out what she was coming to, but it was too dark to "
"see anything; she felt that she was dozing off, and had just begun to dream that she was walking hand in hand with Dinah, and "
"saying to her 'Now, Dinah, tell me the truth: did you ever eat a bat?' when suddenly, thump! Down she came upon a heap of sticks "
"and dry leaves, and the fall was over."
)
passage = "Oh dear! Oh dear! I shall be late!"
window = 4
before, target, after = get_sentence_context_flair(
full_text, passage, window=window
)
# Nicely print the context window
print(f"=== {window} sentences BEFORE ===")
for s in before:
print(f" • {s}")
print(f"\n=== TARGET sentence ===")
for s in target:
print(f" • {s}")
print(f"\n=== {window} sentences AFTER ===")
for s in after:
print(f" • {s}")
# /// script
# requires-python = ">=3.12"
# dependencies = [
# "sentence-splitter",
# "IPython",
# ]
# ///
from sentence_splitter import SentenceSplitter
# Initialize the splitter for English
splitter = SentenceSplitter(language="en")
def get_sentence_context_splitter(
text: str,
passage: str,
window: int = 4,
) -> tuple[list[str], list[str], list[str]]:
"""
Extract up to `window` sentences before/after `passage` using sentence-splitter.
Returns a tuple (before, target, after) of sentence lists.
"""
# Split the full text into sentences
sentences = splitter.split(text)
# Locate the character offset of the passage
start_char = text.find(passage)
if start_char < 0:
raise ValueError("Passage not found in text.")
end_char = start_char + len(passage)
# Build cumulative character offsets for each sentence
offsets: list[tuple[int, int]] = []
cum = 0
for s in sentences:
offsets.append((cum, cum + len(s)))
# Account for the separator (a single space) in the original text
cum += len(s) + 1
# Find which sentence contains the start of the passage
target_idx = next(
(i for i, (a, b) in enumerate(offsets) if a <= start_char < b), None
)
if target_idx is None:
raise ValueError("Could not map passage to a sentence.")
# Compute context window bounds
start = max(0, target_idx - window)
end = min(len(sentences), target_idx + window + 1)
before = sentences[start:target_idx]
target = sentences[target_idx : end - window]
after = sentences[target_idx + 1 : end]
return before, target, after
if __name__ == "__main__":
# Define the text and the passage of interest
full_text = (
"Alice was beginning to get very tired of sitting by her sister on the bank, and of having nothing to do. "
"Once or twice she had peeped into the book her sister was reading, but it had no pictures or conversations in it, "
"'and what is the use of a book,' thought Alice 'without pictures or conversation?' "
"So she was considering in her own mind (as well as she could, for the hot day made her feel very sleepy and stupid), "
"whether the pleasure of making a daisy-chain would be worth the trouble. "
"Suddenly a White Rabbit with pink eyes ran close by her. "
"There was nothing so very remarkable in that; nor did Alice think it so very much out of the way to hear the Rabbit say "
"to itself 'Oh dear! Oh dear! I shall be late!' (when she thought it over afterwards, it occurred to her that she ought to "
"have wondered at this, but at the time it all seemed quite natural); but when the Rabbit actually took a watch out of its "
"waistcoat-pocket, and looked at it, and hurried on, Alice started to her feet, for it flashed across her mind that she had "
"never before seen a rabbit with either a waistcoat-pocket, or a watch to take out of it. "
"With nothing more than a curious glance back at her sister, who was still sitting on the bank, and was getting very "
"tired of waiting by now, Alice ran across the field after the Rabbit, and fortunately was just in time to see it pop down "
"a large rabbit-hole under the hedge. "
"In another moment down went Alice after it, never once considering how in the world she was to get out again. "
"The rabbit-hole went straight on like a tunnel for some way, and then dipped suddenly down, so suddenly that Alice had not "
"a moment to think about stopping herself before she found herself falling down a very deep well. "
"Either the well was very deep, or she fell very slowly, for she had plenty of time as she went down to look about her and to "
"wonder what was going to happen next. First, she tried to look down and make out what she was coming to, but it was too dark to "
"see anything; she felt that she was dozing off, and had just begun to dream that she was walking hand in hand with Dinah, and "
"saying to her 'Now, Dinah, tell me the truth: did you ever eat a bat?' when suddenly, thump! Down she came upon a heap of sticks "
"and dry leaves, and the fall was over."
)
passage = "Oh dear! Oh dear! I shall be late!"
window = 4
before, target, after = get_sentence_context_splitter(
full_text, passage, window=window
)
# Nicely print the context window
print(f"=== {window} sentences BEFORE ===")
for s in before:
print(f" • {s}")
print(f"\n=== TARGET sentence ===")
for s in target:
print(f" • {s}")
print(f"\n=== {window} sentences AFTER ===")
for s in after:
print(f" • {s}")
# /// script
# requires-python = ">=3.12"
# dependencies = [
# "stanza",
# "IPython",
# ]
# ///
import stanza
# Download once: stanza.download('en')
nlp = stanza.Pipeline(lang="en", processors="tokenize")
def get_sentence_context_stanza(
text: str,
passage: str,
window: int = 4,
) -> tuple[list[str], list[str], list[str]]:
"""
Extract up to `window` sentences before/after `passage` using Stanza.
Returns a tuple (before, target, after) of sentence lists.
"""
# Process the full text
doc = nlp(text)
# Extract raw sentence texts
sentences = [sent.text for sent in doc.sentences]
# Locate passage in the original text
start_char = text.find(passage)
if start_char < 0:
raise ValueError("Passage not found in text.")
end_char = start_char + len(passage)
# Build cumulative character offsets to map passage → sentence index
offsets: list[tuple[int, int]] = []
cum = 0
for s in sentences:
offsets.append((cum, cum + len(s)))
cum += len(s) + 1 # Account for the space/newline separator
# Find the index of the sentence containing the passage
target_idx = next(
(i for i, (a, b) in enumerate(offsets) if a <= start_char < b), None
)
if target_idx is None:
raise ValueError("Could not map passage to a sentence.")
# Compute window bounds
start = max(0, target_idx - window)
end = min(len(sentences), target_idx + window + 1)
before = sentences[start:target_idx]
target = sentences[target_idx : end - window]
after = sentences[target_idx + 1 : end]
return before, target, after
if __name__ == "__main__":
# Define the text and the passage of interest
full_text = (
"Alice was beginning to get very tired of sitting by her sister on the bank, and of having nothing to do. "
"Once or twice she had peeped into the book her sister was reading, but it had no pictures or conversations in it, "
"'and what is the use of a book,' thought Alice 'without pictures or conversation?' "
"So she was considering in her own mind (as well as she could, for the hot day made her feel very sleepy and stupid), "
"whether the pleasure of making a daisy-chain would be worth the trouble. "
"Suddenly a White Rabbit with pink eyes ran close by her. "
"There was nothing so very remarkable in that; nor did Alice think it so very much out of the way to hear the Rabbit say "
"to itself 'Oh dear! Oh dear! I shall be late!' (when she thought it over afterwards, it occurred to her that she ought to "
"have wondered at this, but at the time it all seemed quite natural); but when the Rabbit actually took a watch out of its "
"waistcoat-pocket, and looked at it, and hurried on, Alice started to her feet, for it flashed across her mind that she had "
"never before seen a rabbit with either a waistcoat-pocket, or a watch to take out of it. "
"With nothing more than a curious glance back at her sister, who was still sitting on the bank, and was getting very "
"tired of waiting by now, Alice ran across the field after the Rabbit, and fortunately was just in time to see it pop down "
"a large rabbit-hole under the hedge. "
"In another moment down went Alice after it, never once considering how in the world she was to get out again. "
"The rabbit-hole went straight on like a tunnel for some way, and then dipped suddenly down, so suddenly that Alice had not "
"a moment to think about stopping herself before she found herself falling down a very deep well. "
"Either the well was very deep, or she fell very slowly, for she had plenty of time as she went down to look about her and to "
"wonder what was going to happen next. First, she tried to look down and make out what she was coming to, but it was too dark to "
"see anything; she felt that she was dozing off, and had just begun to dream that she was walking hand in hand with Dinah, and "
"saying to her 'Now, Dinah, tell me the truth: did you ever eat a bat?' when suddenly, thump! Down she came upon a heap of sticks "
"and dry leaves, and the fall was over."
)
passage = "Oh dear! Oh dear! I shall be late!"
window = 4
before, target, after = get_sentence_context_stanza(
full_text, passage, window=window
)
# Nicely print the context window
print(f"=== {window} sentences BEFORE ===")
for s in before:
print(f" • {s}")
print(f"\n=== TARGET sentence ===")
for s in target:
print(f" • {s}")
print(f"\n=== {window} sentences AFTER ===")
for s in after:
print(f" • {s}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment