Skip to content

Instantly share code, notes, and snippets.

@tikendraw
Created September 21, 2024 06:44
Show Gist options
  • Save tikendraw/fdffe9fa2bc33e32afe23a69224078d7 to your computer and use it in GitHub Desktop.
Save tikendraw/fdffe9fa2bc33e32afe23a69224078d7 to your computer and use it in GitHub Desktop.
Extract json Code blocks for pydantic class with fallback
import re
import json
import ast
from pydantic import ValidationError
def extract_code_block(text):
# This regex looks for ```json or ``` followed by { ... } (JSON or dict-like structure)
code_block = re.findall(r'```(?:json)?\s*({.*?})\s*```', text, re.DOTALL)
# If a match is found, parse it as a dictionary
if code_block:
try:
# Use ast.literal_eval to safely evaluate the dictionary-like string
return [ast.literal_eval(block) for block in code_block]
except (SyntaxError, ValueError) as e:
return f"Error parsing code block: {e}"
return None
# Function to extract potential JSON/dict blocks
def extract_code_block(text):
# Try to find code blocks first with regex
code_block = re.findall(r'```(?:json)?\s*({.*?})\s*```', text, re.DOTALL)
# Try to parse the blocks if found
if code_block:
try:
return [json.loads(block) for block in code_block]
except json.JSONDecodeError:
return None
return None
# Fallback function when parsing fails
def fallback_extract(text, expected_keys):
# Start extracting key-value pairs based on known keys
fallback_dict = {}
for i, key in enumerate(expected_keys):
# Find the location of the key in the text
match = re.search(rf'"{key}"\s*:\s*([^\s,]+)', text)
if match:
value = match.group(1).strip('"').strip(',')
# Try to infer the type of the value (str, int, or dict)
if value.isdigit():
fallback_dict[key] = int(value)
elif re.match(r'^\{.*\}$', value): # Detect dictionary structure
try:
fallback_dict[key] = json.loads(value)
except json.JSONDecodeError:
fallback_dict[key] = value # Leave it as a string if malformed
else:
fallback_dict[key] = value
else:
fallback_dict[key] = None # If the key is not found, set it to None
return fallback_dict
# Main function to handle parsing with fallback
def parse_with_fallback(text, pydantic_class):
# Extract expected keys from the Pydantic class
expected_keys = list(pydantic_class.__fields__.keys())
# First try to extract clean JSON blocks
parsed_blocks = extract_code_block(text)
if parsed_blocks:
# Validate and return parsed data
try:
return [pydantic_class(**block) for block in parsed_blocks]
except ValidationError as e:
print("Validation error:", e)
# Fallback to manually extracting key-value pairs
fallback_data = fallback_extract(text, expected_keys)
try:
# Try to validate the fallback data with the Pydantic class
return pydantic_class(**fallback_data)
except ValidationError as e:
return f"Error parsing with fallback: {e}"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment