Skip to content

Instantly share code, notes, and snippets.

@kidGodzilla
Created May 12, 2025 00:22
Show Gist options
  • Save kidGodzilla/b948e050d9555df8e81a8fde034a6ca8 to your computer and use it in GitHub Desktop.
Save kidGodzilla/b948e050d9555df8e81a8fde034a6ca8 to your computer and use it in GitHub Desktop.
import torch
from PIL import Image
import open_clip
# Choose device – M1 uses "mps" (Metal Performance Shaders)
device = "mps" if torch.backends.mps.is_available() else "cpu"
# Load model
model, preprocess, _ = open_clip.create_model_and_transforms(
"MobileCLIP-B", pretrained="datacompdr", device=device
)
model.eval()
# Load tokenizer
tokenizer = open_clip.get_tokenizer("MobileCLIP-B")
# Example prompts
prompts = ["a cat", "a dog", "a person"]
tokenized = tokenizer(prompts).to(device)
# Encode text
with torch.no_grad():
text_features = model.encode_text(tokenized)
text_features /= text_features.norm(dim=-1, keepdim=True)
# Load and preprocess image
image = Image.open("example.jpg").convert("RGB")
image_input = preprocess(image).unsqueeze(0).to(device)
# Encode image and compute similarity
with torch.no_grad():
image_features = model.encode_image(image_input)
image_features /= image_features.norm(dim=-1, keepdim=True)
# Similarity scores
similarity = (image_features @ text_features.T).squeeze(0)
for i, score in enumerate(similarity):
print(f"{prompts[i]}: {score.item():.4f}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment