Skip to content

Instantly share code, notes, and snippets.

View naufalso's full-sized avatar

Naufal Suryanto naufalso

View GitHub Profile
@naufalso
naufalso / extract_line_differences.py
Created March 18, 2025 02:02
Extract line differences in python.
import difflib
def extract_line_differences(source: str, modified: str) -> dict:
"""
Extracts line-level differences between two multi-line strings and returns a dictionary where:
- For replacements: key is the differing block of lines in the source and
value is the corresponding block of lines in the modified string.
- For deletions: key is the block of lines deleted from the source, value is an empty string.
- For insertions: key is a string indicating the insertion location (e.g., "INSERT_AT_LINE_#")
and value is the inserted block of lines from the modified string.
@naufalso
naufalso / fetch_url_readme_from_git.py
Created March 13, 2025 04:02
a simple python function to fetch the git readme
# Author: Naufal Suryanto
import requests
import re
from markdownify import markdownify as md
from urllib.parse import urlparse
def fetch_url_readme_from_git(url):
"""
Fetch the README file content from a Git repository given a URL.
@naufalso
naufalso / weave_fetch_eval_data.py
Last active February 27, 2025 03:00
Download evaluation data from weave
import weave
import pandas as pd
from tqdm import tqdm
def get_calls(project_id, op_name, parent_id = None):
client = weave.init(project_id)
query_data = {
"project_id": project_id,
"filter": {"op_names": [op_name]},
"sort_by": [{"field": "started_at", "direction": "desc"}],
@naufalso
naufalso / describe_md_image_vlm.py
Created February 13, 2025 07:07
Describe images inside markdown with VLM
#!/usr/bin/env python3
"""
Markdown Image Descriptor Script
This script processes a markdown file, extracts images, encodes them in base64,
and sends them with contextual text to a Visual Language Model (VLM) for description extraction.
The image markdown references are then replaced with the structured descriptions provided by the model.
If an image is deemed non-important (e.g., logos) it is replaced with a markdown comment.
Additionally, if an image contains text information (e.g., terminal screenshots, tables, code snippets),
the VLM is instructed to extract and return only the text content using markdown formatting.
@naufalso
naufalso / get_memory_size.py
Created January 19, 2025 03:39 — forked from philschmid/get_memory_size.py
Get needed GPU per precision for a Hugging Face Model Id
from typing import Dict, Union
from huggingface_hub import get_safetensors_metadata
import argparse
import sys
# Example:
# python get_gpu_memory.py Qwen/Qwen2.5-7B-Instruct
# Dictionary mapping dtype strings to their byte sizes
bytes_per_dtype: Dict[str, float] = {
@naufalso
naufalso / image_clarity_analysis.py
Created December 15, 2024 08:27
Image Clarity Analysis
import cv2
import numpy as np
import matplotlib.pyplot as plt
import argparse
# --------------------------
# Utility Functions
# --------------------------
@naufalso
naufalso / push_to_hub.py
Last active November 26, 2024 02:08
Push markdown to github
import os
import re
import base64
import argparse
from typing import List, Dict, Any
from datasets import Dataset
def embed_images_in_markdown(markdown_text: str, base_path: str = ".") -> str:
"""
Embed images in markdown text as base64 encoded strings.
@naufalso
naufalso / export_chrome_history.py
Created November 8, 2024 10:03
Export chrome history
import sqlite3
import pandas as pd
from datetime import datetime, timedelta
# Path to your Chrome history database [Select one based on your os and chrome history path]
history_path = "C:\\Users\\[USER_NAME]\\AppData\\Local\\Google\\Chrome\\User Data\\Default\\History" # Windows
history_path = "~/Library/Application Support/Google/Chrome/Default/History" # MAC
history_path = "~/.config/google-chrome/Default/History" # Linux
# Connect to the database
@naufalso
naufalso / parallel_queue_tmux.sh
Created October 11, 2024 01:55
Paralel Queue with tmux
#!/bin/bash
# initialize a semaphore with a given number of tokens
open_sem(){
mkfifo pipe-$$
exec 3<>pipe-$$
rm pipe-$$
local i=$1
for((;i>0;i--)); do
printf %s 000 >&3
@naufalso
naufalso / compute_hash.sh
Created September 4, 2024 00:29
A scripts for computing folder hash. This is usefull for checking to folder is identical.
#!/bin/bash
# Check if the correct number of arguments is provided
if [ "$#" -ne 2 ]; then
echo "Usage: $0 <input_directory> <output_file>"
exit 1
fi
# Assign input arguments to variables
input_dir="$1"