Last active
August 14, 2025 13:19
-
-
Save joaodaher/97847c9987ccf7d6ee13ffe76066c5ca to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"$schema": "https://json-schema.org/draft/2020-12/schema", | |
"$id": "https://warmly.ai/schemas/person-identification-record-4.3.json", | |
"title": "person-identification-record", | |
"description": "A complete record for person identity resolution. Each lineage item flows vendor_ingest → normalization → enrichment. Identification selects the best enrichment. The root publish_target declares where to emit the final result.", | |
"type": "object", | |
"required": ["id", "lineage", "identification", "publish_target"], | |
"properties": { | |
"id": { | |
"type": "string", | |
"description": "Unique ID for this record (e.g., batch/window identifier)" | |
}, | |
"publish_target": { | |
"$ref": "#/$defs/publish-target", | |
"description": "Destination where the final identity should be published (e.g., GCP Pub/Sub topic)." | |
}, | |
"lineage": { | |
"type": "array", | |
"description": "List of lineage items, each containing vendor_ingest, normalization, and enrichment stages.", | |
"items": { "$ref": "#/$defs/lineage-item" }, | |
"minItems": 1 | |
}, | |
"identification": { "$ref": "#/$defs/identification-stage" } | |
}, | |
"additionalProperties": false, | |
"$defs": { | |
"publish-target": { | |
"title": "publish-target", | |
"type": "object", | |
"required": ["system"], | |
"properties": { | |
"system": { "type": "string", "enum": ["gcp_pubsub"] }, | |
"gcp_pubsub": { "$ref": "#/$defs/gcp-pubsub-target" } | |
}, | |
"allOf": [ | |
{ | |
"if": { "properties": { "system": { "const": "gcp_pubsub" } }, "required": ["system"] }, | |
"then": { "required": ["gcp_pubsub"] } | |
} | |
], | |
"additionalProperties": false | |
}, | |
"gcp-pubsub-target": { | |
"title": "gcp-pubsub-target", | |
"type": "object", | |
"required": ["topic"], | |
"properties": { | |
"project_id": { | |
"type": "string", | |
"description": "Optional override; if omitted, use service default project." | |
}, | |
"topic": { | |
"type": "string", | |
"description": "Pub/Sub topic name (e.g., warmly-best-contact)." | |
} | |
}, | |
"additionalProperties": false | |
}, | |
"lineage-item": { | |
"title": "lineage-item", | |
"type": "object", | |
"required": ["lineage_id", "vendor_ingest", "normalization", "enrichment"], | |
"properties": { | |
"lineage_id": { | |
"type": "string", | |
"description": "Unique ID for this lineage item (referenced by identification.metadata.lineage_id)" | |
}, | |
"vendor_ingest": { "$ref": "#/$defs/vendor-ingest-stage" }, | |
"normalization": { "$ref": "#/$defs/normalization-stage" }, | |
"enrichment": { "$ref": "#/$defs/enrichment-stage" } | |
}, | |
"additionalProperties": false | |
}, | |
"vendor-ingest-stage": { | |
"title": "vendor-ingest-stage", | |
"type": "object", | |
"required": ["raw", "metadata"], | |
"properties": { | |
"raw": { | |
"type": "object", | |
"description": "Opaque vendor payload as received.", | |
"additionalProperties": true | |
}, | |
"metadata": { "$ref": "#/$defs/vendor-ingest-metadata" } | |
}, | |
"additionalProperties": false | |
}, | |
"normalization-stage": { | |
"title": "normalization-stage", | |
"type": "object", | |
"required": ["content", "metadata"], | |
"properties": { | |
"content": { "$ref": "#/$defs/entity-resolution-input" }, | |
"metadata": { "$ref": "#/$defs/normalization-decision" } | |
}, | |
"additionalProperties": false | |
}, | |
"enrichment-stage": { | |
"title": "enrichment-stage", | |
"type": "object", | |
"required": ["content", "metadata"], | |
"properties": { | |
"content": { "$ref": "#/$defs/enriched-person-content" }, | |
"metadata": { "$ref": "#/$defs/enrichment-metadata" } | |
}, | |
"additionalProperties": false | |
}, | |
"vendor-ingest-metadata": { | |
"title": "vendor-ingest-metadata", | |
"type": "object", | |
"required": ["received_at", "vendor"], | |
"properties": { | |
"vendor": { "type": "string" }, | |
"received_at": { "type": "string", "format": "date-time" }, | |
"correlation_id": { "type": "string" }, | |
"hash": { "type": "string", "description": "Stable hash of vendor_ingest.raw" }, | |
"session_id": { "type": "string", "description": "Optional; only for session-scoped pipelines" }, | |
"producer": { | |
"type": "object", | |
"properties": { | |
"service": { "type": "string" }, | |
"version": { "type": "string" }, | |
"environment": { "type": "string" } | |
}, | |
"additionalProperties": false | |
} | |
}, | |
"additionalProperties": false | |
}, | |
"entity-resolution-input": { | |
"title": "entity-resolution-input", | |
"type": "object", | |
"description": "Compact, strict person input (snake_case).", | |
"properties": { | |
"id": { "type": "string" }, | |
"organization_id": { "type": "string" }, | |
"email": { "type": "string", "format": "email" }, | |
"linkedin_handle": { "type": "string", "description": "Handle, not URL" }, | |
"first_name": { "type": "string" }, | |
"last_name": { "type": "string" }, | |
"full_name": { "type": "string" }, | |
"job_title": { "type": "string" }, | |
"company_name": { "type": "string" }, | |
"company_domain": { "type": "string", "description": "e.g., acme.com (no scheme)" } | |
}, | |
"additionalProperties": false | |
}, | |
"normalization-decision": { | |
"title": "normalization-decision", | |
"type": "object", | |
"required": ["state", "decided_at"], | |
"properties": { | |
"state": { "type": "string", "enum": ["need_enrichment", "no_enrichment", "duplicate"] }, | |
"duplicate_of_id": { "type": ["string", "null"] }, | |
"reason": { "type": ["string", "null"], "maxLength": 300 }, | |
"decided_at": { "type": "string", "format": "date-time" } | |
}, | |
"allOf": [ | |
{ | |
"if": { "properties": { "state": { "const": "duplicate" } }, "required": ["state"] }, | |
"then": { "required": ["duplicate_of_id"] } | |
} | |
], | |
"additionalProperties": false | |
}, | |
"enriched-person-content": { | |
"title": "enriched-person-content", | |
"type": "object", | |
"description": "Comprehensive person DTO produced by enrichment (person-focused).", | |
"properties": { | |
"id": { "type": "string" }, | |
"name": { | |
"type": "object", | |
"properties": { | |
"full_name": { "type": "string" }, | |
"first_name": { "type": "string" }, | |
"last_name": { "type": "string" } | |
}, | |
"additionalProperties": false | |
}, | |
"email": { "type": "string", "format": "email" }, | |
"phone": { "type": "string", "description": "E.164 recommended" }, | |
"location": { "type": "string" }, | |
"time_zone": { "type": "string" }, | |
"utc_offset": { "type": "number" }, | |
"bio": { "type": "string" }, | |
"site": { "type": "string" }, | |
"avatar": { "type": "string" }, | |
"employment": { | |
"type": "object", | |
"properties": { | |
"domain": { "type": "string" }, | |
"name": { "type": "string" }, | |
"title": { "type": "string" }, | |
"role": { "type": "string" }, | |
"sub_role": { "type": "string" }, | |
"seniority": { "type": "string" }, | |
"refreshed": { "type": "boolean" } | |
}, | |
"additionalProperties": false | |
}, | |
"facebook": { | |
"type": "object", | |
"properties": { "handle": { "type": "string" }, "likes": { "type": "number" } }, | |
"additionalProperties": false | |
}, | |
"github": { | |
"type": "object", | |
"properties": { | |
"handle": { "type": "string" }, | |
"id": { "type": "number" }, | |
"avatar": { "type": "string" }, | |
"company": { "type": "string" }, | |
"blog": { "type": "string" }, | |
"followers": { "type": "number" }, | |
"following": { "type": "number" } | |
}, | |
"additionalProperties": false | |
}, | |
"twitter": { | |
"type": "object", | |
"properties": { | |
"handle": { "type": "string" }, | |
"id": { "type": "number" }, | |
"bio": { "type": "string" }, | |
"followers": { "type": "number" }, | |
"following": { "type": "number" }, | |
"statuses": { "type": "number" }, | |
"favorites": { "type": "number" }, | |
"location": { "type": "string" }, | |
"site": { "type": "string" }, | |
"avatar": { "type": "string" } | |
}, | |
"additionalProperties": false | |
}, | |
"linkedin": { "type": "object", "properties": { "handle": { "type": "string" } }, "additionalProperties": false }, | |
"email_provider": { "type": "boolean" }, | |
"indexed_at": { "type": "string", "format": "date-time" }, | |
"phone_last_seen_at": { "type": "string", "format": "date-time" }, | |
"active_at": { "type": "string", "format": "date-time" }, | |
"inactive_at": { "type": "string", "format": "date-time" } | |
}, | |
"additionalProperties": false | |
}, | |
"enrichment-metadata": { | |
"title": "enrichment-metadata", | |
"type": "object", | |
"properties": { | |
"provider": { "type": "string" }, | |
"status": { "type": "string", "enum": ["ok", "not_found", "rate_limited", "error"] }, | |
"confidence": { "type": "number", "minimum": 0, "maximum": 1 }, | |
"freshness_days": { "type": "integer", "minimum": 0 }, | |
"score_overall": { "type": "number", "minimum": 0, "maximum": 1 }, | |
"provider_request_id": { "type": "string" }, | |
"requested_at": { "type": "string", "format": "date-time" }, | |
"responded_at": { "type": "string", "format": "date-time" }, | |
"provider_payload_raw": { | |
"type": "object", | |
"description": "Opaque provider response as received.", | |
"additionalProperties": true | |
}, | |
"notes": { "type": "string" } | |
}, | |
"additionalProperties": false | |
}, | |
"identification-stage": { | |
"title": "identification-stage", | |
"type": "object", | |
"required": ["best", "metadata"], | |
"properties": { | |
"best": { | |
"type": "object", | |
"required": ["content"], | |
"properties": { | |
"content": { "$ref": "#/$defs/enriched-person-content" } | |
}, | |
"additionalProperties": false | |
}, | |
"metadata": { "$ref": "#/$defs/identification-metadata" } | |
}, | |
"additionalProperties": false | |
}, | |
"identification-metadata": { | |
"title": "identification-metadata", | |
"type": "object", | |
"required": ["engine", "lineage_id"], | |
"properties": { | |
"lineage_id": { | |
"type": "string", | |
"description": "The lineage_id of the winning item." | |
}, | |
"engine": { "type": "string", "enum": ["rule", "llm", "hybrid"] }, | |
"rationale": { "type": "string" }, | |
"reasons": { | |
"type": "array", | |
"items": { | |
"type": "object", | |
"properties": { "code": { "type": "string" }, "detail": { "type": "string" } }, | |
"additionalProperties": false | |
} | |
}, | |
"cost": { | |
"type": "object", | |
"properties": { | |
"currency": { "type": "string" }, | |
"amount": { "type": "number" }, | |
"tokens": { "type": "integer", "minimum": 0 } | |
}, | |
"additionalProperties": false | |
}, | |
"timestamp": { "type": "string", "format": "date-time" } | |
}, | |
"additionalProperties": false | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment