Created
November 18, 2024 07:07
-
-
Save muminoff/5e1aeb5e01097aa421b1dbd8ef06ed03 to your computer and use it in GitHub Desktop.
Korean Bank SMS JSON Parser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
interface TransactionData { | |
when: string; | |
place: string; | |
amount: number; | |
balance: number; | |
} | |
class KoreanBankSMSParser { | |
private static readonly AMOUNT_REGEX = /(?:출금|체크카드출금)\s*([\d,]+)원?/; | |
private static readonly BALANCE_REGEX = /잔액\s*([\d,]+)원?/; | |
private static readonly DATE_REGEX = /(\d{1,2}\/\d{1,2}(?:\s+\d{1,2}:\d{1,2})?|\d{4}\/\d{1,2}\/\d{1,2}\s+\d{1,2}:\d{1,2})/; | |
private static readonly TIME_REGEX = /\d{1,2}:\d{2}/; | |
private static readonly BANK_PATTERNS = /^\[?(KB|우리|농협|기업)\]?$/; | |
private static readonly CARD_NUMBER_PATTERN = /^[\d\*\-]+$/; | |
private static readonly TRANSACTION_PATTERN = /(?:출금|체크카드출금)\s*[\d,]+원?/; | |
private static cleanNumber(str: string): number { | |
return parseInt(str.replace(/[,원]/g, '')); | |
} | |
private static extractDateTime(text: string): string { | |
const match = text.match(this.DATE_REGEX); | |
if (!match) return ''; | |
// Normalize date format | |
let dateStr = match[1]; | |
if (dateStr.includes('/')) { | |
const parts = dateStr.split(' '); | |
// If it's a full date (2024/11/15), take just month/day | |
if (parts[0].split('/').length === 3) { | |
dateStr = parts[0].split('/').slice(1).join('/') + (parts[1] ? ' ' + parts[1] : ''); | |
} | |
} | |
return dateStr; | |
} | |
private static isKoreanOrAlphanumeric(str: string): boolean { | |
return /[\uAC00-\uD7AF\w]+/.test(str); | |
} | |
private static shouldSkipWord(word: string): boolean { | |
if (!word || word === '[Web발신]') return true; | |
const patterns = [ | |
this.DATE_REGEX, | |
this.TIME_REGEX, | |
this.BANK_PATTERNS, | |
this.CARD_NUMBER_PATTERN, | |
/^[\d,]+원?$/, | |
/잔액/, | |
/출금/, | |
/^Web발신$/, | |
/^\d+$/, | |
/^[\d\*\-]+$/, | |
/체크카드출금/ | |
]; | |
return patterns.some(pattern => pattern.test(word)); | |
} | |
private static extractPlace(text: string): string { | |
// Split into lines and words, filtering out empty strings | |
const lines = text.split('\n') | |
.map(line => line.trim()) | |
.filter(line => line); | |
// Look for merchant names in each line | |
for (const line of lines) { | |
// Skip lines that are purely transaction-related | |
if (/^(?:출금|체크카드출금|잔액)\s*[\d,]+원?$/.test(line.trim())) { | |
continue; | |
} | |
const words = line.split(/\s+/); | |
for (const word of words) { | |
if (this.shouldSkipWord(word)) continue; | |
// Valid merchant name should contain Korean or alphanumeric characters | |
// and be at least 2 characters long to avoid single-character noise | |
if (this.isKoreanOrAlphanumeric(word) && word.length >= 2) { | |
return word; | |
} | |
} | |
} | |
return ''; | |
} | |
private static extractAmount(text: string): number { | |
const match = text.match(this.AMOUNT_REGEX); | |
if (!match) { | |
// Try alternative pattern for simple "출금 amount원" format | |
const altMatch = text.match(/출금\s*([\d,]+)원/); | |
if (altMatch) { | |
return this.cleanNumber(altMatch[1]); | |
} | |
return 0; | |
} | |
return this.cleanNumber(match[1]); | |
} | |
private static extractBalance(text: string): number { | |
const match = text.match(this.BALANCE_REGEX); | |
if (!match) return 0; | |
return this.cleanNumber(match[1]); | |
} | |
public static parse(smsText: string): TransactionData { | |
return { | |
when: this.extractDateTime(smsText), | |
place: this.extractPlace(smsText), | |
amount: this.extractAmount(smsText), | |
balance: this.extractBalance(smsText) | |
}; | |
} | |
} | |
// Example usage: | |
const testSMSMessages = [ | |
`[Web발신] | |
[KB]11/18 14:58 | |
279801**027 | |
과일놀이터 | |
체크카드출금 | |
18,000 | |
잔액238,281`, | |
`[Web발신] | |
우리 11/17 18:17 | |
*250284 | |
출금 3,000원 | |
서울특별시-현 | |
잔액 214,164원`, | |
`[Web발신] | |
2024/11/15 12:57 | |
출금 354,594원 | |
잔액 50,622원 | |
현대카드 | |
469***03801011 | |
기업`, | |
`[Web발신] | |
농협 출금2,500원 | |
11/03 17:43 301-****-2640-41 파우PC 잔액5,428원` | |
]; | |
// Process each message | |
const results = testSMSMessages.map(sms => KoreanBankSMSParser.parse(sms)); | |
// Output results | |
console.log(JSON.stringify(results, null, 2)); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
🏦 Korean Bank SMS Parser
Transforms Korean bank transaction SMS into structured JSON. Handles multiple banks (KB, Woori, NH, IBK), various date formats, and merchant names with smart pattern matching.
Features