Skip to content

Instantly share code, notes, and snippets.

@EdamAme-x
Created February 10, 2025 00:33
Show Gist options
  • Save EdamAme-x/9b02873c0e0263ec8b15bb838c362807 to your computer and use it in GitHub Desktop.
Save EdamAme-x/9b02873c0e0263ec8b15bb838c362807 to your computer and use it in GitHub Desktop.
lexer.ts
type Skip = null;
interface Token<Type extends string, Value extends unknown> {
type: Type;
value: Value;
}
type Tokens = Token<string, unknown>[];
interface Lexer<
TTokens extends Tokens,
State extends Record<string, unknown>,
Methods extends Record<string, Function>,
> {
enter: (
char: string,
internal: {
token: <const TToken extends Token<string, unknown>>(
token: TToken,
) => [TToken];
skip: Skip;
state: State;
methods: Methods;
},
) => TTokens | Skip;
state?: State;
methods?: Methods;
}
const empty = Object.create(null);
const lexer = <
const TTokens extends Tokens,
State extends Record<string, unknown>,
Methods extends Record<string, Function>,
>(
lexer: Lexer<TTokens, State, Methods>,
) => {
return {
lexer: (source: string) =>
source.split("").map((char) =>
lexer.enter(char, {
token: (token) => [token],
skip: null,
state: lexer.state || empty,
methods: lexer.methods || empty,
}),
).filter(t => !!t).flat(),
};
};
const customLexer = lexer({
state: {
alreadyReadChars: [] as string[],
},
methods: {
isNumbericChar: (char: String) => !isNaN(Number(char)),
},
enter: (char, internal) => {
if (
internal.state.alreadyReadChars.includes(char) ||
internal.methods.isNumbericChar(char)
) {
return internal.skip;
}
internal.state.alreadyReadChars.push(char);
return internal.token({
type: "char",
value: {
char,
},
});
},
});
console.log(customLexer.lexer("abcdefg1234hijkkkk"));
@EdamAme-x
Copy link
Author

type Skip = null;

interface Token<Type extends string, Value extends unknown> {
  type: Type;
  value: Value;
}

type Tokens = Token<string, unknown>[];

interface Lexer<
  TTokens extends Tokens,
  State extends Record<string, unknown>,
  Methods extends Record<string, Function>,
> {
  enter: (
    char: string,
    internal: {
      token: <const TToken extends Token<string, unknown>>(
        token: TToken,
      ) => [TToken];
      skip: Skip;
      state: State;
      methods: Methods;
    },
  ) => TTokens | Skip;
  state?: State;
  methods?: Methods;
}

const empty = Object.create(null);

const lexer = <
  const TTokens extends Tokens,
  State extends Record<string, unknown>,
  Methods extends Record<string, Function>,
>(
  lexer: Lexer<TTokens, State, Methods>,
) => {
  return {
    lexer: (source: string) =>
      source.split("").map((char) =>
        lexer.enter(char, {
          token: (token) => [token],
          skip: null,
          state: lexer.state || empty,
          methods: lexer.methods || empty,
        }),
      ).filter(t => !!t).flat(),
    "~types": null as unknown as TTokens
  };
};

const customLexer = lexer({
  state: {
    alreadyReadChars: [] as string[],
  },
  methods: {
    isNumbericChar: (char: String) => !isNaN(Number(char)),
  },
  enter: (char, internal) => {
    if (
      internal.state.alreadyReadChars.includes(char) ||
      internal.methods.isNumbericChar(char)
    ) {
      return internal.skip;
    }

    internal.state.alreadyReadChars.push(char);

    return internal.token({
      type: "char",
      value: {
        char,
      },
    });
  },
});

console.log(customLexer.lexer("abcdefg1234hijkkkk"));

@EdamAme-x
Copy link
Author

memo
https://www.typescriptlang.org/play/?target=99#code/C4TwDgpgBAyg1gSzFAvFAdgVwDbYNwBQBC6wEATgGYCGAxtACoD2cE6APA+NBAB5noAJgGcow4ORIBzADRQAatWyYe-NiKiZ0cdEwDu6AHxQA3gShRQkAFxQukQhYBuSlbcXKIhAL5ErjFjZRNGZWDnFJdFlNbV0DQwBtAF1CYlIKGnooABk+CnZzO1CgqD4BDWL0YRlCmGBqMlK1IVEAJQhaJnJBdgjpOS0dfSMaiwBZCGAACyYNMvU2jq6evqi5ADEtWmAEJhGCYzMLNjJyWwAKQotaKeozsQl+q6gSU-QlWyOLb+BA9Ft2J0qsAin9RPMWnYwYZLt84VAAHRI35hYS2BiVYTPCwASlQxgxYMc8OEiDAtngSGJcPEDQgFPqZGp3wAtpMZiJbBNprNhMyLJAIHALniUMZVlIoAAfDA4fDPbyjKCigmY6WwMnE2lkAD8DLpxLZPJEeqg3I5fIIvgIQPEpRZYFAqCgAHkAEYAKw6wARtHIEDp5ywuBxqVtIOweXIzoK1z2dsJqKa5VEmKVdTpyYWUHanW6vUeaxiQ3iSvNvKzkNzywLkWim3Q212+xhhUjvAotlyHfInDTsEZEDk5ZEhhqKtMhX9wEw5HQk7h7c7UHOwiYs-otglE6+cPDUAA+jc7sExOvyPQEcIwNgEMBzgAiB+h7FQffH8hoh515LO5KpeEoEoLoV33I9bmjJhKEPD9hDxXd4X3bBqHEABhCDnVghJYIRSMommKAAFooAARhSV8LAQaDzmQtCMJQBioAfAAdZjnwXQDviwnC8KkAjiLIqAAGo0HAu5+W+bxSmwYRoAQwCcLATBhCmc4xPIF9OKgXxAOtBT4xBD8AElTlPc5KC2HY9gAKhXeCKKgPQpgQSNQIg4RcLYPipnsrSLBABAIGwQQ33cq9nMoe9NM4vTOOnWd52DeVdJxc4X1feK51Cu4TIoLEtIRFlqDAc5zg-FUHOuCDKu+HUoCXcgEROCgyoguR5L8iwUTYC4kQRbqqgnAbqhqwDSSQWwkqVTqaUHWwGqvQdpRlCAHVAaaZosI0LXmqNCvZCspRWtaQA2zbBWFOz8Wy8hcs-BF0DKNKERcTxltlXAzr87wcVGqBJrlL7cQchFKBc05SuACcAEJoahkHfgAQXIchqBANKQcoZCouZRVCm8HwwwMywUJZbsKGdBrzi+bV6Q4yjhFQpgWTZUhshIOmaBkod8aVbbeU+Z4EGEAA5TAWTdCgEFodC7guD8GTrGHhZF6gRfOMWJZa8qcSVPHjnSe5WruORXgod5sB3IXqLNuclEWukEWFpmWZOdnHqgAAyT2btQRiWPQdiOtti2HbIJ3GeZ1ngHd6A0C52SJMy+cQ-t8awFxog4Soty7j9tAHwAYgfd7U+wMOIAjl3o9j3z4TLiuq6jt2OedCQVCTyYEpeQ3Q-TzPrdz6MGIL5jA7rwDk57t407JBUs++SMQSYZA0A-BeLGA6NzjwPAJ8Q4nHv4WXh+n837YujGHJz85oaP4AT-e+-H5Hpi36OjAyhf-2x6Dv63X9NQOADlYqARXsJNAz9qpwlAW+YmdwpBfglL+NA-5nhLzfLOZ0AAGDeQEQI7z3vTA+wJP7H3omfO25dL6-WvtRKBedX4lz-ptWgWCRKkT+kCHYWAIAgLwdnaid8v4YQ-gw4eP9x7EK0gAgMwCtKwLhAg4Q2FZxJGWgXB8DllGqPIOojh4j57PCng3Aa1MHL+FsA+FeD4gZQFem4KAK8FRyD6sowqxVzgIOuuYrSlimIINsQ5BxdMEEKhxBE3mVp0q2iYJGXCTApCwhJsIMme0qYAAMCAwAAKIMCgIjKAAAmbJeSoAACEoAAGYCCFzfEoWgBThIVIIIjAAIm0pp5SCAAAVWhGRFvkxGBAMm6wIC+IAA

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment