Last active
April 3, 2023 13:10
-
-
Save dotnetchris/8e99ef70a6fcb3bd445ef1f3505f7087 to your computer and use it in GitHub Desktop.
Base58 is case SENSITIVE alphanumeric ELIMINATING: 0 (zero), I (uppercase), O (uppercase), l (lowercase)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/// <summary> | |
/// Base58 is case SENSITIVE alphanumeric ELIMINATING: 0 (zero) I (uppercase), O (upppercase), l (lowercase) | |
/// This reduction of characters eliminates the majority of human typoposition errors | |
/// </summary> | |
public class Base58 | |
{ | |
private static readonly BaseN base58 = new BaseN("123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz"); //base58 | |
public static string Encode(long @long) => base58.Encode(@long); | |
public static long DecodeLong(string encoded, bool truncate = false) => base58.DecodeLong(encoded, truncate); | |
public static string Encode(byte[] data) => base58.Encode(data); | |
public static byte[] Decode(string encoded) => base58.Decode(encoded); | |
} | |
/// <summary> | |
/// Inspired by https://gist.github.com/CodesInChaos/3175971 | |
/// </summary> | |
public class BaseN | |
{ | |
public BaseN(string characterSet) | |
{ | |
CharacterSet = characterSet; | |
EncodedMaxLong = Encode(long.MaxValue); | |
} | |
public string CharacterSet { get; } | |
public int Base => CharacterSet.Length; | |
public char[] CharMap => CharacterSet.ToCharArray(); | |
public string EncodedMaxLong { get; } | |
public string Encode(long @long) | |
{ | |
var array = BitConverter.GetBytes(@long); | |
var encoded = Encode(array); | |
return encoded; | |
} | |
public long DecodeLong(string encoded, bool truncate = false) | |
{ | |
if (encoded.Length > EncodedMaxLong.Length && false == truncate) | |
throw new ArgumentException($"{encoded} exceeds long.MaxValue : {EncodedMaxLong} and will truncate." + | |
"Set truncate parameter to true to allow"); | |
var decoded = Decode(encoded); | |
var @long = BitConverter.ToInt64(decoded, 0); | |
return @long; | |
} | |
public string Encode(byte[] data) | |
{ | |
// Decode byte[] to BigInteger | |
var intData = data.Aggregate<byte, BigInteger>(0, (current, @byte) => current * 256 + @byte); | |
// Encode BigInteger to Base58 string | |
var result = ""; | |
while (intData > 0) | |
{ | |
var remainder = (int)(intData % Base); | |
intData /= Base; | |
result = CharacterSet[remainder] + result; | |
} | |
// Append `1` for each leading 0 byte | |
for (var i = 0; i < data.Length && data[i] == 0; i++) | |
{ | |
result = '1' + result; | |
} | |
return result; | |
} | |
public byte[] Decode(string encoded) | |
{ | |
// Decode Base58 string to BigInteger | |
BigInteger intData = 0; | |
for (var i = 0; i < encoded.Length; i++) | |
{ | |
var digitValue = Array.BinarySearch(CharMap, encoded[i]); | |
if (digitValue < 0) | |
throw new FormatException(string.Format("Invalid Base58 character `{0}` at position {1}", encoded[i], i)); | |
intData = intData * Base + digitValue; | |
} | |
// Encode BigInteger to byte[] | |
// Leading zero bytes get encoded as leading `1` characters | |
var leadingZeroCount = encoded.TakeWhile(c => c == '1').Count(); | |
var leadingZeros = Enumerable.Repeat((byte)0, leadingZeroCount); | |
var bytesWithoutLeadingZeros = | |
intData.ToByteArray() | |
.Reverse() // to big endian | |
.SkipWhile(b => b == 0); //strip sign byte | |
var result = leadingZeros.Concat(bytesWithoutLeadingZeros).ToArray(); | |
return result; | |
} | |
} |
Source is public domain, and thanks @CodesInChaos for the larger work
I previously mentioned using Flakey, at this point I recommend only using https://github.com/RobThree/IdGen
Flakey was a fork of IdGen and is less supported at this point.
Made it so that encoder can be configurable base
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
So i'm using this with https://github.com/joshclark/Flakey to generate (mostly) unique long identifiers. Instead of a giant sequence of numbers shown to humans, i want a friendly identifier that's also fairly easy to speak verbally and avoids the pitfall of similar letters.
So i end up using this like:
So this would take the numeric id
220242127914405888
and make"1eBQ-u72b-Le"
. Now if a human has to either copy by hand or read this verbally it has a natural grouping of 4 characters. Then you can take"1eBQu72bLe"
back to220242127914405888
for key lookups. But, i'd say this is a bad experience to require rigid case sensitivity for a human centered process.Using the hyphen as a stop word, it's easy to tokenize this text and then you can easily apply a simple ranking in memory with the levenshtein distance of each token. You could also push this down to the database to use %'s in sql or whatever full text comparison you want.