infogulch · December 6, 2018 03:49
diff --git a/utf8.cpp b/utf8.cpp
 // decode accepts a byte and a vector of the next 3 bytes (the %first byte and the possible 
 // %continuation bytes and returns a decoded code point in an integer, and the number of bytes
 // consumed (including the first byte) The vector may contain NaR bytes if at the end of a buffer
 F('decode') %first, %cont;
 // fast path for 1 byte
 con(v(0xe0, 0xf0, 0xf8))   %prefmask, //R0    # masks of the three possible prefixes
 lssu(%first, 0x80)         %onebyte,  //  E0  # check if the first byte is < 0x80
 andlu(%first, %prefmask)   %masked,   //  E1  # bit-and with bitmasks of 3 possible prefixes
 retntr(%onebyte, %first, 1);          //    F0# if < 0x80 return the first byte & consume one byte

 // 2-4 byte decoding:                 
 con(v(0xc0, 0xe0, 0xf0))   %prefixes, //R0    # the first three prefixes themselves
 eqlu(%masked, %prefixes)   %picked,   //  E0  # compare the first byte with the 3 possible prefixes
 andlu(%cont, 0xc0)         %conhi,    //  E1  # high bits of continuation bytes (x in 0bxxyyyyyy)
 andlu(%cont, 0x3f)         %conlo;    //  E2  # low bits of continuation bytes (y in 0bxxyyyyyy)

 con(v(2, 3, 4, 5))         %shufidxa, //R0    # adjusted shuffle indexes
 smearx(%picked)            %excluded, //  E0  # mark all bytes excluded from decoding
 any(%picked)               %matched,  //  E1  # find if any of the prefixes match
 eqlu(%conhi, 0x80)         %contest,  //  E2  # check if high bits of continuation bytes match
 left(%picked)              %idx;      //  E3  # index of the last continuation byte
 retnfl(%matched, -1, 1);              //    F0# if no prefixes match, return error and consume one byte

 con(v(0x1f, 0xf, 0x7))     %valmask,  //R0    # bitmasks of codepoint bits for each prefix
 con(v(18, 12, 6, 0))       %shifts,   //R1    # a list of shift amounts
 extract(%valmask, %idx)    %mask,     //  E0  # extract out the correct bitmask for the first byte
 notl(%excluded)            %included, //  E1  # smearx+notl makes a logical bool vec of bytes to decode
 sublu(%shufidxa, %idx)     %shufidx,  //  E2  # shuffle indexes for shift amounts
 add1(%idx)                 %cnt;      //  E3  # count of continuation bytes to decode

 shuffle(%shifts, %shufidx) %shift,    //  E0  # shuffle the shifts to match the codepoint layout
 andu(%first, %mask)        %fbits,    //  E1  # mask off the codepoint bits from the first byte
 imp(%included, %contest)   %conck;    //  E2  # the bytes to decode imply the matched continuation bytes
 shuffle(%conlo, v(3, 0, 1, 2)) %cp0;  //  E3  # shift continuation bytes to make room for the first byte

 inject(%cp0, %fbits, 0)    %cp1,      //  E0  # put the bits from the first byte into the codepoint vec
 all(%conck)                %conok,    //  E1  # implies should return all true if everything is ok
 inject(%included, %cnt, 1) %cpbytes,  //  E2  # set the last 
 retnfl(%conok, -1, 1);                //    F0# else return an error and consume one byte

 con(v(0, 0, 0, 0))         %zero,     //R0    # zeros
 shiftluv(%cp1, %shift)     %cp2,      //  E0  # shift each set of codepoint bits to their final position
 pick(%cpbytes, %cp2, %zero)%cp3;      //   P0 # fill non-cp elements (that may be NaR or None) with zero

 // OR-reduce the vector and return the result
 alternate(%cp3, %cp3)      %cp4 %cp5; Nope;

 orl(%cp4, %cp5)            %cp6;

 alternate(%cp6, %cp6)      %cp7 %cp8; Nope;

 orl(%cp7, %cp8)            %cp9;

 extract(%cp9, 0)           %cp,
 add1(%cnt)                 %consumed
 retn(%cp, %consumed);
 ;
	// decode accepts a byte and a vector of the next 3 bytes (the %first byte and the possible
	// %continuation bytes and returns a decoded code point in an integer, and the number of bytes
	// consumed (including the first byte) The vector may contain NaR bytes if at the end of a buffer
	F('decode') %first, %cont;
	// fast path for 1 byte
	con(v(0xe0, 0xf0, 0xf8)) %prefmask, //R0 # masks of the three possible prefixes
	lssu(%first, 0x80) %onebyte, // E0 # check if the first byte is < 0x80
	andlu(%first, %prefmask) %masked, // E1 # bit-and with bitmasks of 3 possible prefixes
	retntr(%onebyte, %first, 1); // F0# if < 0x80 return the first byte & consume one byte

	// 2-4 byte decoding:
	con(v(0xc0, 0xe0, 0xf0)) %prefixes, //R0 # the first three prefixes themselves
	eqlu(%masked, %prefixes) %picked, // E0 # compare the first byte with the 3 possible prefixes
	andlu(%cont, 0xc0) %conhi, // E1 # high bits of continuation bytes (x in 0bxxyyyyyy)
	andlu(%cont, 0x3f) %conlo; // E2 # low bits of continuation bytes (y in 0bxxyyyyyy)

	con(v(2, 3, 4, 5)) %shufidxa, //R0 # adjusted shuffle indexes
	smearx(%picked) %excluded, // E0 # mark all bytes excluded from decoding
	any(%picked) %matched, // E1 # find if any of the prefixes match
	eqlu(%conhi, 0x80) %contest, // E2 # check if high bits of continuation bytes match
	left(%picked) %idx; // E3 # index of the last continuation byte
	retnfl(%matched, -1, 1); // F0# if no prefixes match, return error and consume one byte

	con(v(0x1f, 0xf, 0x7)) %valmask, //R0 # bitmasks of codepoint bits for each prefix
	con(v(18, 12, 6, 0)) %shifts, //R1 # a list of shift amounts
	extract(%valmask, %idx) %mask, // E0 # extract out the correct bitmask for the first byte
	notl(%excluded) %included, // E1 # smearx+notl makes a logical bool vec of bytes to decode
	sublu(%shufidxa, %idx) %shufidx, // E2 # shuffle indexes for shift amounts
	add1(%idx) %cnt; // E3 # count of continuation bytes to decode

	shuffle(%shifts, %shufidx) %shift, // E0 # shuffle the shifts to match the codepoint layout
	andu(%first, %mask) %fbits, // E1 # mask off the codepoint bits from the first byte
	imp(%included, %contest) %conck; // E2 # the bytes to decode imply the matched continuation bytes
	shuffle(%conlo, v(3, 0, 1, 2)) %cp0; // E3 # shift continuation bytes to make room for the first byte

	inject(%cp0, %fbits, 0) %cp1, // E0 # put the bits from the first byte into the codepoint vec
	all(%conck) %conok, // E1 # implies should return all true if everything is ok
	inject(%included, %cnt, 1) %cpbytes, // E2 # set the last
	retnfl(%conok, -1, 1); // F0# else return an error and consume one byte

	con(v(0, 0, 0, 0)) %zero, //R0 # zeros
	shiftluv(%cp1, %shift) %cp2, // E0 # shift each set of codepoint bits to their final position
	pick(%cpbytes, %cp2, %zero)%cp3; // P0 # fill non-cp elements (that may be NaR or None) with zero

	// OR-reduce the vector and return the result
	alternate(%cp3, %cp3) %cp4 %cp5; Nope;

	orl(%cp4, %cp5) %cp6;

	alternate(%cp6, %cp6) %cp7 %cp8; Nope;

	orl(%cp7, %cp8) %cp9;

	extract(%cp9, 0) %cp,
	add1(%cnt) %consumed
	retn(%cp, %consumed);
	;