nandakoryaaa · June 10, 2025 15:01
diff --git a/dict.c b/dict.c
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
 #include <windows.h>

 #define LETTER_CNT 5

 #define DICT_NODE_CNT 16384
 #define DICT_NIL 16384

 typedef struct {
 	uint8_t letters[LETTER_CNT];
 } InputWord;

 typedef struct {
 	uint8_t code;
 	uint16_t next_index;
 	uint16_t sub_index;
 } DictNode;

 typedef struct {
 	uint16_t cnt;
 	DictNode* mem;
 } DictNodeManager;

 uint8_t dict_is_empty(DictNodeManager* mgr)
 {
 	return mgr->cnt == 0;
 }

 DictNode dict_node_init(uint8_t code)
 {
 	return (DictNode) {
 		.code = code,
 		.next_index = DICT_NIL,
 		.sub_index = DICT_NIL
 	};
 }

 DictNode dict_node_get(DictNodeManager* mgr, uint16_t index)
 {
 	if (index < mgr->cnt) {
 		return mgr->mem[index];
 	}
 	return dict_node_init(0);
 }

 uint8_t dict_node_update(
 	DictNodeManager* mgr, uint16_t index, DictNode node
 ) {
 	if (index < mgr->cnt) {
 		mgr->mem[index] = node;
 		return 1;
 	}
 	return 0;
 }

 uint16_t dict_node_allocate(DictNodeManager* mgr)
 {
 	uint16_t pos = mgr->cnt;
 	if (pos >= DICT_NODE_CNT) {
 		return DICT_NIL;
 	}

 	mgr->cnt++;
 	return pos;
 }

 uint16_t dict_add_subword(
 	DictNodeManager* mgr, const uint8_t* letters, uint16_t offset
 ) {
 	uint16_t base_index = dict_node_allocate(mgr);
 	if (base_index != DICT_NIL) {
 		uint16_t index = base_index;
 		DictNode node = dict_node_init(letters[offset++]);
 		dict_node_update(mgr, index, node);

 		while (offset < LETTER_CNT) {
 			node.sub_index = dict_node_allocate(mgr);
 			dict_node_update(mgr, index, node);

 			DictNode sub_node = dict_node_init(letters[offset++]);
 			dict_node_update(mgr, node.sub_index, sub_node);
 			index = node.sub_index;
 			node = sub_node;
 		}
 	}
 	return base_index;
 }

 uint8_t dict_add_word(DictNodeManager* mgr, const char* letters)
 {
 	if (dict_is_empty(mgr)) {
 		 return dict_add_subword(mgr, letters, 0) != DICT_NIL;
 	}
 	uint16_t index = 0;
 	size_t i = 0;
 	while (i < LETTER_CNT) {
 		DictNode node = dict_node_get(mgr, index);
 		if (node.code == letters[i]) {
 			i++;
 			if (node.sub_index == DICT_NIL) {
 				node.sub_index = dict_add_subword(mgr, letters, i);
 				return dict_node_update(mgr, index, node);
 			}
 			index = node.sub_index;
 		} else {
 			if (node.next_index == DICT_NIL) {
 				node.next_index = dict_add_subword(mgr, letters, i);
 				return dict_node_update(mgr, index, node);
 			}
 			index = node.next_index;
 		}
 	}
 	return 1;
 }

 uint8_t dict_find_word(DictNodeManager* mgr, const uint8_t* letters)
 {
 	if (dict_is_empty(mgr)) {
 		 return 0;
 	}

 	uint16_t index = 0;
 	size_t i = 0;
 	while (i < LETTER_CNT) {
 		DictNode node = dict_node_get(mgr, index);
 		if (node.code == letters[i]) {
 			i++;
 			if (node.sub_index == DICT_NIL) {
 				break;
 			}
 			index = node.sub_index;
 		} else {
 			if (node.next_index == DICT_NIL) {
 				break;
 			}
 			index = node.next_index;
 		}
 	}

 	return i == LETTER_CNT;
 }

 InputWord from_utf16(const wchar_t* wbuf)
 {
 	InputWord input;
 	for (size_t pos = 0; pos < LETTER_CNT; pos++) {
 		wchar_t wc = wbuf[pos];
 		if (wc < 1040 || wc > 1103) {
 			input.letters[0] = 0;
 			return input;
 		}
 		if (wc > 1071) {
 			wc -= 32;
 		}
 		input.letters[pos] = wc - 1039;
 	}

 	return input;	
 }

 InputWord from_utf8(const uint16_t* wbuf)
 {
 	InputWord input;
 	for (size_t pos = 0; pos < LETTER_CNT; pos++) {
 		wchar_t wc = wbuf[pos];
 		if ((wc & 0xE0) != 0xC0) {
 			input.letters[0] = 0;
 			return input;
 		}

 		wc = ((wc >> 8) & 63) | ((wc & 31) << 6);

 		if (wc < 1040 || wc > 1103) {
 			input.letters[0] = 0;
 			return input;
 		}
 		if (wc > 1071) {
 			wc -= 32;
 		}
 		input.letters[pos] = wc - 1039;
 	}

 	return input;
 }


 InputWord get_word()
 {
 	void* stdin_handle = GetStdHandle(STD_INPUT_HANDLE);
 	wchar_t wbuf[LETTER_CNT + 3];

 	unsigned long int char_cnt = 0;
 	uint32_t read_cnt = 0;

 	do {
 		ReadConsoleW(
 			stdin_handle, wbuf, LETTER_CNT + 2, &char_cnt, NULL
 		);
 		read_cnt++;
 	} while (wbuf[char_cnt - 1] != '\n');

 	if (read_cnt > 1) {
 		return (InputWord) { .letters = {0} };
 	}

 	return from_utf16(wbuf);
 }

 void main(int argc, char** argv)
 {
 	DictNodeManager mgr = (DictNodeManager) {
 		.cnt = 0,
 		.mem = malloc(DICT_NODE_CNT * sizeof(DictNode))
 	};

 	uint16_t buf[128];
 	uint8_t error = 0;
 	size_t cnt = 0;

 	FILE* fp = fopen(argv[1], "r");

 	while (!feof(fp)) {
 		fgets((char*)buf, LETTER_CNT * sizeof(wchar_t) + 6, fp);
 		InputWord word = from_utf8(buf);
 		if (word.letters[0] == 0) {
 			printf("error reading word #%u: %s\n", cnt, (char*)buf);
 			error = 1;
 			break;
 		}
 		if (!dict_add_word(&mgr, word.letters)) {
 			printf("error adding word #%u: %s\n", cnt, (char*)buf);
 			error = 1;
 			break;
 		}
 		cnt++;
 	}

 	fclose(fp);

 	if (!error) {

 		printf("added %lu words, dict size=%u\n", cnt, mgr.cnt);
 		while (1) {
 			InputWord w = get_word();
 			printf("%u\n", dict_find_word(&mgr, w.letters));
 		}
 	}

 	free(mgr.mem);
 }
	#include <stdio.h>
	#include <stdlib.h>
	#include <stdint.h>
	#include <windows.h>

	#define LETTER_CNT 5

	#define DICT_NODE_CNT 16384
	#define DICT_NIL 16384

	typedef struct {
	uint8_t letters[LETTER_CNT];
	} InputWord;

	typedef struct {
	uint8_t code;
	uint16_t next_index;
	uint16_t sub_index;
	} DictNode;

	typedef struct {
	uint16_t cnt;
	DictNode* mem;
	} DictNodeManager;

	uint8_t dict_is_empty(DictNodeManager* mgr)
	{
	return mgr->cnt == 0;
	}

	DictNode dict_node_init(uint8_t code)
	{
	return (DictNode) {
	.code = code,
	.next_index = DICT_NIL,
	.sub_index = DICT_NIL
	};
	}

	DictNode dict_node_get(DictNodeManager* mgr, uint16_t index)
	{
	if (index < mgr->cnt) {
	return mgr->mem[index];
	}
	return dict_node_init(0);
	}

	uint8_t dict_node_update(
	DictNodeManager* mgr, uint16_t index, DictNode node
	) {
	if (index < mgr->cnt) {
	mgr->mem[index] = node;
	return 1;
	}
	return 0;
	}

	uint16_t dict_node_allocate(DictNodeManager* mgr)
	{
	uint16_t pos = mgr->cnt;
	if (pos >= DICT_NODE_CNT) {
	return DICT_NIL;
	}

	mgr->cnt++;
	return pos;
	}

	uint16_t dict_add_subword(
	DictNodeManager* mgr, const uint8_t* letters, uint16_t offset
	) {
	uint16_t base_index = dict_node_allocate(mgr);
	if (base_index != DICT_NIL) {
	uint16_t index = base_index;
	DictNode node = dict_node_init(letters[offset++]);
	dict_node_update(mgr, index, node);

	while (offset < LETTER_CNT) {
	node.sub_index = dict_node_allocate(mgr);
	dict_node_update(mgr, index, node);

	DictNode sub_node = dict_node_init(letters[offset++]);
	dict_node_update(mgr, node.sub_index, sub_node);
	index = node.sub_index;
	node = sub_node;
	}
	}
	return base_index;
	}

	uint8_t dict_add_word(DictNodeManager* mgr, const char* letters)
	{
	if (dict_is_empty(mgr)) {
	return dict_add_subword(mgr, letters, 0) != DICT_NIL;
	}
	uint16_t index = 0;
	size_t i = 0;
	while (i < LETTER_CNT) {
	DictNode node = dict_node_get(mgr, index);
	if (node.code == letters[i]) {
	i++;
	if (node.sub_index == DICT_NIL) {
	node.sub_index = dict_add_subword(mgr, letters, i);
	return dict_node_update(mgr, index, node);
	}
	index = node.sub_index;
	} else {
	if (node.next_index == DICT_NIL) {
	node.next_index = dict_add_subword(mgr, letters, i);
	return dict_node_update(mgr, index, node);
	}
	index = node.next_index;
	}
	}
	return 1;
	}

	uint8_t dict_find_word(DictNodeManager* mgr, const uint8_t* letters)
	{
	if (dict_is_empty(mgr)) {
	return 0;
	}

	uint16_t index = 0;
	size_t i = 0;
	while (i < LETTER_CNT) {
	DictNode node = dict_node_get(mgr, index);
	if (node.code == letters[i]) {
	i++;
	if (node.sub_index == DICT_NIL) {
	break;
	}
	index = node.sub_index;
	} else {
	if (node.next_index == DICT_NIL) {
	break;
	}
	index = node.next_index;
	}
	}

	return i == LETTER_CNT;
	}

	InputWord from_utf16(const wchar_t* wbuf)
	{
	InputWord input;
	for (size_t pos = 0; pos < LETTER_CNT; pos++) {
	wchar_t wc = wbuf[pos];
	if (wc < 1040 \|\| wc > 1103) {
	input.letters[0] = 0;
	return input;
	}
	if (wc > 1071) {
	wc -= 32;
	}
	input.letters[pos] = wc - 1039;
	}

	return input;
	}

	InputWord from_utf8(const uint16_t* wbuf)
	{
	InputWord input;
	for (size_t pos = 0; pos < LETTER_CNT; pos++) {
	wchar_t wc = wbuf[pos];
	if ((wc & 0xE0) != 0xC0) {
	input.letters[0] = 0;
	return input;
	}

	wc = ((wc >> 8) & 63) \| ((wc & 31) << 6);

	if (wc < 1040 \|\| wc > 1103) {
	input.letters[0] = 0;
	return input;
	}
	if (wc > 1071) {
	wc -= 32;
	}
	input.letters[pos] = wc - 1039;
	}

	return input;
	}


	InputWord get_word()
	{
	void* stdin_handle = GetStdHandle(STD_INPUT_HANDLE);
	wchar_t wbuf[LETTER_CNT + 3];

	unsigned long int char_cnt = 0;
	uint32_t read_cnt = 0;

	do {
	ReadConsoleW(
	stdin_handle, wbuf, LETTER_CNT + 2, &char_cnt, NULL
	);
	read_cnt++;
	} while (wbuf[char_cnt - 1] != '\n');

	if (read_cnt > 1) {
	return (InputWord) { .letters = {0} };
	}

	return from_utf16(wbuf);
	}

	void main(int argc, char** argv)
	{
	DictNodeManager mgr = (DictNodeManager) {
	.cnt = 0,
	.mem = malloc(DICT_NODE_CNT * sizeof(DictNode))
	};

	uint16_t buf[128];
	uint8_t error = 0;
	size_t cnt = 0;

	FILE* fp = fopen(argv[1], "r");

	while (!feof(fp)) {
	fgets((char)buf, LETTER_CNT sizeof(wchar_t) + 6, fp);
	InputWord word = from_utf8(buf);
	if (word.letters[0] == 0) {
	printf("error reading word #%u: %s\n", cnt, (char*)buf);
	error = 1;
	break;
	}
	if (!dict_add_word(&mgr, word.letters)) {
	printf("error adding word #%u: %s\n", cnt, (char*)buf);
	error = 1;
	break;
	}
	cnt++;
	}

	fclose(fp);

	if (!error) {

	printf("added %lu words, dict size=%u\n", cnt, mgr.cnt);
	while (1) {
	InputWord w = get_word();
	printf("%u\n", dict_find_word(&mgr, w.letters));
	}
	}

	free(mgr.mem);
	}