test 16 bit trie

This commit is contained in:
Matt Skalecki 2024-02-22 12:26:45 -05:00
commit 0dd58e2d88
13 changed files with 1893 additions and 65 deletions

Binary file not shown.

File diff suppressed because it is too large Load diff

View file

@ -1,17 +0,0 @@
# Copyright 2021-2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
aa -> ab
baa -> bac
caa -> cad

View file

@ -1,7 +1,8 @@
{ {
"rules_file_name": "./magickey_dict.txt", "rules_file_name": "./magickey_dict.txt",
"magic_chars": "☆✵★✪", "magic_chars": "👆👍★✪",
"wordbreak_char": ":", "wordbreak_char": ":",
"output_func_chars": "↻⇑",
"comment_char": "#", "comment_char": "#",
"separator": "⇒" "separator": "⇒"
} }

View file

@ -0,0 +1,52 @@
// Copyright 2024 QMK
// SPDX-License-Identifier: GPL-2.0-or-later
/*******************************************************************************
88888888888 888 d8b .d888 d8b 888 d8b
888 888 Y8P d88P" Y8P 888 Y8P
888 888 888 888
888 88888b. 888 .d8888b 888888 888 888 .d88b. 888 .d8888b
888 888 "88b 888 88K 888 888 888 d8P Y8b 888 88K
888 888 888 888 "Y8888b. 888 888 888 88888888 888 "Y8888b.
888 888 888 888 X88 888 888 888 Y8b. 888 X88
888 888 888 888 88888P' 888 888 888 "Y8888 888 88888P'
888 888
888 888
888 888
.d88b. .d88b. 88888b. .d88b. 888d888 8888b. 888888 .d88b. .d88888
d88P"88b d8P Y8b 888 "88b d8P Y8b 888P" "88b 888 d8P Y8b d88" 888
888 888 88888888 888 888 88888888 888 .d888888 888 88888888 888 888
Y88b 888 Y8b. 888 888 Y8b. 888 888 888 Y88b. Y8b. Y88b 888
"Y88888 "Y8888 888 888 "Y8888 888 "Y888888 "Y888 "Y8888 "Y88888
888
Y8b d88P
"Y88P"
*******************************************************************************/
#pragma once
// Autocorrection dictionary with longest match semantics:
// Autocorrection dictionary (6 entries):
// j☆ -> just
// j☆☆ -> j☆ment
// ☆☆ -> ☆n
// j✵ -> join
// jud☆ -> judge
// jud☆☆ -> jud☆ment
#define AUTOCORRECT_MIN_LENGTH 2 // "j☆"
#define AUTOCORRECT_MAX_LENGTH 5 // "jud☆☆"
#define DICTIONARY_SIZE 37
#define COMPLETIONS_SIZE 18
#define MAGICKEY_COUNT 4
static const uint16_t magickey_data[DICTIONARY_SIZE] PROGMEM = {
0x4100, 0x0005, 0x0101, 0x0021, 0x0000, 0x4007, 0x000C, 0x000D, 0x0011, 0x0100, 0x0013, 0x0000, 0x0018, 0x000D, 0x0000, 0x8000,
0x0000, 0x8000, 0x0003, 0xC000, 0x0007, 0x4007, 0x001A, 0x000D, 0x001F, 0x0000, 0x0018, 0x000D, 0x0000, 0x8000, 0x0009, 0x8000,
0x0009, 0x000D, 0x0000, 0x8000, 0x000E
};
static const uint8_t magickey_completions_data[COMPLETIONS_SIZE] PROGMEM = {
0x67, 0x65, 0x00, 0x75, 0x73, 0x74, 0x00, 0x6E, 0x00, 0x6D, 0x65, 0x6E, 0x74, 0x00, 0x6F, 0x69,
0x6E, 0x00
};

View file

@ -26,27 +26,54 @@
#pragma once #pragma once
// Autocorrection dictionary with longest match semantics: // Autocorrection dictionary with longest match semantics:
// Autocorrection dictionary (6 entries): // Autocorrection dictionary (25 entries):
// j☆ -> just // c👆 -> cy
// j☆☆ -> justment // p👆 -> py
// ☆☆ -> ☆n // d👆 -> dy
// j✵ -> join // y👆 -> yp
// jud☆ -> judge // g👆 -> gy
// jud☆☆ -> judgment // 👍 -> ↻
// i👍 -> ing
// a👍 -> and
// :👆 -> the
// :👍 -> for
// .👍 -> .:⇑
// j👆 -> just
// j👆👆 -> justment
// 👆👆 -> 👆n
// j👍 -> join
// jud👆 -> judge
// jud👆👆 -> judgment
// d★ -> develop
// d★t -> development
// d★r -> developer
// d★d -> developed
// :i👍 -> I
// :i👍m -> I'm
// :i👍d -> I'd
// :i👍l -> I'll
#define AUTOCORRECT_MIN_LENGTH 2 // "j☆" #define MAGICKEY_MIN_LENGTH 1 // "👍"
#define AUTOCORRECT_MAX_LENGTH 5 // "jud☆☆" #define MAGICKEY_MAX_LENGTH 5 // "jud👆👆"
#define DICTIONARY_SIZE 37 #define DICTIONARY_SIZE 134
#define COMPLETIONS_SIZE 18 #define COMPLETIONS_SIZE 64
#define MAGICKEY_COUNT 4 #define MAGICKEY_COUNT 4
static const uint16_t magickey_data[DICTIONARY_SIZE] PROGMEM = { static const uint16_t magickey_data[DICTIONARY_SIZE] PROGMEM = {
0x4100, 0x0005, 0x0101, 0x0021, 0x0000, 0x4007, 0x000C, 0x000D, 0x0011, 0x0100, 0x0013, 0x0000, 0x0018, 0x000D, 0x0000, 0x8000, 0x4007, 0x0011, 0x000F, 0x001F, 0x0010, 0x0025, 0x0015, 0x002B, 0x0017, 0x0030, 0x0102, 0x0035, 0x0100, 0x0039, 0x0101, 0x006B,
0x0000, 0x8000, 0x0003, 0xC000, 0x0007, 0x4007, 0x001A, 0x000D, 0x001F, 0x0000, 0x0018, 0x000D, 0x0000, 0x8001, 0x0009, 0x8000, 0x0000, 0x4102, 0x0016, 0x0101, 0x001A, 0x0000, 0x0007, 0x0000, 0x8000, 0x0000, 0x000C, 0x002C, 0x0000, 0x8000, 0x0003, 0x0101,
0x0009, 0x000D, 0x0000, 0x8000, 0x000E 0x000C, 0x002C, 0x0000, 0x8000, 0x0006, 0x0101, 0x000C, 0x002C, 0x0000, 0x8000, 0x000A, 0x0102, 0x0007, 0x0000, 0x8000, 0x000D,
0x0102, 0x0007, 0x0000, 0x8000, 0x0010, 0x0007, 0x0000, 0x8000, 0x0015, 0x402C, 0x004A, 0x0006, 0x004C, 0x0007, 0x004E, 0x000A,
0x0055, 0x000D, 0x0057, 0x0013, 0x0059, 0x001C, 0x005B, 0x0100, 0x005D, 0x0000, 0x8000, 0x001C, 0x8000, 0x0020, 0xC000, 0x0020,
0x0018, 0x000D, 0x0000, 0x8000, 0x0022, 0x8000, 0x0020, 0x8000, 0x0025, 0x8000, 0x0020, 0x8000, 0x0029, 0xC000, 0x002B, 0x4007,
0x0064, 0x000D, 0x0069, 0x0000, 0x0018, 0x000D, 0x0000, 0x8001, 0x0010, 0x8000, 0x0010, 0xC800, 0x002D, 0x4037, 0x0078, 0x002C,
0x007A, 0x0004, 0x007C, 0x000C, 0x007E, 0x000D, 0x0084, 0x0000, 0x9000, 0x002E, 0x8000, 0x0030, 0x8000, 0x0034, 0xC000, 0x0037,
0x002C, 0x0000, 0x8001, 0x003A, 0x8000, 0x003C
}; };
static const uint8_t magickey_completions_data[COMPLETIONS_SIZE] PROGMEM = { static const uint8_t magickey_completions_data[COMPLETIONS_SIZE] PROGMEM = {
0x67, 0x65, 0x00, 0x75, 0x73, 0x74, 0x00, 0x6E, 0x00, 0x6D, 0x65, 0x6E, 0x74, 0x00, 0x6F, 0x69, 0x65, 0x64, 0x00, 0x27, 0x64, 0x00, 0x27, 0x6C, 0x6C, 0x00, 0x27, 0x6D, 0x00, 0x65, 0x72, 0x00,
0x6E, 0x00 0x6D, 0x65, 0x6E, 0x74, 0x00, 0x65, 0x76, 0x65, 0x6C, 0x6F, 0x70, 0x00, 0x74, 0x68, 0x65, 0x00,
0x79, 0x00, 0x67, 0x65, 0x00, 0x75, 0x73, 0x74, 0x00, 0x70, 0x00, 0x6E, 0x00, 0x00, 0x3A, 0x00,
0x66, 0x6F, 0x72, 0x00, 0x6E, 0x64, 0x00, 0x6E, 0x67, 0x00, 0x49, 0x00, 0x6F, 0x69, 0x6E, 0x00
}; };

View file

@ -1,33 +1,30 @@
# Copyright ✵0✵☆-✵0✵✵ Google LLC # 👆👍★✪
# # ↻⇑
# Licensed under the Apache License, Version ✵.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-✵.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#c☆ ⇒ cy c👆 ⇒ cy
#p☆ ⇒ py p👆 ⇒ py
#d☆ ⇒ dy d👆 ⇒ dy
#y☆ ⇒ yp y👆 ⇒ yp
#g☆ ⇒ gy g👆 ⇒ gy
#j☆ ⇒ just 👍 ⇒ ↻
#jud☆ ⇒ judge i👍 ⇒ ing
#✵ ⇒ @ a👍 ⇒ and
#i✵ ⇒ ing :👆 ⇒ the
#a✵ ⇒ and :👍 ⇒ for
#:☆ ⇒ the .👍 ⇒ .:⇑
#:✵ ⇒ for
j☆ ⇒ just j👆 ⇒ just
j☆☆ ⇒ justment j👆👆 ⇒ justment
☆☆ ⇒ ☆n 👆👆 ⇒ 👆n
j✵ ⇒ join j👍 ⇒ join
jud☆ ⇒ judge jud👆 ⇒ judge
jud☆☆ ⇒ judgment jud👆👆 ⇒ judgment
d★ ⇒ develop
d★t ⇒ development
d★r ⇒ developer
d★d ⇒ developed
:i👍 ⇒ I
:i👍m ⇒ I'm
:i👍d ⇒ I'd
:i👍l ⇒ I'll

View file

@ -0,0 +1,21 @@
#include "stack.h"
#include <stdio.h>
//////////////////////////////////////////////////////////////////////////////////
void stack_push(stack_t *s, char c)
{
if (s->size < STACK_SIZE)
s->buffer[s->size++] = c;
}
//////////////////////////////////////////////////////////////////////////////////
void stack_pop(stack_t *s)
{
if (s->size > 0)
s->size--;
}
//////////////////////////////////////////////////////////////////////////////////
void stack_print(stack_t *s)
{
for (int i = s->size - 1; i >= 0; --i)
printf("%c", s->buffer[i]);
}

View file

@ -0,0 +1,12 @@
#pragma once
//////////////////////////////////////////////////////////////////////////////////
#define STACK_SIZE 256
typedef struct {
char buffer[STACK_SIZE];
int size;
} stack_t;
void stack_push(stack_t *s, char c);
void stack_pop(stack_t *s);
void stack_print(stack_t *s);

View file

@ -0,0 +1,207 @@
#include <assert.h>
#include <stdio.h>
#include <string.h>
#include "trie2.h"
#include "stack.h"
#include "util.h"
#define KEY_BUFFER_MAX_LENGTH MAGICKEY_MAX_LENGTH
#define TDATA(i) trie->dict[i]
#define CDATA(i) trie->completions[i]
//////////////////////////////////////////////////////////////////////////////////
typedef struct {
char data[KEY_BUFFER_MAX_LENGTH];
int size;
// result data
const char *completion;
int complete_len;
int func_num;
int num_backspaces;
} search_buffer_t;
//////////////////////////////////////////////////////////////////////////////////
typedef struct trie2_visitor trie2_visitor_t;
typedef void (*trie2_visitor_cb_t)(trie2_visitor_t *, int, int, const char *);
struct trie2_visitor {
stack_t stack;
trie2_visitor_cb_t cb_func;
void *cb_data;
};
//////////////////////////////////////////////////////////////////////////////////
// traverse trie and call v->cb_func at each MATCH node
void traverse_trie2(const trie2_t *trie, int offset, trie2_visitor_t *v)
{
assert(offset < trie->dict_size);
uint16_t code = TDATA(offset);
assert(code);
// MATCH node
if (code & 0x8000) {
// Traverse down child node if this isn't the only match
if (code & 0x4000)
traverse_trie2(trie, offset+2, v);
printf("offset %d: ", offset);
const int bspaces = (code & 0x3F);
const int func = (code >> 11 & 0x07);
const int complete_offset = TDATA(offset+1);
const char *completion = (const char *)&CDATA(complete_offset);
v->cb_func(v, bspaces, func, completion);
}
// BRANCH node
else if (code & 0x4000) {
//printf("BRANCH node at offset %d\n", offset);
code &= 0x3FFF;
for (; code; offset += 2, code = TDATA(offset)) {
// Get 16bit offset to child node
const int child_offset = TDATA(offset+1);
// Traverse down child node
stack_push(&v->stack, keycode_to_char(code));
traverse_trie2(trie, child_offset, v);
stack_pop(&v->stack);
}
}
// Chain node
else {
//printf("CHAIN node at offset %d\n", offset);
// Travel down chain until we reach a zero byte
const int prev_stack_size = v->stack.size;
for (; code; code = TDATA(++offset))
stack_push(&v->stack, keycode_to_char(code));
// After a chain, there should be a leaf or branch
traverse_trie2(trie, offset+1, v);
v->stack.size = prev_stack_size;
}
}
//////////////////////////////////////////////////////////////////////////////////
bool search_trie2(const trie2_t *trie, int offset, trie2_visitor_t *v)
{
assert(offset < trie->dict_size);
search_buffer_t *search = (search_buffer_t*)v->cb_data;
uint16_t code = TDATA(offset);
assert(code);
// MATCH node if bit 16 is set
if (code & 0x8000) {
// If bit 15 is also set, there's a child node after the completion string
if ((code & 0x4000) && search_trie2(trie, offset+2, v))
return true;
// If no better match found deeper, this is the result!
const int bspaces = (code & 0x3F);
const int func = (code >> 11 & 0x7);
const int complete_offset = TDATA(offset+1);
const char *completion = (const char *)&CDATA(complete_offset);
v->cb_func(v, bspaces, func, completion);
// Found a match so return true!
return true;
}
// BRANCH node if bit 15 is set
if (code & 0x4000) {
if ((v->stack.size+1) > search->size)
return false;
code &= 0x3FFF;
const char cur_char = search->data[search->size - (v->stack.size+1)];
// find child that matches our current buffer location
for (; code; offset += 2, code = TDATA(offset)) {
const char c = keycode_to_char(code);
if (cur_char == c) {
// Get 16bit offset to child node
const int child_offset = TDATA(offset+1);
// Traverse down child node
stack_push(&v->stack, c);
const bool res = search_trie2(trie, child_offset, v);
stack_pop(&v->stack);
return res;
}
}
// Couldn't go deeper, so return false.
return false;
}
// No high bits set, so this is a chain node
// Travel down chain until we reach a zero code, or we no longer match our buffer
const int prev_stack_size = v->stack.size;
for (; code; code = TDATA(++offset)) {
const char c = keycode_to_char(code);
stack_push(&v->stack, c);
if (v->stack.size > search->size ||
c != search->data[search->size - v->stack.size]) {
v->stack.size = prev_stack_size;
return false;
}
}
// After a chain, there should be a leaf or branch
const bool res = search_trie2(trie, offset+1, v);
v->stack.size = prev_stack_size;
return res;
}
//////////////////////////////////////////////////////////////////////////////////
void print_cb2(trie2_visitor_t *v, int bspaces, int func, const char *completion)
{
printf(" depth %d: ", v->stack.size);
stack_print(&v->stack);
printf(" -> %s (bspc: %d, func: %d)\n", completion, bspaces, func);
if (v->cb_data) {
int entries = *(int*)v->cb_data;
*(int*)v->cb_data = entries + 1;
}
}
//////////////////////////////////////////////////////////////////////////////////
void test_traverse2(trie2_t *trie)
{
printf("Traversing Trie2:\n");
int entries = 0;
trie2_visitor_t visitor;
visitor.stack.size = 0;
visitor.cb_data = (void*)&entries;
visitor.cb_func = print_cb2;
traverse_trie2(trie, 0, &visitor);
printf("Found %d entries.\n\n", entries);
}
//////////////////////////////////////////////////////////////////////////////////
void test_search2(const trie2_t *trie, const char *buffer)
{
printf("Searching Trie2 for '%s':\n", buffer);
search_buffer_t search;
strcpy(search.data, buffer);
search.size = (int)strlen(buffer);
trie2_visitor_t visitor;
visitor.stack.size = 0;
visitor.cb_data = (void*)&search;
visitor.cb_func = print_cb2;
search_trie2(trie, 0, &visitor);
}
//////////////////////////////////////////////////////////////////////////////////
void test_trie2()
{
trie2_t trie2 = {
magickey_data,
magickey_completions_data,
DICTIONARY_SIZE,
COMPLETIONS_SIZE
};
test_traverse2(&trie2);
// Test search
test_search2(&trie2, "i*");
test_search2(&trie2, ":sti@");
test_search2(&trie2, "stati*");
test_search2(&trie2, "bo*");
test_search2(&trie2, "judge@");
test_search2(&trie2, "cop*");
test_search2(&trie2, "beh@");
test_search2(&trie2, ":beh@");
test_search2(&trie2, "cat*");
test_search2(&trie2, ":ex@");
test_search2(&trie2, "j*");
test_search2(&trie2, "i@");
test_search2(&trie2, "i@m");
test_search2(&trie2, "i@d");
test_search2(&trie2, "i@l");
test_search2(&trie2, "@");
test_search2(&trie2, "*");
test_search2(&trie2, ".@");
}
int main()
{
test_trie2();
}

View file

@ -0,0 +1,14 @@
#pragma once
#include <stdint.h>
#define PROGMEM
#include "magickey_data.h"
typedef struct {
const uint16_t *dict;
const uint8_t *completions;
int dict_size;
int completions_size;
} trie2_t;
void test_trie2();

View file

@ -0,0 +1,63 @@
#include "keycodes.h"
#include "util.h"
#define QK_LSFT 0x0200
#define pgm_read_byte(address_short) *((uint8_t*)(address_short))
#define PROGMEM
#define KC_MAGIC_0 0x0100
static const char magic_chars[] = { '*', '@' };
const char unshifted_keycode_to_ascii_lut[53] PROGMEM = {
// KC_A KC_B KC_C KC_D
'a', 'b', 'c', 'd',
// KC_E KC_F KC_G KC_H KC_I KC_J KC_K KC_L
'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
// KC_M KC_N KC_O KC_P KC_Q KC_R KC_S KC_T
'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
// KC_U KC_V KC_W KC_X KC_Y KC_Z KC_1 KC_2
'u', 'v', 'w', 'x', 'y', 'z', '1', '2',
// KC_3 KC_4 KC_5 KC_6 KC_7 KC_8 KC_9 KC_0
'3', '4', '5', '6', '7', '8', '9', '0',
// KC_ENTR KC_ESC KC_BSPC KC_TAB KC_SPC KC_MINS KC_EQL KC_LBRC
' ', ' ', ' ', ' ', ' ', '-', '=', '[',
// KC_RBRC KC_BSLS KC_NUHS KC_SCLN KC_QUOT KC_GRV KC_COMM KC_DOT
']', '\\', ' ', ';', '\'', '`', ',', '.',
// KC_SLSH
'/'
};
const char shifted_keycode_to_ascii_lut[53] PROGMEM = {
// KC_A KC_B KC_C KC_D
'A', 'B', 'C', 'D',
// KC_E KC_F KC_G KC_H KC_I KC_J KC_K KC_L
'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L',
// KC_M KC_N KC_O KC_P KC_Q KC_R KC_S KC_T
'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
// KC_U KC_V KC_W KC_X KC_Y KC_Z KC_EXLM KC_AT
'U', 'V', 'W', 'X', 'Y', 'Z', '!', '@',
// KC_HASH KC_DLR KC_PERC KC_CIRC KC_AMPR KC_ASTR KC_LPRN KC_RPRN
'#', '$', '%', '^', '&', '*', '(', ')',
// KC_ENTR KC_ESC KC_BSPC KC_TAB KC_SPC KC_UNDS KC_PLUS KC_LCBR
' ', ' ', ' ', ' ', ' ', '_', '+', '{',
// KC_RCBR KC_PIPE KC_NUHS KC_COLN KC_DQUO KC_GRV KC_LABK KC_RABK
'}', '|', ' ', ':', '"', '~', '<', '>',
// KC_QUES
'?'
};
////////////////////////////////////////////////////////////////////////////////
char keycode_to_char(uint16_t keycode)
{
if (keycode >= KC_MAGIC_0)
return magic_chars[keycode - KC_MAGIC_0];
if (keycode == KC_SPC)
return ':';
const bool shifted = keycode & QK_LSFT;
keycode &= 0xFF;
if (keycode >= KC_A && keycode <= KC_SLASH) {
keycode -= KC_A;
return shifted ? pgm_read_byte(&shifted_keycode_to_ascii_lut[keycode]) :
pgm_read_byte(&unshifted_keycode_to_ascii_lut[keycode]);
}
return ' ';
}

View file

@ -0,0 +1,4 @@
#pragma once
#include <stdint.h>
char keycode_to_char(uint16_t code);