Implemented task per content. However, real T9 would guess fitting words also by frequency of usage.

master
Tomasz Polgrabia 2025-01-03 23:38:11 +01:00
parent 39e5abc769
commit 0ebf8839e9
3 changed files with 109 additions and 28 deletions

View File

@ -17,10 +17,16 @@ public class Program {
var wordDictionaryPath = args[0];
var rstTree = new RstTreeLoader(Paths.get(wordDictionaryPath)).load();
// test for rst tree
// String w = "abadia"; // exists
String w = "abadia"; // exists
// String w = "asdadsadadlosdaladsad"; // doesn't exists
// String w = "abcdefg"; // doesn't exists
// logger.info("Looking for word: {} - {}", w, rstTree.contains(w));
logger.info("Looking for word: {} - {}", w, rstTree.contains(w));
String prefix = "ark";
// logger.info("Searching by prefix: {}", prefix);
rstTree.dfsByPrefix(prefix, (it) -> logger.info("Found word by prefix ({}) - {}", prefix, it));
var t9Lookup = new T9Lookup(rstTree);
int code = 8733;
t9Lookup.lookupByCode(code, (it) -> logger.info("Found word by code ({}) - {}", code, it));
}

View File

@ -1,25 +1,20 @@
package pl.polgrabia.demos.crackingcodeinterview.t16x20;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.function.Consumer;
public class RstTree {
private static final Logger logger = LoggerFactory.getLogger(RstTree.class);
private static final Logger log = LoggerFactory.getLogger(RstTree.class);
private RstTreeNode rstTreeNode;
private RstTreeNode rstTreeRoot;
public void add(String word) {
assert word != null : "word must not be null";
assert !word.isBlank() : "word must not be blank";
assert word.matches("^[a-z]+$") : "word '" + word + "' must contain only small letters";
if (rstTreeNode == null) {
rstTreeNode = new RstTreeNode();
rstTreeNode.setPrefix("");
rstTreeNode.setValue(word);
if (rstTreeRoot == null) {
rstTreeRoot = new RstTreeNode();
rstTreeRoot.setPrefix("");
rstTreeRoot.setValue(word);
} else {
addInternal(rstTreeNode, word);
addInternal(rstTreeRoot, word);
}
}
@ -63,28 +58,67 @@ public class RstTree {
assert !word.isBlank() : "word must not be blank";
assert word.matches("^[a-z]+$") : "word '" + word + "' must contain only small letters";
return containsInternal(rstTreeNode, word);
}
private boolean containsInternal(RstTreeNode node, String word) {
if (node == null) {
RstTreeNode searchedNode = lookupNodeByPrefixOrValueInternal(rstTreeRoot, word, SearchMode.VALUE);
if (searchedNode == null) {
return false;
}
logger.info("Node value: {}, prefix: {}, word: {}", node.getValue(), node.getPrefix(), word);
String value = searchedNode.getValue();
return word.equals(value);
}
assert word.startsWith(node.getPrefix()) : "word " + word + "must start with prefix: " + node.getPrefix();
if (word.equals(node.getValue())) {
return true;
private RstTreeNode lookupNodeByPrefixOrValueInternal(RstTreeNode node, String searchValue, SearchMode searchMode) {
if (node == null) {
return null;
}
char c = word.charAt(node.getPrefix().length());
// logger.info("Node value: {}, prefix: {}, searchValue: {}", node.getValue(), node.getPrefix(), searchValue);
assert searchValue.startsWith(node.getPrefix()) : "searchValue " + searchValue + "must start with prefix: " + node.getPrefix();
if (SearchMode.VALUE.equals(searchMode) && searchValue.equals(node.getValue())) {
return node;
}
if (SearchMode.PREFIX_OR_VALUE.equals(searchMode)
&& (node.getPrefix().startsWith(searchValue) || node.getValue().startsWith(searchValue))) {
return node;
}
if (node.getPrefix().startsWith(searchValue)) {
return null;
}
char c = searchValue.charAt(node.getPrefix().length());
int offset = c - 'a';
assert offset >= 0 && offset < RstTreeNode.EN_ALPHABET_LETTER_COUNT : "It's an offset for possible prefix child item - max alphabet";
// logger.info("Word: {}, prefix: {}, index: {}, c: {}, offset: {}", word, node.getPrefix(), node.getPrefix().length(), c, offset);
// logger.info("Word: {}, prefix: {}, index: {}, c: {}, offset: {}", searchValue, node.getPrefix(), node.getPrefix().length(), c, offset);
var childItem = node.getRstChildren().length <= offset ? null : node.getRstChildren()[offset];
logger.info("Childitem: {}, c: {}, offset: {}", childItem, c, offset);
return containsInternal(childItem, word);
// logger.info("Childitem: {}, c: {}, offset: {}", childItem, c, offset);
return lookupNodeByPrefixOrValueInternal(childItem, searchValue, SearchMode.VALUE);
}
public void dfsByPrefix(String prefix, Consumer<String> wordConsumer) {
assert prefix != null : "prefix must not be null";
assert !prefix.isBlank() : "prefix must not be blank";
assert prefix.matches("^[a-z]+$") : "prefix '" + prefix + "' must contain only small letters";
RstTreeNode node = lookupNodeByPrefixOrValueInternal(rstTreeRoot, prefix, SearchMode.PREFIX_OR_VALUE);
dfs(node, wordConsumer);
}
private void dfs(RstTreeNode node, Consumer<String> wordConsumer) {
if (node == null) {
return;
}
wordConsumer.accept(node.getValue());
for (int i = 0; i < node.getRstChildren().length; i++) {
dfs(node.getRstChildren()[i], wordConsumer);
}
}
private enum SearchMode {
VALUE,
PREFIX_OR_VALUE
}
}

View File

@ -0,0 +1,41 @@
package pl.polgrabia.demos.crackingcodeinterview.t16x20;
import java.util.function.Consumer;
public class T9Lookup {
private static final char[][] T9_CODE_MAPPING = new char[][]
{
new char[]{}, // 0
new char[]{}, // 1
new char[]{'a', 'b', 'c'}, // 2
new char[]{'d', 'e', 'f'}, // 3
new char[]{'g', 'h', 'i'}, // 4
new char[]{'j', 'k', 'l'}, // 5
new char[]{'m', 'n', 'o'}, // 6
new char[]{'p', 'q', 'r', 's'}, // 7
new char[]{'t', 'u', 'v'}, // 8
new char[]{'w', 'x', 'y', 'z'}, // 9
};
private final RstTree rstTree;
public T9Lookup(RstTree rstTree) {
this.rstTree = rstTree;
}
public void lookupByCode(int code, Consumer<String> wordConsumer) {
lookupByCodeInternal(code, "", wordConsumer);
}
private void lookupByCodeInternal(int code, String prefix, Consumer<String> wordConsumer) {
if (code == 0) {
rstTree.dfsByPrefix(prefix, wordConsumer);
// logger.info("Got prefix {} to search by", prefix);
} else {
int v = code % 10;
for (int i = 0; i < T9_CODE_MAPPING[v].length; i++) {
lookupByCodeInternal(code / 10, T9_CODE_MAPPING[v][i] + prefix, wordConsumer);
}
}
}
}