Valid inputs in multiple keyboard layouts

typing
python
Published

March 5, 2024

Most of the time, inputting keys in the wrong keyboard layout will result in gibberish — but not always. I thought today I would try to find the longest input sequences that generate valid words in two different keyboard layouts.

The script is pretty simple and just brute forces comparisons along a large wordlist.

Code
from numpy import loadtxt
from unidecode import unidecode


def main():
    raw_words = loadtxt(
        "./blog/posts/colemak-qwerty-words/wordlist.txt",
        dtype="str",
    )
    words = [unidecode(word).lower() for word in raw_words]

    layouts = get_layouts()

    matches = find_matches(words, layouts)

    print(matches)

    return matches


def get_layouts():
    layouts = {}

    layouts["qwerty"] = [
        ["q", "w", "e", "r", "t", "y", "u", "i", "o", "p", "[", "]", "\\"],
        ["a", "s", "d", "f", "g", "h", "j", "k", "l", ";", "'", "\n"],
        ["z", "x", "c", "v", "b", "n", "m", ",", ".", "/"],
        [" "],
    ]

    layouts["colemak_dh"] = [
        ["q", "w", "f", "p", "b", "j", "l", "u", "y", ";", "[", "]", "\\"],
        ["a", "r", "s", "t", "g", "m", "n", "e", "i", "o", "'", "\n"],
        ["x", "c", "d", "v", "z", "k", "h", ",", ".", "/"],
        [" "],
    ]

    layouts["colemak"] = [
        ["q", "w", "f", "p", "g", "j", "l", "u", "y", ";", "[", "]", "\\"],
        ["a", "r", "s", "t", "d", "h", "n", "e", "i", "o", "'", "\n"],
        ["z", "x", "c", "v", "b", "k", "m", ",", ".", "/"],
        [" "],
    ]

    layouts["dvorak"] = [
        ["'", ",", ".", "p", "y", "f", "g", "c", "r", "l", "/", "=", "\\"],
        ["a", "o", "e", "u", "i", "d", "h", "t", "n", "s", "-", "\n"],
        [";", "q", "j", "k", "x", "b", "m", "w", "v", "z"],
        [" "],
    ]

    return layouts


def map_keys(keys):
    map = {}
    for i, row in enumerate(keys):
        for j, key in enumerate(row):
            map[key] = [i, j]
    return map


def decipher(s, input_key, output_key):
    input_map = map_keys(input_key)
    out = []
    for char in s:
        i, j = input_map[char]
        out.append(output_key[i][j])
    return "".join(out).upper()


def type_sequence(sequence, layout):
    keys = [layout[row][column] for (row, column) in sequence]
    return "".join(keys)


def find_matches(words, layouts):
    maps = {name: map_keys(layout) for (name, layout) in layouts.items()}

    matches = {"colemak_dh": [], "colemak": [], "dvorak": []}

    for word in words:
        sequence = [maps["qwerty"][char] for char in word]

        output = {
            "colemak_dh": type_sequence(sequence, layouts["colemak_dh"]),
            "colemak": type_sequence(sequence, layouts["colemak"]),
            "dvorak": type_sequence(sequence, layouts["dvorak"]),
        }

        for layout, output in output.items():
            if output in words:
                matches[layout].append([word, output])
                print(f"{word} types {output} in {layout}")

    return matches


if __name__ == "__main__":
    matches = main()

The prize for most similar goes to mamma which is the same in qwerty, colemak, and dvorak (can you guess why?).

Here is the full list of pairs.

qwerty output layout
misfaith murtaugh colemak
maddled massifs colemak
macamba macamba colemak
skulked reliefs colemak
skulked reliefs colemak_dh
bahama bahama colemak
wahwah wahwah colemak
clarks diaper colemak_dh
wagwag wagwag colemak_dh
flossy unroof dvorak
based barfs colemak
banks baker colemak
bleed biffs colemak
basks barer colemak
chard chaps colemak
chasm charm colemak
drank spake colemak
disks surer colemak
dirks super colemak
forks typer colemak
ghana dhaka colemak
mamma mamma colemak
mamba mamba colemak
risks purer colemak
racks pacer colemak
slunk rilke colemak
sagas radar colemak
sacks racer colemak
simba rumba colemak
who’d why’s colemak
wanks waker colemak
casks darer colemak_dh
drank spake colemak_dh
disks surer colemak_dh
dirks super colemak_dh
forks typer colemak_dh
gordo gypsy colemak_dh
hanks maker colemak_dh
jacks nader colemak_dh
rinks puker colemak_dh
risks purer colemak_dh
slunk rilke colemak_dh
sicks ruder colemak_dh
tasks barer colemak_dh
tanks baker colemak_dh
talak baiae colemak_dh
flush ungod dvorak
float unray dvorak
flusk ungot dvorak
hoard drape dvorak
kodak treat dvorak
mamma mamma dvorak
rossy proof dvorak
rasps paolo dvorak
udons gerbo dvorak
yangs fabio dvorak
also airy colemak
bask bare colemak
base barf colemak
bank bake colemak
card caps colemak
cask care colemak
char chap colemak
chit chug colemak
disk sure colemak
data saga colemak
dank sake colemak
fork type colemak
flee tiff colemak
fief tuft colemak
gaga dada colemak
glad dias colemak
lamb iamb colemak
miff mutt colemak
maul mali colemak
mask mare colemak
rink puke colemak
risk pure colemak
rara papa colemak
rack pace colemak
sank rake colemak
sack race colemak
yank jake colemak
also airy colemak_dh
cask dare colemak_dh
clad dias colemak_dh
conk dyke colemak_dh
disk sure colemak_dh
dank sake colemak_dh
fork type colemak_dh
flee tiff colemak_dh
fief tuft colemak_dh
faux talc colemak_dh
gaga gaga colemak_dh
hack made colemak_dh
hank make colemak_dh
hard maps colemak_dh
haul mali colemak_dh
mock hyde colemak_dh
mask hare colemak_dh
mash harm colemak_dh
riff putt colemak_dh
rink puke colemak_dh
risk pure colemak_dh
rara papa colemak_dh
sick rude colemak_dh
sank rake colemak_dh
tiff butt colemak_dh
task bare colemak_dh
tank bake colemak_dh
tack bade colemak_dh
wash warm colemak_dh
wale waif colemak_dh
wack wade colemak_dh
xmas char colemak_dh
anno abbr dvorak
goal iran dvorak
lard nape dvorak
mama mama dvorak
malt many dvorak
maid mace dvorak
noah brad dvorak
paid lace dvorak
roam pram dvorak
rosy prof dvorak
rara papa dvorak
raid pace dvorak
soap oral dvorak
slid once dvorak
ussr goop dvorak