Valid inputs in multiple keyboard layouts

Most of the time, inputting keys in the wrong keyboard layout will result in gibberish — but not always. I thought today I would try to find the longest input sequences that generate valid words in two different keyboard layouts.

The script is pretty simple and just brute forces comparisons along a large wordlist.

Code

from numpy import loadtxt
from unidecode import unidecode


def main():
    raw_words = loadtxt(
        "./blog/posts/colemak-qwerty-words/wordlist.txt",
        dtype="str",
    )
    words = [unidecode(word).lower() for word in raw_words]

    layouts = get_layouts()

    matches = find_matches(words, layouts)

    print(matches)

    return matches


def get_layouts():
    layouts = {}

    layouts["qwerty"] = [
        ["q", "w", "e", "r", "t", "y", "u", "i", "o", "p", "[", "]", "\\"],
        ["a", "s", "d", "f", "g", "h", "j", "k", "l", ";", "'", "\n"],
        ["z", "x", "c", "v", "b", "n", "m", ",", ".", "/"],
        [" "],
    ]

    layouts["colemak_dh"] = [
        ["q", "w", "f", "p", "b", "j", "l", "u", "y", ";", "[", "]", "\\"],
        ["a", "r", "s", "t", "g", "m", "n", "e", "i", "o", "'", "\n"],
        ["x", "c", "d", "v", "z", "k", "h", ",", ".", "/"],
        [" "],
    ]

    layouts["colemak"] = [
        ["q", "w", "f", "p", "g", "j", "l", "u", "y", ";", "[", "]", "\\"],
        ["a", "r", "s", "t", "d", "h", "n", "e", "i", "o", "'", "\n"],
        ["z", "x", "c", "v", "b", "k", "m", ",", ".", "/"],
        [" "],
    ]

    layouts["dvorak"] = [
        ["'", ",", ".", "p", "y", "f", "g", "c", "r", "l", "/", "=", "\\"],
        ["a", "o", "e", "u", "i", "d", "h", "t", "n", "s", "-", "\n"],
        [";", "q", "j", "k", "x", "b", "m", "w", "v", "z"],
        [" "],
    ]

    return layouts


def map_keys(keys):
    map = {}
    for i, row in enumerate(keys):
        for j, key in enumerate(row):
            map[key] = [i, j]
    return map


def decipher(s, input_key, output_key):
    input_map = map_keys(input_key)
    out = []
    for char in s:
        i, j = input_map[char]
        out.append(output_key[i][j])
    return "".join(out).upper()


def type_sequence(sequence, layout):
    keys = [layout[row][column] for (row, column) in sequence]
    return "".join(keys)


def find_matches(words, layouts):
    maps = {name: map_keys(layout) for (name, layout) in layouts.items()}

    matches = {"colemak_dh": [], "colemak": [], "dvorak": []}

    for word in words:
        sequence = [maps["qwerty"][char] for char in word]

        output = {
            "colemak_dh": type_sequence(sequence, layouts["colemak_dh"]),
            "colemak": type_sequence(sequence, layouts["colemak"]),
            "dvorak": type_sequence(sequence, layouts["dvorak"]),
        }

        for layout, output in output.items():
            if output in words:
                matches[layout].append([word, output])
                print(f"{word} types {output} in {layout}")

    return matches


if __name__ == "__main__":
    matches = main()

The prize for most similar goes to mamma which is the same in qwerty, colemak, and dvorak (can you guess why?).

Here is the full list of pairs.

qwerty	output	layout
misfaith	murtaugh	colemak
maddled	massifs	colemak
macamba	macamba	colemak
skulked	reliefs	colemak
skulked	reliefs	colemak_dh
bahama	bahama	colemak
wahwah	wahwah	colemak
clarks	diaper	colemak_dh
wagwag	wagwag	colemak_dh
flossy	unroof	dvorak
based	barfs	colemak
banks	baker	colemak
bleed	biffs	colemak
basks	barer	colemak
chard	chaps	colemak
chasm	charm	colemak
drank	spake	colemak
disks	surer	colemak
dirks	super	colemak
forks	typer	colemak
ghana	dhaka	colemak
mamma	mamma	colemak
mamba	mamba	colemak
risks	purer	colemak
racks	pacer	colemak
slunk	rilke	colemak
sagas	radar	colemak
sacks	racer	colemak
simba	rumba	colemak
who’d	why’s	colemak
wanks	waker	colemak
casks	darer	colemak_dh
drank	spake	colemak_dh
disks	surer	colemak_dh
dirks	super	colemak_dh
forks	typer	colemak_dh
gordo	gypsy	colemak_dh
hanks	maker	colemak_dh
jacks	nader	colemak_dh
rinks	puker	colemak_dh
risks	purer	colemak_dh
slunk	rilke	colemak_dh
sicks	ruder	colemak_dh
tasks	barer	colemak_dh
tanks	baker	colemak_dh
talak	baiae	colemak_dh
flush	ungod	dvorak
float	unray	dvorak
flusk	ungot	dvorak
hoard	drape	dvorak
kodak	treat	dvorak
mamma	mamma	dvorak
rossy	proof	dvorak
rasps	paolo	dvorak
udons	gerbo	dvorak
yangs	fabio	dvorak
also	airy	colemak
bask	bare	colemak
base	barf	colemak
bank	bake	colemak
card	caps	colemak
cask	care	colemak
char	chap	colemak
chit	chug	colemak
disk	sure	colemak
data	saga	colemak
dank	sake	colemak
fork	type	colemak
flee	tiff	colemak
fief	tuft	colemak
gaga	dada	colemak
glad	dias	colemak
lamb	iamb	colemak
miff	mutt	colemak
maul	mali	colemak
mask	mare	colemak
rink	puke	colemak
risk	pure	colemak
rara	papa	colemak
rack	pace	colemak
sank	rake	colemak
sack	race	colemak
yank	jake	colemak
also	airy	colemak_dh
cask	dare	colemak_dh
clad	dias	colemak_dh
conk	dyke	colemak_dh
disk	sure	colemak_dh
dank	sake	colemak_dh
fork	type	colemak_dh
flee	tiff	colemak_dh
fief	tuft	colemak_dh
faux	talc	colemak_dh
gaga	gaga	colemak_dh
hack	made	colemak_dh
hank	make	colemak_dh
hard	maps	colemak_dh
haul	mali	colemak_dh
mock	hyde	colemak_dh
mask	hare	colemak_dh
mash	harm	colemak_dh
riff	putt	colemak_dh
rink	puke	colemak_dh
risk	pure	colemak_dh
rara	papa	colemak_dh
sick	rude	colemak_dh
sank	rake	colemak_dh
tiff	butt	colemak_dh
task	bare	colemak_dh
tank	bake	colemak_dh
tack	bade	colemak_dh
wash	warm	colemak_dh
wale	waif	colemak_dh
wack	wade	colemak_dh
xmas	char	colemak_dh
anno	abbr	dvorak
goal	iran	dvorak
lard	nape	dvorak
mama	mama	dvorak
malt	many	dvorak
maid	mace	dvorak
noah	brad	dvorak
paid	lace	dvorak
roam	pram	dvorak
rosy	prof	dvorak
rara	papa	dvorak
raid	pace	dvorak
soap	oral	dvorak
slid	once	dvorak
ussr	goop	dvorak