feat: voice to text

This commit is contained in:
Barrett Ruth 2026-03-11 18:50:11 -04:00
parent 6fae26733d
commit 8011242194
Signed by: barrett
GPG key ID: A6C96C9349D2FC81
6 changed files with 57 additions and 108 deletions

View file

@ -135,6 +135,7 @@ bind = , I, exec, hyprctl dispatch submap reset; ctl idle
bind = , K, exec, hyprctl dispatch submap reset; ctl keyboard next bind = , K, exec, hyprctl dispatch submap reset; ctl keyboard next
bind = , M, exec, hyprctl dispatch submap reset; ctl media bind = , M, exec, hyprctl dispatch submap reset; ctl media
bind = , P, exec, hyprctl dispatch submap reset; ctl power bind = , P, exec, hyprctl dispatch submap reset; ctl power
bind = , S, exec, hyprctl dispatch submap reset; ctl dictate
bind = , T, exec, hyprctl dispatch submap reset; theme bind = , T, exec, hyprctl dispatch submap reset; theme
bind = , catchall, submap, reset bind = , catchall, submap, reset

55
flake.lock generated
View file

@ -39,24 +39,6 @@
"type": "github" "type": "github"
} }
}, },
"flake-utils": {
"inputs": {
"systems": "systems"
},
"locked": {
"lastModified": 1731533236,
"narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "flake-utils",
"type": "github"
}
},
"home-manager": { "home-manager": {
"inputs": { "inputs": {
"nixpkgs": [ "nixpkgs": [
@ -220,46 +202,9 @@
"neovim-nightly": "neovim-nightly", "neovim-nightly": "neovim-nightly",
"nixos-hardware": "nixos-hardware", "nixos-hardware": "nixos-hardware",
"nixpkgs": "nixpkgs_3", "nixpkgs": "nixpkgs_3",
"whisper-dictation": "whisper-dictation",
"zen-browser": "zen-browser" "zen-browser": "zen-browser"
} }
}, },
"systems": {
"locked": {
"lastModified": 1681028828,
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
"owner": "nix-systems",
"repo": "default",
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
"type": "github"
},
"original": {
"owner": "nix-systems",
"repo": "default",
"type": "github"
}
},
"whisper-dictation": {
"inputs": {
"flake-utils": "flake-utils",
"nixpkgs": [
"nixpkgs"
]
},
"locked": {
"lastModified": 1771198514,
"narHash": "sha256-5Dd0zVTh+nUf3lxrDpgQSHbr0pU40hGh/hUuRbwG790=",
"owner": "jacopone",
"repo": "whisper-dictation",
"rev": "20190a148d8bce6f0d0b78a56cc22afe02844e8f",
"type": "github"
},
"original": {
"owner": "jacopone",
"repo": "whisper-dictation",
"type": "github"
}
},
"zen-browser": { "zen-browser": {
"inputs": { "inputs": {
"home-manager": "home-manager_2", "home-manager": "home-manager_2",

View file

@ -11,11 +11,7 @@
zen-browser.url = "github:0xc000022070/zen-browser-flake"; zen-browser.url = "github:0xc000022070/zen-browser-flake";
claude-code.url = "github:ryoppippi/claude-code-overlay"; claude-code.url = "github:ryoppippi/claude-code-overlay";
neovim-nightly.url = "github:nix-community/neovim-nightly-overlay"; neovim-nightly.url = "github:nix-community/neovim-nightly-overlay";
whisper-dictation = { };
url = "github:jacopone/whisper-dictation";
inputs.nixpkgs.follows = "nixpkgs";
};
};
outputs = outputs =
{ {
@ -25,7 +21,6 @@
zen-browser, zen-browser,
claude-code, claude-code,
neovim-nightly, neovim-nightly,
whisper-dictation,
... ...
}: }:
let let
@ -92,7 +87,7 @@
home-manager.lib.homeManagerConfiguration { home-manager.lib.homeManagerConfiguration {
pkgs = mkPkgs hostConfig.platform [ ]; pkgs = mkPkgs hostConfig.platform [ ];
extraSpecialArgs = { extraSpecialArgs = {
inherit zen-browser whisper-dictation hostConfig; inherit zen-browser hostConfig;
}; };
modules = [ ./home/home.nix ]; modules = [ ./home/home.nix ];
}; };
@ -134,6 +129,10 @@
"tailscale" "tailscale"
"libfprint-2-tod1-goodix" "libfprint-2-tod1-goodix"
"brgenml1lpr" "brgenml1lpr"
"cuda_cccl"
"cuda_cudart"
"libcublas"
"cuda_nvcc"
] ]
); );
} }
@ -144,7 +143,7 @@
home-manager.backupFileExtension = "bak"; home-manager.backupFileExtension = "bak";
home-manager.users.barrett = import ./home/home.nix; home-manager.users.barrett = import ./home/home.nix;
home-manager.extraSpecialArgs = { home-manager.extraSpecialArgs = {
inherit zen-browser whisper-dictation; inherit zen-browser;
hostConfig = xps15Config; hostConfig = xps15Config;
}; };
} }

View file

@ -3,19 +3,17 @@
pkgs, pkgs,
config, config,
hostConfig, hostConfig,
whisper-dictation,
... ...
}: }:
let let
whisper = pkgs.whisper-cpp.override { cudaSupport = hostConfig.gpu == "nvidia"; };
modelDir = "${config.home.homeDirectory}/.local/share/whisper-models"; modelDir = "${config.home.homeDirectory}/.local/share/whisper-models";
model = "ggml-base.bin"; model = "ggml-medium.bin";
modelUrl = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/${model}"; modelUrl = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/${model}";
in in
{ {
home.packages = [ home.packages = [ whisper ];
whisper-dictation.packages.${hostConfig.platform}.default
];
home.activation.downloadWhisperModel = lib.hm.dag.entryAfter [ "writeBoundary" ] '' home.activation.downloadWhisperModel = lib.hm.dag.entryAfter [ "writeBoundary" ] ''
if [ ! -f "${modelDir}/${model}" ]; then if [ ! -f "${modelDir}/${model}" ]; then
@ -23,29 +21,4 @@ in
run ${pkgs.curl}/bin/curl -L -o "${modelDir}/${model}" "${modelUrl}" run ${pkgs.curl}/bin/curl -L -o "${modelDir}/${model}" "${modelUrl}"
fi fi
''; '';
xdg.configFile."whisper-dictation/config.yaml".text = builtins.toJSON {
whisper = {
model = "base";
language = "auto";
};
hotkey = {
key = "KEY_DOT";
modifiers = [ "KEY_LEFTMETA" ];
};
};
systemd.user.services.whisper-dictation = {
Unit = {
Description = "Whisper Dictation speech-to-text daemon";
After = [ "graphical-session.target" "ydotoold.service" ];
PartOf = [ "graphical-session.target" ];
};
Service = {
ExecStart = "${whisper-dictation.packages.${hostConfig.platform}.default}/bin/whisper-dictation";
Restart = "on-failure";
RestartSec = 5;
};
Install.WantedBy = [ "graphical-session.target" ];
};
} }

View file

@ -95,24 +95,10 @@ in
"libvirt" "libvirt"
"storage" "storage"
"power" "power"
"input"
]; ];
shell = pkgs.zsh; shell = pkgs.zsh;
}; };
hardware.uinput.enable = true;
systemd.user.services.ydotoold = {
description = "ydotool daemon";
wantedBy = [ "graphical-session.target" ];
partOf = [ "graphical-session.target" ];
serviceConfig = {
ExecStart = "${pkgs.ydotool}/bin/ydotoold";
Restart = "on-failure";
RestartSec = 3;
};
};
programs.chromium = { programs.chromium = {
enable = true; enable = true;
extraOpts = { extraOpts = {

View file

@ -533,6 +533,51 @@ power)
"$shutdown") systemctl poweroff ;; "$shutdown") systemctl poweroff ;;
esac esac
;; ;;
dictate)
require pw-record whisper-cli wl-copy notify-send
dtmp="${XDG_RUNTIME_DIR:-/tmp}/dictation"
mkdir -p "$dtmp"
dpid="$dtmp/rec_pid"
daudio="$dtmp/recording.wav"
dmodel="${DICTATE_MODEL:-medium}"
dmodel_dir="${XDG_DATA_HOME:-$HOME/.local/share}/whisper-models"
dmodel_file="$dmodel_dir/ggml-$dmodel.bin"
if [ -f "$dpid" ] && kill -0 "$(cat "$dpid")" 2>/dev/null; then
kill "$(cat "$dpid")" 2>/dev/null
rm -f "$dpid"
sleep 0.2
if [ ! -s "$daudio" ]; then
notify-send -a ctl "no audio"
exit 1
fi
notify-send -a ctl "transcribing..."
text=$(whisper-cli \
--model "$dmodel_file" \
--language "${DICTATE_LANG:-en}" \
--no-prints --no-timestamps \
"$daudio" 2>/dev/null)
rm -f "$daudio"
text=$(printf '%s' "$text" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//' | tr -s ' ')
if [ -z "$text" ]; then
notify-send -a ctl "no speech detected"
exit 1
fi
printf '%s' "$text" | wl-copy
notify-send -a ctl "$text"
exit 0
fi
if [ ! -f "$dmodel_file" ]; then
notify-send -a ctl "downloading whisper $dmodel model..."
mkdir -p "$dmodel_dir"
curl -L -o "$dmodel_file" \
"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-$dmodel.bin"
fi
notify-send -a ctl "recording..."
pw-record "$daudio" &
printf '%s' "$!" > "$dpid"
;;
idle) idle)
require notify-send require notify-send
if systemctl --user is-active --quiet hypridle.service; then if systemctl --user is-active --quiet hypridle.service; then
@ -546,7 +591,7 @@ idle)
fi fi
;; ;;
*) *)
echo "Usage: ctl {screenshot|keyboard|audio|wifi|brightness|volume|media|wallpaper|power|idle|clip}" >&2 echo "Usage: ctl {screenshot|keyboard|audio|wifi|brightness|volume|media|wallpaper|power|idle|clip|dictate}" >&2
exit 1 exit 1
;; ;;
esac esac