Implements speech-to-text (via whisper-cpp) and text-to-speech (via espeak) functionality with key bindings. Replaces coreutils with busybox for lighter dependencies and removes explicit buildInputs since all paths are hardcoded.
92 lines
2.8 KiB
Nix
92 lines
2.8 KiB
Nix
{
|
|
lib,
|
|
stdenv,
|
|
writeShellScriptBin,
|
|
whisper-cpp,
|
|
wtype,
|
|
libnotify,
|
|
pipewire,
|
|
busybox,
|
|
}: let
|
|
script = writeShellScriptBin "stt-ptt" ''
|
|
#!/usr/bin/env bash
|
|
# stt-ptt - Push to Talk Speech to Text
|
|
|
|
CACHE_DIR="''${XDG_CACHE_HOME:-$HOME/.cache}/stt-ptt"
|
|
MODEL_DIR="''${XDG_DATA_HOME:-$HOME/.local/share}/stt-ptt/models"
|
|
AUDIO="$CACHE_DIR/stt.wav"
|
|
PID_FILE="$CACHE_DIR/stt.pid"
|
|
|
|
# Configurable via environment
|
|
STT_MODEL="''${STT_MODEL:-$MODEL_DIR/ggml-large-v3-turbo.bin}"
|
|
STT_NOTIFY_TIMEOUT="''${STT_NOTIFY_TIMEOUT:-3000}"
|
|
|
|
NOTIFY="${libnotify}/bin/notify-send"
|
|
PW_RECORD="${pipewire}/bin/pw-record"
|
|
WHISPER="${whisper-cpp}/bin/whisper-cli"
|
|
WTYPE="${wtype}/bin/wtype"
|
|
MKDIR="${busybox}/bin/mkdir"
|
|
RM="${busybox}/bin/rm"
|
|
CAT="${busybox}/bin/cat"
|
|
KILL="${busybox}/bin/kill"
|
|
TR="${busybox}/bin/tr"
|
|
SED="${busybox}/bin/sed"
|
|
|
|
# Ensure cache directory exists
|
|
"$MKDIR" -p "$CACHE_DIR"
|
|
|
|
case "''${1:-}" in
|
|
start)
|
|
"$RM" -f "$AUDIO" "$PID_FILE"
|
|
"$NOTIFY" -t "$STT_NOTIFY_TIMEOUT" -a "stt-ptt" "Recording..."
|
|
"$PW_RECORD" --rate=16000 --channels=1 "$AUDIO" &
|
|
echo $! > "$PID_FILE"
|
|
;;
|
|
stop)
|
|
[[ -f "$PID_FILE" ]] && "$KILL" "$("$CAT" "$PID_FILE")" 2>/dev/null
|
|
"$RM" -f "$PID_FILE"
|
|
|
|
if [[ -f "$AUDIO" ]]; then
|
|
if [[ ! -f "$STT_MODEL" ]]; then
|
|
"$NOTIFY" -t "$STT_NOTIFY_TIMEOUT" -a "stt-ptt" "Error: Model not found at $STT_MODEL"
|
|
"$RM" -f "$AUDIO"
|
|
exit 1
|
|
fi
|
|
text=$("$WHISPER" -m "$STT_MODEL" -f "$AUDIO" -np -nt 2>/dev/null | "$TR" -d '\n' | "$SED" 's/^[[:space:]]*//;s/[[:space:]]*$//')
|
|
"$RM" -f "$AUDIO"
|
|
[[ -n "$text" ]] && "$WTYPE" -- "$text"
|
|
fi
|
|
;;
|
|
*)
|
|
echo "Usage: stt-ptt {start|stop}"
|
|
echo ""
|
|
echo "Environment variables:"
|
|
echo " STT_MODEL - Path to whisper model (default: \$XDG_DATA_HOME/stt-ptt/models/ggml-large-v3-turbo.bin)"
|
|
echo " STT_NOTIFY_TIMEOUT - Notification timeout in ms (default: 3000)"
|
|
exit 1
|
|
;;
|
|
esac
|
|
'';
|
|
in
|
|
stdenv.mkDerivation {
|
|
pname = "stt-ptt";
|
|
version = "0.1.0";
|
|
|
|
dontUnpack = true;
|
|
|
|
# No buildInputs needed - all runtime deps are hardcoded with full nix store paths in the script
|
|
|
|
installPhase = ''
|
|
mkdir -p "$out/bin"
|
|
ln -s ${script}/bin/stt-ptt "$out/bin/stt-ptt"
|
|
'';
|
|
|
|
meta = with lib; {
|
|
description = "Push to Talk Speech to Text using Whisper";
|
|
homepage = "https://code.m3ta.dev/m3tam3re/nixpkgs";
|
|
license = licenses.mit;
|
|
platforms = platforms.linux;
|
|
mainProgram = "stt-ptt";
|
|
};
|
|
}
|