Implements speech-to-text (via whisper-cpp) and text-to-speech (via espeak) functionality with key bindings. Replaces coreutils with busybox for lighter dependencies and removes explicit buildInputs since all paths are hardcoded.
108 lines
3.5 KiB
Nix
108 lines
3.5 KiB
Nix
{
|
|
config,
|
|
lib,
|
|
pkgs,
|
|
...
|
|
}:
|
|
with lib; let
|
|
cfg = config.cli.stt-ptt;
|
|
|
|
# Build stt-ptt package with the selected whisper package
|
|
sttPttPackage = pkgs.stt-ptt.override {
|
|
whisper-cpp = cfg.whisperPackage;
|
|
};
|
|
|
|
modelDir = "${config.xdg.dataHome}/stt-ptt/models";
|
|
modelPath = "${modelDir}/${cfg.model}.bin";
|
|
|
|
# HuggingFace URL for whisper.cpp models
|
|
modelUrl = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/${cfg.model}.bin";
|
|
in {
|
|
options.cli.stt-ptt = {
|
|
enable = mkEnableOption "Push to Talk Speech to Text using Whisper";
|
|
|
|
whisperPackage = mkOption {
|
|
type = types.package;
|
|
default = pkgs.whisper-cpp;
|
|
description = ''
|
|
The whisper-cpp package to use. Available options:
|
|
|
|
Pre-built variants:
|
|
- `pkgs.whisper-cpp` - CPU-based inference (default)
|
|
- `pkgs.whisper-cpp-vulkan` - Vulkan GPU acceleration
|
|
|
|
Override options (can be combined):
|
|
- `cudaSupport` - NVIDIA CUDA support
|
|
- `rocmSupport` - AMD ROCm support
|
|
- `vulkanSupport` - Vulkan support
|
|
- `coreMLSupport` - Apple CoreML (macOS only)
|
|
- `metalSupport` - Apple Metal (macOS ARM only)
|
|
|
|
Example overrides:
|
|
- `pkgs.whisper-cpp.override { cudaSupport = true; }` - NVIDIA GPU
|
|
- `pkgs.whisper-cpp.override { rocmSupport = true; }` - AMD GPU
|
|
- `pkgs.whisper-cpp.override { vulkanSupport = true; }` - Vulkan
|
|
'';
|
|
example = literalExpression "pkgs.whisper-cpp.override { cudaSupport = true; }";
|
|
};
|
|
|
|
model = mkOption {
|
|
type = types.str;
|
|
default = "ggml-large-v3-turbo";
|
|
description = ''
|
|
The Whisper model to use. Models are downloaded from HuggingFace.
|
|
|
|
Available models (sorted by size/quality):
|
|
- `ggml-tiny` / `ggml-tiny.en` - 75MB, fastest, lowest quality
|
|
- `ggml-base` / `ggml-base.en` - 142MB, fast, basic quality
|
|
- `ggml-small` / `ggml-small.en` - 466MB, balanced
|
|
- `ggml-medium` / `ggml-medium.en` - 1.5GB, good quality
|
|
- `ggml-large-v1` - 2.9GB, high quality (original)
|
|
- `ggml-large-v2` - 2.9GB, high quality (improved)
|
|
- `ggml-large-v3` - 2.9GB, highest quality
|
|
- `ggml-large-v3-turbo` - 1.6GB, high quality, optimized speed (recommended)
|
|
|
|
Models ending in `.en` are English-only and slightly faster for English.
|
|
Quantized versions (q5_0, q5_1, q8_0) are also available for reduced size.
|
|
'';
|
|
example = "ggml-base.en";
|
|
};
|
|
|
|
notifyTimeout = mkOption {
|
|
type = types.int;
|
|
default = 3000;
|
|
description = ''
|
|
Notification timeout in milliseconds for the recording indicator.
|
|
Set to 0 for persistent notifications.
|
|
'';
|
|
example = 5000;
|
|
};
|
|
};
|
|
|
|
config = mkIf cfg.enable {
|
|
home.packages = [sttPttPackage];
|
|
|
|
home.sessionVariables = {
|
|
STT_MODEL = modelPath;
|
|
STT_NOTIFY_TIMEOUT = toString cfg.notifyTimeout;
|
|
};
|
|
|
|
# Create model directory and download model if not present
|
|
home.activation.downloadWhisperModel = lib.hm.dag.entryAfter ["writeBoundary"] ''
|
|
MODEL_DIR="${modelDir}"
|
|
MODEL_PATH="${modelPath}"
|
|
MODEL_URL="${modelUrl}"
|
|
|
|
$DRY_RUN_CMD mkdir -p "$MODEL_DIR"
|
|
|
|
if [ ! -f "$MODEL_PATH" ]; then
|
|
echo "Downloading Whisper model: ${cfg.model}..."
|
|
$DRY_RUN_CMD ${pkgs.curl}/bin/curl -L -o "$MODEL_PATH" "$MODEL_URL" || {
|
|
echo "Failed to download model from $MODEL_URL"
|
|
echo "Please download manually and place at: $MODEL_PATH"
|
|
}
|
|
fi
|
|
'';
|
|
};
|
|
}
|