{ config, lib, pkgs, ... }: with lib; let cfg = config.cli.stt-ptt; # Build stt-ptt package with the selected whisper package sttPttPackage = pkgs.stt-ptt.override { whisper-cpp = cfg.whisperPackage; }; modelDir = "${config.xdg.dataHome}/stt-ptt/models"; modelPath = "${modelDir}/${cfg.model}.bin"; # HuggingFace URL for whisper.cpp models modelUrl = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/${cfg.model}.bin"; in { options.cli.stt-ptt = { enable = mkEnableOption "Push to Talk Speech to Text using Whisper"; whisperPackage = mkOption { type = types.package; default = pkgs.whisper-cpp; description = '' The whisper-cpp package to use. Available options: Pre-built variants: - `pkgs.whisper-cpp` - CPU-based inference (default) - `pkgs.whisper-cpp-vulkan` - Vulkan GPU acceleration Override options (can be combined): - `cudaSupport` - NVIDIA CUDA support - `rocmSupport` - AMD ROCm support - `vulkanSupport` - Vulkan support - `coreMLSupport` - Apple CoreML (macOS only) - `metalSupport` - Apple Metal (macOS ARM only) Example overrides: - `pkgs.whisper-cpp.override { cudaSupport = true; }` - NVIDIA GPU - `pkgs.whisper-cpp.override { rocmSupport = true; }` - AMD GPU - `pkgs.whisper-cpp.override { vulkanSupport = true; }` - Vulkan ''; example = literalExpression "pkgs.whisper-cpp.override { cudaSupport = true; }"; }; model = mkOption { type = types.str; default = "ggml-large-v3-turbo"; description = '' The Whisper model to use. Models are downloaded from HuggingFace. Available models (sorted by size/quality): - `ggml-tiny` / `ggml-tiny.en` - 75MB, fastest, lowest quality - `ggml-base` / `ggml-base.en` - 142MB, fast, basic quality - `ggml-small` / `ggml-small.en` - 466MB, balanced - `ggml-medium` / `ggml-medium.en` - 1.5GB, good quality - `ggml-large-v1` - 2.9GB, high quality (original) - `ggml-large-v2` - 2.9GB, high quality (improved) - `ggml-large-v3` - 2.9GB, highest quality - `ggml-large-v3-turbo` - 1.6GB, high quality, optimized speed (recommended) Models ending in `.en` are English-only and slightly faster for English. Quantized versions (q5_0, q5_1, q8_0) are also available for reduced size. ''; example = "ggml-base.en"; }; notifyTimeout = mkOption { type = types.int; default = 3000; description = '' Notification timeout in milliseconds for the recording indicator. Set to 0 for persistent notifications. ''; example = 5000; }; language = mkOption { type = types.enum [ "auto" "en" "es" "fr" "de" "it" "pt" "ru" "zh" "ja" "ko" "ar" "hi" "tr" "pl" "nl" "sv" "da" "fi" "no" "vi" "th" "id" "uk" "cs" ]; default = "auto"; description = '' Language for speech recognition. Use "auto" for automatic language detection, or specify a language code (ISO 639-1 standard) for better accuracy. Auto-detection analyzes the audio to determine the spoken language automatically. Specifying a language can improve accuracy if you know the language in advance. Common language codes: - en: English - es: Spanish - fr: French - de: German - zh: Chinese - ja: Japanese - ko: Korean whisper.cpp supports 100+ languages. See whisper.cpp documentation for the full list. ''; example = "en"; }; }; config = mkIf cfg.enable { home.packages = [sttPttPackage]; home.sessionVariables = { STT_MODEL = modelPath; STT_LANGUAGE = cfg.language; STT_NOTIFY_TIMEOUT = toString cfg.notifyTimeout; }; # Create model directory and download model if not present home.activation.downloadWhisperModel = lib.hm.dag.entryAfter ["writeBoundary"] '' MODEL_DIR="${modelDir}" MODEL_PATH="${modelPath}" MODEL_URL="${modelUrl}" $DRY_RUN_CMD mkdir -p "$MODEL_DIR" if [ ! -f "$MODEL_PATH" ]; then echo "Downloading Whisper model: ${cfg.model}..." $DRY_RUN_CMD ${pkgs.curl}/bin/curl -L -o "$MODEL_PATH" "$MODEL_URL" || { echo "Failed to download model from $MODEL_URL" echo "Please download manually and place at: $MODEL_PATH" } fi ''; }; }