Files
nixpkgs/modules/home-manager/cli/stt-ptt.nix
m3tm3re 9f284a8c21 feat: add auto-language detection support to stt-ptt
- Add STT_LANGUAGE environment variable (default: auto)
- Modify whisper-cli call to use -l flag with language code
- Add language option to HM module (enum with 23 common languages + auto)
- Update help text to document STT_LANGUAGE variable
2026-01-10 09:36:21 +01:00

159 lines
4.6 KiB
Nix

{
config,
lib,
pkgs,
...
}:
with lib; let
cfg = config.cli.stt-ptt;
# Build stt-ptt package with the selected whisper package
sttPttPackage = pkgs.stt-ptt.override {
whisper-cpp = cfg.whisperPackage;
};
modelDir = "${config.xdg.dataHome}/stt-ptt/models";
modelPath = "${modelDir}/${cfg.model}.bin";
# HuggingFace URL for whisper.cpp models
modelUrl = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/${cfg.model}.bin";
in {
options.cli.stt-ptt = {
enable = mkEnableOption "Push to Talk Speech to Text using Whisper";
whisperPackage = mkOption {
type = types.package;
default = pkgs.whisper-cpp;
description = ''
The whisper-cpp package to use. Available options:
Pre-built variants:
- `pkgs.whisper-cpp` - CPU-based inference (default)
- `pkgs.whisper-cpp-vulkan` - Vulkan GPU acceleration
Override options (can be combined):
- `cudaSupport` - NVIDIA CUDA support
- `rocmSupport` - AMD ROCm support
- `vulkanSupport` - Vulkan support
- `coreMLSupport` - Apple CoreML (macOS only)
- `metalSupport` - Apple Metal (macOS ARM only)
Example overrides:
- `pkgs.whisper-cpp.override { cudaSupport = true; }` - NVIDIA GPU
- `pkgs.whisper-cpp.override { rocmSupport = true; }` - AMD GPU
- `pkgs.whisper-cpp.override { vulkanSupport = true; }` - Vulkan
'';
example = literalExpression "pkgs.whisper-cpp.override { cudaSupport = true; }";
};
model = mkOption {
type = types.str;
default = "ggml-large-v3-turbo";
description = ''
The Whisper model to use. Models are downloaded from HuggingFace.
Available models (sorted by size/quality):
- `ggml-tiny` / `ggml-tiny.en` - 75MB, fastest, lowest quality
- `ggml-base` / `ggml-base.en` - 142MB, fast, basic quality
- `ggml-small` / `ggml-small.en` - 466MB, balanced
- `ggml-medium` / `ggml-medium.en` - 1.5GB, good quality
- `ggml-large-v1` - 2.9GB, high quality (original)
- `ggml-large-v2` - 2.9GB, high quality (improved)
- `ggml-large-v3` - 2.9GB, highest quality
- `ggml-large-v3-turbo` - 1.6GB, high quality, optimized speed (recommended)
Models ending in `.en` are English-only and slightly faster for English.
Quantized versions (q5_0, q5_1, q8_0) are also available for reduced size.
'';
example = "ggml-base.en";
};
notifyTimeout = mkOption {
type = types.int;
default = 3000;
description = ''
Notification timeout in milliseconds for the recording indicator.
Set to 0 for persistent notifications.
'';
example = 5000;
};
language = mkOption {
type = types.enum [
"auto"
"en"
"es"
"fr"
"de"
"it"
"pt"
"ru"
"zh"
"ja"
"ko"
"ar"
"hi"
"tr"
"pl"
"nl"
"sv"
"da"
"fi"
"no"
"vi"
"th"
"id"
"uk"
"cs"
];
default = "auto";
description = ''
Language for speech recognition. Use "auto" for automatic language detection,
or specify a language code (ISO 639-1 standard) for better accuracy.
Auto-detection analyzes the audio to determine the spoken language automatically.
Specifying a language can improve accuracy if you know the language in advance.
Common language codes:
- en: English
- es: Spanish
- fr: French
- de: German
- zh: Chinese
- ja: Japanese
- ko: Korean
whisper.cpp supports 100+ languages. See whisper.cpp documentation for the full list.
'';
example = "en";
};
};
config = mkIf cfg.enable {
home.packages = [sttPttPackage];
home.sessionVariables = {
STT_MODEL = modelPath;
STT_LANGUAGE = cfg.language;
STT_NOTIFY_TIMEOUT = toString cfg.notifyTimeout;
};
# Create model directory and download model if not present
home.activation.downloadWhisperModel = lib.hm.dag.entryAfter ["writeBoundary"] ''
MODEL_DIR="${modelDir}"
MODEL_PATH="${modelPath}"
MODEL_URL="${modelUrl}"
$DRY_RUN_CMD mkdir -p "$MODEL_DIR"
if [ ! -f "$MODEL_PATH" ]; then
echo "Downloading Whisper model: ${cfg.model}..."
$DRY_RUN_CMD ${pkgs.curl}/bin/curl -L -o "$MODEL_PATH" "$MODEL_URL" || {
echo "Failed to download model from $MODEL_URL"
echo "Please download manually and place at: $MODEL_PATH"
}
fi
'';
};
}