- Add STT_LANGUAGE environment variable (default: auto) - Modify whisper-cli call to use -l flag with language code - Add language option to HM module (enum with 23 common languages + auto) - Update help text to document STT_LANGUAGE variable
159 lines
4.6 KiB
Nix
159 lines
4.6 KiB
Nix
{
|
|
config,
|
|
lib,
|
|
pkgs,
|
|
...
|
|
}:
|
|
with lib; let
|
|
cfg = config.cli.stt-ptt;
|
|
|
|
# Build stt-ptt package with the selected whisper package
|
|
sttPttPackage = pkgs.stt-ptt.override {
|
|
whisper-cpp = cfg.whisperPackage;
|
|
};
|
|
|
|
modelDir = "${config.xdg.dataHome}/stt-ptt/models";
|
|
modelPath = "${modelDir}/${cfg.model}.bin";
|
|
|
|
# HuggingFace URL for whisper.cpp models
|
|
modelUrl = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/${cfg.model}.bin";
|
|
in {
|
|
options.cli.stt-ptt = {
|
|
enable = mkEnableOption "Push to Talk Speech to Text using Whisper";
|
|
|
|
whisperPackage = mkOption {
|
|
type = types.package;
|
|
default = pkgs.whisper-cpp;
|
|
description = ''
|
|
The whisper-cpp package to use. Available options:
|
|
|
|
Pre-built variants:
|
|
- `pkgs.whisper-cpp` - CPU-based inference (default)
|
|
- `pkgs.whisper-cpp-vulkan` - Vulkan GPU acceleration
|
|
|
|
Override options (can be combined):
|
|
- `cudaSupport` - NVIDIA CUDA support
|
|
- `rocmSupport` - AMD ROCm support
|
|
- `vulkanSupport` - Vulkan support
|
|
- `coreMLSupport` - Apple CoreML (macOS only)
|
|
- `metalSupport` - Apple Metal (macOS ARM only)
|
|
|
|
Example overrides:
|
|
- `pkgs.whisper-cpp.override { cudaSupport = true; }` - NVIDIA GPU
|
|
- `pkgs.whisper-cpp.override { rocmSupport = true; }` - AMD GPU
|
|
- `pkgs.whisper-cpp.override { vulkanSupport = true; }` - Vulkan
|
|
'';
|
|
example = literalExpression "pkgs.whisper-cpp.override { cudaSupport = true; }";
|
|
};
|
|
|
|
model = mkOption {
|
|
type = types.str;
|
|
default = "ggml-large-v3-turbo";
|
|
description = ''
|
|
The Whisper model to use. Models are downloaded from HuggingFace.
|
|
|
|
Available models (sorted by size/quality):
|
|
- `ggml-tiny` / `ggml-tiny.en` - 75MB, fastest, lowest quality
|
|
- `ggml-base` / `ggml-base.en` - 142MB, fast, basic quality
|
|
- `ggml-small` / `ggml-small.en` - 466MB, balanced
|
|
- `ggml-medium` / `ggml-medium.en` - 1.5GB, good quality
|
|
- `ggml-large-v1` - 2.9GB, high quality (original)
|
|
- `ggml-large-v2` - 2.9GB, high quality (improved)
|
|
- `ggml-large-v3` - 2.9GB, highest quality
|
|
- `ggml-large-v3-turbo` - 1.6GB, high quality, optimized speed (recommended)
|
|
|
|
Models ending in `.en` are English-only and slightly faster for English.
|
|
Quantized versions (q5_0, q5_1, q8_0) are also available for reduced size.
|
|
'';
|
|
example = "ggml-base.en";
|
|
};
|
|
|
|
notifyTimeout = mkOption {
|
|
type = types.int;
|
|
default = 3000;
|
|
description = ''
|
|
Notification timeout in milliseconds for the recording indicator.
|
|
Set to 0 for persistent notifications.
|
|
'';
|
|
example = 5000;
|
|
};
|
|
|
|
language = mkOption {
|
|
type = types.enum [
|
|
"auto"
|
|
"en"
|
|
"es"
|
|
"fr"
|
|
"de"
|
|
"it"
|
|
"pt"
|
|
"ru"
|
|
"zh"
|
|
"ja"
|
|
"ko"
|
|
"ar"
|
|
"hi"
|
|
"tr"
|
|
"pl"
|
|
"nl"
|
|
"sv"
|
|
"da"
|
|
"fi"
|
|
"no"
|
|
"vi"
|
|
"th"
|
|
"id"
|
|
"uk"
|
|
"cs"
|
|
];
|
|
default = "auto";
|
|
description = ''
|
|
Language for speech recognition. Use "auto" for automatic language detection,
|
|
or specify a language code (ISO 639-1 standard) for better accuracy.
|
|
|
|
Auto-detection analyzes the audio to determine the spoken language automatically.
|
|
Specifying a language can improve accuracy if you know the language in advance.
|
|
|
|
Common language codes:
|
|
- en: English
|
|
- es: Spanish
|
|
- fr: French
|
|
- de: German
|
|
- zh: Chinese
|
|
- ja: Japanese
|
|
- ko: Korean
|
|
|
|
whisper.cpp supports 100+ languages. See whisper.cpp documentation for the full list.
|
|
'';
|
|
example = "en";
|
|
};
|
|
};
|
|
|
|
config = mkIf cfg.enable {
|
|
home.packages = [sttPttPackage];
|
|
|
|
home.sessionVariables = {
|
|
STT_MODEL = modelPath;
|
|
STT_LANGUAGE = cfg.language;
|
|
STT_NOTIFY_TIMEOUT = toString cfg.notifyTimeout;
|
|
};
|
|
|
|
# Create model directory and download model if not present
|
|
home.activation.downloadWhisperModel = lib.hm.dag.entryAfter ["writeBoundary"] ''
|
|
MODEL_DIR="${modelDir}"
|
|
MODEL_PATH="${modelPath}"
|
|
MODEL_URL="${modelUrl}"
|
|
|
|
$DRY_RUN_CMD mkdir -p "$MODEL_DIR"
|
|
|
|
if [ ! -f "$MODEL_PATH" ]; then
|
|
echo "Downloading Whisper model: ${cfg.model}..."
|
|
$DRY_RUN_CMD ${pkgs.curl}/bin/curl -L -o "$MODEL_PATH" "$MODEL_URL" || {
|
|
echo "Failed to download model from $MODEL_URL"
|
|
echo "Please download manually and place at: $MODEL_PATH"
|
|
}
|
|
fi
|
|
'';
|
|
};
|
|
}
|