feat(stt): add local speech-to-text with whisper.cpp

- Add stt-dictate script for push-to-talk dictation - Add Mod+Space keybinding in i3 (hold to record, release to transcribe) - Add whisper-cpp, alsa-utils, xdotool, libnotify packages - Enable dunst notification daemon 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-27 17:01:42 +01:00
parent 265f0ecefe
commit 2dc7c688ed
4 changed files with 123 additions and 0 deletions
--- a/files/stt-dictate.sh
+++ b/files/stt-dictate.sh
@@ -0,0 +1,111 @@
 #!/usr/bin/env bash
 # stt-dictate.sh - Push-to-talk speech-to-text using whisper.cpp
 #
 # Usage:
 #   stt-dictate start   # demarre l'enregistrement
 #   stt-dictate stop    # arrete et transcrit
 #   stt-dictate toggle  # bascule entre start/stop
 #
 # Keybinding i3 (mode toggle):
 #   bindsym Mod4+space exec stt-dictate toggle
 #
 # Modeles disponibles (STT_MODEL):
 #   tiny    - 39 MB   - rapide, qualite basique
 #   base    - 74 MB   - rapide, bonne qualite
 #   small   - 244 MB  - equilibre (defaut)
 #   medium  - 769 MB  - lent, excellente qualite
 #   large   - 1.5 GB  - tres lent, meilleure qualite
 #
 # Exemple: STT_MODEL=tiny stt-dictate start
 set -euo pipefail
 RECORDING_PID="/tmp/stt-recording.pid"
 AUDIO_FILE="/tmp/stt-audio.wav"
 MODEL_DIR="${HOME}/.cache/whisper"
 MODEL="${STT_MODEL:-small}"
 # Notification helper (silently fails if no daemon)
 notify() {
    notify-send "STT" "$1" -t "${2:-2000}" 2>/dev/null || echo "[STT] $1"
 }
 # Telecharge le modele si absent
 download_model() {
    local model_file="${MODEL_DIR}/ggml-${MODEL}.bin"
    if [[ ! -f "$model_file" ]]; then
        mkdir -p "$MODEL_DIR"
        notify "Telechargement du modele ${MODEL}..." 5000
        local url="https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-${MODEL}.bin"
        curl -L "$url" -o "$model_file"
        notify "Modele ${MODEL} pret"
    fi
 }
 start_recording() {
    # Ne pas démarrer si déjà en cours
    if [[ -f "$RECORDING_PID" ]] && kill -0 "$(cat "$RECORDING_PID")" 2>/dev/null; then
        return 0
    fi
    download_model
    # Enregistre avec arecord (format compatible whisper.cpp)
    arecord -f S16_LE -r 16000 -c 1 -t wav "$AUDIO_FILE" &
    echo $! > "$RECORDING_PID"
    notify "Enregistrement..." 1000
 }
 stop_and_transcribe() {
    if [[ -f "$RECORDING_PID" ]]; then
        kill "$(cat "$RECORDING_PID")" 2>/dev/null || true
        rm -f "$RECORDING_PID"
        sleep 0.3  # laisse arecord finaliser le fichier
        if [[ ! -f "$AUDIO_FILE" ]] || [[ ! -s "$AUDIO_FILE" ]]; then
            notify "Pas d'audio enregistre"
            rm -f "$AUDIO_FILE"
            return 1
        fi
        notify "Transcription..." 1000
        local model_file="${MODEL_DIR}/ggml-${MODEL}.bin"
        # Transcription avec whisper.cpp
        TEXT=$(whisper-cli \
            -m "$model_file" \
            -l fr \
            -nt \
            -np \
            "$AUDIO_FILE" 2>&1 \
            | grep -v "^load_backend:" \
            | tr -d '\n' \
            | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
        rm -f "$AUDIO_FILE"
        # Tape le texte au curseur
        if [[ -n "$TEXT" ]]; then
            sleep 0.1  # petit delai pour focus
            xdotool type --delay 10 -- "$TEXT"
            notify "$TEXT"
        else
            notify "Aucun texte detecte"
        fi
    fi
 }
 case "${1:-toggle}" in
    start) start_recording ;;
    stop)  stop_and_transcribe ;;
    toggle)
        if [[ -f "$RECORDING_PID" ]]; then
            stop_and_transcribe
        else
            start_recording
        fi
        ;;
    *)
        echo "Usage: $0 {start|stop|toggle}"
        exit 1
        ;;
 esac
--- a/home.nix
+++ b/home.nix
@@ -8,6 +8,10 @@
  home.file.".config/traefik/traefik.toml".source = ./files/traefik.toml;
  home.file.".npmrc".source = ./files/.npmrc;
  home.file.".local/bin/stt-dictate" = {
    source = ./files/stt-dictate.sh;
    executable = true;
  };
  imports =
    [
@@ -49,6 +53,7 @@
  services.unclutter.enable = true;
  services.blueman-applet.enable = true;
  services.dunst.enable = true;  # notification daemon
  services.udiskie.enable = true; # require "services.udisks2.enable = true" in system configuration
 }
--- a/packages.nix
+++ b/packages.nix
@@ -150,6 +150,10 @@
    # AI
    ollama
    whisper-cpp # STT local
    alsa-utils # arecord pour l'enregistrement
    xdotool # pour taper le texte transcrit
    libnotify # notifications
    # Perso
    nextcloud-client
--- a/programs/i3.nix
+++ b/programs/i3.nix
@@ -76,6 +76,9 @@
          # Sreen brightness controls
          "XF86MonBrightnessUp" = "exec light -A 2"; # increase screen brightness
          "XF86MonBrightnessDown" = "exec light -U 2"; # decrease screen brightness
          # Speech-to-text (toggle: press to start/stop)
          "${modifier}+space" = "exec ~/.local/bin/stt-dictate toggle";
        };
        startup = [