feat(stt): add local speech-to-text with whisper.cpp

- Add stt-dictate script for push-to-talk dictation - Add Mod+Space keybinding in i3 (hold to record, release to transcribe) - Add whisper-cpp, alsa-utils, xdotool, libnotify packages - Enable dunst notification daemon 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-27 17:01:42 +01:00
parent 265f0ecefe
commit 2dc7c688ed
4 changed files with 123 additions and 0 deletions
--- a/files/stt-dictate.sh
+++ b/files/stt-dictate.sh
@@ -0,0 +1,111 @@
+#!/usr/bin/env bash
+# stt-dictate.sh - Push-to-talk speech-to-text using whisper.cpp
+#
+# Usage:
+#   stt-dictate start   # demarre l'enregistrement
+#   stt-dictate stop    # arrete et transcrit
+#   stt-dictate toggle  # bascule entre start/stop
+#
+# Keybinding i3 (mode toggle):
+#   bindsym Mod4+space exec stt-dictate toggle
+#
+# Modeles disponibles (STT_MODEL):
+#   tiny    - 39 MB   - rapide, qualite basique
+#   base    - 74 MB   - rapide, bonne qualite
+#   small   - 244 MB  - equilibre (defaut)
+#   medium  - 769 MB  - lent, excellente qualite
+#   large   - 1.5 GB  - tres lent, meilleure qualite
+#
+# Exemple: STT_MODEL=tiny stt-dictate start
+
+set -euo pipefail
+
+RECORDING_PID="/tmp/stt-recording.pid"
+AUDIO_FILE="/tmp/stt-audio.wav"
+MODEL_DIR="${HOME}/.cache/whisper"
+MODEL="${STT_MODEL:-small}"
+
+# Notification helper (silently fails if no daemon)
+notify() {
+    notify-send "STT" "$1" -t "${2:-2000}" 2>/dev/null || echo "[STT] $1"
+}
+
+# Telecharge le modele si absent
+download_model() {
+    local model_file="${MODEL_DIR}/ggml-${MODEL}.bin"
+    if [[ ! -f "$model_file" ]]; then
+        mkdir -p "$MODEL_DIR"
+        notify "Telechargement du modele ${MODEL}..." 5000
+        local url="https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-${MODEL}.bin"
+        curl -L "$url" -o "$model_file"
+        notify "Modele ${MODEL} pret"
+    fi
+}
+
+start_recording() {
+    # Ne pas démarrer si déjà en cours
+    if [[ -f "$RECORDING_PID" ]] && kill -0 "$(cat "$RECORDING_PID")" 2>/dev/null; then
+        return 0
+    fi
+    download_model
+    # Enregistre avec arecord (format compatible whisper.cpp)
+    arecord -f S16_LE -r 16000 -c 1 -t wav "$AUDIO_FILE" &
+    echo $! > "$RECORDING_PID"
+    notify "Enregistrement..." 1000
+}
+
+stop_and_transcribe() {
+    if [[ -f "$RECORDING_PID" ]]; then
+        kill "$(cat "$RECORDING_PID")" 2>/dev/null || true
+        rm -f "$RECORDING_PID"
+        sleep 0.3  # laisse arecord finaliser le fichier
+
+        if [[ ! -f "$AUDIO_FILE" ]] || [[ ! -s "$AUDIO_FILE" ]]; then
+            notify "Pas d'audio enregistre"
+            rm -f "$AUDIO_FILE"
+            return 1
+        fi
+
+        notify "Transcription..." 1000
+
+        local model_file="${MODEL_DIR}/ggml-${MODEL}.bin"
+
+        # Transcription avec whisper.cpp
+        TEXT=$(whisper-cli \
+            -m "$model_file" \
+            -l fr \
+            -nt \
+            -np \
+            "$AUDIO_FILE" 2>&1 \
+            | grep -v "^load_backend:" \
+            | tr -d '\n' \
+            | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
+
+        rm -f "$AUDIO_FILE"
+
+        # Tape le texte au curseur
+        if [[ -n "$TEXT" ]]; then
+            sleep 0.1  # petit delai pour focus
+            xdotool type --delay 10 -- "$TEXT"
+            notify "$TEXT"
+        else
+            notify "Aucun texte detecte"
+        fi
+    fi
+}
+
+case "${1:-toggle}" in
+    start) start_recording ;;
+    stop)  stop_and_transcribe ;;
+    toggle)
+        if [[ -f "$RECORDING_PID" ]]; then
+            stop_and_transcribe
+        else
+            start_recording
+        fi
+        ;;
+    *)
+        echo "Usage: $0 {start|stop|toggle}"
+        exit 1
+        ;;
+esac