Files
dotfiles/files/stt-dictate.sh
Pierre Martin 2dc7c688ed feat(stt): add local speech-to-text with whisper.cpp
- Add stt-dictate script for push-to-talk dictation
- Add Mod+Space keybinding in i3 (hold to record, release to transcribe)
- Add whisper-cpp, alsa-utils, xdotool, libnotify packages
- Enable dunst notification daemon

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-27 17:04:07 +01:00

112 lines
3.1 KiB
Bash
Executable File

#!/usr/bin/env bash
# stt-dictate.sh - Push-to-talk speech-to-text using whisper.cpp
#
# Usage:
# stt-dictate start # demarre l'enregistrement
# stt-dictate stop # arrete et transcrit
# stt-dictate toggle # bascule entre start/stop
#
# Keybinding i3 (mode toggle):
# bindsym Mod4+space exec stt-dictate toggle
#
# Modeles disponibles (STT_MODEL):
# tiny - 39 MB - rapide, qualite basique
# base - 74 MB - rapide, bonne qualite
# small - 244 MB - equilibre (defaut)
# medium - 769 MB - lent, excellente qualite
# large - 1.5 GB - tres lent, meilleure qualite
#
# Exemple: STT_MODEL=tiny stt-dictate start
set -euo pipefail
RECORDING_PID="/tmp/stt-recording.pid"
AUDIO_FILE="/tmp/stt-audio.wav"
MODEL_DIR="${HOME}/.cache/whisper"
MODEL="${STT_MODEL:-small}"
# Notification helper (silently fails if no daemon)
notify() {
notify-send "STT" "$1" -t "${2:-2000}" 2>/dev/null || echo "[STT] $1"
}
# Telecharge le modele si absent
download_model() {
local model_file="${MODEL_DIR}/ggml-${MODEL}.bin"
if [[ ! -f "$model_file" ]]; then
mkdir -p "$MODEL_DIR"
notify "Telechargement du modele ${MODEL}..." 5000
local url="https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-${MODEL}.bin"
curl -L "$url" -o "$model_file"
notify "Modele ${MODEL} pret"
fi
}
start_recording() {
# Ne pas démarrer si déjà en cours
if [[ -f "$RECORDING_PID" ]] && kill -0 "$(cat "$RECORDING_PID")" 2>/dev/null; then
return 0
fi
download_model
# Enregistre avec arecord (format compatible whisper.cpp)
arecord -f S16_LE -r 16000 -c 1 -t wav "$AUDIO_FILE" &
echo $! > "$RECORDING_PID"
notify "Enregistrement..." 1000
}
stop_and_transcribe() {
if [[ -f "$RECORDING_PID" ]]; then
kill "$(cat "$RECORDING_PID")" 2>/dev/null || true
rm -f "$RECORDING_PID"
sleep 0.3 # laisse arecord finaliser le fichier
if [[ ! -f "$AUDIO_FILE" ]] || [[ ! -s "$AUDIO_FILE" ]]; then
notify "Pas d'audio enregistre"
rm -f "$AUDIO_FILE"
return 1
fi
notify "Transcription..." 1000
local model_file="${MODEL_DIR}/ggml-${MODEL}.bin"
# Transcription avec whisper.cpp
TEXT=$(whisper-cli \
-m "$model_file" \
-l fr \
-nt \
-np \
"$AUDIO_FILE" 2>&1 \
| grep -v "^load_backend:" \
| tr -d '\n' \
| sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
rm -f "$AUDIO_FILE"
# Tape le texte au curseur
if [[ -n "$TEXT" ]]; then
sleep 0.1 # petit delai pour focus
xdotool type --delay 10 -- "$TEXT"
notify "$TEXT"
else
notify "Aucun texte detecte"
fi
fi
}
case "${1:-toggle}" in
start) start_recording ;;
stop) stop_and_transcribe ;;
toggle)
if [[ -f "$RECORDING_PID" ]]; then
stop_and_transcribe
else
start_recording
fi
;;
*)
echo "Usage: $0 {start|stop|toggle}"
exit 1
;;
esac