Files
dotfiles/files/stt-dictate.sh

112 lines
2.8 KiB
Bash
Executable File

#!/usr/bin/env bash
# stt-dictate.sh - Push-to-talk speech-to-text using whisper.cpp
#
# Usage:
# stt-dictate start # demarre l'enregistrement
# stt-dictate stop # arrete et transcrit
# stt-dictate toggle # bascule entre start/stop
#
# Keybinding i3 (mode toggle):
# bindsym Mod4+space exec stt-dictate toggle
#
# Modeles disponibles (STT_MODEL):
# tiny - 39 MB - rapide, qualite basique
# base - 74 MB - rapide, bonne qualite
# small - 244 MB - equilibre (defaut)
# medium - 769 MB - lent, excellente qualite
# large - 1.5 GB - tres lent, meilleure qualite
#
# Exemple: STT_MODEL=tiny stt-dictate start
set -euo pipefail
RECORDING_PID="/tmp/stt-recording.pid"
AUDIO_FILE="/tmp/stt-audio.wav"
MODEL_DIR="${HOME}/.cache/whisper"
MODEL="${STT_MODEL:-small}"
# Notification helper (silently fails if no daemon)
notify() {
notify-send "STT" "$1" -t "${2:-2000}" 2>/dev/null || echo "[STT] $1"
}
# Telecharge le modele si absent
download_model() {
local model_file="${MODEL_DIR}/ggml-${MODEL}.bin"
if [[ ! -f "$model_file" ]]; then
mkdir -p "$MODEL_DIR"
notify "Telechargement du modele ${MODEL}..." 5000
local url="https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-${MODEL}.bin"
curl -L "$url" -o "$model_file"
notify "Modele ${MODEL} pret"
fi
}
start_recording() {
# Ne pas démarrer si déjà en cours
if [[ -f "$RECORDING_PID" ]] && kill -0 "$(cat "$RECORDING_PID")" 2>/dev/null; then
return 0
fi
download_model
# Enregistre avec arecord (format compatible whisper.cpp)
arecord -f S16_LE -r 16000 -c 1 -t wav "$AUDIO_FILE" &
echo $! >"$RECORDING_PID"
notify "Enregistrement..." 1000
}
stop_and_transcribe() {
if [[ -f "$RECORDING_PID" ]]; then
kill "$(cat "$RECORDING_PID")" 2>/dev/null || true
rm -f "$RECORDING_PID"
sleep 0.3 # laisse arecord finaliser le fichier
if [[ ! -f "$AUDIO_FILE" ]] || [[ ! -s "$AUDIO_FILE" ]]; then
notify "Pas d'audio enregistre"
rm -f "$AUDIO_FILE"
return 1
fi
notify "Transcription..." 1000
local model_file="${MODEL_DIR}/ggml-${MODEL}.bin"
# Transcription avec whisper.cpp
TEXT=$(whisper-cli \
-m "$model_file" \
-l fr \
-nt \
-np \
"$AUDIO_FILE" 2>&1 |
grep -v "^load_backend:" |
tr -d '\n' |
sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
rm -f "$AUDIO_FILE"
# Tape le texte au curseur
if [[ -n "$TEXT" ]]; then
sleep 0.1 # petit delai pour focus
xdotool type --delay 10 -- "$TEXT"
notify "$TEXT"
else
notify "Aucun texte detecte"
fi
fi
}
case "${1:-toggle}" in
start) start_recording ;;
stop) stop_and_transcribe ;;
toggle)
if [[ -f "$RECORDING_PID" ]]; then
stop_and_transcribe
else
start_recording
fi
;;
*)
echo "Usage: $0 {start|stop|toggle}"
exit 1
;;
esac