112 lines
2.8 KiB
Bash
Executable File
112 lines
2.8 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# stt-dictate.sh - Push-to-talk speech-to-text using whisper.cpp
|
|
#
|
|
# Usage:
|
|
# stt-dictate start # demarre l'enregistrement
|
|
# stt-dictate stop # arrete et transcrit
|
|
# stt-dictate toggle # bascule entre start/stop
|
|
#
|
|
# Keybinding i3 (mode toggle):
|
|
# bindsym Mod4+space exec stt-dictate toggle
|
|
#
|
|
# Modeles disponibles (STT_MODEL):
|
|
# tiny - 39 MB - rapide, qualite basique
|
|
# base - 74 MB - rapide, bonne qualite
|
|
# small - 244 MB - equilibre (defaut)
|
|
# medium - 769 MB - lent, excellente qualite
|
|
# large - 1.5 GB - tres lent, meilleure qualite
|
|
#
|
|
# Exemple: STT_MODEL=tiny stt-dictate start
|
|
|
|
set -euo pipefail
|
|
|
|
RECORDING_PID="/tmp/stt-recording.pid"
|
|
AUDIO_FILE="/tmp/stt-audio.wav"
|
|
MODEL_DIR="${HOME}/.cache/whisper"
|
|
MODEL="${STT_MODEL:-small}"
|
|
|
|
# Notification helper (silently fails if no daemon)
|
|
notify() {
|
|
notify-send "STT" "$1" -t "${2:-2000}" 2>/dev/null || echo "[STT] $1"
|
|
}
|
|
|
|
# Telecharge le modele si absent
|
|
download_model() {
|
|
local model_file="${MODEL_DIR}/ggml-${MODEL}.bin"
|
|
if [[ ! -f "$model_file" ]]; then
|
|
mkdir -p "$MODEL_DIR"
|
|
notify "Telechargement du modele ${MODEL}..." 5000
|
|
local url="https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-${MODEL}.bin"
|
|
curl -L "$url" -o "$model_file"
|
|
notify "Modele ${MODEL} pret"
|
|
fi
|
|
}
|
|
|
|
start_recording() {
|
|
# Ne pas démarrer si déjà en cours
|
|
if [[ -f "$RECORDING_PID" ]] && kill -0 "$(cat "$RECORDING_PID")" 2>/dev/null; then
|
|
return 0
|
|
fi
|
|
download_model
|
|
# Enregistre avec arecord (format compatible whisper.cpp)
|
|
arecord -f S16_LE -r 16000 -c 1 -t wav "$AUDIO_FILE" &
|
|
echo $! >"$RECORDING_PID"
|
|
notify "Enregistrement..." 1000
|
|
}
|
|
|
|
stop_and_transcribe() {
|
|
if [[ -f "$RECORDING_PID" ]]; then
|
|
kill "$(cat "$RECORDING_PID")" 2>/dev/null || true
|
|
rm -f "$RECORDING_PID"
|
|
sleep 0.3 # laisse arecord finaliser le fichier
|
|
|
|
if [[ ! -f "$AUDIO_FILE" ]] || [[ ! -s "$AUDIO_FILE" ]]; then
|
|
notify "Pas d'audio enregistre"
|
|
rm -f "$AUDIO_FILE"
|
|
return 1
|
|
fi
|
|
|
|
notify "Transcription..." 1000
|
|
|
|
local model_file="${MODEL_DIR}/ggml-${MODEL}.bin"
|
|
|
|
# Transcription avec whisper.cpp
|
|
TEXT=$(whisper-cli \
|
|
-m "$model_file" \
|
|
-l fr \
|
|
-nt \
|
|
-np \
|
|
"$AUDIO_FILE" 2>&1 |
|
|
grep -v "^load_backend:" |
|
|
tr -d '\n' |
|
|
sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
|
|
|
|
rm -f "$AUDIO_FILE"
|
|
|
|
# Tape le texte au curseur
|
|
if [[ -n "$TEXT" ]]; then
|
|
sleep 0.1 # petit delai pour focus
|
|
xdotool type --delay 10 -- "$TEXT"
|
|
notify "$TEXT"
|
|
else
|
|
notify "Aucun texte detecte"
|
|
fi
|
|
fi
|
|
}
|
|
|
|
case "${1:-toggle}" in
|
|
start) start_recording ;;
|
|
stop) stop_and_transcribe ;;
|
|
toggle)
|
|
if [[ -f "$RECORDING_PID" ]]; then
|
|
stop_and_transcribe
|
|
else
|
|
start_recording
|
|
fi
|
|
;;
|
|
*)
|
|
echo "Usage: $0 {start|stop|toggle}"
|
|
exit 1
|
|
;;
|
|
esac
|