feat(stt): add local speech-to-text with whisper.cpp
- Add stt-dictate script for push-to-talk dictation - Add Mod+Space keybinding in i3 (hold to record, release to transcribe) - Add whisper-cpp, alsa-utils, xdotool, libnotify packages - Enable dunst notification daemon 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
111
files/stt-dictate.sh
Executable file
111
files/stt-dictate.sh
Executable file
@@ -0,0 +1,111 @@
|
||||
#!/usr/bin/env bash
|
||||
# stt-dictate.sh - Push-to-talk speech-to-text using whisper.cpp
|
||||
#
|
||||
# Usage:
|
||||
# stt-dictate start # demarre l'enregistrement
|
||||
# stt-dictate stop # arrete et transcrit
|
||||
# stt-dictate toggle # bascule entre start/stop
|
||||
#
|
||||
# Keybinding i3 (mode toggle):
|
||||
# bindsym Mod4+space exec stt-dictate toggle
|
||||
#
|
||||
# Modeles disponibles (STT_MODEL):
|
||||
# tiny - 39 MB - rapide, qualite basique
|
||||
# base - 74 MB - rapide, bonne qualite
|
||||
# small - 244 MB - equilibre (defaut)
|
||||
# medium - 769 MB - lent, excellente qualite
|
||||
# large - 1.5 GB - tres lent, meilleure qualite
|
||||
#
|
||||
# Exemple: STT_MODEL=tiny stt-dictate start
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
RECORDING_PID="/tmp/stt-recording.pid"
|
||||
AUDIO_FILE="/tmp/stt-audio.wav"
|
||||
MODEL_DIR="${HOME}/.cache/whisper"
|
||||
MODEL="${STT_MODEL:-small}"
|
||||
|
||||
# Notification helper (silently fails if no daemon)
|
||||
notify() {
|
||||
notify-send "STT" "$1" -t "${2:-2000}" 2>/dev/null || echo "[STT] $1"
|
||||
}
|
||||
|
||||
# Telecharge le modele si absent
|
||||
download_model() {
|
||||
local model_file="${MODEL_DIR}/ggml-${MODEL}.bin"
|
||||
if [[ ! -f "$model_file" ]]; then
|
||||
mkdir -p "$MODEL_DIR"
|
||||
notify "Telechargement du modele ${MODEL}..." 5000
|
||||
local url="https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-${MODEL}.bin"
|
||||
curl -L "$url" -o "$model_file"
|
||||
notify "Modele ${MODEL} pret"
|
||||
fi
|
||||
}
|
||||
|
||||
start_recording() {
|
||||
# Ne pas démarrer si déjà en cours
|
||||
if [[ -f "$RECORDING_PID" ]] && kill -0 "$(cat "$RECORDING_PID")" 2>/dev/null; then
|
||||
return 0
|
||||
fi
|
||||
download_model
|
||||
# Enregistre avec arecord (format compatible whisper.cpp)
|
||||
arecord -f S16_LE -r 16000 -c 1 -t wav "$AUDIO_FILE" &
|
||||
echo $! > "$RECORDING_PID"
|
||||
notify "Enregistrement..." 1000
|
||||
}
|
||||
|
||||
stop_and_transcribe() {
|
||||
if [[ -f "$RECORDING_PID" ]]; then
|
||||
kill "$(cat "$RECORDING_PID")" 2>/dev/null || true
|
||||
rm -f "$RECORDING_PID"
|
||||
sleep 0.3 # laisse arecord finaliser le fichier
|
||||
|
||||
if [[ ! -f "$AUDIO_FILE" ]] || [[ ! -s "$AUDIO_FILE" ]]; then
|
||||
notify "Pas d'audio enregistre"
|
||||
rm -f "$AUDIO_FILE"
|
||||
return 1
|
||||
fi
|
||||
|
||||
notify "Transcription..." 1000
|
||||
|
||||
local model_file="${MODEL_DIR}/ggml-${MODEL}.bin"
|
||||
|
||||
# Transcription avec whisper.cpp
|
||||
TEXT=$(whisper-cli \
|
||||
-m "$model_file" \
|
||||
-l fr \
|
||||
-nt \
|
||||
-np \
|
||||
"$AUDIO_FILE" 2>&1 \
|
||||
| grep -v "^load_backend:" \
|
||||
| tr -d '\n' \
|
||||
| sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
|
||||
|
||||
rm -f "$AUDIO_FILE"
|
||||
|
||||
# Tape le texte au curseur
|
||||
if [[ -n "$TEXT" ]]; then
|
||||
sleep 0.1 # petit delai pour focus
|
||||
xdotool type --delay 10 -- "$TEXT"
|
||||
notify "$TEXT"
|
||||
else
|
||||
notify "Aucun texte detecte"
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
case "${1:-toggle}" in
|
||||
start) start_recording ;;
|
||||
stop) stop_and_transcribe ;;
|
||||
toggle)
|
||||
if [[ -f "$RECORDING_PID" ]]; then
|
||||
stop_and_transcribe
|
||||
else
|
||||
start_recording
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
echo "Usage: $0 {start|stop|toggle}"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
Reference in New Issue
Block a user