diff --git a/files/stt-dictate.sh b/files/stt-dictate.sh new file mode 100755 index 0000000..cb61019 --- /dev/null +++ b/files/stt-dictate.sh @@ -0,0 +1,111 @@ +#!/usr/bin/env bash +# stt-dictate.sh - Push-to-talk speech-to-text using whisper.cpp +# +# Usage: +# stt-dictate start # demarre l'enregistrement +# stt-dictate stop # arrete et transcrit +# stt-dictate toggle # bascule entre start/stop +# +# Keybinding i3 (mode toggle): +# bindsym Mod4+space exec stt-dictate toggle +# +# Modeles disponibles (STT_MODEL): +# tiny - 39 MB - rapide, qualite basique +# base - 74 MB - rapide, bonne qualite +# small - 244 MB - equilibre (defaut) +# medium - 769 MB - lent, excellente qualite +# large - 1.5 GB - tres lent, meilleure qualite +# +# Exemple: STT_MODEL=tiny stt-dictate start + +set -euo pipefail + +RECORDING_PID="/tmp/stt-recording.pid" +AUDIO_FILE="/tmp/stt-audio.wav" +MODEL_DIR="${HOME}/.cache/whisper" +MODEL="${STT_MODEL:-small}" + +# Notification helper (silently fails if no daemon) +notify() { + notify-send "STT" "$1" -t "${2:-2000}" 2>/dev/null || echo "[STT] $1" +} + +# Telecharge le modele si absent +download_model() { + local model_file="${MODEL_DIR}/ggml-${MODEL}.bin" + if [[ ! -f "$model_file" ]]; then + mkdir -p "$MODEL_DIR" + notify "Telechargement du modele ${MODEL}..." 5000 + local url="https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-${MODEL}.bin" + curl -L "$url" -o "$model_file" + notify "Modele ${MODEL} pret" + fi +} + +start_recording() { + # Ne pas démarrer si déjà en cours + if [[ -f "$RECORDING_PID" ]] && kill -0 "$(cat "$RECORDING_PID")" 2>/dev/null; then + return 0 + fi + download_model + # Enregistre avec arecord (format compatible whisper.cpp) + arecord -f S16_LE -r 16000 -c 1 -t wav "$AUDIO_FILE" & + echo $! > "$RECORDING_PID" + notify "Enregistrement..." 1000 +} + +stop_and_transcribe() { + if [[ -f "$RECORDING_PID" ]]; then + kill "$(cat "$RECORDING_PID")" 2>/dev/null || true + rm -f "$RECORDING_PID" + sleep 0.3 # laisse arecord finaliser le fichier + + if [[ ! -f "$AUDIO_FILE" ]] || [[ ! -s "$AUDIO_FILE" ]]; then + notify "Pas d'audio enregistre" + rm -f "$AUDIO_FILE" + return 1 + fi + + notify "Transcription..." 1000 + + local model_file="${MODEL_DIR}/ggml-${MODEL}.bin" + + # Transcription avec whisper.cpp + TEXT=$(whisper-cli \ + -m "$model_file" \ + -l fr \ + -nt \ + -np \ + "$AUDIO_FILE" 2>&1 \ + | grep -v "^load_backend:" \ + | tr -d '\n' \ + | sed 's/^[[:space:]]*//;s/[[:space:]]*$//') + + rm -f "$AUDIO_FILE" + + # Tape le texte au curseur + if [[ -n "$TEXT" ]]; then + sleep 0.1 # petit delai pour focus + xdotool type --delay 10 -- "$TEXT" + notify "$TEXT" + else + notify "Aucun texte detecte" + fi + fi +} + +case "${1:-toggle}" in + start) start_recording ;; + stop) stop_and_transcribe ;; + toggle) + if [[ -f "$RECORDING_PID" ]]; then + stop_and_transcribe + else + start_recording + fi + ;; + *) + echo "Usage: $0 {start|stop|toggle}" + exit 1 + ;; +esac diff --git a/home.nix b/home.nix index 15be889..6b1906b 100644 --- a/home.nix +++ b/home.nix @@ -8,6 +8,10 @@ home.file.".config/traefik/traefik.toml".source = ./files/traefik.toml; home.file.".npmrc".source = ./files/.npmrc; + home.file.".local/bin/stt-dictate" = { + source = ./files/stt-dictate.sh; + executable = true; + }; imports = [ @@ -49,6 +53,7 @@ services.unclutter.enable = true; services.blueman-applet.enable = true; + services.dunst.enable = true; # notification daemon services.udiskie.enable = true; # require "services.udisks2.enable = true" in system configuration } diff --git a/packages.nix b/packages.nix index a5be01f..f8e5eef 100644 --- a/packages.nix +++ b/packages.nix @@ -150,6 +150,10 @@ # AI ollama + whisper-cpp # STT local + alsa-utils # arecord pour l'enregistrement + xdotool # pour taper le texte transcrit + libnotify # notifications # Perso nextcloud-client diff --git a/programs/i3.nix b/programs/i3.nix index bb5dd08..0bb297c 100644 --- a/programs/i3.nix +++ b/programs/i3.nix @@ -76,6 +76,9 @@ # Sreen brightness controls "XF86MonBrightnessUp" = "exec light -A 2"; # increase screen brightness "XF86MonBrightnessDown" = "exec light -U 2"; # decrease screen brightness + + # Speech-to-text (toggle: press to start/stop) + "${modifier}+space" = "exec ~/.local/bin/stt-dictate toggle"; }; startup = [