Small Commit

This commit is contained in:
2026-01-18 02:55:04 +01:00
parent 9c5a87e61d
commit 06954aeef0
5 changed files with 787 additions and 0 deletions

BIN
de_DE-thorsten-medium.onnx Normal file

Binary file not shown.

View File

@@ -0,0 +1,487 @@
{
"audio": {
"sample_rate": 22050,
"quality": "medium"
},
"espeak": {
"voice": "de"
},
"inference": {
"noise_scale": 0.667,
"length_scale": 1,
"noise_w": 0.8
},
"phoneme_type": "espeak",
"phoneme_map": {},
"phoneme_id_map": {
"_": [
0
],
"^": [
1
],
"$": [
2
],
" ": [
3
],
"!": [
4
],
"'": [
5
],
"(": [
6
],
")": [
7
],
",": [
8
],
"-": [
9
],
".": [
10
],
":": [
11
],
";": [
12
],
"?": [
13
],
"a": [
14
],
"b": [
15
],
"c": [
16
],
"d": [
17
],
"e": [
18
],
"f": [
19
],
"h": [
20
],
"i": [
21
],
"j": [
22
],
"k": [
23
],
"l": [
24
],
"m": [
25
],
"n": [
26
],
"o": [
27
],
"p": [
28
],
"q": [
29
],
"r": [
30
],
"s": [
31
],
"t": [
32
],
"u": [
33
],
"v": [
34
],
"w": [
35
],
"x": [
36
],
"y": [
37
],
"z": [
38
],
"æ": [
39
],
"ç": [
40
],
"ð": [
41
],
"ø": [
42
],
"ħ": [
43
],
"ŋ": [
44
],
"œ": [
45
],
"ǀ": [
46
],
"ǁ": [
47
],
"ǂ": [
48
],
"ǃ": [
49
],
"ɐ": [
50
],
"ɑ": [
51
],
"ɒ": [
52
],
"ɓ": [
53
],
"ɔ": [
54
],
"ɕ": [
55
],
"ɖ": [
56
],
"ɗ": [
57
],
"ɘ": [
58
],
"ə": [
59
],
"ɚ": [
60
],
"ɛ": [
61
],
"ɜ": [
62
],
"ɞ": [
63
],
"ɟ": [
64
],
"ɠ": [
65
],
"ɡ": [
66
],
"ɢ": [
67
],
"ɣ": [
68
],
"ɤ": [
69
],
"ɥ": [
70
],
"ɦ": [
71
],
"ɧ": [
72
],
"ɨ": [
73
],
"ɪ": [
74
],
"ɫ": [
75
],
"ɬ": [
76
],
"ɭ": [
77
],
"ɮ": [
78
],
"ɯ": [
79
],
"ɰ": [
80
],
"ɱ": [
81
],
"ɲ": [
82
],
"ɳ": [
83
],
"ɴ": [
84
],
"ɵ": [
85
],
"ɶ": [
86
],
"ɸ": [
87
],
"ɹ": [
88
],
"ɺ": [
89
],
"ɻ": [
90
],
"ɽ": [
91
],
"ɾ": [
92
],
"ʀ": [
93
],
"ʁ": [
94
],
"ʂ": [
95
],
"ʃ": [
96
],
"ʄ": [
97
],
"ʈ": [
98
],
"ʉ": [
99
],
"ʊ": [
100
],
"ʋ": [
101
],
"ʌ": [
102
],
"ʍ": [
103
],
"ʎ": [
104
],
"ʏ": [
105
],
"ʐ": [
106
],
"ʑ": [
107
],
"ʒ": [
108
],
"ʔ": [
109
],
"ʕ": [
110
],
"ʘ": [
111
],
"ʙ": [
112
],
"ʛ": [
113
],
"ʜ": [
114
],
"ʝ": [
115
],
"ʟ": [
116
],
"ʡ": [
117
],
"ʢ": [
118
],
"ʲ": [
119
],
"ˈ": [
120
],
"ˌ": [
121
],
"ː": [
122
],
"ˑ": [
123
],
"˞": [
124
],
"β": [
125
],
"θ": [
126
],
"χ": [
127
],
"ᵻ": [
128
],
"ⱱ": [
129
],
"0": [
130
],
"1": [
131
],
"2": [
132
],
"3": [
133
],
"4": [
134
],
"5": [
135
],
"6": [
136
],
"7": [
137
],
"8": [
138
],
"9": [
139
],
"̧": [
140
],
"̃": [
141
],
"̪": [
142
],
"̯": [
143
],
"̩": [
144
],
"ʰ": [
145
],
"ˤ": [
146
],
"ε": [
147
],
"↓": [
148
],
"#": [
149
],
"\"": [
150
],
"↑": [
151
]
},
"num_symbols": 256,
"num_speakers": 1,
"speaker_id_map": {},
"piper_version": "1.0.0",
"language": {
"code": "de_DE",
"family": "de",
"region": "DE",
"name_native": "Deutsch",
"name_english": "German",
"country_english": "Germany"
},
"dataset": "thorsten"
}

227
main_bak.py Normal file
View File

@@ -0,0 +1,227 @@
import threading
import queue
import json
import time
import os
import subprocess
# =========================
# KONFIGURATION
# =========================
VOSK_MODEL_PATH = "vosk-model-de-0.21"
PIPER_BIN = "piper"
PIPER_MODEL = "de_DE-thorsten-medium.onnx"
SAMPLE_RATE = 22050
# =========================
# STATES
# =========================
STATE_IDLE = "IDLE"
STATE_LISTENING = "LISTENING"
STATE_SPEAKING = "SPEAKING"
# =========================
# GLOBALER ZUSTAND
# =========================
state = STATE_IDLE
context = {
"intent": None,
"slots": {},
"required_slots": [],
"pending_slot": None
}
audio_queue = queue.Queue()
# =========================
# TTS (PIPER)
# =========================
def speak(text):
global state
state = STATE_SPEAKING
print(f"[TTS] {text}")
process = subprocess.Popen(
[PIPER_BIN, "--model", PIPER_MODEL, "--output-raw"],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE
)
audio = process.communicate(input=text.encode("utf-8"))[0]
play = subprocess.Popen(
["aplay", "-r", str(SAMPLE_RATE), "-f", "S16_LE"],
stdin=subprocess.PIPE
)
play.communicate(audio)
state = STATE_LISTENING
# =========================
# INTENTS & SLOTS
# =========================
INTENTS = {
"weather": {
"keywords": ["wetter", "temperatur", "regen"],
"required_slots": ["location"]
},
"timer": {
"keywords": ["timer"],
"required_slots": ["duration"]
}
}
# evtl mit regex überarbeiten
def detect_intent(text):
text = text.lower()
for name, data in INTENTS.items():
if any(word in text for word in data["keywords"]):
return name
return None
# =========================
# SKILLS
# =========================
def weather_skill(slots):
location = slots["location"]
return f"Das Wetter in {location} ist sonnig bei 20 Grad."
def timer_skill(slots):
duration = slots["duration"]
return f"Der Timer für {duration} Minuten wurde gestartet."
SKILLS = {
"weather": weather_skill,
"timer": timer_skill
}
# =========================
# DIALOGLOGIK
# =========================
def handle_text(text):
global context, state
if state != STATE_LISTENING:
return
print(f"[STT] {text}")
# 1. Rückfrage beantworten
if context["pending_slot"]:
context["slots"][context["pending_slot"]] = text
context["pending_slot"] = None
# 2. Intent erkennen
if not context["intent"]:
intent = detect_intent(text)
if not intent:
speak("Das habe ich nicht verstanden.")
reset_context()
return
context["intent"] = intent
context["required_slots"] = INTENTS[intent]["required_slots"]
# 3. Fehlende Slots prüfen
for slot in context["required_slots"]:
if slot not in context["slots"]:
context["pending_slot"] = slot
ask_for_slot(slot)
return
# 4. Skill ausführen
result = SKILLS[context["intent"]](context["slots"])
speak(result)
reset_context()
def ask_for_slot(slot):
questions = {
"location": "Für welchen Ort?",
"duration": "Wie lange soll der Timer laufen?"
}
speak(questions.get(slot, "Bitte spezifizieren."))
def reset_context():
global context, state
context = {
"intent": None,
"slots": {},
"required_slots": [],
"pending_slot": None
}
state = STATE_IDLE
# =========================
# VOSK LISTENER
# =========================
def vosk_listener():
import vosk
import pyaudio
model = vosk.Model(VOSK_MODEL_PATH)
rec = vosk.KaldiRecognizer(model, SAMPLE_RATE)
p = pyaudio.PyAudio()
stream = p.open(
format=pyaudio.paInt16,
channels=1,
rate=SAMPLE_RATE,
input=True,
frames_per_buffer=4000
)
stream.start_stream()
while True:
data = stream.read(4000, exception_on_overflow=False)
if rec.AcceptWaveform(data):
result = json.loads(rec.Result())
text = result.get("text", "")
if text:
audio_queue.put(text)
# =========================
# WAKEWORD (SIMPLIFIZIERT)
# =========================
def fake_wakeword_detector():
global state
while True:
if state == STATE_IDLE:
time.sleep(0.1)
state = STATE_LISTENING
speak("Wie kann ich helfen?")
# =========================
# MAIN LOOP
# =========================
def main():
threading.Thread(target=vosk_listener, daemon=True).start()
threading.Thread(target=fake_wakeword_detector, daemon=True).start()
while True:
try:
text = audio_queue.get(timeout=0.1)
handle_text(text)
except queue.Empty:
pass
if __name__ == "__main__":
main()

27
test_tts.py Normal file
View File

@@ -0,0 +1,27 @@
import subprocess
PIPER_BIN = "piper"
PIPER_MODEL = "de_DE-thorsten-medium.onnx"
SAMPLE_RATE = 22050 #aplay -v assistant/audio-tts/predefined.wav so rausgefunden
def speak(text):
print(f"[TTS] {text}")
process = subprocess.Popen(
[PIPER_BIN, "--model", PIPER_MODEL, "--output-raw"],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE
)
audio = process.communicate(input=text.encode("utf-8"))[0]
play = subprocess.Popen(
["aplay", "-r", str(SAMPLE_RATE), "-f", "S16_LE"],
stdin=subprocess.PIPE
)
play.communicate(audio)
text="Das Auto musste repariert werden, bevor wir weiterfahren konnten, neuer text"
speak(text)

46
test_vosk.py Normal file
View File

@@ -0,0 +1,46 @@
import threading
import queue
import json
audio_queue = queue.Queue()
SAMPLE_RATE = 16000
def vosk_listener():
import vosk
import pyaudio
VOSK_MODEL_PATH = "/home/tino/Documents/_Documents/Schule/4 Wilhelm Maybach Schule/2BKI Jahr 2/Abschlussprojekt/test assistant/assistant_all_in_one/vosk-model-de-0.21/"
model = vosk.Model(VOSK_MODEL_PATH)
rec = vosk.KaldiRecognizer(model, SAMPLE_RATE)
p = pyaudio.PyAudio()
stream = p.open(
format=pyaudio.paInt16,
channels=1,
rate=SAMPLE_RATE,
input=True,
frames_per_buffer=4000
)
stream.start_stream()
while True:
data = stream.read(4000, exception_on_overflow=False)
if rec.AcceptWaveform(data):
result = json.loads(rec.Result())
text = result.get("text", "")
if text:
audio_queue.put(text)
def main():
threading.Thread(target=vosk_listener, daemon=True).start()
while True:
try:
text = audio_queue.get(timeout=0.1)
print("[SST]", text)
except queue.Empty:
pass
if __name__ == "__main__":
main()