| 
									
										
										
										
											2025-09-29 09:19:40 +08:00
										 |  |  |  | import signal | 
					
						
							|  |  |  |  | import sys | 
					
						
							|  |  |  |  | import time | 
					
						
							|  |  |  |  | import re | 
					
						
							|  |  |  |  | import subprocess | 
					
						
							|  |  |  |  | import queue  | 
					
						
							|  |  |  |  | # 导入所有模块 | 
					
						
							|  |  |  |  | from tts_module import BaiduOnlineTTS | 
					
						
							|  |  |  |  | from volume_module import VolumeController, detect_audio_control | 
					
						
							|  |  |  |  | from motion_module import RobotMotionController | 
					
						
							|  |  |  |  | from camera_module import CameraModule | 
					
						
							|  |  |  |  | from ark_api_module import ArkAPIController | 
					
						
							|  |  |  |  | from voice_recog_module import VoiceRecogController | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | # -------------------- 1. 基础配置(完全保留原代码1. 基础配置) -------------------- | 
					
						
							|  |  |  |  | # 1.1 项目路径与运动模型 | 
					
						
							|  |  |  |  | PROJECT_ROOT = "/home/duckpi/open_duck_mini_ws/OPEN_DUCK_MINI/Open_Duck_Mini_Runtime-2" | 
					
						
							|  |  |  |  | sys.path.append(PROJECT_ROOT) | 
					
						
							|  |  |  |  | ONNX_MODEL_PATH = "/home/duckpi/open_duck_mini_ws/OPEN_DUCK_MINI/Open_Duck_Mini-2/BEST_WALK_ONNX_2.onnx" | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | # 1.2 火山方舟API配置 | 
					
						
							|  |  |  |  | ARK_API_KEY = "390d517c-129a-41c1-bf3d-458048007b69" | 
					
						
							|  |  |  |  | ARK_MODEL_ID = "doubao-seed-1-6-250615" | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | # 1.3 语音识别与唤醒词配置 | 
					
						
							|  |  |  |  | APPID = "1ff50710" | 
					
						
							|  |  |  |  | ACCESS_KEY_ID = "a4f43e95ee0a9518d11befac8d31f1d4" | 
					
						
							|  |  |  |  | ACCESS_KEY_SECRET = "YzQ4NTRhZjc2ZTM4MDA1YjM2MmIyNDEy" | 
					
						
							|  |  |  |  | ACCESS_KEY = "e0EQQBoH0HIVU9KrXsmB7CMlVci+GAs2x0Ejtrdp8CTtZmf25rCLaQ==" | 
					
						
							|  |  |  |  | WAKEUP_WORD_PATH = "/home/duckpi/open_duck_mini_ws/OPEN_DUCK_MINI/resources/xiaohuangya_zh_raspberry-pi_v3_0_0.ppn" | 
					
						
							|  |  |  |  | MODEL_PATH = "/home/duckpi/open_duck_mini_ws/OPEN_DUCK_MINI/resources/porcupine_params_zh.pv" | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | # 1.4 百度在线TTS配置 | 
					
						
							|  |  |  |  | BAIDU_TTS_API_KEY = "TnwYZPPvElNushOzfL6vBlUI" | 
					
						
							|  |  |  |  | BAIDU_TTS_SECRET_KEY = "55HeI8VNUMNlkW3t2QRwVtrjumpxjfxk" | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | # 1.5 语音反馈文本配置 | 
					
						
							|  |  |  |  | FEEDBACK_TEXT = { | 
					
						
							|  |  |  |  |     "wakeup": "你好呀,有什么吩咐", | 
					
						
							|  |  |  |  |     "move_forward": "好的,我正在前进", | 
					
						
							|  |  |  |  |     "move_backward": "好的,我正在后退", | 
					
						
							|  |  |  |  |     "turn_left": "好的,我正在左转", | 
					
						
							|  |  |  |  |     "turn_right": "好的,我正在右转", | 
					
						
							|  |  |  |  |     "image_recog": "好的,我来识别一下", | 
					
						
							|  |  |  |  |     "chat": "好的,我来想想", | 
					
						
							|  |  |  |  |     "volume_increase": "音量已增大", | 
					
						
							|  |  |  |  |     "volume_decrease": "音量已减小", | 
					
						
							|  |  |  |  |     "volume_max": "已调至最大音量", | 
					
						
							|  |  |  |  |     "volume_min": "已调至最小音量", | 
					
						
							|  |  |  |  |     "unknown": "抱歉,没听懂,请再说一次", | 
					
						
							|  |  |  |  |     "api_error": "抱歉,处理请求时出错了" | 
					
						
							|  |  |  |  | } | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | # 1.6 音频参数 | 
					
						
							|  |  |  |  | VOLUME_STEP = 10 | 
					
						
							|  |  |  |  | MIN_VOLUME = 0 | 
					
						
							|  |  |  |  | MAX_VOLUME = 100 | 
					
						
							|  |  |  |  | CURRENT_VOLUME = 40 | 
					
						
							|  |  |  |  | AUDIO_CONTROL_NAME = None | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | # 1.7 麦克风与扬声器参数(模块内部已定义,此处保留用于一致性) | 
					
						
							|  |  |  |  | SAMPLE_RATE = 16000 | 
					
						
							|  |  |  |  | CHANNELS = 1 | 
					
						
							|  |  |  |  | SAMPLE_FORMAT = "int16" | 
					
						
							|  |  |  |  | AUDIO_ENCODE = "pcm_s16le" | 
					
						
							|  |  |  |  | LANG = "autodialect" | 
					
						
							|  |  |  |  | INTERACTION_TIMEOUT = 30 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | # -------------------- 2. 全局状态变量(完全保留原代码2. 全局状态变量,用列表传引用) -------------------- | 
					
						
							|  |  |  |  | audio_q = queue.Queue() | 
					
						
							|  |  |  |  | last_audio_time = [time.time()]  # 列表传引用,供模块修改 | 
					
						
							|  |  |  |  | current_text = [""]               # 列表传引用,供模块修改 | 
					
						
							|  |  |  |  | final_result = [""]              # 列表传引用,供模块修改 | 
					
						
							|  |  |  |  | is_processing = [False]          # 列表传引用,供模块修改 | 
					
						
							|  |  |  |  | last_command_time = [time.time()]# 列表传引用,供模块修改 | 
					
						
							|  |  |  |  | feedback_playing = False         # TTS模块使用的全局变量 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | # -------------------- 8. 指令解析与执行(完全保留原代码8. 指令解析与执行) -------------------- | 
					
						
							|  |  |  |  | def parse_voice_command(command_text: str): | 
					
						
							|  |  |  |  |     command_text = command_text.strip().lower() | 
					
						
							|  |  |  |  |     if not command_text: | 
					
						
							|  |  |  |  |         return ("unknown", {}) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     # 运动指令 | 
					
						
							|  |  |  |  |     motion_rules = [ | 
					
						
							|  |  |  |  |         {"keywords": ["前进", "往前走", "向前走"], "action": "move_forward"}, | 
					
						
							|  |  |  |  |         {"keywords": ["后退", "往后走", "向后退"], "action": "move_backward"}, | 
					
						
							|  |  |  |  |         {"keywords": ["左转", "向左转", "往左转"], "action": "turn_left"}, | 
					
						
							|  |  |  |  |         {"keywords": ["右转", "向右转", "往右转"], "action": "turn_right"}, | 
					
						
							|  |  |  |  |     ] | 
					
						
							|  |  |  |  |     for rule in motion_rules: | 
					
						
							|  |  |  |  |         if any(keyword in command_text for keyword in rule["keywords"]): | 
					
						
							|  |  |  |  |             number_match = re.search(r"(\d{1,2})", command_text) | 
					
						
							|  |  |  |  |             seconds = int(number_match.group(1)) if number_match else 2 | 
					
						
							|  |  |  |  |             return ("motion", {"action": rule["action"], "seconds": seconds}) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     # 图像识别指令 | 
					
						
							|  |  |  |  |     image_keywords = ["是什么", "这是什么", "识别", "看这个", "这东西", "这物体", "辨认"] | 
					
						
							|  |  |  |  |     if any(keyword in command_text for keyword in image_keywords): | 
					
						
							|  |  |  |  |         prompt = f"请简洁描述图片中的物体,1-2句话说明:{command_text}" | 
					
						
							|  |  |  |  |         return ("image_recog", {"prompt": prompt}) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     # 闲聊指令 | 
					
						
							|  |  |  |  |     chat_keywords = [ | 
					
						
							|  |  |  |  |         "什么", "怎么", "为什么", "哪里", "多少", "如何", "吗", "呢", "吧", | 
					
						
							|  |  |  |  |         "你好", "哈喽", "嗨", "今天", "天气", "时间", "故事", "笑话", "知识" | 
					
						
							|  |  |  |  |     ] | 
					
						
							|  |  |  |  |     exclude_keywords = ["前进", "后退", "左转", "右转", "识别", "音量", "增大", "减小"] | 
					
						
							|  |  |  |  |     if len(command_text) >= 2 and any(k in command_text for k in chat_keywords) and not any(k in command_text for k in exclude_keywords): | 
					
						
							|  |  |  |  |         return ("chat", {"prompt": command_text}) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     # 音量控制指令 | 
					
						
							|  |  |  |  |     if any(keyword in command_text for keyword in ["增大音量", "声音大一点", "调大音量"]): | 
					
						
							|  |  |  |  |         return ("volume", {"action": "increase"}) | 
					
						
							|  |  |  |  |     elif any(keyword in command_text for keyword in ["减小音量", "声音小一点", "调小音量"]): | 
					
						
							|  |  |  |  |         return ("volume", {"action": "decrease"}) | 
					
						
							|  |  |  |  |     elif any(keyword in command_text for keyword in ["最大音量", "声音最大"]): | 
					
						
							|  |  |  |  |         return ("volume", {"action": "max"}) | 
					
						
							|  |  |  |  |     elif any(keyword in command_text for keyword in ["最小音量", "声音最小", "静音"]): | 
					
						
							|  |  |  |  |         return ("volume", {"action": "min"}) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     # 未知指令 | 
					
						
							|  |  |  |  |     return ("unknown", {}) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | def execute_command(command_type: str, params: dict, motion_controller, ark_api_controller, volume_controller): | 
					
						
							|  |  |  |  |     global is_processing, feedback_playing | 
					
						
							|  |  |  |  |     if is_processing[0]: | 
					
						
							|  |  |  |  |         tts_controller.speak(FEEDBACK_TEXT["unknown"]) | 
					
						
							|  |  |  |  |         print("⚠️  已有指令处理中,请稍后再说") | 
					
						
							|  |  |  |  |         return | 
					
						
							|  |  |  |  |     is_processing[0] = True | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     try: | 
					
						
							|  |  |  |  |         if command_type == "motion": | 
					
						
							|  |  |  |  |             motion_controller.execute_motion(params["action"], params["seconds"]) | 
					
						
							|  |  |  |  |          | 
					
						
							|  |  |  |  |         elif command_type == "image_recog": | 
					
						
							|  |  |  |  |             print(f"\n🔍 触发图像识别,正在拍摄...") | 
					
						
							|  |  |  |  |             image_base64 = camera_module.capture_base64() | 
					
						
							|  |  |  |  |             if not image_base64: | 
					
						
							|  |  |  |  |                 tts_controller.speak(FEEDBACK_TEXT["unknown"]) | 
					
						
							|  |  |  |  |                 print("\n" + "="*50) | 
					
						
							|  |  |  |  |                 print("❌ 图像采集失败,无法识别") | 
					
						
							|  |  |  |  |                 print("="*50 + "\n") | 
					
						
							|  |  |  |  |                 return | 
					
						
							|  |  |  |  |             ark_api_controller.call_ark_api("image_recog", {"image_base64": image_base64, "prompt": params["prompt"]}) | 
					
						
							|  |  |  |  |          | 
					
						
							|  |  |  |  |         elif command_type == "chat": | 
					
						
							|  |  |  |  |             print(f"\n💬 触发闲聊,正在思考...") | 
					
						
							|  |  |  |  |             ark_api_controller.call_ark_api("chat", {"prompt": params["prompt"]}) | 
					
						
							|  |  |  |  |          | 
					
						
							|  |  |  |  |         elif command_type == "volume": | 
					
						
							|  |  |  |  |             volume_action = params["action"] | 
					
						
							|  |  |  |  |             if volume_action == "increase": | 
					
						
							|  |  |  |  |                 success = volume_controller.adjust_volume(is_increase=True) | 
					
						
							|  |  |  |  |                 if success: | 
					
						
							|  |  |  |  |                     tts_controller.speak(FEEDBACK_TEXT["volume_increase"]) | 
					
						
							|  |  |  |  |             elif volume_action == "decrease": | 
					
						
							|  |  |  |  |                 success = volume_controller.adjust_volume(is_increase=False) | 
					
						
							|  |  |  |  |                 if success: | 
					
						
							|  |  |  |  |                     tts_controller.speak(FEEDBACK_TEXT["volume_decrease"]) | 
					
						
							|  |  |  |  |             elif volume_action == "max": | 
					
						
							|  |  |  |  |                 success = volume_controller.set_system_volume(MAX_VOLUME) | 
					
						
							|  |  |  |  |                 if success: | 
					
						
							|  |  |  |  |                     tts_controller.speak(FEEDBACK_TEXT["volume_max"]) | 
					
						
							|  |  |  |  |             elif volume_action == "min": | 
					
						
							|  |  |  |  |                 success = volume_controller.set_system_volume(MIN_VOLUME) | 
					
						
							|  |  |  |  |                 if success: | 
					
						
							|  |  |  |  |                     tts_controller.speak(FEEDBACK_TEXT["volume_min"]) | 
					
						
							|  |  |  |  |          | 
					
						
							|  |  |  |  |         elif command_type == "unknown": | 
					
						
							|  |  |  |  |             tts_controller.speak(FEEDBACK_TEXT["unknown"]) | 
					
						
							|  |  |  |  |             print("\n" + "="*50) | 
					
						
							|  |  |  |  |             print(f"❌ 未识别到有效指令,支持:") | 
					
						
							|  |  |  |  |             print(f"  - 运动:前进3秒、左转2秒 |  - 图像识别:这是什么") | 
					
						
							|  |  |  |  |             print(f"  - 闲聊:今天天气怎么样 |  - 音量:增大音量、减小音量") | 
					
						
							|  |  |  |  |             print("="*50 + "\n") | 
					
						
							|  |  |  |  |      | 
					
						
							|  |  |  |  |     finally: | 
					
						
							|  |  |  |  |         is_processing[0] = False | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | # -------------------- 11. 主循环(完全保留原代码11. 主循环逻辑) -------------------- | 
					
						
							|  |  |  |  | def main(): | 
					
						
							|  |  |  |  |     global tts_controller, camera_module, AUDIO_CONTROL_NAME, feedback_playing | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     # 初始化各模块(按原代码顺序) | 
					
						
							|  |  |  |  |     # 1. 初始化TTS | 
					
						
							|  |  |  |  |     try: | 
					
						
							|  |  |  |  |         tts_controller = BaiduOnlineTTS(BAIDU_TTS_API_KEY, BAIDU_TTS_SECRET_KEY) | 
					
						
							|  |  |  |  |     except Exception as e: | 
					
						
							|  |  |  |  |         print(f"❌ TTS初始化失败: {str(e)}") | 
					
						
							|  |  |  |  |         sys.exit(1) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     # 2. 初始化音量控制 | 
					
						
							|  |  |  |  |     AUDIO_CONTROL_NAME = detect_audio_control() | 
					
						
							|  |  |  |  |     volume_controller = VolumeController( | 
					
						
							|  |  |  |  |         audio_control_name=AUDIO_CONTROL_NAME, | 
					
						
							|  |  |  |  |         current_volume=CURRENT_VOLUME, | 
					
						
							|  |  |  |  |         volume_step=VOLUME_STEP, | 
					
						
							|  |  |  |  |         min_volume=MIN_VOLUME, | 
					
						
							|  |  |  |  |         max_volume=MAX_VOLUME | 
					
						
							|  |  |  |  |     ) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     # 3. 初始化运动控制 | 
					
						
							|  |  |  |  |     motion_controller = RobotMotionController( | 
					
						
							|  |  |  |  |         onnx_model_path=ONNX_MODEL_PATH, | 
					
						
							|  |  |  |  |         tts_controller=tts_controller, | 
					
						
							|  |  |  |  |         feedback_text=FEEDBACK_TEXT | 
					
						
							|  |  |  |  |     ) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     # 4. 初始化摄像头 | 
					
						
							|  |  |  |  |     camera_module = CameraModule() | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     # 5. 初始化API控制器 | 
					
						
							|  |  |  |  |     ark_api_controller = ArkAPIController( | 
					
						
							|  |  |  |  |         ark_api_key=ARK_API_KEY, | 
					
						
							|  |  |  |  |         ark_model_id=ARK_MODEL_ID, | 
					
						
							|  |  |  |  |         tts_controller=tts_controller, | 
					
						
							|  |  |  |  |         feedback_text=FEEDBACK_TEXT | 
					
						
							|  |  |  |  |     ) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     # 6. 初始化语音识别 | 
					
						
							|  |  |  |  |     voice_recog_controller = VoiceRecogController( | 
					
						
							|  |  |  |  |         access_key=ACCESS_KEY, | 
					
						
							|  |  |  |  |         wakeup_word_path=WAKEUP_WORD_PATH, | 
					
						
							|  |  |  |  |         model_path=MODEL_PATH, | 
					
						
							|  |  |  |  |         appid=APPID, | 
					
						
							|  |  |  |  |         access_key_id=ACCESS_KEY_ID, | 
					
						
							|  |  |  |  |         access_key_secret=ACCESS_KEY_SECRET, | 
					
						
							|  |  |  |  |         tts_controller=tts_controller, | 
					
						
							|  |  |  |  |         feedback_text=FEEDBACK_TEXT | 
					
						
							|  |  |  |  |     ) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     # 中断处理(完全保留原逻辑) | 
					
						
							|  |  |  |  |     def handle_interrupt(signum, frame): | 
					
						
							|  |  |  |  |         print("\n🛑 收到退出信号,正在清理资源...") | 
					
						
							|  |  |  |  |         # 停止机器人运动 | 
					
						
							|  |  |  |  |         if 'motion_controller' in globals() and hasattr(motion_controller, 'rl_walk'): | 
					
						
							|  |  |  |  |             motion_controller.rl_walk.last_commands = [0.0, 0.0, 0.0] | 
					
						
							|  |  |  |  |         # 停止TTS播放 | 
					
						
							|  |  |  |  |         global feedback_playing | 
					
						
							|  |  |  |  |         feedback_playing = False | 
					
						
							|  |  |  |  |         # 停止摄像头与麦克风 | 
					
						
							|  |  |  |  |         if 'camera_module' in globals() and camera_module.camera: | 
					
						
							|  |  |  |  |             camera_module.camera.stop() | 
					
						
							|  |  |  |  |         if hasattr(voice_recog_controller, 'stream') and voice_recog_controller.stream and voice_recog_controller.stream.active: | 
					
						
							|  |  |  |  |             voice_recog_controller.stream.stop() | 
					
						
							|  |  |  |  |         # 关闭TTS资源 | 
					
						
							|  |  |  |  |         tts_controller.close() | 
					
						
							|  |  |  |  |         print("✅ 所有资源清理完成,程序退出") | 
					
						
							|  |  |  |  |         sys.exit(0) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     signal.signal(signal.SIGINT, handle_interrupt) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     # 强制测试一次语音输出(原逻辑) | 
					
						
							|  |  |  |  |     print("\n🔍 正在测试语音输出...") | 
					
						
							|  |  |  |  |     tts_controller.speak("系统初始化完成,等待语音唤醒") | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     # 主循环(原逻辑) | 
					
						
							|  |  |  |  |     while True: | 
					
						
							|  |  |  |  |         if voice_recog_controller.wakeup_listener(): | 
					
						
							|  |  |  |  |             # 定义指令执行回调函数(关键修复) | 
					
						
							|  |  |  |  |             def execute_callback(command_text): | 
					
						
							|  |  |  |  |                 command_type, params = parse_voice_command(command_text) | 
					
						
							|  |  |  |  |                 execute_command(command_type, params, motion_controller, ark_api_controller, volume_controller) | 
					
						
							|  |  |  |  |              | 
					
						
							|  |  |  |  |             # 启动WebSocket时传入回调函数 | 
					
						
							|  |  |  |  |             voice_recog_controller.start_websocket( | 
					
						
							|  |  |  |  |                 current_text=current_text, | 
					
						
							|  |  |  |  |                 final_result=final_result, | 
					
						
							|  |  |  |  |                 last_audio_time=last_audio_time, | 
					
						
							|  |  |  |  |                 is_processing=is_processing, | 
					
						
							|  |  |  |  |                 last_command_time=last_command_time, | 
					
						
							|  |  |  |  |                 execute_callback=execute_callback  # 传入回调 | 
					
						
							|  |  |  |  |             ) | 
					
						
							|  |  |  |  |             # 重置状态 | 
					
						
							|  |  |  |  |             last_audio_time[0] = time.time() | 
					
						
							|  |  |  |  |             last_command_time[0] = time.time() | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | if __name__ == "__main__": | 
					
						
							|  |  |  |  |     # # 确保ffmpeg已安装(原逻辑) | 
					
						
							|  |  |  |  |     # try: | 
					
						
							|  |  |  |  |     #     subprocess.run(["ffmpeg", "--version"], capture_output=True, check=True) | 
					
						
							|  |  |  |  |     # except: | 
					
						
							|  |  |  |  |     #     print("⚠️  未检测到ffmpeg,正在尝试安装...") | 
					
						
							|  |  |  |  |     #     subprocess.run(["sudo", "apt-get", "install", "-y", "ffmpeg"], check=True) | 
					
						
							|  |  |  |  |      | 
					
						
							|  |  |  |  |     main() |