代码补充

2025-09-29 09:19:40 +08:00 · 2025-09-29 09:19:40 +08:00 · ab42cc6de5
commit ab42cc6de5
parent cb2eb13b6e
7 changed files with 973 additions and 0 deletions
--- a/ark_api_module.py
+++ b/ark_api_module.py
@ -0,0 +1,101 @@
 from openai import OpenAI
 import time
 import sys
 import queue  # 新增：用于缓存实时文本片段
 import threading  # 新增：用于并行处理语音播放
 # 原代码7. 火山方舟API调用完整逻辑
 class ArkAPIController:
    def __init__(self, ark_api_key, ark_model_id, tts_controller, feedback_text):
        # 接收调度脚本传入的TTS实例和反馈文本，保持原逻辑
        self.ARK_API_KEY = ark_api_key
        self.ARK_MODEL_ID = ark_model_id
        self.tts_controller = tts_controller
        self.FEEDBACK_TEXT = feedback_text
        self.chat_context = []  # 聊天上下文由模块内部维护（与原逻辑一致）
        self.MAX_CONTEXT_LEN = 10
         # 新增：实时语音播放队列与线程
        self.speech_queue = queue.Queue()  # 缓存待播放的文本片段
        self.speech_thread = threading.Thread(target=self._process_speech_queue, daemon=True)
        self.speech_thread.start()  # 启动语音播放线程
     # 新增：处理语音队列的函数（循环从队列取片段并播放）
    def _process_speech_queue(self):
        """持续从队列中获取文本片段并调用TTS播放"""
        while True:
            text = self.speech_queue.get()  # 阻塞等待队列消息
            if text is None:  # 退出信号
                break
            self.tts_controller.speak(text)  # 播放片段
            self.speech_queue.task_done()  # 标记任务完成
    def call_ark_api(self, content_type: str, content: dict):
        # 播放操作反馈（同步执行）
        self.tts_controller.speak(self.FEEDBACK_TEXT[content_type])
        client = OpenAI(
            base_url="https://ark.cn-beijing.volces.com/api/v3",
            api_key=self.ARK_API_KEY
        )
        try:
            messages = []
            if content_type == "chat":
                messages.extend(self.chat_context[-self.MAX_CONTEXT_LEN*2:])
                messages.append({"role": "user", "content": [{"type": "text", "text": content["prompt"]}]})
            elif content_type == "image_recog":
                messages.append({
                    "role": "user",
                    "content": [
                        {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{content['image_base64']}"}},
                        {"type": "text", "text": content["prompt"]}
                    ]
                })
            response = client.chat.completions.create(
                model=self.ARK_MODEL_ID,
                messages=messages,
                max_tokens=300,
                temperature=0.7 if content_type == "chat" else 0.3,
                stream=True
            )
            full_response = ""
            current_speech_chunk = ""  # 缓存当前待播放的片段
            print("\n" + "="*50)
            print("🤖 回应：", end="", flush=True)
            for chunk in response:
                if chunk.choices and chunk.choices[0].delta.content:
                    char = chunk.choices[0].delta.content
                    full_response += char
                    current_speech_chunk += char  # 累加片段
                    print(char, end="", flush=True)
                    time.sleep(0.05)
                    # 关键逻辑：当片段包含标点或达到一定长度时，推送到语音队列
                    if any(punct in current_speech_chunk for punct in [".", "。", "!", "！", "?", "？", ",", "，", ";", "；"]):
                        self.speech_queue.put(current_speech_chunk)  # 推送片段到队列
                        current_speech_chunk = ""  # 重置片段缓存
            # 处理最后剩余的片段（如果有）
            if current_speech_chunk:
                self.speech_queue.put(current_speech_chunk)
            print("\n" + "="*50 + "\n")
            # 等待所有语音片段播放完成
            self.speech_queue.join()
            # 维护聊天上下文（原有逻辑）
            if content_type == "chat" and full_response.strip():
                self.chat_context.append({"role": "user", "content": [{"type": "text", "text": content["prompt"]}]})
                self.chat_context.append({"role": "assistant", "content": [{"type": "text", "text": full_response}]})
            return full_response
        except Exception as e:
            error_msg = f"❌ API调用失败：{str(e)}"
            print(f"\n" + "="*50)
            print(error_msg)
            print("="*50 + "\n")
            self.tts_controller.speak(self.FEEDBACK_TEXT["api_error"])
            return error_msg
--- a/camera_module.py
+++ b/camera_module.py
@ -0,0 +1,30 @@
 import io
 import base64
 from PIL import Image
 from picamera2 import Picamera2
 import sys
 # 原代码6. 摄像头模块完整逻辑
 class CameraModule:
    def __init__(self):
        try:
            self.camera = Picamera2()
            cam_config = self.camera.create_still_configuration(main={"size": (320, 240)})
            self.camera.configure(cam_config)
            self.camera.start()
            print("📷 摄像头模块初始化成功")
        except Exception as e:
            print(f"❌ 摄像头失败：{str(e)}")
            self.camera = None
    def capture_base64(self):
        if not self.camera:
            return None
        try:
            img_array = self.camera.capture_array()
            img_byte = io.BytesIO()
            Image.fromarray(img_array).save(img_byte, format="JPEG", quality=80)
            return base64.b64encode(img_byte.getvalue()).decode("utf-8")
        except Exception as e:
            print(f"❌ 拍摄失败：{str(e)}")
            return None
--- a/main_scheduler.py
+++ b/main_scheduler.py
@ -0,0 +1,291 @@
 import signal
 import sys
 import time
 import re
 import subprocess
 import queue 
 # 导入所有模块
 from tts_module import BaiduOnlineTTS
 from volume_module import VolumeController, detect_audio_control
 from motion_module import RobotMotionController
 from camera_module import CameraModule
 from ark_api_module import ArkAPIController
 from voice_recog_module import VoiceRecogController
 # -------------------- 1. 基础配置（完全保留原代码1. 基础配置） --------------------
 # 1.1 项目路径与运动模型
 PROJECT_ROOT = "/home/duckpi/open_duck_mini_ws/OPEN_DUCK_MINI/Open_Duck_Mini_Runtime-2"
 sys.path.append(PROJECT_ROOT)
 ONNX_MODEL_PATH = "/home/duckpi/open_duck_mini_ws/OPEN_DUCK_MINI/Open_Duck_Mini-2/BEST_WALK_ONNX_2.onnx"
 # 1.2 火山方舟API配置
 ARK_API_KEY = "390d517c-129a-41c1-bf3d-458048007b69"
 ARK_MODEL_ID = "doubao-seed-1-6-250615"
 # 1.3 语音识别与唤醒词配置
 APPID = "1ff50710"
 ACCESS_KEY_ID = "a4f43e95ee0a9518d11befac8d31f1d4"
 ACCESS_KEY_SECRET = "YzQ4NTRhZjc2ZTM4MDA1YjM2MmIyNDEy"
 ACCESS_KEY = "e0EQQBoH0HIVU9KrXsmB7CMlVci+GAs2x0Ejtrdp8CTtZmf25rCLaQ=="
 WAKEUP_WORD_PATH = "/home/duckpi/open_duck_mini_ws/OPEN_DUCK_MINI/resources/xiaohuangya_zh_raspberry-pi_v3_0_0.ppn"
 MODEL_PATH = "/home/duckpi/open_duck_mini_ws/OPEN_DUCK_MINI/resources/porcupine_params_zh.pv"
 # 1.4 百度在线TTS配置
 BAIDU_TTS_API_KEY = "TnwYZPPvElNushOzfL6vBlUI"
 BAIDU_TTS_SECRET_KEY = "55HeI8VNUMNlkW3t2QRwVtrjumpxjfxk"
 # 1.5 语音反馈文本配置
 FEEDBACK_TEXT = {
    "wakeup": "你好呀，有什么吩咐",
    "move_forward": "好的，我正在前进",
    "move_backward": "好的，我正在后退",
    "turn_left": "好的，我正在左转",
    "turn_right": "好的，我正在右转",
    "image_recog": "好的，我来识别一下",
    "chat": "好的，我来想想",
    "volume_increase": "音量已增大",
    "volume_decrease": "音量已减小",
    "volume_max": "已调至最大音量",
    "volume_min": "已调至最小音量",
    "unknown": "抱歉，没听懂，请再说一次",
    "api_error": "抱歉，处理请求时出错了"
 }
 # 1.6 音频参数
 VOLUME_STEP = 10
 MIN_VOLUME = 0
 MAX_VOLUME = 100
 CURRENT_VOLUME = 40
 AUDIO_CONTROL_NAME = None
 # 1.7 麦克风与扬声器参数（模块内部已定义，此处保留用于一致性）
 SAMPLE_RATE = 16000
 CHANNELS = 1
 SAMPLE_FORMAT = "int16"
 AUDIO_ENCODE = "pcm_s16le"
 LANG = "autodialect"
 INTERACTION_TIMEOUT = 30
 # -------------------- 2. 全局状态变量（完全保留原代码2. 全局状态变量，用列表传引用） --------------------
 audio_q = queue.Queue()
 last_audio_time = [time.time()]  # 列表传引用，供模块修改
 current_text = [""]               # 列表传引用，供模块修改
 final_result = [""]              # 列表传引用，供模块修改
 is_processing = [False]          # 列表传引用，供模块修改
 last_command_time = [time.time()]# 列表传引用，供模块修改
 feedback_playing = False         # TTS模块使用的全局变量
 # -------------------- 8. 指令解析与执行（完全保留原代码8. 指令解析与执行） --------------------
 def parse_voice_command(command_text: str):
    command_text = command_text.strip().lower()
    if not command_text:
        return ("unknown", {})
    # 运动指令
    motion_rules = [
        {"keywords": ["前进", "往前走", "向前走"], "action": "move_forward"},
        {"keywords": ["后退", "往后走", "向后退"], "action": "move_backward"},
        {"keywords": ["左转", "向左转", "往左转"], "action": "turn_left"},
        {"keywords": ["右转", "向右转", "往右转"], "action": "turn_right"},
    ]
    for rule in motion_rules:
        if any(keyword in command_text for keyword in rule["keywords"]):
            number_match = re.search(r"(\d{1,2})", command_text)
            seconds = int(number_match.group(1)) if number_match else 2
            return ("motion", {"action": rule["action"], "seconds": seconds})
    # 图像识别指令
    image_keywords = ["是什么", "这是什么", "识别", "看这个", "这东西", "这物体", "辨认"]
    if any(keyword in command_text for keyword in image_keywords):
        prompt = f"请简洁描述图片中的物体，1-2句话说明：{command_text}"
        return ("image_recog", {"prompt": prompt})
    # 闲聊指令
    chat_keywords = [
        "什么", "怎么", "为什么", "哪里", "多少", "如何", "吗", "呢", "吧",
        "你好", "哈喽", "嗨", "今天", "天气", "时间", "故事", "笑话", "知识"
    ]
    exclude_keywords = ["前进", "后退", "左转", "右转", "识别", "音量", "增大", "减小"]
    if len(command_text) >= 2 and any(k in command_text for k in chat_keywords) and not any(k in command_text for k in exclude_keywords):
        return ("chat", {"prompt": command_text})
    # 音量控制指令
    if any(keyword in command_text for keyword in ["增大音量", "声音大一点", "调大音量"]):
        return ("volume", {"action": "increase"})
    elif any(keyword in command_text for keyword in ["减小音量", "声音小一点", "调小音量"]):
        return ("volume", {"action": "decrease"})
    elif any(keyword in command_text for keyword in ["最大音量", "声音最大"]):
        return ("volume", {"action": "max"})
    elif any(keyword in command_text for keyword in ["最小音量", "声音最小", "静音"]):
        return ("volume", {"action": "min"})
    # 未知指令
    return ("unknown", {})
 def execute_command(command_type: str, params: dict, motion_controller, ark_api_controller, volume_controller):
    global is_processing, feedback_playing
    if is_processing[0]:
        tts_controller.speak(FEEDBACK_TEXT["unknown"])
        print("⚠️  已有指令处理中，请稍后再说")
        return
    is_processing[0] = True
    try:
        if command_type == "motion":
            motion_controller.execute_motion(params["action"], params["seconds"])
        elif command_type == "image_recog":
            print(f"\n🔍 触发图像识别，正在拍摄...")
            image_base64 = camera_module.capture_base64()
            if not image_base64:
                tts_controller.speak(FEEDBACK_TEXT["unknown"])
                print("\n" + "="*50)
                print("❌ 图像采集失败，无法识别")
                print("="*50 + "\n")
                return
            ark_api_controller.call_ark_api("image_recog", {"image_base64": image_base64, "prompt": params["prompt"]})
        elif command_type == "chat":
            print(f"\n💬 触发闲聊，正在思考...")
            ark_api_controller.call_ark_api("chat", {"prompt": params["prompt"]})
        elif command_type == "volume":
            volume_action = params["action"]
            if volume_action == "increase":
                success = volume_controller.adjust_volume(is_increase=True)
                if success:
                    tts_controller.speak(FEEDBACK_TEXT["volume_increase"])
            elif volume_action == "decrease":
                success = volume_controller.adjust_volume(is_increase=False)
                if success:
                    tts_controller.speak(FEEDBACK_TEXT["volume_decrease"])
            elif volume_action == "max":
                success = volume_controller.set_system_volume(MAX_VOLUME)
                if success:
                    tts_controller.speak(FEEDBACK_TEXT["volume_max"])
            elif volume_action == "min":
                success = volume_controller.set_system_volume(MIN_VOLUME)
                if success:
                    tts_controller.speak(FEEDBACK_TEXT["volume_min"])
        elif command_type == "unknown":
            tts_controller.speak(FEEDBACK_TEXT["unknown"])
            print("\n" + "="*50)
            print(f"❌ 未识别到有效指令，支持：")
            print(f"  - 运动：前进3秒、左转2秒 |  - 图像识别：这是什么")
            print(f"  - 闲聊：今天天气怎么样 |  - 音量：增大音量、减小音量")
            print("="*50 + "\n")
    finally:
        is_processing[0] = False
 # -------------------- 11. 主循环（完全保留原代码11. 主循环逻辑） --------------------
 def main():
    global tts_controller, camera_module, AUDIO_CONTROL_NAME, feedback_playing
    # 初始化各模块（按原代码顺序）
    # 1. 初始化TTS
    try:
        tts_controller = BaiduOnlineTTS(BAIDU_TTS_API_KEY, BAIDU_TTS_SECRET_KEY)
    except Exception as e:
        print(f"❌ TTS初始化失败: {str(e)}")
        sys.exit(1)
    # 2. 初始化音量控制
    AUDIO_CONTROL_NAME = detect_audio_control()
    volume_controller = VolumeController(
        audio_control_name=AUDIO_CONTROL_NAME,
        current_volume=CURRENT_VOLUME,
        volume_step=VOLUME_STEP,
        min_volume=MIN_VOLUME,
        max_volume=MAX_VOLUME
    )
    # 3. 初始化运动控制
    motion_controller = RobotMotionController(
        onnx_model_path=ONNX_MODEL_PATH,
        tts_controller=tts_controller,
        feedback_text=FEEDBACK_TEXT
    )
    # 4. 初始化摄像头
    camera_module = CameraModule()
    # 5. 初始化API控制器
    ark_api_controller = ArkAPIController(
        ark_api_key=ARK_API_KEY,
        ark_model_id=ARK_MODEL_ID,
        tts_controller=tts_controller,
        feedback_text=FEEDBACK_TEXT
    )
    # 6. 初始化语音识别
    voice_recog_controller = VoiceRecogController(
        access_key=ACCESS_KEY,
        wakeup_word_path=WAKEUP_WORD_PATH,
        model_path=MODEL_PATH,
        appid=APPID,
        access_key_id=ACCESS_KEY_ID,
        access_key_secret=ACCESS_KEY_SECRET,
        tts_controller=tts_controller,
        feedback_text=FEEDBACK_TEXT
    )
    # 中断处理（完全保留原逻辑）
    def handle_interrupt(signum, frame):
        print("\n🛑 收到退出信号，正在清理资源...")
        # 停止机器人运动
        if 'motion_controller' in globals() and hasattr(motion_controller, 'rl_walk'):
            motion_controller.rl_walk.last_commands = [0.0, 0.0, 0.0]
        # 停止TTS播放
        global feedback_playing
        feedback_playing = False
        # 停止摄像头与麦克风
        if 'camera_module' in globals() and camera_module.camera:
            camera_module.camera.stop()
        if hasattr(voice_recog_controller, 'stream') and voice_recog_controller.stream and voice_recog_controller.stream.active:
            voice_recog_controller.stream.stop()
        # 关闭TTS资源
        tts_controller.close()
        print("✅ 所有资源清理完成，程序退出")
        sys.exit(0)
    signal.signal(signal.SIGINT, handle_interrupt)
    # 强制测试一次语音输出（原逻辑）
    print("\n🔍 正在测试语音输出...")
    tts_controller.speak("系统初始化完成,等待语音唤醒")
    # 主循环（原逻辑）
    while True:
        if voice_recog_controller.wakeup_listener():
            # 定义指令执行回调函数（关键修复）
            def execute_callback(command_text):
                command_type, params = parse_voice_command(command_text)
                execute_command(command_type, params, motion_controller, ark_api_controller, volume_controller)
            # 启动WebSocket时传入回调函数
            voice_recog_controller.start_websocket(
                current_text=current_text,
                final_result=final_result,
                last_audio_time=last_audio_time,
                is_processing=is_processing,
                last_command_time=last_command_time,
                execute_callback=execute_callback  # 传入回调
            )
            # 重置状态
            last_audio_time[0] = time.time()
            last_command_time[0] = time.time()
 if __name__ == "__main__":
    # # 确保ffmpeg已安装（原逻辑）
    # try:
    #     subprocess.run(["ffmpeg", "--version"], capture_output=True, check=True)
    # except:
    #     print("⚠️  未检测到ffmpeg，正在尝试安装...")
    #     subprocess.run(["sudo", "apt-get", "install", "-y", "ffmpeg"], check=True)
    main()
--- a/motion_module.py
+++ b/motion_module.py
@ -0,0 +1,59 @@
 import sys
 import time
 import threading
 # 原代码5. 运动控制模块完整逻辑（保留原路径导入）
 PROJECT_ROOT = "/home/duckpi/open_duck_mini_ws/OPEN_DUCK_MINI/Open_Duck_Mini_Runtime-2"
 sys.path.append(PROJECT_ROOT)
 from v2_rl_walk_mujoco import RLWalk
 class RobotMotionController:
    def __init__(self, onnx_model_path, tts_controller, feedback_text):
        # 接收调度脚本传入的TTS实例和反馈文本，保持原逻辑调用
        self.tts_controller = tts_controller
        self.FEEDBACK_TEXT = feedback_text
        try:
            self.rl_walk = RLWalk(
                onnx_model_path=onnx_model_path,
                cutoff_frequency=40,
                pid=[30, 0, 0]
            )
            self.walk_thread = threading.Thread(target=self.rl_walk.run, daemon=True)
            self.walk_thread.start()
            time.sleep(1)
            print("🤖 运动控制模块初始化成功")
        except Exception as e:
            print(f"❌ 运动控制失败：{str(e)}")
            sys.exit(1)
    def execute_motion(self, action_name: str, seconds: int):
        # 全局变量由调度脚本传入，保留原逻辑
        global is_processing
        is_processing = True
        try:
            # 播放反馈（同步执行确保声音输出）
            self.tts_controller.speak(self.FEEDBACK_TEXT[action_name])
            # 执行运动
            seconds = max(2, min(seconds, 5))
            if action_name == "move_forward":
                print(f"\n🚶 前进{seconds}秒...")
                self.rl_walk.last_commands[0] = 0.17
            elif action_name == "move_backward":
                print(f"\n🚶 后退{seconds}秒...")
                self.rl_walk.last_commands[0] = -0.17
            elif action_name == "turn_left":
                print(f"\n🔄 左转{seconds}秒...")
                self.rl_walk.last_commands[2] = 1.1
            elif action_name == "turn_right":
                print(f"\n🔄 右转{seconds}秒...")
                self.rl_walk.last_commands[2] = -1.1
            time.sleep(seconds)
            self.rl_walk.last_commands = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
            print(f"✅ 运动完成")
        except Exception as e:
            print(f"❌ 运动执行失败：{str(e)}")
            self.rl_walk.last_commands = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
        finally:
            is_processing = False
--- a/tts_module.py
+++ b/tts_module.py
@ -0,0 +1,173 @@
 import pyaudio
 import wave
 import tempfile
 import os
 import requests
 import time
 import sys
 # 原代码3. 百度在线TTS模块完整逻辑
 class BaiduOnlineTTS:
    def __init__(self, api_key, secret_key):
        """初始化百度在线TTS"""
        self.api_key = api_key
        self.secret_key = secret_key
        self.access_token = None
        self.token_expires = 0
        # 初始化音频播放器
        self.audio_player = pyaudio.PyAudio()
        # TTS配置参数
        self.default_options = {
            'vol': 5,    # 音量(0-15)
            'spd': 5,    # 语速(0-9)
            'pit': 5,    # 音调(0-9)
            'per': 0     # 发音人(0:女,1:男,3:情感女,4:情感男)
        }
        # 获取初始访问令牌
        if not self._get_access_token():
            raise Exception("无法获取百度API访问令牌，请检查密钥是否正确")
    def _get_access_token(self):
        """获取百度API访问令牌"""
        # 检查令牌是否仍然有效
        if self.access_token and time.time() < self.token_expires - 300:
            return True
        try:
            url = f"https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id={self.api_key}&client_secret={self.secret_key}"
            response = requests.get(url)
            result = response.json()
            if "access_token" in result:
                self.access_token = result["access_token"]
                self.token_expires = time.time() + result["expires_in"]
                print("✅ 成功获取百度API访问令牌")
                return True
            else:
                print(f"❌ 获取令牌失败: {result}")
                return False
        except Exception as e:
            print(f"❌ 获取令牌时发生错误: {str(e)}")
            return False
    def text_to_speech(self, text, options=None, save_path=None):
        """将文本转换为语音"""
        # 确保令牌有效
        if not self._get_access_token():
            return None
        # 合并配置参数
        params = self.default_options.copy()
        if options:
            params.update(options)
        try:
            # 对文本进行URL编码
            encoded_text = requests.utils.quote(text)
            url = f"https://tsn.baidu.com/text2audio?tex={encoded_text}&lan=zh&cuid=baidu-tts-python&ctp=1&tok={self.access_token}"
            # 添加合成参数
            for key, value in params.items():
                url += f"&{key}={value}"
            # 发送请求
            response = requests.get(url)
            # 检查响应是否为音频数据
            if response.headers.get("Content-Type", "").startswith("audio/"):
                # 保存文件(如果需要)
                if save_path:
                    with open(save_path, "wb") as f:
                        f.write(response.content)
                    print(f"✅ 音频已保存至: {save_path}")
                return response.content
            else:
                # 解析错误信息
                try:
                    error = response.json()
                    print(f"❌ 语音合成失败: {error.get('err_msg', '未知错误')}")
                except:
                    print(f"❌ 语音合成失败，响应内容: {response.text}")
                return None
        except Exception as e:
            print(f"❌ 语音合成时发生错误: {str(e)}")
            return None
    def speak(self, text, options=None):
        """直接播放文本转换的语音"""
        # 全局变量由调度脚本传入，此处保留原逻辑调用
        from main_scheduler import feedback_playing
        if feedback_playing:
            return False
        feedback_playing = True
        # 限制文本长度(百度API有长度限制)
        if len(text) > 1024:
            print("⚠️ 文本过长，将截断为1024字符")
            text = text[:1024]
        # 获取音频数据
        audio_data = self.text_to_speech(text, options)
        if not audio_data:
            feedback_playing = False
            return False
        try:
            # 创建临时MP3文件
            with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_file:
                temp_file.write(audio_data)
                temp_filename = temp_file.name
            # 转换为WAV格式(适配pyaudio)
            from pydub import AudioSegment
            audio = AudioSegment.from_mp3(temp_filename)
            with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as wav_file:
                audio.export(wav_file.name, format="wav")
                wav_filename = wav_file.name
            # 播放WAV文件
            wf = wave.open(wav_filename, 'rb')
            stream = self.audio_player.open(
                format=self.audio_player.get_format_from_width(wf.getsampwidth()),
                channels=wf.getnchannels(),
                rate=wf.getframerate(),
                output=True
            )
            # 播放音频
            chunk = 1024
            data = wf.readframes(chunk)
            while data and feedback_playing:
                stream.write(data)
                data = wf.readframes(chunk)
            # 清理资源
            stream.stop_stream()
            stream.close()
            wf.close()
            print(f"✅ 语音播放完成: {text[:20]}...")
            return True
        except Exception as e:
            print(f"❌ 播放语音时发生错误: {str(e)}")
            return False
        finally:
            # 删除临时文件
            if 'temp_filename' in locals() and os.path.exists(temp_filename):
                os.remove(temp_filename)
            if 'wav_filename' in locals() and os.path.exists(wav_filename):
                os.remove(wav_filename)
            feedback_playing = False
    def close(self):
        """释放资源"""
        self.audio_player.terminate()
        print("✅ TTS资源已释放")
--- a/voice_recog_module.py
+++ b/voice_recog_module.py
@ -0,0 +1,226 @@
 import sounddevice as sd
 import pvporcupine
 import struct
 import websocket
 import threading
 import hmac
 import hashlib
 import base64
 import json
 import time
 import urllib.parse
 import uuid
 import queue
 import sys
 # 原代码9. 音频采集与WebSocket + 10. 唤醒词监听完整逻辑
 class VoiceRecogController:
    def __init__(self, access_key, wakeup_word_path, model_path, appid, access_key_id, access_key_secret, tts_controller, feedback_text):
        # 接收调度脚本传入的参数，保持原逻辑
        self.ACCESS_KEY = access_key
        self.WAKEUP_WORD_PATH = wakeup_word_path
        self.MODEL_PATH = model_path
        self.APPID = appid
        self.ACCESS_KEY_ID = access_key_id
        self.ACCESS_KEY_SECRET = access_key_secret
        self.tts_controller = tts_controller
        self.FEEDBACK_TEXT = feedback_text
        self.SAMPLE_RATE = 16000
        self.CHANNELS = 1
        self.SAMPLE_FORMAT = "int16"
        self.INTERACTION_TIMEOUT = 30
        self.audio_q = queue.Queue()
        self.stream = None  # 麦克风流后续初始化
    def wakeup_listener(self):
        """原代码10. 唤醒词监听"""
        try:
            porcupine = pvporcupine.create(
                access_key=self.ACCESS_KEY,
                keyword_paths=[self.WAKEUP_WORD_PATH],
                model_path=self.MODEL_PATH
            )
            print(f"\n🎯 唤醒词引擎就绪（采样率：{porcupine.sample_rate}）")
            wakeup_mic = sd.RawInputStream(
                samplerate=porcupine.sample_rate,
                blocksize=porcupine.frame_length,
                dtype="int16",
                channels=1
            )
            print("📢 等待唤醒词「小黄鸭」（按Ctrl+C退出）")
            with wakeup_mic:
                while True:
                    pcm_data, _ = wakeup_mic.read(porcupine.frame_length)
                    pcm_unpacked = struct.unpack_from("h" * porcupine.frame_length, pcm_data)
                    if porcupine.process(pcm_unpacked) >= 0:
                        print("🚀 检测到唤醒词「小黄鸭」！")
                        # 播放唤醒反馈（同步执行）
                        self.tts_controller.speak(self.FEEDBACK_TEXT["wakeup"])
                        porcupine.delete()
                        return True
        except Exception as e:
            print(f"\n❌ 唤醒词监听失败：{str(e)}")
            print("   排查：1. 唤醒词文件路径 2. 麦克风连接 3. PicoVoice Key有效性")
            sys.exit(1)
    def _audio_callback(self, indata, frames, t, status):
        """原代码9. 音频采集回调"""
        if status:
            print(f"⚠️  音频异常：{status}")
        self.audio_q.put(bytes(indata))
    def _create_ws_url(self):
        """原代码9. 创建WebSocket URL"""
        try:
            host = "office-api-ast-dx.iflyaisol.com"
            path = "/ast/communicate/v1"
            utc = time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime()) + "+0000"
            session_uuid = str(uuid.uuid4())
            params = {
                "accessKeyId": self.ACCESS_KEY_ID,
                "appId": self.APPID,
                "samplerate": self.SAMPLE_RATE,
                "audio_encode": "pcm_s16le",
                "lang": "autodialect",
                "uuid": session_uuid,
                "utc": utc,
            }
            sorted_params = sorted(params.items(), key=lambda x: x[0])
            base_string = "&".join(
                f"{urllib.parse.quote_plus(str(k))}={urllib.parse.quote_plus(str(v))}"
                for k, v in sorted_params
            )
            signature = hmac.new(
                self.ACCESS_KEY_SECRET.encode("utf-8"),
                base_string.encode("utf-8"),
                hashlib.sha1
            ).digest()
            signature = base64.b64encode(signature).decode("utf-8")
            query = base_string + "&signature=" + urllib.parse.quote_plus(signature)
            return f"wss://{host}{path}?{query}", session_uuid
        except Exception as e:
            print(f"❌ WebSocket URL生成失败：{str(e)}")
            return None, None
    def _on_message(self, ws, message, current_text, last_audio_time):
        """原代码9. WebSocket消息处理（接收全局变量引用）"""
        try:
            data = json.loads(message)
            if data.get("msg_type") == "result" and "cn" in data.get("data", {}):
                words = [
                    cw.get("w", "") 
                    for rt in data["data"]["cn"].get("st", {}).get("rt", [])
                    for ws_item in rt.get("ws", [])
                    for cw in ws_item.get("cw", [])
                ]
                if words:
                    current_text[0] = "".join(words)  # 用列表传引用，修改全局变量
                    last_audio_time[0] = time.time()
                    print(f"🎧 识别中：{current_text[0]}", end="\r")
        except Exception as e:
            print(f"\n❌ 语音识别消息处理错误：{str(e)}")
    def _on_error(self, ws, error, is_processing):
        """原代码9. WebSocket错误处理"""
        if not is_processing[0]:
            print(f"\n❌ WebSocket连接错误：{str(error)}")
    def _on_close(self, ws, close_status_code, close_msg, current_text, final_result, stream):
        """原代码9. WebSocket关闭处理"""
        print(f"\n🔌 WebSocket连接关闭 | 状态码：{close_status_code}")
        if stream and stream.active:
            stream.stop()
        current_text[0] = ""
        final_result[0] = ""
    def _on_open(self, ws, stream, current_text, final_result, last_audio_time, is_processing, last_command_time, execute_callback):
        """新增 execute_callback 参数，用于接收指令执行函数"""
        def send_audio_and_handle():
            print("\n🎤 指令已就绪！支持：")
            print("  - 运动：前进3秒、左转2秒 |  - 图像识别：这是什么")
            print("  - 闲聊：今天天气怎么样 |  - 音量：增大音量、减小音量\n")
            stream.start()
            current_text[0] = ""
            final_result[0] = ""
            last_command_time[0] = time.time()
            while True:
                try:
                    # 1. 处理音频队列（避免堆积）
                    while self.audio_q.qsize() > 5:
                        self.audio_q.get_nowait()
                    # 2. 发送音频数据（若队列有数据）
                    audio_data = self.audio_q.get(timeout=0.5)
                    ws.send(audio_data, websocket.ABNF.OPCODE_BINARY)
                    # 3. 指令识别与执行：有文本且2秒内无新音频时执行
                    if current_text[0] and (time.time() - last_audio_time[0]) > 2:
                        final_result[0] = current_text[0].strip()
                        if len(final_result[0]) > 0:  # 确保指令有效
                            print(f"\n⏹ 最终指令：{final_result[0]}")
                            # 调用回调函数执行指令（关键修复：直接在这里执行）
                            execute_callback(final_result[0])
                            last_command_time[0] = time.time()  # 更新最后操作时间
                        current_text[0] = ""  # 执行后清空，避免重复识别
                        final_result[0] = ""
                        time.sleep(1)  # 等待指令执行完成
                    # 4. 超时检测：30秒无操作则关闭连接
                    if time.time() - last_command_time[0] > self.INTERACTION_TIMEOUT:
                        print(f"\n⌛ {self.INTERACTION_TIMEOUT}秒无操作，关闭连接")
                        self.tts_controller.speak(self.FEEDBACK_TEXT.get("wakeup_timeout", "长时间没操作，我先休息啦"))
                        time.sleep(1)
                        ws.send("close", websocket.ABNF.OPCODE_TEXT)
                        break
                except queue.Empty:
                    # 队列为空时，检测超时
                    if time.time() - last_command_time[0] > self.INTERACTION_TIMEOUT:
                        print(f"\n⌛ {self.INTERACTION_TIMEOUT}秒无操作，关闭连接")
                        self.tts_controller.speak(self.FEEDBACK_TEXT.get("wakeup_timeout", "长时间没操作，我先休息啦"))
                        time.sleep(1)
                        ws.send("close", websocket.ABNF.OPCODE_TEXT)
                        break
                    continue  # 继续循环等待音频
                except Exception as e:
                    print(f"\n❌ 音频发送错误：{str(e)}")
                    break
        audio_thread = threading.Thread(target=send_audio_and_handle, daemon=True)
        audio_thread.start()
    def start_websocket(self, current_text, final_result, last_audio_time, is_processing, last_command_time, execute_callback):
        """新增 execute_callback 参数，用于传递指令执行函数"""
        self.stream = sd.RawInputStream(
            samplerate=self.SAMPLE_RATE,
            channels=self.CHANNELS,
            dtype=self.SAMPLE_FORMAT,
            callback=self._audio_callback,
        )
        ws_url, session_id = self._create_ws_url()
        if not ws_url:
            print("⚠️  无法生成语音识别连接，3秒后重新监听...")
            time.sleep(3)
            return
        try:
            print(f"🔄 连接语音识别服务（会话ID：{session_id[:8]}...）")
            # 绑定WebSocket回调时传入 execute_callback
            ws = websocket.WebSocketApp(
                ws_url,
                on_open=lambda ws: self._on_open(ws, self.stream, current_text, final_result, last_audio_time, is_processing, last_command_time, execute_callback),
                on_message=lambda ws, msg: self._on_message(ws, msg, current_text, last_audio_time),
                on_error=lambda ws, err: self._on_error(ws, err, is_processing),
                on_close=lambda ws, status, msg: self._on_close(ws, status, msg, current_text, final_result, self.stream)
            )
            ws.run_forever(ping_interval=10, ping_timeout=5)
        except Exception as e:
            print(f"❌ 语音识别连接失败：{str(e)}")
            print("⚠️  3秒后重新监听唤醒词...")
            time.sleep(3)
--- a/volume_module.py
+++ b/volume_module.py
@ -0,0 +1,93 @@
 import subprocess
 import re
 import sys
 # 原代码4. 音频控制项自动检测与音量控制完整逻辑
 def detect_audio_control():
    """自动检测可用的音频播放控制项"""
    try:
        result = subprocess.run(["amixer", "controls"], capture_output=True, text=True)
        playback_controls = []
        # 优先查找常见的音频控制项
        priority_names = ["Master", "Speaker", "Headphone", "Audio", "Sound"]
        for name in priority_names:
            if name in result.stdout:
                print(f"✅ 自动检测到音频控制项：{name}")
                return name
        # 如果没有找到优先项，从所有控制项中提取
        for line in result.stdout.splitlines():
            if "Playback" in line:
                match = re.search(r"'([^']+)'", line)
                if match:
                    playback_controls.append(match.group(1))
        if playback_controls:
            print(f"✅ 找到音频控制项：{playback_controls[0]}")
            return playback_controls[0]
        else:
            print("⚠️  未检测到音频控制项，将尝试不指定控制项调节音量")
            return None
    except Exception as e:
        print(f"❌ 音频控制检测失败：{str(e)}")
        return None
 class VolumeController:
    def __init__(self, audio_control_name, current_volume, volume_step, min_volume, max_volume):
        # 接收调度脚本传入的全局参数，保持原逻辑
        self.available = True
        self.AUDIO_CONTROL_NAME = audio_control_name
        self.CURRENT_VOLUME = current_volume
        self.VOLUME_STEP = volume_step
        self.MIN_VOLUME = min_volume
        self.MAX_VOLUME = max_volume
        try:
            # 强制取消静音并设置初始音量
            self.set_system_volume(self.CURRENT_VOLUME)
            current_volume = self.get_system_volume()
            if current_volume is not None:
                self.CURRENT_VOLUME = current_volume
                print(f"🔊 音量控制初始化成功（当前音量：{self.CURRENT_VOLUME}%）")
            else:
                print(f"⚠️  无法获取当前音量，但已尝试设置为{self.CURRENT_VOLUME}%")
        except Exception as e:
            self.available = False
            print(f"❌ 音量控制失败：{str(e)}")
    def get_system_volume(self):
        try:
            # 根据检测到的控制项获取音量
            cmd = ["amixer", "get"]
            if self.AUDIO_CONTROL_NAME:
                cmd.append(self.AUDIO_CONTROL_NAME)
            result = subprocess.run(cmd, capture_output=True, text=True)
            volume_match = re.search(r"(\d+)%", result.stdout)
            return int(volume_match.group(1)) if volume_match else None
        except Exception as e:
            print(f"❌ 获取音量失败：{str(e)}")
            return None
    def set_system_volume(self, target_volume: int):
        target_volume = max(self.MIN_VOLUME, min(self.MAX_VOLUME, target_volume))
        try:
            # 根据检测到的控制项设置音量
            cmd = ["amixer", "set"]
            if self.AUDIO_CONTROL_NAME:
                cmd.append(self.AUDIO_CONTROL_NAME)
            cmd.extend([f"{target_volume}%", "unmute"])  # 强制取消静音
            subprocess.run(cmd, capture_output=True)
            self.CURRENT_VOLUME = target_volume
            print(f"🔊 音量已调整至：{self.CURRENT_VOLUME}%")
            return True
        except Exception as e:
            print(f"❌ 调整音量失败：{str(e)}")
            return False
    def adjust_volume(self, is_increase: bool):
        target_volume = self.CURRENT_VOLUME + self.VOLUME_STEP if is_increase else self.CURRENT_VOLUME - self.VOLUME_STEP
        return self.set_system_volume(target_volume)