#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
每周记忆蒸馏脚本
读取近 7 天的 memory/YYYY-MM-DD.md 文件，提炼值得长期保留的信息
"""

import sys
import json
from pathlib import Path
from datetime import datetime

MEMORY_DIR = Path(__file__).parent.parent / "memory"
MEMORY_MD = Path(__file__).parent.parent / "MEMORY.md"


def read_daily_memory(date_str):
    """读取单日记忆文件"""
    file_path = MEMORY_DIR / f"{date_str}.md"
    if not file_path.exists():
        return None
    
    with open(file_path, 'r', encoding='utf-8') as f:
        return f.read()


def extract_key_info(content, date_str):
    """从单日记忆中提取关键信息"""
    if not content:
        return []
    
    key_items = []
    
    # 查找已完成的重大项目
    if "✅" in content:
        lines = content.split('\n')
        for i, line in enumerate(lines):
            if "✅" in line and ("完成" in line or "成功" in line):
                # 提取项目标题
                if "**" in line:
                    title = line.split('**')[1] if len(line.split('**')) > 1 else line
                    key_items.append({
                        "type": "completed_task",
                        "date": date_str,
                        "title": title.strip(),
                        "content": line.strip()
                    })
    
    # 查找经验教训
    if "⚠️" in content or "教训" in content or "错误" in content:
        lines = content.split('\n')
        in_section = False
        section_content = []
        
        for line in lines:
            if "⚠️" in line or "教训" in line or "错误" in line:
                in_section = True
            if in_section:
                section_content.append(line)
                if line.strip() and not line.startswith(' ') and len(section_content) > 3:
                    key_items.append({
                        "type": "lesson_learned",
                        "date": date_str,
                        "title": "经验教训",
                        "content": '\n'.join(section_content[:10])
                    })
                    in_section = False
                    section_content = []
    
    # 查找技术方案
    if "技术方案" in content or "Skill" in content:
        lines = content.split('\n')
        for i, line in enumerate(lines):
            if "技术方案" in line or "Skill" in line:
                key_items.append({
                    "type": "technical_solution",
                    "date": date_str,
                    "title": line.strip(),
                    "content": '\n'.join(lines[i:min(i+5, len(lines))])
                })
    
    return key_items


def generate_distill_report(start_date, end_date, week_num, all_items):
    """生成蒸馏报告"""
    report = f"""# {week_num} 记忆蒸馏报告

**蒸馏时间**：{datetime.now().strftime('%Y-%m-%d %H:%M')}
**覆盖日期**：{start_date} 至 {end_date}
**蒸馏原则**：只保留 3 个月后仍有价值的内容

---

## 📊 本周主要任务

"""
    
    # 按类型分组
    completed_tasks = [item for item in all_items if item['type'] == 'completed_task']
    lessons = [item for item in all_items if item['type'] == 'lesson_learned']
    tech_solutions = [item for item in all_items if item['type'] == 'technical_solution']
    
    if completed_tasks:
        report += "### 已完成任务\n\n"
        for task in completed_tasks[:5]:  # 最多 5 个
            report += f"- **{task['date']}**: {task['title']}\n"
        report += "\n"
    
    if lessons:
        report += "### 经验教训\n\n"
        for lesson in lessons[:3]:  # 最多 3 个
            report += f"- **{lesson['date']}**: {lesson['title']}\n"
        report += "\n"
    
    if tech_solutions:
        report += "### 技术方案\n\n"
        for tech in tech_solutions[:3]:  # 最多 3 个
            report += f"- **{tech['date']}**: {tech['title']}\n"
        report += "\n"
    
    report += f"""---

## 📝 值得长期保留的信息

建议更新 MEMORY.md 的内容：
"""
    
    # 生成建议更新
    for item in all_items[:10]:
        report += f"\n### {item['title']} ({item['date']})\n"
        report += f"{item['content'][:200]}...\n"
    
    report += f"""
---

## 🗑️ 可清理的临时信息

- 具体的 bug 修复细节（已固化到 Skill）
- 临时测试文件
- 单次任务的中间状态

---

## 📊 蒸馏统计

| 类别 | 数量 |
|------|------|
| 已完成任务 | {len(completed_tasks)} |
| 经验教训 | {len(lessons)} |
| 技术方案 | {len(tech_solutions)} |
| 总计 | {len(all_items)} |

---

*蒸馏完成时间：{datetime.now().strftime('%Y-%m-%d %H:%M')}*
*下次蒸馏：下周同日*
"""
    
    return report


def update_memory_md(week_num, all_items):
    """更新 MEMORY.md（简化版，实际应该更智能）"""
    # 这里只是示例，实际应该更智能地合并内容
    print(f"[INFO] 建议手动审查蒸馏报告并更新 MEMORY.md")
    print(f"[INFO] 蒸馏报告已保存到：{MEMORY_DIR / f'weekly_distill_{week_num}.md'}")
    return True


def main():
    if len(sys.argv) < 3:
        print("用法：python3 weekly_distill.py <start_date> <end_date> [week_num]")
        sys.exit(1)
    
    start_date = sys.argv[1]
    end_date = sys.argv[2]
    week_num = sys.argv[3] if len(sys.argv) > 3 else datetime.now().strftime('%Y-W%V')
    
    print(f"[INFO] 开始记忆蒸馏：{start_date} 至 {end_date}")
    print(f"[INFO] 周次：{week_num}")
    
    # 读取所有记忆文件
    all_items = []
    current_date = start_date
    
    from datetime import timedelta
    start = datetime.strptime(start_date, '%Y-%m-%d')
    end = datetime.strptime(end_date, '%Y-%m-%d')
    
    current = start
    while current <= end:
        date_str = current.strftime('%Y-%m-%d')
        content = read_daily_memory(date_str)
        if content:
            items = extract_key_info(content, date_str)
            all_items.extend(items)
            print(f"[INFO] 读取 {date_str}.md - 提取 {len(items)} 条关键信息")
        current += timedelta(days=1)
    
    print(f"[INFO] 共提取 {len(all_items)} 条关键信息")
    
    # 生成蒸馏报告
    report = generate_distill_report(start_date, end_date, week_num, all_items)
    
    # 保存报告
    report_file = MEMORY_DIR / f"weekly_distill_{week_num}.md"
    with open(report_file, 'w', encoding='utf-8') as f:
        f.write(report)
    
    print(f"[INFO] ✓ 蒸馏报告已保存：{report_file}")
    
    # 更新 MEMORY.md
    if all_items:
        update_memory_md(week_num, all_items)
        print(f"[INFO] ✓ 记忆蒸馏完成")
    else:
        print(f"[INFO] ⚠️ 未找到需要蒸馏的内容")
    
    return 0


if __name__ == "__main__":
    sys.exit(main())
