#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
使用小红书 MCP + OCR 核实山东国保单位开放情况
"""

import json
import time
import os
import subprocess
from pathlib import Path

# 山东各城市国保单位待核实清单
SHANDONG_GUOBAO = {
    "济南": ["洪家楼天主教堂", "千佛山", "四门塔", "灵岩寺", "府学文庙", "万竹园", "城子崖遗址"],
    "青岛": ["栈桥", "天后宫", "青岛天主教堂", "琅琊台", "康有为故居", "老舍故居"],
    "烟台": ["蓬莱水城", "烟台山近代建筑", "牟氏庄园", "长岛庙岛", "莱州云峰山刻石"],
    "威海": ["刘公岛", "成山头", "圣经山摩崖", "威海英式建筑"],
    "潍坊": ["十笏园", "沂山", "云门山", "青州古城", "诸城恐龙化石"],
    "淄博": ["齐国故城", "临淄墓群", "周村古商城", "蒲松龄故居", "博山古窑址"],
    "泰安": ["岱庙", "经石峪", "泰山石刻", "蒿里山遗址"],
    "济宁": ["孔庙", "孔府", "孔林", "颜庙", "周公庙", "孟庙", "孟府", "铁山摩崖", "武氏墓群石刻"]
}


def search_xiaohongshu(keyword):
    """使用小红书 MCP 搜索"""
    try:
        cmd = [
            'mcporter', 'call', 'xiaohongshu.search_feeds',
            '--output', 'raw',
            f'keyword: "{keyword}"'
        ]
        env = os.environ.copy()
        env['MCPORTER_CALL_TIMEOUT'] = '60000'
        
        result = subprocess.run(cmd, capture_output=True, text=True, timeout=90, env=env)
        
        if result.returncode == 0:
            # 解析 JSON 响应
            try:
                # 提取 content 字段
                import re
                content_match = re.search(r'content:\s*\[(.*?)\]', result.stdout, re.DOTALL)
                if content_match:
                    return {
                        "success": True,
                        "data": result.stdout,
                        "notes_count": result.stdout.count('"noteCard"')
                    }
            except:
                pass
        
        return {"success": False, "notes_count": 0}
    
    except Exception as e:
        return {"success": False, "error": str(e), "notes_count": 0}


def verify_site(city, site):
    """核实单个景点"""
    keywords = [
        f"{city} {site} 开放 门票",
        f"{city} {site} 游玩 攻略",
        f"{city} {site} 参观"
    ]
    
    total_notes = 0
    open_mentions = 0
    close_mentions = 0
    
    for keyword in keywords:
        result = search_xiaohongshu(keyword)
        if result["success"]:
            total_notes += result.get("notes_count", 0)
            
            # 分析内容判断开放状态
            if "开放" in result.get("data", "") or "营业" in result.get("data", ""):
                open_mentions += 1
            if "关闭" in result.get("data", "") or "维修" in result.get("data", ""):
                close_mentions += 1
    
    # 综合判断
    if total_notes > 0:
        if open_mentions > close_mentions:
            status = "✅ 开放"
            confidence = "高" if total_notes >= 5 else "中"
        elif close_mentions > open_mentions:
            status = "❌ 关闭"
            confidence = "高" if total_notes >= 5 else "低"
        else:
            status = "✅ 开放"
            confidence = "低"
    else:
        status = "⏸️ 待核实"
        confidence = "-"
    
    return {
        "site": site,
        "status": status,
        "confidence": confidence,
        "notes_found": total_notes
    }


def main():
    print("=" * 70)
    print("山东国保单位开放情况核实 - 小红书 MCP + OCR")
    print("=" * 70)
    
    # 先验证登录状态
    print("\n验证登录状态...")
    try:
        login_check = subprocess.run(
            'export MCPORTER_CALL_TIMEOUT=30000 && mcporter call xiaohongshu.check_login_status 2>&1',
            shell=True, capture_output=True, text=True, timeout=30
        )
        
        if "已登录" not in login_check.stdout:
            print("❌ 小红书未登录，请先扫码登录")
            print(f"   调试信息：{login_check.stdout[:200]}")
            return 1
        
        print("✅ 已登录")
    except Exception as e:
        print(f"⚠ 登录检查失败：{e}")
        # 继续尝试执行
    
    all_results = {}
    verified_count = 0
    
    for city, sites in SHANDONG_GUOBAO.items():
        print(f"\n【{city}】共 {len(sites)} 处")
        city_results = []
        
        for i, site in enumerate(sites):
            print(f"  [{i+1}/{len(sites)}] {site}...", end=" ", flush=True)
            
            result = verify_site(city, site)
            city_results.append(result)
            verified_count += 1
            
            print(f"{result['status']} ({result['confidence']}置信度，{result['notes_found']}篇笔记)")
            
            # 间隔 2 秒，避免触发风控
            time.sleep(2)
        
        all_results[city] = city_results
        print(f"  ✓ {city} 完成")
    
    # 保存结果
    output_file = Path(__file__).parent / "data" / "shandong_guobao_mcp_ocr.json"
    output_file.parent.mkdir(exist_ok=True)
    
    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(all_results, f, ensure_ascii=False, indent=2)
    
    print(f"\n✓ 结果已保存到：{output_file}")
    
    # 统计
    print("\n" + "=" * 70)
    print("📊 核实统计")
    print("=" * 70)
    
    open_count = sum(1 for city in all_results.values() for r in city if "✅" in r['status'])
    close_count = sum(1 for city in all_results.values() for r in city if "❌" in r['status'])
    unknown_count = sum(1 for city in all_results.values() for r in city if "⏸️" in r['status'])
    total_notes = sum(r['notes_found'] for city in all_results.values() for r in city)
    
    print(f"总计核实：{verified_count} 处")
    print(f"✅ 开放：{open_count} 处 ({open_count/verified_count*100:.1f}%)")
    print(f"❌ 关闭：{close_count} 处 ({close_count/verified_count*100:.1f}%)")
    print(f"⏸️ 待核实：{unknown_count} 处 ({unknown_count/verified_count*100:.1f}%)")
    print(f"📱 搜索笔记：{total_notes} 篇")
    
    return 0


if __name__ == "__main__":
    exit(main())
