#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
批量核实山东国保单位开放情况（使用小红书）
"""

import json
import time
from pathlib import Path
from playwright.sync_api import sync_playwright

# 加载小红书 cookies
CONFIG_FILE = Path(__file__).parent.parent / "beijing-exhibitions" / "config" / "xiaohongshu_cookies.json"
try:
    with open(CONFIG_FILE, "r", encoding="utf-8") as f:
        XHS_COOKIES_DICT = json.load(f)
except:
    XHS_COOKIES_DICT = {}

XHS_COOKIES = [
    {"name": name, "value": value, "domain": ".xiaohongshu.com", "path": "/"}
    for name, value in XHS_COOKIES_DICT.items()
]

# 山东各城市国保单位待核实清单（优先级：著名景点优先）
SHANDONG_GUOBAO = {
    "济南": ["洪家楼天主教堂", "千佛山", "四门塔", "灵岩寺", "府学文庙", "万竹园", "城子崖遗址"],
    "青岛": ["栈桥", "天后宫", "青岛天主教堂", "琅琊台", "康有为故居", "老舍故居"],
    "烟台": ["蓬莱水城", "烟台山近代建筑", "牟氏庄园", "长岛庙岛", "莱州云峰山刻石"],
    "威海": ["刘公岛", "成山头", "圣经山摩崖", "威海英式建筑"],
    "潍坊": ["十笏园", "沂山", "云门山", "青州古城", "诸城恐龙化石"],
    "淄博": ["齐国故城", "临淄墓群", "周村古商城", "蒲松龄故居", "博山古窑址"],
    "泰安": ["岱庙", "经石峪", "泰山石刻", "蒿里山遗址"],
    "济宁": ["孔庙", "孔府", "孔林", "颜庙", "周公庙", "孟庙", "孟府", "铁山摩崖", "武氏墓群石刻"]
}

def verify_site(city, site):
    """核实单个景点"""
    keyword = f"{city} {site} 开放 门票"
    
    with sync_playwright() as p:
        browser = p.chromium.launch(headless=True, args=[
            '--disable-blink-features=AutomationControlled',
            '--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
        ])
        
        context = browser.new_context()
        context.add_cookies(XHS_COOKIES)
        page = context.new_page()
        
        try:
            url = f"https://www.xiaohongshu.com/search_result?keyword={keyword}&source=web_search_result_notes"
            page.goto(url, wait_until="domcontentloaded", timeout=60000)
            page.wait_for_timeout(5000)
            
            notes = page.query_selector_all("section.note-item")
            
            # 分析笔记内容判断开放状态
            open_count = 0
            close_count = 0
            recent_count = 0
            
            for note in notes[:10]:
                try:
                    text = note.inner_text().strip()
                    
                    # 判断是否近期（2026 年或 2025 年）
                    if "2026" in text or "2025" in text or "今年" in text or "最近" in text:
                        recent_count += 1
                    
                    # 判断开放状态
                    if any(kw in text for kw in ["开放", "开门", "营业", "游玩", "打卡", "参观", "门票"]):
                        open_count += 1
                    if any(kw in text for kw in ["关闭", "没开", "维修", "改造", "不开放"]):
                        close_count += 1
                        
                except:
                    pass
            
            # 判断结果
            if open_count > close_count and recent_count > 0:
                status = "✅ 开放"
                confidence = "高" if recent_count >= 3 else "中"
            elif close_count > open_count:
                status = "❌ 关闭"
                confidence = "高" if close_count >= 3 else "低"
            elif open_count > 0:
                status = "✅ 开放"
                confidence = "低"
            else:
                status = "⏸️ 待核实"
                confidence = "-"
            
            return {
                "site": site,
                "status": status,
                "confidence": confidence,
                "notes_found": len(notes),
                "recent": recent_count,
                "open_mentions": open_count,
                "close_mentions": close_count
            }
            
        except Exception as e:
            return {
                "site": site,
                "status": "⏸️ 待核实",
                "confidence": "-",
                "error": str(e)
            }
        finally:
            browser.close()


def main():
    print("=" * 70)
    print("山东国保单位开放情况核实 - 小红书批量搜索")
    print("=" * 70)
    
    all_results = {}
    total_sites = sum(len(sites) for sites in SHANDONG_GUOBAO.values())
    verified_count = 0
    
    for city, sites in SHANDONG_GUOBAO.items():
        print(f"\n【{city}】共 {len(sites)} 处")
        city_results = []
        
        for i, site in enumerate(sites):
            print(f"  [{i+1}/{len(sites)}] {site}...", end=" ", flush=True)
            
            result = verify_site(city, site)
            city_results.append(result)
            verified_count += 1
            
            print(f"{result['status']} ({result['confidence']}置信度)")
            
            # 间隔 3 秒，避免触发风控
            time.sleep(3)
        
        all_results[city] = city_results
        print(f"  ✓ {city} 完成")
    
    # 保存结果
    output_file = Path(__file__).parent / "data" / "shandong_guobao_verified.json"
    output_file.parent.mkdir(exist_ok=True)
    
    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(all_results, f, ensure_ascii=False, indent=2)
    
    print(f"\n✓ 结果已保存到：{output_file}")
    
    # 统计
    print("\n" + "=" * 70)
    print("📊 核实统计")
    print("=" * 70)
    
    open_count = sum(1 for city in all_results.values() for r in city if "✅" in r['status'])
    close_count = sum(1 for city in all_results.values() for r in city if "❌" in r['status'])
    unknown_count = sum(1 for city in all_results.values() for r in city if "⏸️" in r['status'])
    
    print(f"总计核实：{verified_count} 处")
    print(f"✅ 开放：{open_count} 处 ({open_count/verified_count*100:.1f}%)")
    print(f"❌ 关闭：{close_count} 处 ({close_count/verified_count*100:.1f}%)")
    print(f"⏸️ 待核实：{unknown_count} 处 ({unknown_count/verified_count*100:.1f}%)")


if __name__ == "__main__":
    main()
