#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
使用小红书核实山东国保单位开放情况
"""

import json
from pathlib import Path
from playwright.sync_api import sync_playwright

# 加载小红书 cookies
CONFIG_FILE = Path(__file__).parent.parent / "beijing-exhibitions" / "config" / "xiaohongshu_cookies.json"
XHS_COOKIES_DICT = {}
try:
    with open(CONFIG_FILE, "r", encoding="utf-8") as f:
        XHS_COOKIES_DICT = json.load(f)
    print(f"✓ 已加载小红书 cookie 配置：{len(XHS_COOKIES_DICT)} 个")
except Exception as e:
    print(f"⚠ 加载 cookie 配置失败：{e}")

# 转换为 Playwright 格式
XHS_COOKIES = [
    {"name": name, "value": value, "domain": ".xiaohongshu.com", "path": "/"}
    for name, value in XHS_COOKIES_DICT.items()
]

# 山东各城市国保单位待核实清单
SHANDONG_GUOBAO = {
    "济南": [
        "洪家楼天主教堂", "广智院", "千佛崖造像", "城子崖遗址", 
        "府学文庙", "万竹园", "西河遗址", "小荆山遗址",
        "济南老城区古建筑", "济南战役纪念馆"
    ],
    "青岛": [
        "栈桥", "天后宫", "青岛天主教堂", "琅琊台", 
        "齐长城遗址青岛段", "康有为故居", "老舍故居"
    ],
    "烟台": [
        "蓬莱水城", "烟台山近代建筑", "牟氏庄园", 
        "长岛庙岛显应宫", "莱州云峰山刻石"
    ],
    "威海": [
        "刘公岛甲午战争纪念地", "成山头", "圣经山摩崖",
        "威海英式建筑群"
    ],
    "潍坊": [
        "十笏园", "沂山", "云门山", "青州古城",
        "诸城恐龙化石遗址", "寿光纪国故城"
    ],
    "淄博": [
        "齐国故城", "临淄墓群", "周村古商城",
        "蒲松龄故居", "博山古窑址"
    ],
    "泰安": [
        "岱庙", "经石峪", "泰山石刻", "泰山古建筑群",
        "蒿里山遗址", "大汶口遗址泰安部分"
    ],
    "济宁": [
        "孔庙", "孔府", "孔林", "颜庙", "周公庙",
        "孟庙", "孟府", "孟林", "铁山摩崖石刻",
        "嘉祥武氏墓群石刻"
    ]
}


def search_xiaohongshu(keyword):
    """搜索小红书笔记"""
    results = []
    
    with sync_playwright() as p:
        browser = p.chromium.launch(headless=True, args=[
            '--disable-blink-features=AutomationControlled',
            '--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
        ])
        
        context = browser.new_context()
        context.add_cookies(XHS_COOKIES)
        page = context.new_page()
        
        try:
            url = f"https://www.xiaohongshu.com/search_result?keyword={keyword}&source=web_search_result_notes"
            print(f"  搜索：{keyword}")
            
            page.goto(url, wait_until="domcontentloaded", timeout=60000)
            page.wait_for_timeout(5000)
            
            # 获取笔记列表
            notes = page.query_selector_all("section.note-item")
            if not notes:
                notes = page.query_selector_all("div.note-card")
            
            print(f"  找到 {len(notes)} 篇笔记")
            
            for note in notes[:5]:  # 最多 5 篇
                try:
                    title_el = note.query_selector("div.title, span.title, h3.title")
                    title = title_el.inner_text().strip() if title_el else ""
                    
                    user_el = note.query_selector("div.username, span.username")
                    user = user_el.inner_text().strip() if user_el else ""
                    
                    # 提取关键信息
                    open_status = "未知"
                    if "开放" in title or "开门" in title or "营业" in title:
                        open_status = "✅ 开放"
                    elif "关闭" in title or "没开" in title or "维修" in title:
                        open_status = "❌ 关闭"
                    elif "攻略" in title or "游玩" in title or "打卡" in title:
                        open_status = "✅ 开放"  # 有游玩攻略通常表示开放
                    
                    if title and len(title) > 2:
                        results.append({
                            "title": title,
                            "user": user or "未知",
                            "status": open_status,
                            "keyword": keyword
                        })
                except Exception as e:
                    pass
            
        except Exception as e:
            print(f"  爬取失败：{e}")
        
        browser.close()
    
    return results


def main():
    print("=" * 60)
    print("山东国保单位开放情况核实 - 小红书搜索")
    print("=" * 60)
    
    all_results = {}
    
    for city, sites in SHANDONG_GUOBAO.items():
        print(f"\n【{city}】共 {len(sites)} 处")
        city_results = []
        
        for site in sites[:3]:  # 每个城市先搜索前 3 个
            results = search_xiaohongshu(f"{city} {site} 开放")
            city_results.extend(results)
            
            # 间隔 2 秒，避免触发风控
            import time
            time.sleep(2)
        
        all_results[city] = city_results
        print(f"  ✓ {city} 完成，共 {len(city_results)} 篇笔记")
    
    # 保存结果
    output_file = Path(__file__).parent / "data" / "shandong_guobao_xiaohongshu.json"
    output_file.parent.mkdir(exist_ok=True)
    
    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(all_results, f, ensure_ascii=False, indent=2)
    
    print(f"\n✓ 结果已保存到：{output_file}")
    
    # 统计
    total_notes = sum(len(v) for v in all_results.values())
    print(f"\n📊 统计：共搜索 {len(all_results)} 个城市，获取 {total_notes} 篇笔记")


if __name__ == "__main__":
    main()