#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
简化版：使用小红书核实山东国保单位开放情况
"""

import json
import time
from pathlib import Path
from playwright.sync_api import sync_playwright

# 加载小红书 cookies
CONFIG_FILE = Path(__file__).parent.parent / "beijing-exhibitions" / "config" / "xiaohongshu_cookies.json"
try:
    with open(CONFIG_FILE, "r", encoding="utf-8") as f:
        XHS_COOKIES_DICT = json.load(f)
    print(f"✓ 已加载小红书 cookie 配置：{len(XHS_COOKIES_DICT)} 个")
except Exception as e:
    print(f"⚠ 加载 cookie 配置失败：{e}")
    XHS_COOKIES_DICT = {}

XHS_COOKIES = [
    {"name": name, "value": value, "domain": ".xiaohongshu.com", "path": "/"}
    for name, value in XHS_COOKIES_DICT.items()
]

# 测试搜索：济南 洪家楼天主教堂
TEST_KEYWORD = "济南 洪家楼天主教堂 开放"

print("=" * 60)
print(f"测试搜索：{TEST_KEYWORD}")
print("=" * 60)

with sync_playwright() as p:
    browser = p.chromium.launch(headless=True, args=[
        '--disable-blink-features=AutomationControlled',
        '--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
    ])
    
    context = browser.new_context()
    context.add_cookies(XHS_COOKIES)
    page = context.new_page()
    
    try:
        url = f"https://www.xiaohongshu.com/search_result?keyword={TEST_KEYWORD}&source=web_search_result_notes"
        print(f"访问：{url}")
        
        page.goto(url, wait_until="domcontentloaded", timeout=60000)
        page.wait_for_timeout(8000)  # 等待 JS 加载
        
        # 保存完整页面
        content = page.content()
        page_html_file = Path(__file__).parent / "data" / "xiaohongshu_shandong_test.html"
        with open(page_html_file, "w", encoding="utf-8") as f:
            f.write(content)
        print(f"✓ 页面已保存：{page_html_file} ({len(content)} 字节)")
        
        # 尝试多种选择器
        selectors = [
            "section.note-item",
            "div.note-card",
            "div.search-result-item",
            "article.note-item",
            "div[role='article']"
        ]
        
        for selector in selectors:
            notes = page.query_selector_all(selector)
            if notes:
                print(f"✓ 选择器 '{selector}' 找到 {len(notes)} 篇笔记")
                
                # 提取前 3 篇笔记的标题
                for i, note in enumerate(notes[:3]):
                    try:
                        # 尝试获取文本
                        text = note.inner_text().strip().replace('\n', ' ')[:100]
                        print(f"  [{i+1}] {text}")
                    except Exception as e:
                        print(f"  [{i+1}] 提取失败：{e}")
                break
        else:
            print("⚠ 未找到笔记，尝试获取页面标题...")
            title = page.title()
            print(f"页面标题：{title}")
        
    except Exception as e:
        print(f"❌ 爬取失败：{e}")
    
    browser.close()

print("\n✓ 测试完成")
