#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
使用 Playwright + Cookies 爬取小红书展览信息
"""

import json
from playwright.sync_api import sync_playwright

# 小红书 cookies（用户提供的）
XHS_COOKIES = [
    {"name": "acw_tc", "value": "0a0bb41a17726804792802066ef22266fb6b0216da2a3e9f089735f8353b77", "domain": ".xiaohongshu.com", "path": "/"},
    {"name": "abRequestId", "value": "b5346cbb-6db3-5645-accc-df5d27fd9362", "domain": ".xiaohongshu.com", "path": "/"},
    {"name": "webBuild", "value": "5.13.1", "domain": ".xiaohongshu.com", "path": "/"},
    {"name": "xsecappid", "value": "xhs-pc-web", "domain": ".xiaohongshu.com", "path": "/"},
    {"name": "a1", "value": "19cbbfd8f46tok3grdu3mmi72tpiihd7co02rf9oa30000122754", "domain": ".xiaohongshu.com", "path": "/"},
    {"name": "webId", "value": "68ac71cf3f14eb4a280b442b71aad7e5", "domain": ".xiaohongshu.com", "path": "/"},
    {"name": "gid", "value": "yjSDDifj0fSfyjSDDifYilij4K9lTqkFf7q68l063WJ9UJq833xfWF888yJJW248dDqfjJ0Y", "domain": ".xiaohongshu.com", "path": "/"},
    {"name": "web_session", "value": "040069b8dcb7aa9bcf6957bd9d3b4b7b5c866c", "domain": ".xiaohongshu.com", "path": "/"},
    {"name": "id_token", "value": "VjEAAPwuVKkxM5M3tgTrefheWsAsAIisJtFuRYQM3EFnhkneE3Zag62PzVeUMmRAgCOznnJXrYICwToncTBIL4u7bKSd7M8QMVA0TgyLKo+Oknjg00IY1MQziJXeiutd2NTcUd+B", "domain": ".xiaohongshu.com", "path": "/"},
    {"name": "loadts", "value": "1772680707856", "domain": ".xiaohongshu.com", "path": "/"},
]

def search_xiaohongshu(keyword):
    """搜索小红书笔记"""
    exhibitions = []
    
    with sync_playwright() as p:
        browser = p.chromium.launch(headless=True, args=[
            '--disable-blink-features=AutomationControlled',
            '--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
        ])
        
        context = browser.new_context()
        context.add_cookies(XHS_COOKIES)
        page = context.new_page()
        
        try:
            # 访问小红书搜索页面
            url = f"https://www.xiaohongshu.com/search_result?keyword={keyword}&source=web_search_result_notes"
            print(f"访问：{url}")
            
            page.goto(url, wait_until="domcontentloaded", timeout=60000)
            page.wait_for_timeout(5000)  # 等待 JS 加载
            
            # 获取页面内容
            content = page.content()
            print(f"页面长度：{len(content)}")
            
            # 尝试获取笔记列表
            notes = page.query_selector_all("section.note-item")
            print(f"找到 {len(notes)} 篇笔记")
            
            for note in notes[:10]:
                try:
                    title_el = note.query_selector("div.title")
                    title = title_el.inner_text().strip() if title_el else ""
                    
                    user_el = note.query_selector("div.username")
                    user = user_el.inner_text().strip() if user_el else ""
                    
                    if title:
                        exhibitions.append({
                            "title": title,
                            "user": user,
                            "source": "小红书"
                        })
                except Exception as e:
                    print(f"解析笔记失败：{e}")
            
            # 保存完整页面用于分析
            with open("data/xiaohongshu_page.html", "w", encoding="utf-8") as f:
                f.write(content)
            print("已保存页面到 data/xiaohongshu_page.html")
            
        except Exception as e:
            print(f"爬取失败：{e}")
        
        browser.close()
    
    return exhibitions


if __name__ == "__main__":
    print("=" * 50)
    print("小红书爬虫 - 北京看展 2026")
    print("=" * 50)
    
    exhibitions = search_xiaohongshu("北京看展 2026")
    
    print(f"\n找到 {len(exhibitions)} 篇笔记：")
    for ex in exhibitions[:5]:
        print(f"  - {ex['title']} by @{ex['user']}")
    
    # 保存结果
    with open("data/xiaohongshu_exhibitions.json", "w", encoding="utf-8") as f:
        json.dump(exhibitions, f, ensure_ascii=False, indent=2)
    
    print(f"\n结果已保存到 data/xiaohongshu_exhibitions.json")
