#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
使用 Playwright 爬取动态网站展览信息
"""

import json
from playwright.sync_api import sync_playwright

def crawl_chnmuseum():
    """爬取国家博物馆展览"""
    exhibitions = []
    
    with sync_playwright() as p:
        browser = p.chromium.launch(headless=True)
        page = browser.new_page()
        
        try:
            page.goto("https://www.chnmuseum.cn/zl/zhanlanyugao/", wait_until="networkidle", timeout=30000)
            
            # 等待页面加载
            page.wait_for_selector(".list_con", timeout=10000)
            
            # 获取展览列表
            items = page.query_selector_all(".list_con ul li")
            
            for item in items:
                try:
                    title_el = item.query_selector("a")
                    title = title_el.inner_text().strip() if title_el else ""
                    
                    date_el = item.query_selector(".date")
                    date = date_el.inner_text().strip() if date_el else ""
                    
                    link = title_el.get_attribute("href") if title_el else ""
                    
                    if title:
                        exhibitions.append({
                            "title": title,
                            "venue": "中国国家博物馆",
                            "area": "东城区",
                            "date": date,
                            "url": f"https://www.chnmuseum.cn{link}" if link and not link.startswith("http") else link,
                            "source": "国家博物馆官网"
                        })
                except Exception as e:
                    print(f"解析展览项失败：{e}")
            
            print(f"✓ 国家博物馆爬取成功：{len(exhibitions)} 个展览")
            
        except Exception as e:
            print(f"✗ 国家博物馆爬取失败：{e}")
        
        browser.close()
    
    return exhibitions


def crawl_dpm():
    """爬取故宫博物院展览"""
    exhibitions = []
    
    with sync_playwright() as p:
        browser = p.chromium.launch(headless=True)
        page = browser.new_page()
        
        try:
            page.goto("https://www.dpm.org.cn/shows.html", wait_until="networkidle", timeout=30000)
            
            # 等待页面加载
            page.wait_for_selector(".shows", timeout=10000)
            
            # 获取展览列表
            items = page.query_selector_all(".shows .item")
            
            for item in items:
                try:
                    title_el = item.query_selector(".title")
                    title = title_el.inner_text().strip() if title_el else ""
                    
                    date_el = item.query_selector(".date")
                    date = date_el.inner_text().strip() if date_el else ""
                    
                    link_el = item.query_selector("a")
                    link = link_el.get_attribute("href") if link_el else ""
                    
                    if title:
                        exhibitions.append({
                            "title": title,
                            "venue": "故宫博物院",
                            "area": "东城区",
                            "date": date,
                            "url": link,
                            "source": "故宫博物院官网"
                        })
                except Exception as e:
                    print(f"解析展览项失败：{e}")
            
            print(f"✓ 故宫博物院爬取成功：{len(exhibitions)} 个展览")
            
        except Exception as e:
            print(f"✗ 故宫博物院爬取失败：{e}")
        
        browser.close()
    
    return exhibitions


def crawl_capital_museum():
    """爬取首都博物馆展览"""
    exhibitions = []
    
    with sync_playwright() as p:
        browser = p.chromium.launch(headless=True)
        page = browser.new_page()
        
        try:
            page.goto("http://www.capitalmuseum.org.cn/", wait_until="networkidle", timeout=30000)
            
            # 获取展览信息
            content = page.content()
            
            print(f"✓ 首都博物馆页面加载成功")
            print(f"页面长度：{len(content)}")
            
        except Exception as e:
            print(f"✗ 首都博物馆爬取失败：{e}")
        
        browser.close()
    
    return exhibitions


if __name__ == "__main__":
    print("=" * 50)
    print("开始使用 Playwright 爬取博物馆展览")
    print("=" * 50)
    
    # 爬取国家博物馆
    print("\n【国家博物馆】")
    chnmuseum_exhibitions = crawl_chnmuseum()
    
    # 爬取故宫博物院
    print("\n【故宫博物院】")
    dpm_exhibitions = crawl_dpm()
    
    # 爬取首都博物馆
    print("\n【首都博物馆】")
    capital_exhibitions = crawl_capital_museum()
    
    print("\n" + "=" * 50)
    print("爬取完成")
    print("=" * 50)
    
    # 保存结果
    result = {
        "chnmuseum": chnmuseum_exhibitions,
        "dpm": dpm_exhibitions,
        "capital": capital_exhibitions
    }
    
    with open("/root/.openclaw/workspace/travel/beijing-exhibitions/scripts/data/playwright_result.json", "w", encoding="utf-8") as f:
        json.dump(result, f, ensure_ascii=False, indent=2)
    
    print(f"\n结果已保存到：playwright_result.json")
    print(f"国家博物馆：{len(chnmuseum_exhibitions)} 个")
    print(f"故宫博物院：{len(dpm_exhibitions)} 个")
    print(f"首都博物馆：{len(capital_exhibitions)} 个")
