#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
使用 Playwright 爬取并保存页面内容进行分析
"""

from playwright.sync_api import sync_playwright

def save_page_content():
    """保存页面内容用于分析"""
    
    with sync_playwright() as p:
        browser = p.chromium.launch(headless=True)
        page = browser.new_page()
        
        # 国家博物馆
        print("【国家博物馆】")
        try:
            page.goto("https://www.chnmuseum.cn/zl/zhanlanyugao/", wait_until="domcontentloaded", timeout=60000)
            page.wait_for_timeout(5000)
            
            content = page.content()
            with open("data/chnmuseum_page.html", "w", encoding="utf-8") as f:
                f.write(content)
            print(f"✓ 保存国家博物馆页面：{len(content)} 字节")
            
            # 获取所有文本
            text = page.inner_text("body")
            with open("data/chnmuseum_text.txt", "w", encoding="utf-8") as f:
                f.write(text)
            print(f"✓ 保存文本：{len(text)} 字符")
            
        except Exception as e:
            print(f"✗ 失败：{e}")
        
        # 故宫博物院
        print("\n【故宫博物院】")
        try:
            page.goto("https://www.dpm.org.cn/shows.html", wait_until="domcontentloaded", timeout=60000)
            page.wait_for_timeout(5000)
            
            content = page.content()
            with open("data/dpm_page.html", "w", encoding="utf-8") as f:
                f.write(content)
            print(f"✓ 保存故宫页面：{len(content)} 字节")
            
            text = page.inner_text("body")
            with open("data/dpm_text.txt", "w", encoding="utf-8") as f:
                f.write(text)
            print(f"✓ 保存文本：{len(text)} 字符")
            
        except Exception as e:
            print(f"✗ 失败：{e}")
        
        browser.close()

if __name__ == "__main__":
    save_page_content()
    print("\n完成！请查看 data/ 目录下的文件")
