#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
从保存的文本中提取故宫展览信息
"""

import re
import json

def extract_dpm_exhibitions():
    """从文本中提取故宫展览"""
    
    with open("data/dpm_text.txt", "r", encoding="utf-8") as f:
        text = f.read()
    
    exhibitions = []
    
    # 查找展览模式
    # 展览名称 + 地点 + 时间 + 状态
    lines = text.split("\n")
    
    current_exhibition = {}
    
    for i, line in enumerate(lines):
        line = line.strip()
        
        # 查找展览名称（包含"展"字且长度适中）
        if "展" in line and len(line) > 5 and len(line) < 50:
            if "展览" not in line and "近期" not in line and "更多" not in line:
                current_exhibition = {
                    "title": line,
                    "venue": "故宫博物院",
                    "area": "东城区",
                    "source": "故宫博物院官网"
                }
                
                # 查找后续几行的信息
                for j in range(i+1, min(i+5, len(lines))):
                    next_line = lines[j].strip()
                    
                    if "展览地点：" in next_line:
                        current_exhibition["hall"] = next_line.replace("展览地点：", "")
                    
                    if "展览时间：" in next_line:
                        time_info = next_line.replace("展览时间：", "")
                        current_exhibition["date"] = time_info
                        
                        if "【在展】" in time_info:
                            current_exhibition["status"] = "在展"
                        elif "【结束】" in time_info:
                            current_exhibition["status"] = "结束"
                        else:
                            current_exhibition["status"] = "未知"
                    
                    if "需预约" in next_line:
                        current_exhibition["note"] = "需预约"
                
                if current_exhibition.get("status") == "在展":
                    exhibitions.append(current_exhibition)
    
    return exhibitions


if __name__ == "__main__":
    exhibitions = extract_dpm_exhibitions()
    
    print(f"找到 {len(exhibitions)} 个在展展览：\n")
    
    for ex in exhibitions:
        print(f"📍 {ex['title']}")
        print(f"   地点：{ex.get('hall', '待查询')}")
        print(f"   时间：{ex.get('date', '待查询')}")
        print()
    
    # 保存结果
    with open("data/dpm_exhibitions.json", "w", encoding="utf-8") as f:
        json.dump(exhibitions, f, ensure_ascii=False, indent=2)
    
    print(f"已保存到 data/dpm_exhibitions.json")
