├── .devcontainer └── devcontainer.json ├── .gitignore ├── README.md ├── app.py ├── llm_utils.py ├── requirements.txt └── utils.py /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Python 3", 3 | // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile 4 | "image": "mcr.microsoft.com/devcontainers/python:1-3.11-bullseye", 5 | "customizations": { 6 | "codespaces": { 7 | "openFiles": [ 8 | "README.md", 9 | "app.py" 10 | ] 11 | }, 12 | "vscode": { 13 | "settings": {}, 14 | "extensions": [ 15 | "ms-python.python", 16 | "ms-python.vscode-pylance" 17 | ] 18 | } 19 | }, 20 | "updateContentCommand": "[ -f packages.txt ] && sudo apt update && sudo apt upgrade -y && sudo xargs apt install -y english_chars else 'english' 12 | 13 | def get_system_prompt(language): 14 | """Get system prompt based on language""" 15 | if language == 'chinese': 16 | return """You are a professional text analysis assistant. Please analyze the input text and extract key concepts and their relationships. 17 | 18 | You must output ONLY a JSON object in the following format, with NO additional text or explanation: 19 | 20 | { 21 | "nodes": [ 22 | { 23 | "id": "1", // Must be a unique string 24 | "label": "概念1", // Concept name in Chinese 25 | "group": "类别1" // Category in Chinese 26 | } 27 | ], 28 | "edges": [ 29 | { 30 | "from": "1", // Must match an existing node id 31 | "to": "2", // Must match an existing node id 32 | "label": "包含" // Relationship description in Chinese 33 | } 34 | ] 35 | } 36 | 37 | Requirements: 38 | 1. Output ONLY the JSON object, no other text 39 | 2. All node IDs must be unique strings 40 | 3. All 'from' and 'to' in edges must reference existing node IDs 41 | 4. All labels and descriptions MUST be in Chinese 42 | 5. The output must be valid JSON format 43 | 6. Extract at least 3 key concepts and their relationships 44 | 7. Group similar concepts under the same category 45 | 8. Use natural and idiomatic Chinese expressions 46 | 9. Ensure relationship descriptions are clear and meaningful 47 | 48 | DO NOT include any explanations or markdown formatting in the output.""" 49 | else: 50 | return """You are a professional text analysis assistant. Please analyze the input text and extract key concepts and their relationships. 51 | 52 | You must output ONLY a JSON object in the following format, with NO additional text or explanation: 53 | 54 | { 55 | "nodes": [ 56 | { 57 | "id": "1", // Must be a unique string 58 | "label": "Concept1", // Concept name in English 59 | "group": "Group1" // Category in English 60 | } 61 | ], 62 | "edges": [ 63 | { 64 | "from": "1", // Must match an existing node id 65 | "to": "2", // Must match an existing node id 66 | "label": "contains" // Relationship description in English 67 | } 68 | ] 69 | } 70 | 71 | Requirements: 72 | 1. Output ONLY the JSON object, no other text 73 | 2. All node IDs must be unique strings 74 | 3. All 'from' and 'to' in edges must reference existing node IDs 75 | 4. All labels and descriptions MUST be in English 76 | 5. The output must be valid JSON format 77 | 6. Extract at least 3 key concepts and their relationships 78 | 7. Group similar concepts under the same category 79 | 8. Use natural and idiomatic English expressions 80 | 9. Ensure relationship descriptions are clear and meaningful 81 | 82 | DO NOT include any explanations or markdown formatting in the output.""" 83 | 84 | def generate_graph_data(text): 85 | """Call OpenAI API to generate graph nodes and edges data""" 86 | 87 | # Detect the language of input text 88 | language = detect_language(text) 89 | 90 | # Get appropriate system prompt based on language 91 | system_msg = get_system_prompt(language) 92 | 93 | user_msg = "Please analyze the following text and generate relationship graph data:\n" + text 94 | 95 | try: 96 | # Call OpenAI API 97 | output = call_llm(system_msg, user_msg) 98 | if not output: 99 | raise ValueError("API returned empty response") 100 | 101 | # Clean potential extra content from output 102 | output = output.strip() 103 | if output.startswith("```json"): 104 | output = output[7:] 105 | if output.endswith("```"): 106 | output = output[:-3] 107 | output = output.strip() 108 | 109 | # Parse JSON data 110 | result = json.loads(output) 111 | 112 | # Validate data format 113 | if not isinstance(result, dict): 114 | raise ValueError("Response is not a JSON object") 115 | if 'nodes' not in result or 'edges' not in result: 116 | raise ValueError("Missing required 'nodes' or 'edges' fields") 117 | if not isinstance(result['nodes'], list) or not isinstance(result['edges'], list): 118 | raise ValueError("'nodes' or 'edges' is not an array") 119 | if len(result['nodes']) < 3: 120 | raise ValueError("At least 3 nodes are required") 121 | 122 | # Validate nodes and edges data 123 | node_ids = set() 124 | groups = set() 125 | for node in result['nodes']: 126 | if not all(k in node for k in ('id', 'label', 'group')): 127 | raise ValueError("Invalid node format - missing required fields") 128 | if not all(isinstance(node[k], str) for k in ('id', 'label', 'group')): 129 | raise ValueError("Node fields must be strings") 130 | if str(node['id']) in node_ids: 131 | raise ValueError(f"Duplicate node ID found: {node['id']}") 132 | node_ids.add(str(node['id'])) 133 | groups.add(node['group']) 134 | 135 | if len(groups) < 2: 136 | raise ValueError("Nodes should be categorized into at least 2 groups") 137 | 138 | for edge in result['edges']: 139 | if not all(k in edge for k in ('from', 'to', 'label')): 140 | raise ValueError("Invalid edge format - missing required fields") 141 | if not all(isinstance(edge[k], str) for k in ('from', 'to', 'label')): 142 | raise ValueError("Edge fields must be strings") 143 | if str(edge['from']) not in node_ids: 144 | raise ValueError(f"Edge references non-existent source node: {edge['from']}") 145 | if str(edge['to']) not in node_ids: 146 | raise ValueError(f"Edge references non-existent target node: {edge['to']}") 147 | 148 | return result['nodes'], result['edges'] 149 | 150 | except json.JSONDecodeError as je: 151 | st.error(f"JSON parsing error: {str(je)}\nActual output: {output}") 152 | return [], [] 153 | except Exception as e: 154 | st.error(f"Error generating graph data: {str(e)}") 155 | return [], [] --------------------------------------------------------------------------------