wr1240148048 commited on
Commit
7a358b7
·
verified ·
1 Parent(s): 88a75b9

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +106 -0
app.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import chromadb
3
+ from typing import List, Dict
4
+
5
+ # ChromaDB 配置
6
+ client = chromadb.PersistentClient(path="chroma_data")
7
+
8
+ # 加载集合
9
+ embed_collection = client.get_collection(name="embed_data")
10
+ answer_collection = client.get_collection(name="answer_data")
11
+
12
+
13
+ def search_embed_and_answer(label: str):
14
+ def search_top_answers(target_embedding: List[float], top_n: int = 10) -> List[Dict]:
15
+ """使用ChromaDB进行相似度搜索"""
16
+ print("正在执行相似度搜索...")
17
+ results = answer_collection.query(
18
+ query_embeddings=[target_embedding],
19
+ n_results=top_n,
20
+ include=["metadatas", "documents", "distances"]
21
+ )
22
+
23
+ # 处理结果
24
+ answers = []
25
+ for meta, doc, dist in zip(
26
+ results['metadatas'][0],
27
+ results['documents'][0],
28
+ results['distances'][0]
29
+ ):
30
+ answers.append({
31
+ "formal_name": meta['formal_name'],
32
+ "formal_statement": meta['formal_statement'],
33
+ "informal_name": meta['informal_name'],
34
+ "informal_statement": doc,
35
+ "similarity": 1 - dist
36
+ })
37
+ print("相似度搜索完成!")
38
+ return answers
39
+
40
+ """主搜索函数"""
41
+ print("开始处理搜索请求...")
42
+
43
+ # 在 embed 集合中查找目标条目
44
+ print("正在查询目标定理的元数据...")
45
+ results = embed_collection.query(
46
+ query_texts=[label],
47
+ n_results=1,
48
+ include=["metadatas", "embeddings"]
49
+ )
50
+
51
+ if not results['ids'][0]:
52
+ print(f"未找到 label 为 {label} 的定理。")
53
+ return f"未找到 label 为 {label} 的定理。", ""
54
+
55
+ # 提取目标条目
56
+ print("正在提取目标定理的特征...")
57
+ target_metadata = results['metadatas'][0][0]
58
+ target_embedding = results['embeddings'][0][0]
59
+
60
+ # 执行搜索
61
+ print("正在执行相似度搜索...")
62
+ top_answers = search_top_answers(target_embedding)
63
+
64
+ # 格式化结果
65
+ print("正在格式化搜索结果...")
66
+ latex_output = f"目标定理的 tag: {target_metadata['tag']}\nLaTeX 渲染结果: {target_metadata['latex']}"
67
+
68
+ answer_output = "最吻合的前十个定理:\n"
69
+ for i, ans in enumerate(top_answers, 1):
70
+ answer_output += (
71
+ f"{i}. Formal Name: {ans['formal_name']}\n"
72
+ f" Formal Statement: {ans['formal_statement']}\n"
73
+ f" Informal Name: {ans['informal_name']}\n"
74
+ f" Informal Statement: {ans['informal_statement']}\n"
75
+ f" 相似度: {ans['similarity']:.4f}\n\n"
76
+ )
77
+
78
+ print("搜索请求处理完成!")
79
+ return latex_output, answer_output
80
+
81
+ print("初始化 Gradio 界面...")
82
+ with gr.Blocks(theme=gr.themes.Soft(), title="Mathlib4 Search") as demo:
83
+ gr.Markdown("""<center><font size=8> ReasLab Smart Search</font></center>""")
84
+
85
+ with gr.Tab("Theorem Search"):
86
+ label_input = gr.Textbox(
87
+ placeholder="Example: quadratic_formula",
88
+ label="Label",
89
+ info="Enter a theorem label from embed.json"
90
+ )
91
+ search_button = gr.Button("Search")
92
+ latex_output = gr.Textbox(label="LaTeX and Tag", interactive=False)
93
+ answer_output = gr.Textbox(label="Top Matching Theorems", interactive=False)
94
+
95
+ search_button.click(
96
+ fn=search_embed_and_answer,
97
+ inputs=label_input,
98
+ outputs=[latex_output, answer_output]
99
+ )
100
+
101
+ print("启动 Gradio 服务...")
102
+ try:
103
+ demo.launch(server_name="0.0.0.0", server_port=9071, share=False,debug=True)
104
+ print("Gradio 服务已启动!")
105
+ except Exception as e:
106
+ print(f"启动 Gradio 服务失败: {e}")