Spaces:

shigureui
/

BookSearch

Sleeping

App Files Files Community

shigureui commited on May 6

Commit

da335e9

1 Parent(s): 57ecbdf

test

Browse files

Files changed (4) hide show

.gitignore +1 -0
app.py +77 -1
encrypted.txt +0 -0
requirements.txt +1 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ temp/*

app.py CHANGED Viewed

@@ -1,12 +1,88 @@
 import gradio as gr
 import llama_cpp
 llm = llama_cpp.Llama.from_pretrained(repo_id="KimChen/bge-m3-GGUF", filename="bge-m3-q4_k_m.gguf", embedding=True)
-embeddings = llm.create_embedding("Hello, world!")
 def greet(name):
     embeddings = llm.create_embedding(name)
     return "Hello " + name + "!!"
 demo = gr.Interface(fn=greet, inputs="text", outputs="text")

 import gradio as gr
 import llama_cpp
+import base64
+from Crypto.Cipher import AES
+from Crypto.Util.Padding import unpad
+def decrypt_file(input_path, key):
+    # 读取加密文件
+    with open(input_path, 'rb') as f:
+        encrypted_data = base64.b64decode(f.read())
+    key = key.ljust(32, '0')[:32].encode('utf-8')
+    iv = encrypted_data[:16]
+    ciphertext = encrypted_data[16:]
+    cipher = AES.new(key, AES.MODE_CBC, iv)
+    plaintext = unpad(cipher.decrypt(ciphertext), AES.block_size)
+    return plaintext.decode('utf-8')
 llm = llama_cpp.Llama.from_pretrained(repo_id="KimChen/bge-m3-GGUF", filename="bge-m3-q4_k_m.gguf", embedding=True)
+embedding_1 = llm.create_embedding("Hello, world!")
+embedding_2 = llm.create_embedding("你好, 世界!") # type(embedding_1['data'][0]['embedding']) list
+from pymilvus import MilvusClient
+client = MilvusClient("./books.db")
+client.create_collection(
+    collection_name="collection_1",
+    dimension=1024
+)
+import os, json
+aeskey = os.getenv('aeskey')
+decrypted_content = decrypt_file('encrypted.txt', aeskey)
+raw_jsons = json.loads(decrypted_content)
+docs = []
+metas = []
+for vhjx_item in raw_jsons:
+    chapter = vhjx_item[0]
+    for jvvi_item in vhjx_item[1:]:
+        content = jvvi_item["原文"]
+        docs.append(content)
+        metas.append({
+            "index": jvvi_item["index"],
+            "text": content,
+            "annotation": jvvi_item.get("注释", ""),
+            "critique": jvvi_item.get("批判", ""),
+            "chapter": chapter
+        })
+# 4. 批量生成 embeddings（每个为 list[float]）
+emb_result = llm.create_embedding(docs)
+embeddings = [item["embedding"] for item in emb_result["data"]]  # List[List[float]]
+# 准备数据
+milvus_data = []
+for i, emb in enumerate(embeddings):
+    item = metas[i]
+    milvus_data.append({
+        "id": item["index"],
+        "vector": emb,
+        "text": item["text"],
+        "annotation": item["annotation"],
+        "critique": item["critique"],
+        "chapter": item["chapter"]
+    })
+# 插入数据
+client.insert(collection_name="collection_1", data=milvus_data)
+print(f"✅ 插入完成：共 {len(milvus_data)} 条数据")
 def greet(name):
     embeddings = llm.create_embedding(name)
+    res = client.search(
+        collection_name="collection_1",
+        data=[embeddings['data'][0]['embedding']],
+        limit=2,
+        output_fields=["text", "id"],
+    )
+    print(res)
     return "Hello " + name + "!!"
 demo = gr.Interface(fn=greet, inputs="text", outputs="text")

encrypted.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt CHANGED Viewed

@@ -1,5 +1,6 @@
 whoosh-reloaded==2.7.5
 jieba==0.42.1
 huggingface-hub
 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
 llama-cpp-python

 whoosh-reloaded==2.7.5
 jieba==0.42.1
 huggingface-hub
+milvus
 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
 llama-cpp-python