shigureui commited on
Commit
da335e9
·
1 Parent(s): 57ecbdf
Files changed (4) hide show
  1. .gitignore +1 -0
  2. app.py +77 -1
  3. encrypted.txt +0 -0
  4. requirements.txt +1 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ temp/*
app.py CHANGED
@@ -1,12 +1,88 @@
1
  import gradio as gr
2
  import llama_cpp
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
  llm = llama_cpp.Llama.from_pretrained(repo_id="KimChen/bge-m3-GGUF", filename="bge-m3-q4_k_m.gguf", embedding=True)
5
 
6
- embeddings = llm.create_embedding("Hello, world!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  def greet(name):
9
  embeddings = llm.create_embedding(name)
 
 
 
 
 
 
 
10
  return "Hello " + name + "!!"
11
 
12
  demo = gr.Interface(fn=greet, inputs="text", outputs="text")
 
1
  import gradio as gr
2
  import llama_cpp
3
+ import base64
4
+ from Crypto.Cipher import AES
5
+ from Crypto.Util.Padding import unpad
6
+
7
+ def decrypt_file(input_path, key):
8
+ # 读取加密文件
9
+ with open(input_path, 'rb') as f:
10
+ encrypted_data = base64.b64decode(f.read())
11
+
12
+ key = key.ljust(32, '0')[:32].encode('utf-8')
13
+ iv = encrypted_data[:16]
14
+ ciphertext = encrypted_data[16:]
15
+
16
+ cipher = AES.new(key, AES.MODE_CBC, iv)
17
+ plaintext = unpad(cipher.decrypt(ciphertext), AES.block_size)
18
+
19
+ return plaintext.decode('utf-8')
20
+
21
 
22
  llm = llama_cpp.Llama.from_pretrained(repo_id="KimChen/bge-m3-GGUF", filename="bge-m3-q4_k_m.gguf", embedding=True)
23
 
24
+ embedding_1 = llm.create_embedding("Hello, world!")
25
+ embedding_2 = llm.create_embedding("你好, 世界!") # type(embedding_1['data'][0]['embedding']) list
26
+
27
+ from pymilvus import MilvusClient
28
+ client = MilvusClient("./books.db")
29
+
30
+ client.create_collection(
31
+ collection_name="collection_1",
32
+ dimension=1024
33
+ )
34
+
35
+ import os, json
36
+ aeskey = os.getenv('aeskey')
37
+ decrypted_content = decrypt_file('encrypted.txt', aeskey)
38
+ raw_jsons = json.loads(decrypted_content)
39
+
40
+ docs = []
41
+ metas = []
42
+
43
+ for vhjx_item in raw_jsons:
44
+ chapter = vhjx_item[0]
45
+ for jvvi_item in vhjx_item[1:]:
46
+ content = jvvi_item["原文"]
47
+ docs.append(content)
48
+ metas.append({
49
+ "index": jvvi_item["index"],
50
+ "text": content,
51
+ "annotation": jvvi_item.get("注释", ""),
52
+ "critique": jvvi_item.get("批判", ""),
53
+ "chapter": chapter
54
+ })
55
+
56
+ # 4. 批量生成 embeddings(每个为 list[float])
57
+ emb_result = llm.create_embedding(docs)
58
+ embeddings = [item["embedding"] for item in emb_result["data"]] # List[List[float]]
59
+
60
+ # 准备数据
61
+ milvus_data = []
62
+ for i, emb in enumerate(embeddings):
63
+ item = metas[i]
64
+ milvus_data.append({
65
+ "id": item["index"],
66
+ "vector": emb,
67
+ "text": item["text"],
68
+ "annotation": item["annotation"],
69
+ "critique": item["critique"],
70
+ "chapter": item["chapter"]
71
+ })
72
+
73
+ # 插入数据
74
+ client.insert(collection_name="collection_1", data=milvus_data)
75
+ print(f"✅ 插入完成:共 {len(milvus_data)} 条数据")
76
 
77
  def greet(name):
78
  embeddings = llm.create_embedding(name)
79
+ res = client.search(
80
+ collection_name="collection_1",
81
+ data=[embeddings['data'][0]['embedding']],
82
+ limit=2,
83
+ output_fields=["text", "id"],
84
+ )
85
+ print(res)
86
  return "Hello " + name + "!!"
87
 
88
  demo = gr.Interface(fn=greet, inputs="text", outputs="text")
encrypted.txt ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt CHANGED
@@ -1,5 +1,6 @@
1
  whoosh-reloaded==2.7.5
2
  jieba==0.42.1
3
  huggingface-hub
 
4
  --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
5
  llama-cpp-python
 
1
  whoosh-reloaded==2.7.5
2
  jieba==0.42.1
3
  huggingface-hub
4
+ milvus
5
  --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
6
  llama-cpp-python