Spaces:
Sleeping
Sleeping
use self hosted embedding
Browse files- app.py +35 -8
- embeddings.json +2 -2
- requirements.txt +1 -2
app.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
-
import llama_cpp
|
| 3 |
import base64
|
| 4 |
from Crypto.Cipher import AES
|
| 5 |
from Crypto.Util.Padding import unpad
|
|
@@ -20,14 +20,21 @@ def decrypt_file(input_path, key):
|
|
| 20 |
return plaintext.decode("utf-8")
|
| 21 |
|
| 22 |
|
| 23 |
-
llm = llama_cpp.Llama.from_pretrained(
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
)
|
| 28 |
|
| 29 |
# embedding_1 = llm.create_embedding("Hello, world!")
|
| 30 |
# embedding_2 = llm.create_embedding("你好, 世界!") # type(embedding_1['data'][0]['embedding']) list
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
from pymilvus import MilvusClient
|
| 33 |
|
|
@@ -92,10 +99,30 @@ for vhjx_index, vhjx_item in enumerate(raw_jsons):
|
|
| 92 |
|
| 93 |
|
| 94 |
def greet(name):
|
| 95 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
res = client.search(
|
| 97 |
collection_name="collection_1",
|
| 98 |
-
data=[embeddings["data"][0]["embedding"]],
|
|
|
|
| 99 |
limit=5,
|
| 100 |
output_fields=["index", "text", "annotation", "critique"],
|
| 101 |
)
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
# import llama_cpp
|
| 3 |
import base64
|
| 4 |
from Crypto.Cipher import AES
|
| 5 |
from Crypto.Util.Padding import unpad
|
|
|
|
| 20 |
return plaintext.decode("utf-8")
|
| 21 |
|
| 22 |
|
| 23 |
+
# llm = llama_cpp.Llama.from_pretrained(
|
| 24 |
+
# repo_id="mradermacher/bge-large-zh-v1.5-GGUF",
|
| 25 |
+
# filename="bge-large-zh-v1.5.Q4_K_M.gguf",
|
| 26 |
+
# embedding=True,
|
| 27 |
+
# )
|
| 28 |
|
| 29 |
# embedding_1 = llm.create_embedding("Hello, world!")
|
| 30 |
# embedding_2 = llm.create_embedding("你好, 世界!") # type(embedding_1['data'][0]['embedding']) list
|
| 31 |
+
from openai import OpenAI
|
| 32 |
+
|
| 33 |
+
client = OpenAI(
|
| 34 |
+
api_key=os.getenv("DASHSCOPE_API_KEY"), # 如果您没有配置环境变量,请在此处用您的API Key进行替换
|
| 35 |
+
base_url="https://dashscope.aliyuncs.com/compatible-mode/v1" # 百炼服务的base_url
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
|
| 39 |
from pymilvus import MilvusClient
|
| 40 |
|
|
|
|
| 99 |
|
| 100 |
|
| 101 |
def greet(name):
|
| 102 |
+
"""
|
| 103 |
+
Search for relevant critical commentary entries based on an input query from the Analects.
|
| 104 |
+
|
| 105 |
+
This function parses the input query, performs a fuzzy search in the indexed original text field,
|
| 106 |
+
and extracts related critiques.
|
| 107 |
+
|
| 108 |
+
Args:
|
| 109 |
+
query (str): The input text (a line from the Analects, possibly fuzzy or partial) to search.
|
| 110 |
+
|
| 111 |
+
Returns:
|
| 112 |
+
List[dict]: A list of result entries. Each entry contains the original hit and a list of related entries
|
| 113 |
+
under the key "extra", retrieved via index references mentioned in the commentary.
|
| 114 |
+
"""
|
| 115 |
+
# embeddings = llm.create_embedding(name)
|
| 116 |
+
completion = client.embeddings.create(
|
| 117 |
+
model="text-embedding-v3",
|
| 118 |
+
input=name,
|
| 119 |
+
dimensions=1024, # 仅 text-embedding-v3 支持
|
| 120 |
+
encoding_format="float"
|
| 121 |
+
)
|
| 122 |
res = client.search(
|
| 123 |
collection_name="collection_1",
|
| 124 |
+
# data=[embeddings["data"][0]["embedding"]],
|
| 125 |
+
data=[completion.data[0].embedding],
|
| 126 |
limit=5,
|
| 127 |
output_fields=["index", "text", "annotation", "critique"],
|
| 128 |
)
|
embeddings.json
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e879dc69d30510924d7e26d5ad58ce193e9901003a0c634c94ac025b74655819
|
| 3 |
+
size 11621667
|
requirements.txt
CHANGED
|
@@ -2,6 +2,5 @@ whoosh-reloaded==2.7.5
|
|
| 2 |
jieba==0.42.1
|
| 3 |
huggingface-hub
|
| 4 |
pymilvus
|
| 5 |
-
|
| 6 |
-
llama-cpp-python
|
| 7 |
pycryptodome
|
|
|
|
| 2 |
jieba==0.42.1
|
| 3 |
huggingface-hub
|
| 4 |
pymilvus
|
| 5 |
+
openai
|
|
|
|
| 6 |
pycryptodome
|