Spaces:
Runtime error
Runtime error
LOUIS SANNA
commited on
Commit
·
d6936f0
1
Parent(s):
780c913
feat(data): add analec
Browse files- README.md +1 -1
- anyqa/build_index.py +5 -1
- chroma_db/{13934663-2db5-404d-be0f-51734d442e08 → 1730b83a-f75a-41e2-aba7-637881bb5ea8}/data_level0.bin +0 -0
- chroma_db/{13934663-2db5-404d-be0f-51734d442e08 → 1730b83a-f75a-41e2-aba7-637881bb5ea8}/header.bin +0 -0
- chroma_db/{13934663-2db5-404d-be0f-51734d442e08 → 1730b83a-f75a-41e2-aba7-637881bb5ea8}/length.bin +0 -0
- chroma_db/{13934663-2db5-404d-be0f-51734d442e08 → 1730b83a-f75a-41e2-aba7-637881bb5ea8}/link_lists.bin +0 -0
- chroma_db/5fa47764-2449-49fb-ae2f-0fd1886dfa2d/data_level0.bin +0 -3
- chroma_db/5fa47764-2449-49fb-ae2f-0fd1886dfa2d/header.bin +0 -3
- chroma_db/5fa47764-2449-49fb-ae2f-0fd1886dfa2d/length.bin +0 -3
- chroma_db/5fa47764-2449-49fb-ae2f-0fd1886dfa2d/link_lists.bin +0 -0
- chroma_db/chroma.sqlite3 +2 -2
- constitution.pdf +0 -0
- data/Confucianism/Analects of Confucius.pdf +0 -0
- data/{daoism/tao-te-ching.pdf → Daoism/Tao_Te_Ching.pdf} +0 -0
- data/us-founding/constitution.pdf +0 -0
- data/us-founding/declaration-of-independance.pdf +0 -0
- declaration-of-independance.pdf +0 -0
README.md
CHANGED
|
@@ -18,5 +18,5 @@ We abstracted the code so it's easy to build another tool based on another domai
|
|
| 18 |
## Build vector index
|
| 19 |
|
| 20 |
```bash
|
| 21 |
-
python -m
|
| 22 |
```
|
|
|
|
| 18 |
## Build vector index
|
| 19 |
|
| 20 |
```bash
|
| 21 |
+
python -m anyqa.build_index
|
| 22 |
```
|
anyqa/build_index.py
CHANGED
|
@@ -10,14 +10,18 @@ from .vectorstore import PERSIST_DIRECTORY, get_vectorstore
|
|
| 10 |
|
| 11 |
|
| 12 |
def load_data():
|
|
|
|
| 13 |
docs = parse_data()
|
|
|
|
| 14 |
embedding_function = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL_NAME)
|
|
|
|
| 15 |
vectorstore = get_vectorstore(embedding_function)
|
| 16 |
|
| 17 |
assert isinstance(vectorstore, Chroma)
|
| 18 |
vectorstore.from_documents(
|
| 19 |
docs, embedding_function, persist_directory=PERSIST_DIRECTORY
|
| 20 |
)
|
|
|
|
| 21 |
return vectorstore
|
| 22 |
|
| 23 |
|
|
@@ -47,7 +51,7 @@ def parse_data():
|
|
| 47 |
|
| 48 |
|
| 49 |
def parse_name(source: str) -> str:
|
| 50 |
-
return source.split("/")[-1].split(".")[0]
|
| 51 |
|
| 52 |
|
| 53 |
def parse_domain(source: str) -> str:
|
|
|
|
| 10 |
|
| 11 |
|
| 12 |
def load_data():
|
| 13 |
+
print("Loading data...")
|
| 14 |
docs = parse_data()
|
| 15 |
+
print("Loaded documents")
|
| 16 |
embedding_function = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL_NAME)
|
| 17 |
+
print("Building index...")
|
| 18 |
vectorstore = get_vectorstore(embedding_function)
|
| 19 |
|
| 20 |
assert isinstance(vectorstore, Chroma)
|
| 21 |
vectorstore.from_documents(
|
| 22 |
docs, embedding_function, persist_directory=PERSIST_DIRECTORY
|
| 23 |
)
|
| 24 |
+
print("Index built")
|
| 25 |
return vectorstore
|
| 26 |
|
| 27 |
|
|
|
|
| 51 |
|
| 52 |
|
| 53 |
def parse_name(source: str) -> str:
|
| 54 |
+
return source.split("/")[-1].split(".")[0].replace("_", " ")
|
| 55 |
|
| 56 |
|
| 57 |
def parse_domain(source: str) -> str:
|
chroma_db/{13934663-2db5-404d-be0f-51734d442e08 → 1730b83a-f75a-41e2-aba7-637881bb5ea8}/data_level0.bin
RENAMED
|
File without changes
|
chroma_db/{13934663-2db5-404d-be0f-51734d442e08 → 1730b83a-f75a-41e2-aba7-637881bb5ea8}/header.bin
RENAMED
|
File without changes
|
chroma_db/{13934663-2db5-404d-be0f-51734d442e08 → 1730b83a-f75a-41e2-aba7-637881bb5ea8}/length.bin
RENAMED
|
File without changes
|
chroma_db/{13934663-2db5-404d-be0f-51734d442e08 → 1730b83a-f75a-41e2-aba7-637881bb5ea8}/link_lists.bin
RENAMED
|
File without changes
|
chroma_db/5fa47764-2449-49fb-ae2f-0fd1886dfa2d/data_level0.bin
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:a13e72541800c513c73dccea69f79e39cf4baef4fa23f7e117c0d6b0f5f99670
|
| 3 |
-
size 3212000
|
|
|
|
|
|
|
|
|
|
|
|
chroma_db/5fa47764-2449-49fb-ae2f-0fd1886dfa2d/header.bin
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:0ec6df10978b056a10062ed99efeef2702fa4a1301fad702b53dd2517103c746
|
| 3 |
-
size 100
|
|
|
|
|
|
|
|
|
|
|
|
chroma_db/5fa47764-2449-49fb-ae2f-0fd1886dfa2d/length.bin
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:fc19b1997119425765295aeab72d76faa6927d4f83985d328c26f20468d6cc76
|
| 3 |
-
size 4000
|
|
|
|
|
|
|
|
|
|
|
|
chroma_db/5fa47764-2449-49fb-ae2f-0fd1886dfa2d/link_lists.bin
DELETED
|
File without changes
|
chroma_db/chroma.sqlite3
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d627997dd35604ac27e67f35911999f234285c39362fffecddd50621d9f01d77
|
| 3 |
+
size 4067328
|
constitution.pdf
DELETED
|
Binary file (414 kB)
|
|
|
data/Confucianism/Analects of Confucius.pdf
ADDED
|
Binary file (711 kB). View file
|
|
|
data/{daoism/tao-te-ching.pdf → Daoism/Tao_Te_Ching.pdf}
RENAMED
|
File without changes
|
data/us-founding/constitution.pdf
DELETED
|
Binary file (414 kB)
|
|
|
data/us-founding/declaration-of-independance.pdf
DELETED
|
Binary file (742 kB)
|
|
|
declaration-of-independance.pdf
DELETED
|
Binary file (742 kB)
|
|
|