Spaces:
Runtime error
Runtime error
| # %% | |
| from haystack.document_stores import FAISSDocumentStore | |
| document_store = FAISSDocumentStore(faiss_index_factory_str="Flat") | |
| # %% | |
| import pandas as pd | |
| df_document = pd.read_csv("data/articles.csv") | |
| articles = [] | |
| for idx, row in df_document.iterrows(): | |
| article = { | |
| "content": row["article"], | |
| "meta":{ | |
| "chapter_name": row["chapter_name"], | |
| "article_page": row["article_page"], | |
| "article_number": row["article_number"], | |
| "article_name": row["article_name"], | |
| }, | |
| } | |
| articles.append(article) | |
| document_store.write_documents(articles, index="document") | |
| print(f"Loaded {document_store.get_document_count()} documents") | |
| # %% | |
| from haystack.nodes import DensePassageRetriever | |
| retriever = DensePassageRetriever( | |
| document_store=document_store, | |
| query_embedding_model="sadakmed/dpr-passage_encoder-spanish", | |
| passage_embedding_model="sadakmed/dpr-passage_encoder-spanish", | |
| max_seq_len_query=64, | |
| max_seq_len_passage=384, | |
| batch_size=16, | |
| use_gpu=False, | |
| embed_title=True, | |
| use_fast_tokenizers=True, | |
| ) | |
| document_store.update_embeddings(retriever) | |
| # %% | |
| from haystack.nodes import FARMReader | |
| model_ckpt = "mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es" | |
| reader = FARMReader( | |
| model_name_or_path=model_ckpt, | |
| progress_bar=False, | |
| max_seq_len=384, | |
| doc_stride=128, | |
| return_no_answer=True, | |
| use_gpu=False, | |
| ) | |
| # %% | |
| from haystack.pipelines import ExtractiveQAPipeline | |
| pipe = ExtractiveQAPipeline(reader, retriever) | |
| # %% | |
| question = "pueblos originarios justicia" | |
| prediction = pipe.run( | |
| query=question, | |
| params={ | |
| "Retriever": {"top_k": 10}, | |
| "Reader": {"top_k": 5} | |
| } | |
| ) | |
| # %% | |
| from pprint import pprint | |
| pprint(prediction) | |
| # %% | |
| from haystack.utils import print_answers | |
| print_answers(prediction, details="minimum") | |
| # %% | |