Spaces:
Running
Running
Commit
·
9abc2ca
1
Parent(s):
ae8b477
更新测试好的代码(openai接口更新)
Browse files- app.py +46 -34
- requirements.txt +1 -1
app.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
import pandas as pd
|
| 2 |
import numpy as np
|
| 3 |
import faiss
|
| 4 |
-
import
|
| 5 |
import tempfile
|
| 6 |
from PyPDF2 import PdfReader
|
| 7 |
import io
|
|
@@ -16,8 +16,6 @@ from collections import Counter
|
|
| 16 |
#import jieba.analyse
|
| 17 |
import nltk
|
| 18 |
|
| 19 |
-
|
| 20 |
-
|
| 21 |
@st.cache_data
|
| 22 |
def download_nltk():
|
| 23 |
nltk.download('punkt')
|
|
@@ -65,15 +63,16 @@ def get_keywords(file_paths): #这里的重点是,对每一个file做尽可能
|
|
| 65 |
return keywords_list
|
| 66 |
|
| 67 |
|
| 68 |
-
def get_completion_from_messages(messages, model="gpt-4", temperature=0):
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
|
|
|
| 75 |
|
| 76 |
-
def genarating_outline(keywords, num_lessons,language):
|
| 77 |
system_message = 'You are a great AI teacher and linguist, skilled at create course outline based on summarized knowledge materials.'
|
| 78 |
user_message = f"""You are a great AI teacher and linguist,
|
| 79 |
skilled at generating course outline based on keywords of the course.
|
|
@@ -96,7 +95,7 @@ def genarating_outline(keywords, num_lessons,language):
|
|
| 96 |
'content': user_message},
|
| 97 |
]
|
| 98 |
|
| 99 |
-
response = get_completion_from_messages(messages)
|
| 100 |
|
| 101 |
list_response = ['nothing in the answers..']
|
| 102 |
|
|
@@ -107,9 +106,9 @@ def genarating_outline(keywords, num_lessons,language):
|
|
| 107 |
|
| 108 |
return list_response
|
| 109 |
|
| 110 |
-
def courseOutlineGenerating(file_paths, num_lessons, language):
|
| 111 |
summarized_materials = get_keywords(file_paths)
|
| 112 |
-
course_outline = genarating_outline(summarized_materials, num_lessons, language)
|
| 113 |
return course_outline
|
| 114 |
|
| 115 |
def constructVDB(file_paths):
|
|
@@ -171,7 +170,7 @@ def searchVDB(search_sentence, paraphrase_embeddings_df, index):
|
|
| 171 |
|
| 172 |
return retrieved_chunks_list
|
| 173 |
|
| 174 |
-
def generateCourse(topic, materials, language, style_options):
|
| 175 |
system_message = 'You are a great AI teacher and linguist, skilled at writing informative and easy-to-understand course script based on given lesson topic and knowledge materials.'
|
| 176 |
|
| 177 |
user_message = f"""You are a great AI teacher and linguist,
|
|
@@ -199,7 +198,7 @@ def generateCourse(topic, materials, language, style_options):
|
|
| 199 |
'content': user_message},
|
| 200 |
]
|
| 201 |
|
| 202 |
-
response = get_completion_from_messages(messages)
|
| 203 |
return response
|
| 204 |
|
| 205 |
def decorate_user_question(user_question, retrieved_chunks_for_user):
|
|
@@ -236,9 +235,9 @@ def initialize_vdb(temp_file_paths):
|
|
| 236 |
st.success("Constructing vector database from provided materials...Done")
|
| 237 |
return embeddings_df, faiss_index
|
| 238 |
|
| 239 |
-
def initialize_outline(temp_file_paths, num_lessons, language):
|
| 240 |
with st.spinner('Generating Course Outline...'):
|
| 241 |
-
course_outline_list = courseOutlineGenerating(temp_file_paths, num_lessons, language)
|
| 242 |
st.success("Generating Course Outline...Done")
|
| 243 |
course_outline_string = ''
|
| 244 |
lessons_count = 0
|
|
@@ -251,14 +250,14 @@ def initialize_outline(temp_file_paths, num_lessons, language):
|
|
| 251 |
|
| 252 |
return course_outline_list
|
| 253 |
|
| 254 |
-
def initialize_content(course_outline_list, embeddings_df, faiss_index, language, style_options):
|
| 255 |
count_generating_content = 0
|
| 256 |
course_content_list = []
|
| 257 |
for lesson in course_outline_list:
|
| 258 |
count_generating_content += 1
|
| 259 |
with st.spinner(f"Writing content for lesson {count_generating_content}..."):
|
| 260 |
retrievedChunksList = searchVDB(lesson, embeddings_df, faiss_index)
|
| 261 |
-
courseContent = generateCourse(lesson, retrievedChunksList, language, style_options)
|
| 262 |
course_content_list.append(courseContent)
|
| 263 |
st.success(f"Writing content for lesson {count_generating_content}...Done")
|
| 264 |
with st.expander(f"Learn the lesson {count_generating_content} ", expanded=False):
|
|
@@ -364,9 +363,12 @@ def app():
|
|
| 364 |
|
| 365 |
if "OPENAI_API_KEY" not in st.session_state:
|
| 366 |
st.session_state["OPENAI_API_KEY"] = ''
|
|
|
|
|
|
|
| 367 |
if "openai_model" not in st.session_state:
|
| 368 |
-
st.session_state["openai_model"] = "gpt-
|
| 369 |
-
|
|
|
|
| 370 |
if "messages" not in st.session_state:
|
| 371 |
st.session_state.messages = []
|
| 372 |
|
|
@@ -425,7 +427,6 @@ def app():
|
|
| 425 |
|
| 426 |
''', unsafe_allow_html=True)
|
| 427 |
|
| 428 |
-
|
| 429 |
if btn:
|
| 430 |
if api_key != "sk-..." and api_key !="" and api_key.startswith("sk-"):
|
| 431 |
st.session_state.start_col1.empty()
|
|
@@ -438,10 +439,10 @@ def app():
|
|
| 438 |
#initialize app
|
| 439 |
temp_file_paths = initialize_file(added_files)
|
| 440 |
st.session_state["OPENAI_API_KEY"] = api_key
|
| 441 |
-
|
| 442 |
st.session_state.embeddings_df, st.session_state.faiss_index = initialize_vdb(temp_file_paths)
|
| 443 |
-
st.session_state.course_outline_list = initialize_outline(temp_file_paths, num_lessons, language)
|
| 444 |
-
st.session_state.course_content_list = initialize_content(st.session_state.course_outline_list, st.session_state.embeddings_df, st.session_state.faiss_index, language, style_options)
|
| 445 |
|
| 446 |
st.markdown('''
|
| 447 |
> 🤔 <font color = 'grey'> **Not satisfied with this course?** Simply click "Generate my course!" button to regenerate a new one! </font>
|
|
@@ -457,7 +458,7 @@ def app():
|
|
| 457 |
st.session_state.case_pay.empty()
|
| 458 |
announce.empty()
|
| 459 |
divider.empty()
|
| 460 |
-
warning = st.write("请输入正确的API Key令牌")
|
| 461 |
|
| 462 |
|
| 463 |
col1, col2 = st.columns([0.6,0.4])
|
|
@@ -485,10 +486,12 @@ def app():
|
|
| 485 |
st.write("Hello👋, how can I help you today? 😄")
|
| 486 |
|
| 487 |
# Display chat messages from history on app rerun
|
| 488 |
-
for message in st.session_state.
|
| 489 |
with st.chat_message(message["role"]):
|
| 490 |
-
st.markdown(message["content"]
|
| 491 |
|
|
|
|
|
|
|
| 492 |
# Display new user question.
|
| 493 |
with st.chat_message("user"):
|
| 494 |
st.markdown(user_question)
|
|
@@ -496,21 +499,30 @@ def app():
|
|
| 496 |
#这里的session.state就是保存了这个对话会话的一些基本信息和设置
|
| 497 |
retrieved_chunks_for_user = searchVDB(user_question, st.session_state.embeddings_df, st.session_state.faiss_index)
|
| 498 |
prompt = decorate_user_question(user_question, retrieved_chunks_for_user)
|
| 499 |
-
st.session_state.messages.append({"role": "user", "content":
|
| 500 |
|
| 501 |
# Display assistant response in chat message container
|
| 502 |
with st.chat_message("assistant"):
|
| 503 |
message_placeholder = st.empty()
|
| 504 |
full_response = ""
|
| 505 |
-
|
|
|
|
| 506 |
model=st.session_state["openai_model"],
|
| 507 |
-
messages=[
|
|
|
|
|
|
|
|
|
|
| 508 |
stream=True,
|
| 509 |
):
|
| 510 |
-
|
|
|
|
|
|
|
|
|
|
| 511 |
message_placeholder.markdown(full_response + "▌")
|
| 512 |
message_placeholder.markdown(full_response)
|
| 513 |
-
st.session_state.messages.append({"role": "assistant", "content":
|
|
|
|
|
|
|
| 514 |
|
| 515 |
|
| 516 |
if __name__ == "__main__":
|
|
|
|
| 1 |
import pandas as pd
|
| 2 |
import numpy as np
|
| 3 |
import faiss
|
| 4 |
+
from openai import OpenAI
|
| 5 |
import tempfile
|
| 6 |
from PyPDF2 import PdfReader
|
| 7 |
import io
|
|
|
|
| 16 |
#import jieba.analyse
|
| 17 |
import nltk
|
| 18 |
|
|
|
|
|
|
|
| 19 |
@st.cache_data
|
| 20 |
def download_nltk():
|
| 21 |
nltk.download('punkt')
|
|
|
|
| 63 |
return keywords_list
|
| 64 |
|
| 65 |
|
| 66 |
+
def get_completion_from_messages(client, messages, model="gpt-4-1106-preview", temperature=0):
|
| 67 |
+
client = client
|
| 68 |
+
completion = client.chat.completions.create(
|
| 69 |
+
model=model,
|
| 70 |
+
messages=messages,
|
| 71 |
+
temperature=temperature,
|
| 72 |
+
)
|
| 73 |
+
return completion.choices[0].message.content
|
| 74 |
|
| 75 |
+
def genarating_outline(client, keywords, num_lessons,language):
|
| 76 |
system_message = 'You are a great AI teacher and linguist, skilled at create course outline based on summarized knowledge materials.'
|
| 77 |
user_message = f"""You are a great AI teacher and linguist,
|
| 78 |
skilled at generating course outline based on keywords of the course.
|
|
|
|
| 95 |
'content': user_message},
|
| 96 |
]
|
| 97 |
|
| 98 |
+
response = get_completion_from_messages(client, messages)
|
| 99 |
|
| 100 |
list_response = ['nothing in the answers..']
|
| 101 |
|
|
|
|
| 106 |
|
| 107 |
return list_response
|
| 108 |
|
| 109 |
+
def courseOutlineGenerating(client, file_paths, num_lessons, language):
|
| 110 |
summarized_materials = get_keywords(file_paths)
|
| 111 |
+
course_outline = genarating_outline(client, summarized_materials, num_lessons, language)
|
| 112 |
return course_outline
|
| 113 |
|
| 114 |
def constructVDB(file_paths):
|
|
|
|
| 170 |
|
| 171 |
return retrieved_chunks_list
|
| 172 |
|
| 173 |
+
def generateCourse(client, topic, materials, language, style_options):
|
| 174 |
system_message = 'You are a great AI teacher and linguist, skilled at writing informative and easy-to-understand course script based on given lesson topic and knowledge materials.'
|
| 175 |
|
| 176 |
user_message = f"""You are a great AI teacher and linguist,
|
|
|
|
| 198 |
'content': user_message},
|
| 199 |
]
|
| 200 |
|
| 201 |
+
response = get_completion_from_messages(client, messages)
|
| 202 |
return response
|
| 203 |
|
| 204 |
def decorate_user_question(user_question, retrieved_chunks_for_user):
|
|
|
|
| 235 |
st.success("Constructing vector database from provided materials...Done")
|
| 236 |
return embeddings_df, faiss_index
|
| 237 |
|
| 238 |
+
def initialize_outline(client, temp_file_paths, num_lessons, language):
|
| 239 |
with st.spinner('Generating Course Outline...'):
|
| 240 |
+
course_outline_list = courseOutlineGenerating(client, temp_file_paths, num_lessons, language)
|
| 241 |
st.success("Generating Course Outline...Done")
|
| 242 |
course_outline_string = ''
|
| 243 |
lessons_count = 0
|
|
|
|
| 250 |
|
| 251 |
return course_outline_list
|
| 252 |
|
| 253 |
+
def initialize_content(client, course_outline_list, embeddings_df, faiss_index, language, style_options):
|
| 254 |
count_generating_content = 0
|
| 255 |
course_content_list = []
|
| 256 |
for lesson in course_outline_list:
|
| 257 |
count_generating_content += 1
|
| 258 |
with st.spinner(f"Writing content for lesson {count_generating_content}..."):
|
| 259 |
retrievedChunksList = searchVDB(lesson, embeddings_df, faiss_index)
|
| 260 |
+
courseContent = generateCourse(client, lesson, retrievedChunksList, language, style_options)
|
| 261 |
course_content_list.append(courseContent)
|
| 262 |
st.success(f"Writing content for lesson {count_generating_content}...Done")
|
| 263 |
with st.expander(f"Learn the lesson {count_generating_content} ", expanded=False):
|
|
|
|
| 363 |
|
| 364 |
if "OPENAI_API_KEY" not in st.session_state:
|
| 365 |
st.session_state["OPENAI_API_KEY"] = ''
|
| 366 |
+
#if "client" not in st.session_state:
|
| 367 |
+
# st.session_state["client"] = ''
|
| 368 |
if "openai_model" not in st.session_state:
|
| 369 |
+
st.session_state["openai_model"] = "gpt-4-1106-preview"
|
| 370 |
+
if "messages_ui" not in st.session_state:
|
| 371 |
+
st.session_state.messages_ui = []
|
| 372 |
if "messages" not in st.session_state:
|
| 373 |
st.session_state.messages = []
|
| 374 |
|
|
|
|
| 427 |
|
| 428 |
''', unsafe_allow_html=True)
|
| 429 |
|
|
|
|
| 430 |
if btn:
|
| 431 |
if api_key != "sk-..." and api_key !="" and api_key.startswith("sk-"):
|
| 432 |
st.session_state.start_col1.empty()
|
|
|
|
| 439 |
#initialize app
|
| 440 |
temp_file_paths = initialize_file(added_files)
|
| 441 |
st.session_state["OPENAI_API_KEY"] = api_key
|
| 442 |
+
client = OpenAI(api_key = st.session_state["OPENAI_API_KEY"])
|
| 443 |
st.session_state.embeddings_df, st.session_state.faiss_index = initialize_vdb(temp_file_paths)
|
| 444 |
+
st.session_state.course_outline_list = initialize_outline(client, temp_file_paths, num_lessons, language)
|
| 445 |
+
st.session_state.course_content_list = initialize_content(client, st.session_state.course_outline_list, st.session_state.embeddings_df, st.session_state.faiss_index, language, style_options)
|
| 446 |
|
| 447 |
st.markdown('''
|
| 448 |
> 🤔 <font color = 'grey'> **Not satisfied with this course?** Simply click "Generate my course!" button to regenerate a new one! </font>
|
|
|
|
| 458 |
st.session_state.case_pay.empty()
|
| 459 |
announce.empty()
|
| 460 |
divider.empty()
|
| 461 |
+
warning = st.write("请输入正确的OpenAI API Key令牌")
|
| 462 |
|
| 463 |
|
| 464 |
col1, col2 = st.columns([0.6,0.4])
|
|
|
|
| 486 |
st.write("Hello👋, how can I help you today? 😄")
|
| 487 |
|
| 488 |
# Display chat messages from history on app rerun
|
| 489 |
+
for message in st.session_state.messages_ui:
|
| 490 |
with st.chat_message(message["role"]):
|
| 491 |
+
st.markdown(message["content"])
|
| 492 |
|
| 493 |
+
#更新ui上显示的聊天记录
|
| 494 |
+
st.session_state.messages_ui.append({"role": "user", "content": user_question})
|
| 495 |
# Display new user question.
|
| 496 |
with st.chat_message("user"):
|
| 497 |
st.markdown(user_question)
|
|
|
|
| 499 |
#这里的session.state就是保存了这个对话会话的一些基本信息和设置
|
| 500 |
retrieved_chunks_for_user = searchVDB(user_question, st.session_state.embeddings_df, st.session_state.faiss_index)
|
| 501 |
prompt = decorate_user_question(user_question, retrieved_chunks_for_user)
|
| 502 |
+
st.session_state.messages.append({"role": "user", "content": prompt})
|
| 503 |
|
| 504 |
# Display assistant response in chat message container
|
| 505 |
with st.chat_message("assistant"):
|
| 506 |
message_placeholder = st.empty()
|
| 507 |
full_response = ""
|
| 508 |
+
client = OpenAI(api_key = st.session_state["OPENAI_API_KEY"])
|
| 509 |
+
for response in client.chat.completions.create(
|
| 510 |
model=st.session_state["openai_model"],
|
| 511 |
+
messages=[
|
| 512 |
+
{"role": m["role"], "content": m["content"]}
|
| 513 |
+
for m in st.session_state.messages #用chatbot那边的隐藏消息记录
|
| 514 |
+
],
|
| 515 |
stream=True,
|
| 516 |
):
|
| 517 |
+
try:
|
| 518 |
+
full_response += response.choices[0].delta.content
|
| 519 |
+
except:
|
| 520 |
+
full_response += ""
|
| 521 |
message_placeholder.markdown(full_response + "▌")
|
| 522 |
message_placeholder.markdown(full_response)
|
| 523 |
+
st.session_state.messages.append({"role": "assistant", "content": full_response})
|
| 524 |
+
st.session_state.messages_ui.append({"role": "assistant", "content": full_response})
|
| 525 |
+
|
| 526 |
|
| 527 |
|
| 528 |
if __name__ == "__main__":
|
requirements.txt
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
faiss_cpu==1.7.3
|
| 2 |
nltk==3.8.1
|
| 3 |
numpy==1.25.0
|
| 4 |
-
openai==
|
| 5 |
pandas==2.0.2
|
| 6 |
PyPDF2==3.0.1
|
| 7 |
sentence_transformers==2.2.2
|
|
|
|
| 1 |
faiss_cpu==1.7.3
|
| 2 |
nltk==3.8.1
|
| 3 |
numpy==1.25.0
|
| 4 |
+
openai==1.6.1
|
| 5 |
pandas==2.0.2
|
| 6 |
PyPDF2==3.0.1
|
| 7 |
sentence_transformers==2.2.2
|