Jonathan Bejarano commited on
Commit
276a68a
ยท
1 Parent(s): c0f9112

decreasing halluncination

Browse files
Files changed (2) hide show
  1. app.py +226 -47
  2. requirements.txt +3 -1
app.py CHANGED
@@ -3,6 +3,8 @@ from huggingface_hub import InferenceClient
3
  import re
4
  import random
5
  import os
 
 
6
  from dotenv import load_dotenv
7
 
8
  # Load environment variables from .env file if it exists
@@ -15,47 +17,206 @@ LOCAL_MODE = bool(BASE_URL and LOCAL_TOKEN)
15
  MODEL_NAME = os.getenv('MODEL_NAME', 'meta-llama/Llama-3.2-3B-Instruct')
16
 
17
 
18
- # List of countries for the game
19
  COUNTRIES = [
20
- "Afghanistan", "Albania", "Algeria", "Angola", "Argentina", "Armenia", "Australia", "Austria",
21
- "Bangladesh", "Belgium", "Belize", "Bolivia", "Bosnia and Herzegovina", "Botswana", "Brazil",
22
- "Bulgaria", "Burma", "Burundi", "Cambodia", "Canada", "Central African Republic", "Chad", "Chile",
23
- "China", "Colombia", "Costa Rica", "Croatia", "Cuba", "Czech Republic", "Democratic Republic of the Congo",
24
- "Denmark", "Dominican Republic", "Ecuador", "Egypt", "El Salvador", "Estonia", "Ethiopia", "Fiji",
25
- "Finland", "France", "Georgia", "Germany", "Ghana", "Greece", "Grenada", "Guatemala", "Guinea",
26
- "Guyana", "Haiti", "Honduras", "Hungary", "Iceland", "India", "Indonesia", "Iran", "Iraq", "Ireland",
27
- "Israel", "Italy", "Jamaica", "Japan", "Jordan", "Kenya", "Kuwait", "Kyrgyzstan", "Laos", "Latvia",
28
- "Lebanon", "Liberia", "Libya", "Liechtenstein", "Lithuania", "Luxembourg", "Macedonia", "Madagascar",
29
- "Malaysia", "Mali", "Malta", "Mexico", "Moldova", "Monaco", "Mongolia", "Montenegro", "Morocco",
30
- "Mozambique", "Nepal", "Netherlands", "New Zealand", "Nicaragua", "Niger", "Nigeria", "North Korea",
31
- "Norway", "Oman", "Pakistan", "Palestine", "Panama", "Papua New Guinea", "Paraguay", "Peru",
32
- "Philippines", "Poland", "Portugal", "Qatar", "Republic of the Congo", "Romania", "Russia", "Rwanda",
33
- "Samoa", "Saudi Arabia", "Serbia", "Singapore", "Slovakia", "South Korea", "Slovenia", "Somalia",
34
- "South Africa", "Spain", "Sri Lanka", "Sudan", "Suriname", "Swaziland", "Sweden", "Switzerland",
35
- "Syria", "Tajikistan", "Tanzania", "Thailand", "Tonga", "Trinidad and Tobago", "Tunisia", "Turkey",
36
- "Turkmenistan", "Uganda", "Ukraine", "United Arab Emirates", "United Kingdom", "United States",
37
- "Uruguay", "Uzbekistan", "Vanuatu", "Vatican City", "Venezuela", "Vietnam", "Yemen", "Zambia", "Zimbabwe"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  ]
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  def get_system_message_with_country():
41
  """Generate a system message with a randomly selected country"""
42
- global selected_country
43
- selected_country = random.choice(COUNTRIES)
44
- COUNTRIES.remove(selected_country) # Ensure the same country isn't picked again
45
- print(selected_country)
 
 
 
 
 
 
46
  return f"""You are a friendly geography game host playing 20 questions with students. You are thinking of the country: {selected_country}
47
 
 
 
 
48
  RULES:
49
  1. NEVER reveal the country name ({selected_country}) in your responses
50
  2. Answer only 'Yes' or 'No' to their questions
51
  3. Keep track of how many questions they've asked
52
- 4. When they correctly guess {selected_country}, respond with: 'Congratulations! The country was <<{selected_country}>>'
53
  5. If they reach 20 questions without guessing correctly, respond with: 'Game over! The country was <<{selected_country}>>'
54
  6. Be encouraging and give helpful hints through your yes/no answers
55
  7. If they want to play again tell them they need to reload the page.
56
  8. IMPORTANT: Only accept the country name "{selected_country}" as correct, but Spelling is not important and they can ask a question like it is? Do NOT accept neighboring countries, similar countries, or regions that contain this country.
57
  9. If they guess a neighboring country or similar country, respond with "No" and continue the game.
58
- 10. Be very strict about the exact country match - only "{selected_country}" is the correct answer."""
 
59
 
60
  current_system = get_system_message_with_country()
61
 
@@ -67,11 +228,9 @@ def format_game_result(response):
67
  print("๐Ÿ” DEBUG - Regular response (no game end)")
68
 
69
  if "Congratulations" in response:
70
- return f"๐ŸŽ‰ **Congratulations!** You correctly guessed **{selected_country}**! Well done! ๐ŸŽ‰\n\nWould you like to play another round?"
71
  elif "Game over" in response:
72
- return f"๐Ÿ˜” **Game Over!** You've used all 20 questions. The country I was thinking of was **{selected_country}**. ๐Ÿ˜”\n\nBetter luck next time! Would you like to try again?"
73
- global current_system # Ensure we're using the global variable
74
- current_system = get_system_message_with_country()
75
 
76
  return response
77
 
@@ -88,7 +247,12 @@ def respond(
88
  """
89
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
90
  """
91
-
 
 
 
 
 
92
 
93
  messages = [{"role": "system", "content": current_system}]
94
  messages.extend(history)
@@ -190,23 +354,38 @@ examples = [
190
  ["Is the currency the Euro?"],
191
  ]
192
 
193
- # Create wrapper function for local mode that doesn't expect OAuth token
194
- def custom_respond(message, history):
195
- # Hardcoded values - no additional inputs needed
196
- system_message = ""
197
- max_tokens = 2048
198
- temperature = 0.3 # Lower temperature for more consistent responses
199
- top_p = 0.7 # Lower top_p for more deterministic behavior
200
- return respond(message, history, system_message, max_tokens, temperature, top_p, None if LOCAL_MODE else hf_token)
201
-
202
- chatbot = gr.ChatInterface(
203
- custom_respond,
204
- type="messages",
205
- description=description,
206
- examples=examples,
207
- cache_examples=False,
208
- # No additional_inputs - values are hardcoded in wrapper functions
209
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
 
211
  with gr.Blocks() as demo:
212
  if not LOCAL_MODE:
 
3
  import re
4
  import random
5
  import os
6
+ import requests
7
+ from bs4 import BeautifulSoup
8
  from dotenv import load_dotenv
9
 
10
  # Load environment variables from .env file if it exists
 
17
  MODEL_NAME = os.getenv('MODEL_NAME', 'meta-llama/Llama-3.2-3B-Instruct')
18
 
19
 
20
+ # List of countries for the game with URLs
21
  COUNTRIES = [
22
+ {"name": "Algeria", "url": "/algeria-facts-for-kids.html"},
23
+ {"name": "Angola", "url": "/facts-about-angola.html"},
24
+ {"name": "Argentina", "url": "/argentina-facts.html"},
25
+ {"name": "Australia", "url": "/australia-facts.html"},
26
+ {"name": "Austria", "url": "/austria-facts.html"},
27
+ {"name": "Bahamas", "url": "/facts-about-the-bahamas.html"},
28
+ {"name": "Barbados", "url": "/barbados-facts.html"},
29
+ {"name": "Belgium", "url": "/belgium-facts.html"},
30
+ {"name": "Belize", "url": "/facts-about-belize.html"},
31
+ {"name": "Bhutan", "url": "/bhutan-facts.html"},
32
+ {"name": "Bolivia", "url": "/bolivia-facts.html"},
33
+ {"name": "Botswana", "url": "/facts-about-botswana.html"},
34
+ {"name": "Brazil", "url": "/brazil-facts.html"},
35
+ {"name": "Bulgaria", "url": "/facts-about-bulgaria.html"},
36
+ {"name": "Canada", "url": "/canada-facts-for-kids.html"},
37
+ {"name": "Chile", "url": "/chile-facts.html"},
38
+ {"name": "China", "url": "/china-facts.html"},
39
+ {"name": "Colombia", "url": "/colombia-facts.html"},
40
+ {"name": "Comoros", "url": "/comoros-facts.html"},
41
+ {"name": "Costa Rica", "url": "/costa-rica-facts.html"},
42
+ {"name": "Croatia", "url": "/croatia-facts.html"},
43
+ {"name": "Cuba", "url": "/cuba-facts.html"},
44
+ {"name": "Cyprus", "url": "/cyprus-facts-for-kids.html"},
45
+ {"name": "Denmark", "url": "/denmark-facts.html"},
46
+ {"name": "Dominican Republic", "url": "/dominican-republic-facts.html"},
47
+ {"name": "Ecuador", "url": "/ecuador-facts.html"},
48
+ {"name": "Egypt", "url": "/egypt-facts.html"},
49
+ {"name": "Estonia", "url": "/estonia-facts-for-kids.html"},
50
+ {"name": "Eswatini", "url": "/eswatini-facts.html"},
51
+ {"name": "Ethiopia", "url": "/ethiopia-facts.html"},
52
+ {"name": "Fiji", "url": "/facts-about-fiji.html"},
53
+ {"name": "Finland", "url": "/finland-facts.html"},
54
+ {"name": "France", "url": "/france-facts.html"},
55
+ {"name": "Georgia", "url": "/georgia-facts.html"},
56
+ {"name": "Germany", "url": "/germany-facts.html"},
57
+ {"name": "Ghana", "url": "/ghana-facts.html"},
58
+ {"name": "Greece", "url": "/greece-facts.html"},
59
+ {"name": "Greenland", "url": "/facts-about-greenland.html"},
60
+ {"name": "Guatemala", "url": "/guatemala-facts.html"},
61
+ {"name": "Guyana", "url": "/guyana-facts.html"},
62
+ {"name": "Honduras", "url": "/honduras-facts-for-kids.html"},
63
+ {"name": "Hong Kong", "url": "/facts-about-hong-kong.html"},
64
+ {"name": "Hungary", "url": "/hungary-facts.html"},
65
+ {"name": "Iceland", "url": "/iceland-facts.html"},
66
+ {"name": "India", "url": "/india-for-kids.html"},
67
+ {"name": "Indonesia", "url": "/indonesia-facts.html"},
68
+ {"name": "Iran", "url": "/iran-facts-for-kids.html"},
69
+ {"name": "Ireland", "url": "/ireland-for-kids.html"},
70
+ {"name": "Israel", "url": "/israel-facts.html"},
71
+ {"name": "Italy", "url": "/italy-facts.html"},
72
+ {"name": "Jamaica", "url": "/jamaica-facts.html"},
73
+ {"name": "Japan", "url": "/japan-facts.html"},
74
+ {"name": "Kenya", "url": "/facts-about-kenya.html"},
75
+ {"name": "Kiribati", "url": "/facts-about-kiribati.html"},
76
+ {"name": "Latvia", "url": "/latvia-facts-for-kids.html"},
77
+ {"name": "Lesotho", "url": "/lesotho-facts.html"},
78
+ {"name": "Liberia", "url": "/facts-about-liberia.html"},
79
+ {"name": "Lithuania", "url": "/lithuania-facts-for-kids.html"},
80
+ {"name": "Luxembourg", "url": "/luxembourg-facts.html"},
81
+ {"name": "Macao", "url": "/facts-about-macao.html"},
82
+ {"name": "Madagascar", "url": "/facts-about-madagascar.html"},
83
+ {"name": "Malaysia", "url": "/malaysia-facts.html"},
84
+ {"name": "Maldives", "url": "/maldives-facts.html"},
85
+ {"name": "Malta", "url": "/malta-for-kids.html"},
86
+ {"name": "Mauritius", "url": "/mauritius-facts.html"},
87
+ {"name": "Mexico", "url": "/mexico-facts.html"},
88
+ {"name": "Micronesia", "url": "/facts-about-micronesia.html"},
89
+ {"name": "Moldova", "url": "/moldova-facts-for-kids.html"},
90
+ {"name": "Monaco", "url": "/facts-about-monaco.html"},
91
+ {"name": "Morocco", "url": "/morocco-facts.html"},
92
+ {"name": "Mozambique", "url": "/mozambique-facts.html"},
93
+ {"name": "Myanmar", "url": "/myanmar-facts.html"},
94
+ {"name": "Namibia", "url": "/namibia-facts.html"},
95
+ {"name": "Nauru", "url": "/facts-about-nauru.html"},
96
+ {"name": "Nepal", "url": "/nepal-facts.html"},
97
+ {"name": "Netherlands", "url": "/facts-about-the-netherlands.html"},
98
+ {"name": "New Zealand", "url": "/new-zealand-facts.html"},
99
+ {"name": "Nicaragua", "url": "/nicaragua-facts.html"},
100
+ {"name": "Nigeria", "url": "/nigeria-facts.html"},
101
+ {"name": "Norway", "url": "/norway-facts.html"},
102
+ {"name": "Pakistan", "url": "/pakistan-facts.html"},
103
+ {"name": "Panama", "url": "/panama-facts.html"},
104
+ {"name": "Papua New Guinea", "url": "/papua-new-guinea.html"},
105
+ {"name": "Peru", "url": "/peru-facts.html"},
106
+ {"name": "Philippines", "url": "/philippines-facts.html"},
107
+ {"name": "Poland", "url": "/poland-facts.html"},
108
+ {"name": "Portugal", "url": "/portugal-facts.html"},
109
+ {"name": "Puerto Rico", "url": "/facts-about-puerto-rico.html"},
110
+ {"name": "Qatar", "url": "/qatar-facts.html"},
111
+ {"name": "Romania", "url": "/romania-facts-for-kids.html"},
112
+ {"name": "Russia", "url": "/russia-facts.html"},
113
+ {"name": "Samoa", "url": "/facts-about-samoa.html"},
114
+ {"name": "San Marino", "url": "/facts-about-san-marino.html"},
115
+ {"name": "Serbia", "url": "/facts-about-serbia.html"},
116
+ {"name": "Seychelles", "url": "/seychelles-facts.html"},
117
+ {"name": "Singapore", "url": "/singapore-facts.html"},
118
+ {"name": "Solomon Islands", "url": "/solomon-islands-facts.html"},
119
+ {"name": "South Africa", "url": "/south-africa-for-kids.html"},
120
+ {"name": "South Korea", "url": "/south-korea-facts.html"},
121
+ {"name": "Spain", "url": "/spain-facts.html"},
122
+ {"name": "Sri Lanka", "url": "/sri-lanka-facts.html"},
123
+ {"name": "Suriname", "url": "/suriname-facts.html"},
124
+ {"name": "Sweden", "url": "/Sweden-facts.html"},
125
+ {"name": "Switzerland", "url": "/switzerland-facts.html"},
126
+ {"name": "Taiwan", "url": "/taiwan-facts.html"},
127
+ {"name": "Tanzania", "url": "/tanzania-facts.html"},
128
+ {"name": "Thailand", "url": "/thailand-facts.html"},
129
+ {"name": "Togo", "url": "/togo-facts-for-kids.html"},
130
+ {"name": "Tonga", "url": "/facts-about-tonga.html"},
131
+ {"name": "Tunisia", "url": "/tunisia-facts.html"},
132
+ {"name": "Tรผrkiye", "url": "/turkey-facts.html"},
133
+ {"name": "Tuvalu", "url": "/facts-about-tuvalu.html"},
134
+ {"name": "Uganda", "url": "/facts-about-uganda.html"},
135
+ {"name": "Ukraine", "url": "/ukraine-for-kids.html"},
136
+ {"name": "United Arab Emirates", "url": "/uae-facts.html"},
137
+ {"name": "United Kingdom", "url": "/uk-facts.html"},
138
+ {"name": "United States of America", "url": "/usa-facts.html"},
139
+ {"name": "Uruguay", "url": "/uruguay-facts.html"},
140
+ {"name": "Vanuatu", "url": "/facts-about-vanuatu.html"},
141
+ {"name": "Venezuela", "url": "/venezuela-for-kids.html"},
142
+ {"name": "Vietnam", "url": "/vietnam-facts.html"},
143
+ {"name": "Zambia", "url": "/zambia-facts.html"}
144
  ]
145
 
146
+ def fetch_country_facts(country_url):
147
+ """Fetch facts about a country from the Kids World Travel Guide website"""
148
+ base_url = "https://www.kids-world-travel-guide.com"
149
+ full_url = base_url + country_url
150
+
151
+ try:
152
+ response = requests.get(full_url, timeout=10)
153
+ response.raise_for_status()
154
+
155
+ # Parse the HTML content
156
+ soup = BeautifulSoup(response.content, 'html.parser')
157
+
158
+ # Extract relevant facts - looking for common patterns in the website
159
+ facts = []
160
+
161
+ # Look for fact sections, lists, and key information
162
+ # This is a basic parser - you might need to adjust based on the actual HTML structure
163
+
164
+ # Try to find paragraphs with factual content
165
+ paragraphs = soup.find_all('p')
166
+ for p in paragraphs[:10]: # Limit to first 10 paragraphs to avoid too much content
167
+ text = p.get_text().strip()
168
+ if len(text) > 50 and not text.startswith('Related'): # Filter out short texts and navigation
169
+ facts.append(text)
170
+
171
+ # Look for list items that might contain facts
172
+ list_items = soup.find_all('li')
173
+ for li in list_items[:15]: # Limit to avoid too much content
174
+ text = li.get_text().strip()
175
+ if len(text) > 20 and len(text) < 200: # Filter for reasonable fact lengths
176
+ facts.append(text)
177
+
178
+ # Join facts with newlines, limit total length
179
+ facts_text = '\n'.join(facts[:10]) # Limit to 10 facts
180
+
181
+ # Truncate if too long to avoid token limits
182
+ if len(facts_text) > 2000:
183
+ facts_text = facts_text[:2000] + "..."
184
+
185
+ return facts_text
186
+
187
+ except Exception as e:
188
+ print(f"Error fetching facts for {country_url}: {str(e)}")
189
+ return "Unable to fetch additional facts about this country."
190
+
191
  def get_system_message_with_country():
192
  """Generate a system message with a randomly selected country"""
193
+ global selected_country, selected_country_dict
194
+ selected_country_dict = random.choice(COUNTRIES)
195
+ selected_country = selected_country_dict["name"]
196
+
197
+ # Fetch facts about the selected country
198
+ print(f"Selected country for this session: {selected_country}")
199
+ print(f"Fetching facts from: {selected_country_dict['url']}")
200
+
201
+ country_facts = fetch_country_facts(selected_country_dict["url"])
202
+
203
  return f"""You are a friendly geography game host playing 20 questions with students. You are thinking of the country: {selected_country}
204
 
205
+ COUNTRY FACTS (use these to answer questions accurately - DO NOT reveal the country name):
206
+ {country_facts}
207
+
208
  RULES:
209
  1. NEVER reveal the country name ({selected_country}) in your responses
210
  2. Answer only 'Yes' or 'No' to their questions
211
  3. Keep track of how many questions they've asked
212
+ 4. When they correctly guess or ask if it is {selected_country}, respond with: 'Congratulations! The country was <<{selected_country}>>'
213
  5. If they reach 20 questions without guessing correctly, respond with: 'Game over! The country was <<{selected_country}>>'
214
  6. Be encouraging and give helpful hints through your yes/no answers
215
  7. If they want to play again tell them they need to reload the page.
216
  8. IMPORTANT: Only accept the country name "{selected_country}" as correct, but Spelling is not important and they can ask a question like it is? Do NOT accept neighboring countries, similar countries, or regions that contain this country.
217
  9. If they guess a neighboring country or similar country, respond with "No" and continue the game.
218
+ 10. Be very strict about the exact country match - only "{selected_country}" is the correct answer.
219
+ 11. Use the COUNTRY FACTS above to provide accurate yes/no answers - do not make up information."""
220
 
221
  current_system = get_system_message_with_country()
222
 
 
228
  print("๐Ÿ” DEBUG - Regular response (no game end)")
229
 
230
  if "Congratulations" in response:
231
+ return f"๐ŸŽ‰ **Congratulations!** You correctly guessed **{selected_country}**! Well done! ๐ŸŽ‰\n\nTo play another round, please start a new conversation or reload the page."
232
  elif "Game over" in response:
233
+ return f"๐Ÿ˜” **Game Over!** You've used all 20 questions. The country I was thinking of was **{selected_country}**. ๐Ÿ˜”\n\nTo try again, please start a new conversation or reload the page."
 
 
234
 
235
  return response
236
 
 
247
  """
248
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
249
  """
250
+ global current_system
251
+
252
+ # If this is the start of a new conversation (empty history), generate a new country
253
+ if not history:
254
+ current_system = get_system_message_with_country()
255
+ print(f"๐Ÿ” DEBUG - New session started, selected country: {selected_country}")
256
 
257
  messages = [{"role": "system", "content": current_system}]
258
  messages.extend(history)
 
354
  ["Is the currency the Euro?"],
355
  ]
356
 
357
+ # Create wrapper function that handles both local and cloud modes
358
+ if LOCAL_MODE:
359
+ # Local mode - no OAuth needed
360
+ def custom_respond(message, history):
361
+ system_message = ""
362
+ max_tokens = 2048
363
+ temperature = 0.3
364
+ top_p = 0.6
365
+ return respond(message, history, system_message, max_tokens, temperature, top_p, None)
366
+
367
+ chatbot = gr.ChatInterface(
368
+ custom_respond,
369
+ type="messages",
370
+ description=description,
371
+ examples=examples,
372
+ cache_examples=False,
373
+ )
374
+ else:
375
+ # Cloud mode - use OAuth
376
+ chatbot = gr.ChatInterface(
377
+ respond,
378
+ type="messages",
379
+ description=description,
380
+ examples=examples,
381
+ cache_examples=False,
382
+ additional_inputs=[
383
+ gr.Textbox(value="", visible=False), # system_message (hidden)
384
+ gr.Slider(minimum=1, maximum=4096, value=2048, visible=False), # max_tokens (hidden)
385
+ gr.Slider(minimum=0.1, maximum=2.0, value=0.3, visible=False), # temperature (hidden)
386
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.6, visible=False), # top_p (hidden)
387
+ ],
388
+ )
389
 
390
  with gr.Blocks() as demo:
391
  if not LOCAL_MODE:
requirements.txt CHANGED
@@ -1,3 +1,5 @@
1
  gradio
2
  gradio[oauth]
3
- python-dotenv
 
 
 
1
  gradio
2
  gradio[oauth]
3
+ python-dotenv
4
+ requests
5
+ beautifulsoup4