Spaces:
Sleeping
Sleeping
Commit
ยท
9b1e3db
0
Parent(s):
Initial project commit with LFS
Browse files- .devcontainer/devcontainer.json +33 -0
- .gitattributes +5 -0
- .gitignore +232 -0
- .python-version +1 -0
- LICENSE +201 -0
- README.md +429 -0
- assets/ShinhanCard_Logo.png +3 -0
- assets/Synapse.png +3 -0
- config.py +48 -0
- data/big_data_set1_f.csv +3 -0
- data/big_data_set2_f.csv +3 -0
- data/big_data_set3_f.csv +3 -0
- data/festival_df.csv +3 -0
- data/final_df.csv +3 -0
- dict +0 -0
- format +0 -0
- list +0 -0
- modules/filtering.py +397 -0
- modules/knowledge_base.py +103 -0
- modules/llm_provider.py +51 -0
- modules/profile_utils.py +46 -0
- modules/visualization.py +230 -0
- orchestrator.py +247 -0
- pyproject.toml +21 -0
- requirements.txt +25 -0
- streamlit_app.py +500 -0
- tools/festival_recommender.py +30 -0
- tools/marketing_strategy.py +230 -0
- tools/profile_analyzer.py +205 -0
- tools/tool_loader.py +24 -0
- utils/parser_utils.py +52 -0
- uv.lock +0 -0
- vectorstore/faiss_festival/index.faiss +3 -0
- vectorstore/faiss_festival/index.pkl +3 -0
- vectorstore/faiss_marketing/index.faiss +3 -0
- vectorstore/faiss_marketing/index.pkl +3 -0
- ๊ธฐํ/create_faiss_festival.py +106 -0
- ๊ธฐํ/create_faiss_marketing.py +99 -0
- ๊ธฐํ/create_final_df.py +530 -0
- ๊ธฐํ/feastival_df_add_keywords.py +118 -0
- ๊ธฐํ/festival_df_first.py +58 -0
- ๊ธฐํ/festival_df_processing.py +235 -0
- ๊ธฐํ/festival_df_second.py +78 -0
.devcontainer/devcontainer.json
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "Python 3",
|
| 3 |
+
// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
|
| 4 |
+
"image": "mcr.microsoft.com/devcontainers/python:1-3.11-bullseye",
|
| 5 |
+
"customizations": {
|
| 6 |
+
"codespaces": {
|
| 7 |
+
"openFiles": [
|
| 8 |
+
"README.md",
|
| 9 |
+
"streamlit_app.py"
|
| 10 |
+
]
|
| 11 |
+
},
|
| 12 |
+
"vscode": {
|
| 13 |
+
"settings": {},
|
| 14 |
+
"extensions": [
|
| 15 |
+
"ms-python.python",
|
| 16 |
+
"ms-python.vscode-pylance"
|
| 17 |
+
]
|
| 18 |
+
}
|
| 19 |
+
},
|
| 20 |
+
"updateContentCommand": "[ -f packages.txt ] && sudo apt update && sudo apt upgrade -y && sudo xargs apt install -y <packages.txt; [ -f requirements.txt ] && pip3 install --user -r requirements.txt; pip3 install --user streamlit; echo 'โ
Packages installed and Requirements met'",
|
| 21 |
+
"postAttachCommand": {
|
| 22 |
+
"server": "streamlit run streamlit_app.py --server.enableCORS false --server.enableXsrfProtection false"
|
| 23 |
+
},
|
| 24 |
+
"portsAttributes": {
|
| 25 |
+
"8501": {
|
| 26 |
+
"label": "Application",
|
| 27 |
+
"onAutoForward": "openPreview"
|
| 28 |
+
}
|
| 29 |
+
},
|
| 30 |
+
"forwardPorts": [
|
| 31 |
+
8501
|
| 32 |
+
]
|
| 33 |
+
}
|
.gitattributes
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
data/*.csv filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
data/final_df.csv filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
vectorstore/faiss_festival/* filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
vectorstore/faiss_marketing/* filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
assets/*.png filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1,232 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Byte-compiled / optimized / DLL files
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
|
| 6 |
+
# C extensions
|
| 7 |
+
*.so
|
| 8 |
+
|
| 9 |
+
# Distribution / packaging
|
| 10 |
+
.Python
|
| 11 |
+
build/
|
| 12 |
+
develop-eggs/
|
| 13 |
+
dist/
|
| 14 |
+
downloads/
|
| 15 |
+
eggs/
|
| 16 |
+
.eggs/
|
| 17 |
+
lib/
|
| 18 |
+
lib64/
|
| 19 |
+
parts/
|
| 20 |
+
sdist/
|
| 21 |
+
var/
|
| 22 |
+
wheels/
|
| 23 |
+
share/python-wheels/
|
| 24 |
+
*.egg-info/
|
| 25 |
+
.installed.cfg
|
| 26 |
+
*.egg
|
| 27 |
+
MANIFEST
|
| 28 |
+
|
| 29 |
+
# PyInstaller
|
| 30 |
+
# Usually these files are written by a python script from a template
|
| 31 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
| 32 |
+
*.manifest
|
| 33 |
+
*.spec
|
| 34 |
+
|
| 35 |
+
# Installer logs
|
| 36 |
+
pip-log.txt
|
| 37 |
+
pip-delete-this-directory.txt
|
| 38 |
+
|
| 39 |
+
# Unit test / coverage reports
|
| 40 |
+
htmlcov/
|
| 41 |
+
.tox/
|
| 42 |
+
.nox/
|
| 43 |
+
.coverage
|
| 44 |
+
.coverage.*
|
| 45 |
+
.cache
|
| 46 |
+
nosetests.xml
|
| 47 |
+
coverage.xml
|
| 48 |
+
*.cover
|
| 49 |
+
*.py,cover
|
| 50 |
+
.hypothesis/
|
| 51 |
+
.pytest_cache/
|
| 52 |
+
cover/
|
| 53 |
+
|
| 54 |
+
# Translations
|
| 55 |
+
*.mo
|
| 56 |
+
*.pot
|
| 57 |
+
|
| 58 |
+
# Django stuff:
|
| 59 |
+
*.log
|
| 60 |
+
local_settings.py
|
| 61 |
+
db.sqlite3
|
| 62 |
+
db.sqlite3-journal
|
| 63 |
+
|
| 64 |
+
# Flask stuff:
|
| 65 |
+
instance/
|
| 66 |
+
.webassets-cache
|
| 67 |
+
|
| 68 |
+
# Scrapy stuff:
|
| 69 |
+
.scrapy
|
| 70 |
+
|
| 71 |
+
# Sphinx documentation
|
| 72 |
+
docs/_build/
|
| 73 |
+
|
| 74 |
+
# PyBuilder
|
| 75 |
+
.pybuilder/
|
| 76 |
+
target/
|
| 77 |
+
|
| 78 |
+
# Jupyter Notebook
|
| 79 |
+
.ipynb_checkpoints
|
| 80 |
+
|
| 81 |
+
# IPython
|
| 82 |
+
profile_default/
|
| 83 |
+
ipython_config.py
|
| 84 |
+
|
| 85 |
+
# pyenv
|
| 86 |
+
# For a library or package, you might want to ignore these files since the code is
|
| 87 |
+
# intended to run in multiple environments; otherwise, check them in:
|
| 88 |
+
# .python-version
|
| 89 |
+
|
| 90 |
+
# pipenv
|
| 91 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
| 92 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
| 93 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
| 94 |
+
# install all needed dependencies.
|
| 95 |
+
#Pipfile.lock
|
| 96 |
+
|
| 97 |
+
# poetry
|
| 98 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
| 99 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
| 100 |
+
# commonly ignored for libraries.
|
| 101 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
| 102 |
+
#poetry.lock
|
| 103 |
+
|
| 104 |
+
# pdm
|
| 105 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
| 106 |
+
#pdm.lock
|
| 107 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
| 108 |
+
# in version control.
|
| 109 |
+
# https://pdm.fming.dev/#use-with-ide
|
| 110 |
+
.pdm.toml
|
| 111 |
+
|
| 112 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
| 113 |
+
__pypackages__/
|
| 114 |
+
|
| 115 |
+
# Celery stuff
|
| 116 |
+
celerybeat-schedule
|
| 117 |
+
celerybeat.pid
|
| 118 |
+
|
| 119 |
+
# SageMath parsed files
|
| 120 |
+
*.sage.py
|
| 121 |
+
|
| 122 |
+
# Environments
|
| 123 |
+
.env
|
| 124 |
+
.venv
|
| 125 |
+
env/
|
| 126 |
+
venv/
|
| 127 |
+
ENV/
|
| 128 |
+
env.bak/
|
| 129 |
+
venv.bak/
|
| 130 |
+
|
| 131 |
+
# Spyder project settings
|
| 132 |
+
.spyderproject
|
| 133 |
+
.spyproject
|
| 134 |
+
|
| 135 |
+
# Rope project settings
|
| 136 |
+
.ropeproject
|
| 137 |
+
|
| 138 |
+
# mkdocs documentation
|
| 139 |
+
/site
|
| 140 |
+
|
| 141 |
+
# mypy
|
| 142 |
+
.mypy_cache/
|
| 143 |
+
.dmypy.json
|
| 144 |
+
dmypy.json
|
| 145 |
+
|
| 146 |
+
# Pyre type checker
|
| 147 |
+
.pyre/
|
| 148 |
+
|
| 149 |
+
# pytype static type analyzer
|
| 150 |
+
.pytype/
|
| 151 |
+
|
| 152 |
+
# Cython debug symbols
|
| 153 |
+
cython_debug/
|
| 154 |
+
|
| 155 |
+
# PyCharm
|
| 156 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
| 157 |
+
# be added to the global gitignore or merged into this project gitignore. For a PyCharm
|
| 158 |
+
# project, it is recommended to include the following files:
|
| 159 |
+
# .idea/
|
| 160 |
+
# *.iml
|
| 161 |
+
# *.ipr
|
| 162 |
+
# *.iws
|
| 163 |
+
|
| 164 |
+
# VS Code
|
| 165 |
+
.vscode/
|
| 166 |
+
|
| 167 |
+
# macOS
|
| 168 |
+
.DS_Store
|
| 169 |
+
.AppleDouble
|
| 170 |
+
.LSOverride
|
| 171 |
+
|
| 172 |
+
# Windows
|
| 173 |
+
Thumbs.db
|
| 174 |
+
Thumbs.db:encryptable
|
| 175 |
+
ehthumbs.db
|
| 176 |
+
ehthumbs_vista.db
|
| 177 |
+
*.tmp
|
| 178 |
+
*.temp
|
| 179 |
+
Desktop.ini
|
| 180 |
+
$RECYCLE.BIN/
|
| 181 |
+
*.cab
|
| 182 |
+
*.msi
|
| 183 |
+
*.msix
|
| 184 |
+
*.msm
|
| 185 |
+
*.msp
|
| 186 |
+
*.lnk
|
| 187 |
+
|
| 188 |
+
# Linux
|
| 189 |
+
*~
|
| 190 |
+
|
| 191 |
+
# temporary files which can be created if a process still has a handle open of a deleted file
|
| 192 |
+
.fuse_hidden*
|
| 193 |
+
|
| 194 |
+
# KDE directory preferences
|
| 195 |
+
.directory
|
| 196 |
+
|
| 197 |
+
# Linux trash folder which might appear on any partition or disk
|
| 198 |
+
.Trash-*
|
| 199 |
+
|
| 200 |
+
# .nfs files are created when an open file is removed but is still being accessed
|
| 201 |
+
.nfs*
|
| 202 |
+
|
| 203 |
+
# Streamlit
|
| 204 |
+
.streamlit/
|
| 205 |
+
|
| 206 |
+
# Project specific
|
| 207 |
+
*.log
|
| 208 |
+
*.tmp
|
| 209 |
+
temp/
|
| 210 |
+
tmp/
|
| 211 |
+
|
| 212 |
+
# Data files (if they contain sensitive information)
|
| 213 |
+
# data/
|
| 214 |
+
# *.csv
|
| 215 |
+
# *.json
|
| 216 |
+
# *.pkl
|
| 217 |
+
# *.npy
|
| 218 |
+
|
| 219 |
+
# Model files (if they are large)
|
| 220 |
+
# *.model
|
| 221 |
+
# *.pkl
|
| 222 |
+
# *.h5
|
| 223 |
+
# *.pt
|
| 224 |
+
# *.pth
|
| 225 |
+
|
| 226 |
+
# Jupyter notebook checkpoints
|
| 227 |
+
.ipynb_checkpoints/
|
| 228 |
+
|
| 229 |
+
# Virtual environment
|
| 230 |
+
.venv/
|
| 231 |
+
venv/
|
| 232 |
+
env/
|
.python-version
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
3.11
|
LICENSE
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Apache License
|
| 2 |
+
Version 2.0, January 2004
|
| 3 |
+
http://www.apache.org/licenses/
|
| 4 |
+
|
| 5 |
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
| 6 |
+
|
| 7 |
+
1. Definitions.
|
| 8 |
+
|
| 9 |
+
"License" shall mean the terms and conditions for use, reproduction,
|
| 10 |
+
and distribution as defined by Sections 1 through 9 of this document.
|
| 11 |
+
|
| 12 |
+
"Licensor" shall mean the copyright owner or entity authorized by
|
| 13 |
+
the copyright owner that is granting the License.
|
| 14 |
+
|
| 15 |
+
"Legal Entity" shall mean the union of the acting entity and all
|
| 16 |
+
other entities that control, are controlled by, or are under common
|
| 17 |
+
control with that entity. For the purposes of this definition,
|
| 18 |
+
"control" means (i) the power, direct or indirect, to cause the
|
| 19 |
+
direction or management of such entity, whether by contract or
|
| 20 |
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
| 21 |
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
| 22 |
+
|
| 23 |
+
"You" (or "Your") shall mean an individual or Legal Entity
|
| 24 |
+
exercising permissions granted by this License.
|
| 25 |
+
|
| 26 |
+
"Source" form shall mean the preferred form for making modifications,
|
| 27 |
+
including but not limited to software source code, documentation
|
| 28 |
+
source, and configuration files.
|
| 29 |
+
|
| 30 |
+
"Object" form shall mean any form resulting from mechanical
|
| 31 |
+
transformation or translation of a Source form, including but
|
| 32 |
+
not limited to compiled object code, generated documentation,
|
| 33 |
+
and conversions to other media types.
|
| 34 |
+
|
| 35 |
+
"Work" shall mean the work of authorship, whether in Source or
|
| 36 |
+
Object form, made available under the License, as indicated by a
|
| 37 |
+
copyright notice that is included in or attached to the work
|
| 38 |
+
(an example is provided in the Appendix below).
|
| 39 |
+
|
| 40 |
+
"Derivative Works" shall mean any work, whether in Source or Object
|
| 41 |
+
form, that is based on (or derived from) the Work and for which the
|
| 42 |
+
editorial revisions, annotations, elaborations, or other modifications
|
| 43 |
+
represent, as a whole, an original work of authorship. For the purposes
|
| 44 |
+
of this License, Derivative Works shall not include works that remain
|
| 45 |
+
separable from, or merely link (or bind by name) to the interfaces of,
|
| 46 |
+
the Work and Derivative Works thereof.
|
| 47 |
+
|
| 48 |
+
"Contribution" shall mean any work of authorship, including
|
| 49 |
+
the original version of the Work and any modifications or additions
|
| 50 |
+
to that Work or Derivative Works thereof, that is intentionally
|
| 51 |
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
| 52 |
+
or by an individual or Legal Entity authorized to submit on behalf of
|
| 53 |
+
the copyright owner. For the purposes of this definition, "submitted"
|
| 54 |
+
means any form of electronic, verbal, or written communication sent
|
| 55 |
+
to the Licensor or its representatives, including but not limited to
|
| 56 |
+
communication on electronic mailing lists, source code control systems,
|
| 57 |
+
and issue tracking systems that are managed by, or on behalf of, the
|
| 58 |
+
Licensor for the purpose of discussing and improving the Work, but
|
| 59 |
+
excluding communication that is conspicuously marked or otherwise
|
| 60 |
+
designated in writing by the copyright owner as "Not a Contribution."
|
| 61 |
+
|
| 62 |
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
| 63 |
+
on behalf of whom a Contribution has been received by Licensor and
|
| 64 |
+
subsequently incorporated within the Work.
|
| 65 |
+
|
| 66 |
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
| 67 |
+
this License, each Contributor hereby grants to You a perpetual,
|
| 68 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
| 69 |
+
copyright license to reproduce, prepare Derivative Works of,
|
| 70 |
+
publicly display, publicly perform, sublicense, and distribute the
|
| 71 |
+
Work and such Derivative Works in Source or Object form.
|
| 72 |
+
|
| 73 |
+
3. Grant of Patent License. Subject to the terms and conditions of
|
| 74 |
+
this License, each Contributor hereby grants to You a perpetual,
|
| 75 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
| 76 |
+
(except as stated in this section) patent license to make, have made,
|
| 77 |
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
| 78 |
+
where such license applies only to those patent claims licensable
|
| 79 |
+
by such Contributor that are necessarily infringed by their
|
| 80 |
+
Contribution(s) alone or by combination of their Contribution(s)
|
| 81 |
+
with the Work to which such Contribution(s) was submitted. If You
|
| 82 |
+
institute patent litigation against any entity (including a
|
| 83 |
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
| 84 |
+
or a Contribution incorporated within the Work constitutes direct
|
| 85 |
+
or contributory patent infringement, then any patent licenses
|
| 86 |
+
granted to You under this License for that Work shall terminate
|
| 87 |
+
as of the date such litigation is filed.
|
| 88 |
+
|
| 89 |
+
4. Redistribution. You may reproduce and distribute copies of the
|
| 90 |
+
Work or Derivative Works thereof in any medium, with or without
|
| 91 |
+
modifications, and in Source or Object form, provided that You
|
| 92 |
+
meet the following conditions:
|
| 93 |
+
|
| 94 |
+
(a) You must give any other recipients of the Work or
|
| 95 |
+
Derivative Works a copy of this License; and
|
| 96 |
+
|
| 97 |
+
(b) You must cause any modified files to carry prominent notices
|
| 98 |
+
stating that You changed the files; and
|
| 99 |
+
|
| 100 |
+
(c) You must retain, in the Source form of any Derivative Works
|
| 101 |
+
that You distribute, all copyright, patent, trademark, and
|
| 102 |
+
attribution notices from the Source form of the Work,
|
| 103 |
+
excluding those notices that do not pertain to any part of
|
| 104 |
+
the Derivative Works; and
|
| 105 |
+
|
| 106 |
+
(d) If the Work includes a "NOTICE" text file as part of its
|
| 107 |
+
distribution, then any Derivative Works that You distribute must
|
| 108 |
+
include a readable copy of the attribution notices contained
|
| 109 |
+
within such NOTICE file, excluding those notices that do not
|
| 110 |
+
pertain to any part of the Derivative Works, in at least one
|
| 111 |
+
of the following places: within a NOTICE text file distributed
|
| 112 |
+
as part of the Derivative Works; within the Source form or
|
| 113 |
+
documentation, if provided along with the Derivative Works; or,
|
| 114 |
+
within a display generated by the Derivative Works, if and
|
| 115 |
+
wherever such third-party notices normally appear. The contents
|
| 116 |
+
of the NOTICE file are for informational purposes only and
|
| 117 |
+
do not modify the License. You may add Your own attribution
|
| 118 |
+
notices within Derivative Works that You distribute, alongside
|
| 119 |
+
or as an addendum to the NOTICE text from the Work, provided
|
| 120 |
+
that such additional attribution notices cannot be construed
|
| 121 |
+
as modifying the License.
|
| 122 |
+
|
| 123 |
+
You may add Your own copyright statement to Your modifications and
|
| 124 |
+
may provide additional or different license terms and conditions
|
| 125 |
+
for use, reproduction, or distribution of Your modifications, or
|
| 126 |
+
for any such Derivative Works as a whole, provided Your use,
|
| 127 |
+
reproduction, and distribution of the Work otherwise complies with
|
| 128 |
+
the conditions stated in this License.
|
| 129 |
+
|
| 130 |
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
| 131 |
+
any Contribution intentionally submitted for inclusion in the Work
|
| 132 |
+
by You to the Licensor shall be under the terms and conditions of
|
| 133 |
+
this License, without any additional terms or conditions.
|
| 134 |
+
Notwithstanding the above, nothing herein shall supersede or modify
|
| 135 |
+
the terms of any separate license agreement you may have executed
|
| 136 |
+
with Licensor regarding such Contributions.
|
| 137 |
+
|
| 138 |
+
6. Trademarks. This License does not grant permission to use the trade
|
| 139 |
+
names, trademarks, service marks, or product names of the Licensor,
|
| 140 |
+
except as required for reasonable and customary use in describing the
|
| 141 |
+
origin of the Work and reproducing the content of the NOTICE file.
|
| 142 |
+
|
| 143 |
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
| 144 |
+
agreed to in writing, Licensor provides the Work (and each
|
| 145 |
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
| 146 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
| 147 |
+
implied, including, without limitation, any warranties or conditions
|
| 148 |
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
| 149 |
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
| 150 |
+
appropriateness of using or redistributing the Work and assume any
|
| 151 |
+
risks associated with Your exercise of permissions under this License.
|
| 152 |
+
|
| 153 |
+
8. Limitation of Liability. In no event and under no legal theory,
|
| 154 |
+
whether in tort (including negligence), contract, or otherwise,
|
| 155 |
+
unless required by applicable law (such as deliberate and grossly
|
| 156 |
+
negligent acts) or agreed to in writing, shall any Contributor be
|
| 157 |
+
liable to You for damages, including any direct, indirect, special,
|
| 158 |
+
incidental, or consequential damages of any character arising as a
|
| 159 |
+
result of this License or out of the use or inability to use the
|
| 160 |
+
Work (including but not limited to damages for loss of goodwill,
|
| 161 |
+
work stoppage, computer failure or malfunction, or any and all
|
| 162 |
+
other commercial damages or losses), even if such Contributor
|
| 163 |
+
has been advised of the possibility of such damages.
|
| 164 |
+
|
| 165 |
+
9. Accepting Warranty or Additional Liability. While redistributing
|
| 166 |
+
the Work or Derivative Works thereof, You may choose to offer,
|
| 167 |
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
| 168 |
+
or other liability obligations and/or rights consistent with this
|
| 169 |
+
License. However, in accepting such obligations, You may act only
|
| 170 |
+
on Your own behalf and on Your sole responsibility, not on behalf
|
| 171 |
+
of any other Contributor, and only if You agree to indemnify,
|
| 172 |
+
defend, and hold each Contributor harmless for any liability
|
| 173 |
+
incurred by, or claims asserted against, such Contributor by reason
|
| 174 |
+
of your accepting any such warranty or additional liability.
|
| 175 |
+
|
| 176 |
+
END OF TERMS AND CONDITIONS
|
| 177 |
+
|
| 178 |
+
APPENDIX: How to apply the Apache License to your work.
|
| 179 |
+
|
| 180 |
+
To apply the Apache License to your work, attach the following
|
| 181 |
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
| 182 |
+
replaced with your own identifying information. (Don't include
|
| 183 |
+
the brackets!) The text should be enclosed in the appropriate
|
| 184 |
+
comment syntax for the file format. We also recommend that a
|
| 185 |
+
file or class name and description of purpose be included on the
|
| 186 |
+
same "printed page" as the copyright notice for easier
|
| 187 |
+
identification within third-party archives.
|
| 188 |
+
|
| 189 |
+
Copyright [yyyy] [name of copyright owner]
|
| 190 |
+
|
| 191 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
| 192 |
+
you may not use this file except in compliance with the License.
|
| 193 |
+
You may obtain a copy of the License at
|
| 194 |
+
|
| 195 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
| 196 |
+
|
| 197 |
+
Unless required by applicable law or agreed to in writing, software
|
| 198 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
| 199 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 200 |
+
See the License for the specific language governing permissions and
|
| 201 |
+
limitations under the License.
|
README.md
ADDED
|
@@ -0,0 +1,429 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ๐ MarketSync (๋ง์ผ์ฑํฌ)
|
| 2 |
+
|
| 3 |
+
### Agentic RAG ๊ธฐ๋ฐ ์์๊ณต์ธ ๋ง์ถคํ ์ง์ญ ์ถ์ ์ถ์ฒ & ๋ง์ผํ
AI ์ปจ์คํดํธ
|
| 4 |
+
|
| 5 |
+
์ ํ์นด๋ ๋น
๋ฐ์ดํฐ์ ์ ๊ตญ ์ถ์ ์ ๋ณด๋ฅผ ํตํฉ ๋ถ์ํ์ฌ, **AI ์์ด์ ํธ**๊ฐ ๊ฐ๊ฒ๋ณ๋ก ์ฐธ์ฌํ ๋งํ ์ง์ญ ์ถ์ ๋ฅผ ์ถ์ฒํ๊ณ ์ต์ ์ ๋ง์ผํ
์ ๋ต ๋ณด๊ณ ์๋ฅผ ์๋ ์์ฑํฉ๋๋ค. ๐ค
|
| 6 |
+
|
| 7 |
+
---
|
| 8 |
+
|
| 9 |
+
## ๐งญ ํ๋ก์ ํธ ๊ฐ์
|
| 10 |
+
|
| 11 |
+
MarketSync๋ **Streamlit ์น ์ธํฐํ์ด์ค, FastAPI ๋ฐ์ดํฐ ์๋ฒ, LangChain ์์ด์ ํธ**๋ฅผ ๊ฒฐํฉํ์ฌ ์์๊ณต์ธ์ ์ํ AI ์ปจ์คํ
์๋น์ค๋ฅผ ์ ๊ณตํฉ๋๋ค. ์ฌ์ฉ์๋ ์์ ์ ๊ฐ๊ฒ๋ฅผ ์ ํํ์ฌ ์์ธ ํ๋กํ๊ณผ ๋ถ์ ๊ทธ๋ํ๋ฅผ ํ์ธํ ๋ค, "10์์ ์ด๋ฆฌ๋ ์ถ์ ์ถ์ฒํด์ค", "์ถ์ฒ๋ ์ถ์ ๋ค์ ๋ง์ผํ
์ ๋ต ์๋ ค์ค" ์ ๊ฐ์ ์์ฐ์ด ์ง๋ฌธ์ ํตํด ๋ง์ถคํ ์ปจ์คํ
์ ๋ฐ์ ์ ์์ต๋๋ค.
|
| 12 |
+
|
| 13 |
+
**ํต์ฌ ์ํคํ
์ฒ๋ Agentic RAG**์
๋๋ค. AI ์์ด์ ํธ(`Orchestrator`)๋ ์ฌ์ฉ์์ ์ง๋ฌธ๊ณผ ๊ฐ๊ฒ์ ์์ธ ํ๋กํ(JSON)์ ๋ฐํ์ผ๋ก ์ํฉ์ ๋ง๋ **๋๊ตฌ(Tool)**๋ฅผ ์์จ์ ์ผ๋ก ์ ํํ๊ณ , ์ฌ์ฉ์์ ์ง๋ฌธ์ ๋ฐ๋ผ ํ์ํ๋ค๋ฉด ์ฌ๋ฌ ๋๊ตฌ๋ฅผ ์์ฐจ์ ์ผ๋ก ํธ์ถํ์ฌ ์ต์ข
์ปจ์คํ
๋ณด๊ณ ์๋ฅผ ์์ฑํฉ๋๋ค.
|
| 14 |
+
|
| 15 |
+
---
|
| 16 |
+
|
| 17 |
+
## ๐ ๏ธ ํต์ฌ ๋๊ตฌ ๋ฐ ์๋ ๋ฐฉ์
|
| 18 |
+
|
| 19 |
+
AI ์์ด์ ํธ๊ฐ ์ฌ์ฉํ๋ ์ฃผ์ ๋๊ตฌ์ ๋ด๋ถ ์ฒ๋ฆฌ ๊ณผ์ ์ ๋ค์๊ณผ ๊ฐ์ต๋๋ค.
|
| 20 |
+
|
| 21 |
+
| ๊ธฐ๋ฅ ๋ถ๋ฅ | ๋๊ตฌ ํจ์๋ช
(`tools/`) | ์ฃผ์ ์ฒ๋ฆฌ ๊ณผ์ (`modules/`) |
|
| 22 |
+
| :--------------- | :------------------------------------------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
| 23 |
+
| **์ถ์ ์ถ์ฒ** | `recommend_festivals` (festival\_recommender.py) | **ํ์ด๋ธ๋ฆฌ๋ 5๋จ๊ณ ํ์ดํ๋ผ์ธ (`filtering.py`)**: <br> 1. LLM ์ฟผ๋ฆฌ ์ฌ์์ฑ <br> 2. FAISS ๋ฒกํฐ ๊ฒ์ (์ ์ฌ ์ถ์ ํ๋ณด ์ ์ ) <br> 3. LLM ๋์ ์์ฑ ํ๊ฐ (๊ฐ๊ฒ ๋ง์ถค์ฑ) <br> 4. ํ์ด๋ธ๋ฆฌ๋ ์ ์ ๊ณ์ฐ <br> 5. ์ต์ข
Top3 ๊ฒฐ๊ณผ ํฌ๋งทํ
(2026 ์์ธก ํฌํจ) |
|
| 24 |
+
| **๋ง์ผํ
(RAG)** | `search_contextual_marketing_strategy` (marketing\_strategy.py) | **์ปจํ
์คํธ ๊ธฐ๋ฐ RAG (`knowledge_base.py`)**: <br> 1. ๊ฐ๊ฒ ํ๋กํ + ์ง๋ฌธ โ LLM ๊ฒ์ ์ฟผ๋ฆฌ ์์ฑ <br> 2. FAISS ๋ฒกํฐ ๊ฒ์ (๊ด๋ จ ์ ๋ต ๋ฌธ์ ์ถ์ถ) <br> 3. LLM ๋ต๋ณ ์์ฑ (์ถ์ถ๋ ๋ด์ฉ์ ๋ฐํ์ผ๋ก ์์ฐ์ค๋ฌ์ด ์ ๋ต ์ ์) |
|
| 25 |
+
| **๋ง์ผํ
(์์ฑ)** | `create_festival_specific_marketing_strategy` (marketing\_strategy.py) | **LLM ๊ธฐ๋ฐ ์ ๋ต ์์ฑ**: <br> 1. ์ถ์ ํ๋กํ ์กฐํ (`profile_analyzer.py`) <br> 2. ๊ฐ๊ฒ ํ๋กํ + ์ถ์ ํ๋กํ + RAG ๊ฒ์ โ LLM ํ๋กฌํํธ ๊ตฌ์ฑ <br> 3. LLM์ด ํน์ ์ถ์ ๋ง์ถค ์ ๋ต ์์ฑ |
|
| 26 |
+
| **๋ง์ผํ
(์์ฑ)** | `create_marketing_strategies_for_multiple_festivals` (marketing\_strategy.py) | **LLM ๊ธฐ๋ฐ ์ ๋ต ์์ฑ (๋ค์)**: <br> 1. ์ฌ๋ฌ ์ถ์ ์ด๋ฆ ์
๋ ฅ๋ฐ์ <br> 2. ๊ฐ ์ถ์ ๋ณ๋ก `create_festival_specific_marketing_strategy` ๋ฐ๋ณต ํธ์ถ <br> 3. ๋ชจ๋ ์ ๋ต์ ํ๋์ ๋ณด๊ณ ์๋ก ์ทจํฉ |
|
| 27 |
+
| **๊ฐ๊ฒ ๋ถ์** | `analyze_merchant_profile` (profile\_analyzer.py) | **LLM ๊ธฐ๋ฐ ๋ถ์**: <br> ๊ฐ๊ฒ ํ๋กํ(JSON) ์
๋ ฅ โ LLM์ด SWOT ๋ถ์ ๋ฐ ํต์ฌ ๊ณ ๊ฐ ํน์ฑ ์์ฝ ๋ณด๊ณ ์ ์์ฑ |
|
| 28 |
+
| **์ถ์ ๋ถ์** | `analyze_festival_profile` (profile\_analyzer.py) | **LLM ๊ธฐ๋ฐ ๋ถ์**: <br> ์ถ์ ํ๋กํ(JSON) ์
๋ ฅ โ LLM์ด ์ถ์ ์ ํต์ฌ ํน์ง ๋ฐ ์ฃผ์ ๋ฐฉ๋ฌธ๊ฐ ํน์ฑ ์์ฝ ๋ณด๊ณ ์ ์์ฑ |
|
| 29 |
+
| **์ถ์ ์กฐํ** | `get_festival_profile_by_name` (profile\_analyzer.py) | **๋จ์ ๋ฐ์ดํฐ ์กฐํ**: ์ถ์ ์ด๋ฆ ์
๋ ฅ โ `festival_df.csv`์์ ํด๋น ์ถ์ ์ ๋ณด(JSON) ๋ฐํ (์บ์ฑ ํ์ฉ) |
|
| 30 |
+
|
| 31 |
+
---
|
| 32 |
+
|
| 33 |
+
## ๐ ํ๋ก์ ํธ ๊ตฌ์กฐ ๋ฐ ์ฝ๋ ์ค๋ช
|
| 34 |
+
|
| 35 |
+
```plaintext
|
| 36 |
+
MarketSync/
|
| 37 |
+
โโโ streamlit_app.py # Streamlit ์น ์ธํฐํ์ด์ค (UI)
|
| 38 |
+
โโโ orchestrator.py # AI ์์ด์ ํธ: LangChain AgentExecutor, ๋๊ตฌ ๋ผ์ฐํ
, ์ต์ข
๋ต๋ณ ์์ฑ ๋ก์ง
|
| 39 |
+
โโโ config.py # ์ค์ ์ค์ํ: ๊ฒฝ๋ก, API ํค, ๋ชจ๋ธ๋ช
, ๋ก๊น
์ค์ ๋ฑ
|
| 40 |
+
โ
|
| 41 |
+
โโโ api/ # ๋ฐ์ดํฐ ์ ๊ณต ๋ฐ ์ ์ฒ๋ฆฌ ์๋ฒ
|
| 42 |
+
โ โโโ server.py # FastAPI ์๋ฒ: /profile, /merchants ์๋ํฌ์ธํธ ์ ๊ณต
|
| 43 |
+
โ โโโ data_loader.py # ๋ฐ์ดํฐ ๋ก๋ฉ ๋ฐ ์ ์ฒ๋ฆฌ (final_df.csv, festival_df.csv)
|
| 44 |
+
โ
|
| 45 |
+
โโโ tools/ # LangChain @tool ๋๊ตฌ ์ ์ ๋ ์ด์ด
|
| 46 |
+
โ โโโ festival_recommender.py # [Tool] recommend_festivals ๋๊ตฌ ์ ์ (filtering.py ํธ์ถ)
|
| 47 |
+
โ โโโ marketing_strategy.py # [Tool] ๋ง์ผํ
์ ๋ต ๊ด๋ จ ๋๊ตฌ 3๊ฐ ์ ์ (knowledge_base.py, profile_analyzer.py ๋ฑ ํธ์ถ)
|
| 48 |
+
โ โโโ profile_analyzer.py # [Tool] ๊ฐ๊ฒ/์ถ์ ๋ถ์ ๋ฐ ์ถ์ ํ๋กํ ์กฐํ ๋๊ตฌ 3๊ฐ ์ ์ (LLM ํธ์ถ, ๋ฐ์ดํฐ ์กฐํ)
|
| 49 |
+
โ โโโ tool_loader.py # ๋ชจ๋ ๋๊ตฌ(@tool)๋ฅผ ๋ฆฌ์คํธ๋ก ๋ฌถ์ด Orchestrator์ ์ ๊ณต
|
| 50 |
+
โ
|
| 51 |
+
โโโ modules/ # ํต์ฌ ๋ก์ง ๊ตฌํ ๋ชจ๋
|
| 52 |
+
โ โโโ filtering.py # [์ถ์ ์ถ์ฒ] FestivalRecommender ํด๋์ค (5๋จ๊ณ ํ์ดํ๋ผ์ธ ๊ตฌํ)
|
| 53 |
+
โ โโโ knowledge_base.py # [RAG] FAISS ๋ฒกํฐ ์คํ ์ด ๋ก๋ฉ (์ถ์ , ๋ง์ผํ
), ์๋ฒ ๋ฉ ๋ชจ๋ธ ๊ด๋ฆฌ
|
| 54 |
+
โ โโโ llm_provider.py # LLM ์ธ์คํด์ค ๊ด๋ฆฌ (์ ์ญ ๊ณต์ ๋ฐ Temperature ์กฐ์ )
|
| 55 |
+
โ โโโ profile_utils.py # ๊ฐ๊ฒ ํ๋กํ JSON ๊ฐ๊ณต ์ ํธ๋ฆฌํฐ (์ฑํ
์ฉ/๋ถ์์ฉ)
|
| 56 |
+
โ โโโ visualization.py # Streamlit ์๊ฐํ: Matplotlib ๊ทธ๋ํ ์์ฑ ํจ์
|
| 57 |
+
โ
|
| 58 |
+
โโโ utils/ # ๊ณตํต ์ ํธ๋ฆฌํฐ
|
| 59 |
+
โ โโโ parser_utils.py # LLM ์๋ต์์ JSON ์ถ์ถ ํ์
|
| 60 |
+
โ
|
| 61 |
+
โโโ data/ # ์๋ณธ ๋ฐ์ดํฐ
|
| 62 |
+
โ โโโ final_df.csv # ์ ํ์นด๋ ๊ฐ๋งน์ ๋ฐ์ดํฐ
|
| 63 |
+
โ โโโ festival_df.csv # ์ ๊ตญ ์ถ์ ์ ๋ณด ๋ฐ์ดํฐ
|
| 64 |
+
โ
|
| 65 |
+
โโโ vectorstore/ # FAISS ๋ฒกํฐ ๋ฐ์ดํฐ๋ฒ ์ด์ค ์ ์ฅ ํด๋
|
| 66 |
+
โโโ faiss_festival # ์ถ์ ์ ๋ณด ๋ฒกํฐ DB
|
| 67 |
+
โโโ faiss_marketing # ๋ง์ผํ
์ ๋ต ๋ฒกํฐ DB
|
| 68 |
+
```
|
| 69 |
+
|
| 70 |
+
------------------------------------------------------------------------
|
| 71 |
+
|
| 72 |
+
## ๐ ์ํคํ
์ฒ ๋ฐ ๋ฐ์ดํฐ ํ๋ฆ
|
| 73 |
+
|
| 74 |
+
์ด ์์คํ
์ **์์ด์ ํธ ์ค์ฌ์ ๋๊ตฌ ํธ์ถ (Tool-Calling)** ์ํคํ
์ฒ๋ฅผ ๊ธฐ๋ฐ์ผ๋ก ์๋ํฉ๋๋ค. ์ฌ์ฉ์์ ์์ฐ์ด ์ง๋ฌธ์ `Orchestrator`๋ผ๋ AI ์์ด์ ํธ์ ์ํด ํด์๋๋ฉฐ, ์์ด์ ํธ๋ ์ ๊ณต๋ `[๊ฐ๊ฒ ํ๋กํ]` ์ปจํ
์คํธ์ **์์คํ
ํ๋กฌํํธ**์ ์ง์นจ์ ๋ฐ๋ผ ๊ฐ์ฅ ์ ์ ํ ๋๊ตฌ๋ฅผ ์ ํํ๊ณ ์คํํฉ๋๋ค. ํ์ํ๋ค๋ฉด ์ฌ๋ฌ ๋๊ตฌ๋ฅผ ์์ฐจ์ ์ผ๋ก ํธ์ถํ์ฌ ์ป์ ์ ๋ณด๋ฅผ ์ข
ํฉํ ๋ค, ์ต์ข
์ปจ์คํ
๋ต๋ณ์ ์์ฑํฉ๋๋ค.
|
| 75 |
+
|
| 76 |
+
---
|
| 77 |
+
|
| 78 |
+
## ๐งฉ ์์คํ
๊ตฌ์ฑ๋
|
| 79 |
+
|
| 80 |
+
### (1) ์ ์ฒด ๊ฐ์: UI - ์์ด์ ํธ - API ์ํธ์์ฉ
|
| 81 |
+
|
| 82 |
+
์ฌ์ฉ์๊ฐ UI๋ฅผ ํตํด ์ง๋ฌธํ๋ฉด, **์์ด์ ํธ(Orchestrator)** ๊ฐ ์๋ํ๊ณ , ํ์ ์ FastAPI ์๋ฒ๋ **์ฌ๋ฌ ๋๊ตฌ(Tool)** ์ ์ํธ์์ฉํฉ๋๋ค.
|
| 83 |
+
|
| 84 |
+
```mermaid
|
| 85 |
+
graph TD
|
| 86 |
+
%% ========================
|
| 87 |
+
%% AI ์ปจ์คํ
์์ง (์ต์๋จ)
|
| 88 |
+
%% ========================
|
| 89 |
+
subgraph SG_Engine ["๐ง AI ์ปจ์คํ
์์ง"]
|
| 90 |
+
direction TB
|
| 91 |
+
C["๐ค Orchestrator (ํต์ฌ ์์ด์ ํธ)\n(orchestrator.py)\nAgentExecutor (LangChain)"]
|
| 92 |
+
D{"๐ฆ Tool Routing\nLLM ์๋ ๋ถ์ & ๋๊ตฌ ์ ํ"}
|
| 93 |
+
|
| 94 |
+
subgraph SG_Tools ["๐ง ๋ฑ๋ก๋ ๋๊ตฌ ๋ชฉ๋ก (tools/)"]
|
| 95 |
+
T1["recommend_festivals\n(์ถ์ ์ถ์ฒ)"]
|
| 96 |
+
T2["search_contextual_marketing_strategy\n(RAG ๋ง์ผํ
์ ๋ต)"]
|
| 97 |
+
T3["create_festival_specific_marketing_strategy\n(๋จ์ผ ์ถ์ ์ ๋ต)"]
|
| 98 |
+
T3_multi["create_marketing_strategies_for_multiple_festivals\n(๋ค์ ์ถ์ ์ ๋ต)"]
|
| 99 |
+
T4["analyze_merchant_profile\n(๊ฐ๊ฒ ๋ถ์)"]
|
| 100 |
+
T5["analyze_festival_profile\n(์ถ์ ๋ถ์)"]
|
| 101 |
+
T6["get_festival_profile_by_name\n(์ถ์ ํ๋กํ ์กฐํ)"]
|
| 102 |
+
end
|
| 103 |
+
|
| 104 |
+
LLM_Final["๐ช LLM (Final Report Generation)\n์ต์ข
๋ณด๊ณ ์ ์์ฑ"]
|
| 105 |
+
end
|
| 106 |
+
|
| 107 |
+
%% ========================
|
| 108 |
+
%% ์ฌ์ฉ์ ์ธํฐํ์ด์ค & ๋ฐ์ดํฐ ์๋ฒ (ํ๋จ)
|
| 109 |
+
%% ========================
|
| 110 |
+
subgraph SG_UserServer ["๐ป ์ฌ์ฉ์ ์ธํฐํ์ด์ค & ๋ฐ์ดํฐ ์๋ฒ"]
|
| 111 |
+
direction LR
|
| 112 |
+
A["๐ฅ๏ธ Streamlit UI\n(streamlit_app.py)\n์ฌ์ฉ์ ์ํธ์์ฉ"] <--> B["๐ FastAPI Server\n(api/server.py)\n๐ ๊ฐ๊ฒ ํ๋กํ / ๋ชฉ๋ก ์กฐํ"]
|
| 113 |
+
end
|
| 114 |
+
|
| 115 |
+
%% ========================
|
| 116 |
+
%% ์ฐ๊ฒฐ ๊ด๊ณ (์์ )
|
| 117 |
+
%% ========================
|
| 118 |
+
A -- "์์ฐ์ด ์ง๋ฌธ ์
๋ ฅ" --> C
|
| 119 |
+
C -- "์๋ ๋ถ์ ์์ฒญ" --> D
|
| 120 |
+
D -- "์ ํฉ ๋๊ตฌ ์ ํ/์คํ" --> SG_Tools
|
| 121 |
+
SG_Tools -- "๋๊ตฌ ์คํ ๊ฒฐ๊ณผ" --> C
|
| 122 |
+
C -- "์ต์ข
๋ณด๊ณ ์ ์์ฑ ์์ฒญ" --> LLM_Final
|
| 123 |
+
LLM_Final -- "์ต์ข
๊ฒฐ๊ณผ ์ ๋ฌ" --> A
|
| 124 |
+
|
| 125 |
+
%% ========================
|
| 126 |
+
%% ์คํ์ผ ์ง์ (GitHub ํธํ)
|
| 127 |
+
%% ========================
|
| 128 |
+
style A fill:#4CAF50,color:#fff,stroke:#388E3C,stroke-width:2px
|
| 129 |
+
style B fill:#FF9800,color:#fff,stroke:#EF6C00,stroke-width:2px
|
| 130 |
+
style C fill:#E91E63,color:#fff,stroke:#C2185B,stroke-width:2px
|
| 131 |
+
style D fill:#9C27B0,color:#fff,stroke:#7B1FA2,stroke-width:2px,shape:diamond
|
| 132 |
+
style SG_Tools fill:#E1F5FE, stroke:#0277BD,color:#000
|
| 133 |
+
style T1,T2,T3,T3_multi,T4,T5,T6 fill:#03A9F4,color:#fff,stroke:#0288D1,stroke-width:2px,shape:hexagon
|
| 134 |
+
style LLM_Final fill:#BA68C8,color:#fff,stroke:#8E24AA,stroke-width:2px
|
| 135 |
+
```
|
| 136 |
+
|
| 137 |
+
---
|
| 138 |
+
|
| 139 |
+
### (2) ์ถ์ ์ถ์ฒ ๋๊ตฌ ์์ธ โ `recommend_festivals`
|
| 140 |
+
|
| 141 |
+
LLM ๊ธฐ๋ฐ **ํ์ด๋ธ๋ฆฌ๋ 5๋จ๊ณ ํ์ดํ๋ผ์ธ**์ ํตํด,
|
| 142 |
+
๊ฐ๊ฒ ๋ง์ถคํ ์ถ์ ๋ฅผ ์ถ์ฒํฉ๋๋ค.
|
| 143 |
+
```mermaid
|
| 144 |
+
graph TD
|
| 145 |
+
%% ========================
|
| 146 |
+
%% Orchestrator ์์ฒญ
|
| 147 |
+
%% ========================
|
| 148 |
+
subgraph SG_Orchestrator_Req ["๐ง Orchestrator ์์ฒญ"]
|
| 149 |
+
Agent["๐ค AgentExecutor"] -- "์ถ์ ์ถ์ฒ ์์ฒญ" --> Tool_Rec["๐งฉ Tool: recommend_festivals"]
|
| 150 |
+
end
|
| 151 |
+
|
| 152 |
+
%% ========================
|
| 153 |
+
%% ์ง์ ๋ฒ ์ด์ค (์์ )
|
| 154 |
+
%% ========================
|
| 155 |
+
subgraph SG_KnowledgeBase ["๐ ์ง์ ๋ฒ ์ด์ค (modules/knowledge_base.py)"]
|
| 156 |
+
direction LR
|
| 157 |
+
EM["๐งฌ Embedding Model\n(HuggingFace)"]
|
| 158 |
+
VSF["๐ FAISS (์ถ์ DB)"]
|
| 159 |
+
EM -- "์๋ฒ ๋ฉ ์์ฑ (Offline)" --> VSF
|
| 160 |
+
end
|
| 161 |
+
|
| 162 |
+
%% ========================
|
| 163 |
+
%% Filtering Pipeline
|
| 164 |
+
%% ========================
|
| 165 |
+
subgraph SG_Filtering_Pipeline ["๐ Filtering Pipeline (modules/filtering.py)"]
|
| 166 |
+
Tool_Rec --> Step1["1๏ธโฃ LLM ์ฟผ๋ฆฌ ์ฌ์์ฑ"]
|
| 167 |
+
Step1 --> Step2["2๏ธโฃ FAISS ๋ฒกํฐ ๊ฒ์\n(์ ์ฌ ์ถ์ ํ๋ณด ํ์)"]
|
| 168 |
+
|
| 169 |
+
%% RAG ํ๋ฆ ๋ช
ํํ (์์ )
|
| 170 |
+
Step2 -- "์ฟผ๋ฆฌ ์๋ฒ ๋ฉ" --> EM
|
| 171 |
+
Step2 -- "์ ์ฌ๋ ๊ฒ์" --> VSF
|
| 172 |
+
|
| 173 |
+
Step2 --> Step3["3๏ธโฃ LLM ๋์ ์์ฑ ํ๊ฐ\n(๊ฐ๊ฒ ๋ง์ถค์ฑ ํ๋จ)"]
|
| 174 |
+
Step3 --> LLM1["๐ค LLM (Dynamic Evaluation)"]
|
| 175 |
+
Step3 --> Step4["4๏ธโฃ ํ์ด๋ธ๋ฆฌ๋ ์ ์ ๊ณ์ฐ\n(์ ์ฌ๋ + ๋ง์ถค์ฑ)"]
|
| 176 |
+
Step4 --> Step5["5๏ธโฃ ์ต์ข
๊ฒฐ๊ณผ ํฌ๋งทํ
\n(Top3 + 2026 ์์ธก ํฌํจ)"]
|
| 177 |
+
end
|
| 178 |
+
|
| 179 |
+
%% ========================
|
| 180 |
+
%% ๊ฒฐ๊ณผ ๋ฐํ
|
| 181 |
+
%% ========================
|
| 182 |
+
subgraph SG_Result_Return ["๐ฆ ๊ฒฐ๊ณผ ๋ฐํ"]
|
| 183 |
+
Step5 -- "Top3 ์ถ์ ์ถ์ฒ ๊ฒฐ๊ณผ" --> Agent
|
| 184 |
+
end
|
| 185 |
+
|
| 186 |
+
%% ========================
|
| 187 |
+
%% ์คํ์ผ
|
| 188 |
+
%% ========================
|
| 189 |
+
style Agent fill:#E91E63,color:#fff
|
| 190 |
+
style Tool_Rec fill:#03A9F4,color:#fff
|
| 191 |
+
style Step1,Step2,Step3,Step4,Step5 fill:#81D4FA,color:#000
|
| 192 |
+
style VSF fill:#FFC107,color:#000
|
| 193 |
+
style EM fill:#4DD0E1,color:#000
|
| 194 |
+
style LLM1 fill:#BA68C8,color:#fff
|
| 195 |
+
style SG_KnowledgeBase fill:#F5F5F5,stroke:#9E9E9E
|
| 196 |
+
```
|
| 197 |
+
|
| 198 |
+
---
|
| 199 |
+
|
| 200 |
+
### (3) ๋ง์ผํ
์ ๋ต (RAG) ๋๊ตฌ ์์ธ โ `search_contextual_marketing_strategy`
|
| 201 |
+
|
| 202 |
+
**RAG** ๊ธฐ๋ฐ์ผ๋ก **๊ฐ๊ฒ ํ๋กํ + ์ง๋ฌธ ์ปจํ
์คํธ**๋ฅผ ์ด์ฉํด
|
| 203 |
+
๊ฐ์ฅ ๊ด๋ จ์ฑ ๋์ ๋ง์ผํ
์ ๋ต ๋ฌธ์๋ฅผ ๊ฒ์ํ๊ณ , LLM์ด ์์ฐ์ค๋ฝ๊ฒ ์์ฝ/์ ์ํฉ๋๋ค.
|
| 204 |
+
|
| 205 |
+
```mermaid
|
| 206 |
+
graph TD
|
| 207 |
+
%% ========================
|
| 208 |
+
%% Orchestrator ์์ฒญ
|
| 209 |
+
%% ========================
|
| 210 |
+
subgraph SG_Orchestrator_Req_RAG ["๐ง Orchestrator ์์ฒญ"]
|
| 211 |
+
Agent["๐ค AgentExecutor"] -- "๋ง์ผํ
์ ๋ต (RAG) ์์ฒญ" --> Tool_RAG["๐งฉ Tool: search_contextual_marketing_strategy"]
|
| 212 |
+
end
|
| 213 |
+
|
| 214 |
+
%% ========================
|
| 215 |
+
%% ์ง์ ๋ฒ ์ด์ค (์์ )
|
| 216 |
+
%% ========================
|
| 217 |
+
subgraph SG_KnowledgeBase_RAG ["๐ ์ง์ ๋ฒ ์ด์ค (modules/knowledge_base.py)"]
|
| 218 |
+
direction LR
|
| 219 |
+
EM["๐งฌ Embedding Model\n(HuggingFace)"]
|
| 220 |
+
VSM["๐ FAISS (๋ง์ผํ
DB)"]
|
| 221 |
+
EM -- "์๋ฒ ๋ฉ ์์ฑ (Offline)" --> VSM
|
| 222 |
+
end
|
| 223 |
+
|
| 224 |
+
%% ========================
|
| 225 |
+
%% RAG Logic
|
| 226 |
+
%% ========================
|
| 227 |
+
subgraph SG_RAG_Logic ["โ๏ธ RAG Logic (tools/marketing_strategy.py)"]
|
| 228 |
+
Tool_RAG --> Step1["1๏ธโฃ LLM ๊ฒ์ ์ฟผ๋ฆฌ ์์ฑ\n(๊ฐ๊ฒ ํ๋กํ + ์ง๋ฌธ ๊ธฐ๋ฐ)"]
|
| 229 |
+
Step1 --> Step2["2๏ธโฃ FAISS ๋ฒกํฐ ๊ฒ์\n(๋ง์ผํ
DB ํ์)"]
|
| 230 |
+
|
| 231 |
+
%% RAG ํ๋ฆ ๋ช
ํํ (์์ )
|
| 232 |
+
Step2 -- "์ฟผ๋ฆฌ ์๋ฒ ๋ฉ" --> EM
|
| 233 |
+
Step2 -- "์ ์ฌ๋ ๊ฒ์" --> VSM
|
| 234 |
+
|
| 235 |
+
Step2 --> Step3["3๏ธโฃ LLM ๋ต๋ณ ์์ฑ\n(๊ฒ์๋ ์ปจํ
์คํธ ๊ธฐ๋ฐ)"]
|
| 236 |
+
Step3 --> LLM2["๐ค LLM (Answer Synthesis)"]
|
| 237 |
+
end
|
| 238 |
+
|
| 239 |
+
%% ========================
|
| 240 |
+
%% ๊ฒฐ๊ณผ ๋ฐํ
|
| 241 |
+
%% ========================
|
| 242 |
+
subgraph SG_Result_Return_RAG ["๐ฆ ๊ฒฐ๊ณผ ๋ฐํ"]
|
| 243 |
+
Step3 -- "์์ฑ๋ ๋ง์ผํ
์ ๋ต ํ
์คํธ" --> Agent
|
| 244 |
+
end
|
| 245 |
+
|
| 246 |
+
%% ========================
|
| 247 |
+
%% ์คํ์ผ
|
| 248 |
+
%% ========================
|
| 249 |
+
style Agent fill:#E91E63,color:#fff
|
| 250 |
+
style Tool_RAG fill:#03A9F4,color:#fff
|
| 251 |
+
style Step1,Step2,Step3 fill:#81D4FA,color:#000
|
| 252 |
+
style VSM fill:#FFC107,color:#000
|
| 253 |
+
style EM fill:#4DD0E1,color:#000
|
| 254 |
+
style LLM2 fill:#BA68C8,color:#fff
|
| 255 |
+
style SG_KnowledgeBase_RAG fill:#F5F5F5,stroke:#9E9E9E
|
| 256 |
+
```
|
| 257 |
+
|
| 258 |
+
---
|
| 259 |
+
|
| 260 |
+
### (4) LLM ๊ธฐ๋ฐ ๋ถ์ ๋๊ตฌ ์์ธ โ `analyze_merchant_profile` / `analyze_festival_profile`
|
| 261 |
+
|
| 262 |
+
๊ฐ๊ฒ ๋๋ ์ถ์ ์ ํ๋กํ(JSON)์ ์
๋ ฅ๋ฐ์
|
| 263 |
+
LLM์ด **SWOT ๋ถ์ / ์ฃผ์ ํน์ง ์์ฝ**์ ์ํํฉ๋๋ค.
|
| 264 |
+
|
| 265 |
+
```mermaid
|
| 266 |
+
graph TD
|
| 267 |
+
%% ========================
|
| 268 |
+
%% Orchestrator ์์ฒญ
|
| 269 |
+
%% ========================
|
| 270 |
+
subgraph SG_Orchestrator_Req_Analyze ["๐ง Orchestrator ์์ฒญ"]
|
| 271 |
+
Agent["๐ค AgentExecutor"] -- "๊ฐ๊ฒ/์ถ์ ๋ถ์ ์์ฒญ" --> Tool_Analyze["๐งฉ Tool: analyze_merchant_profile / analyze_festival_profile"]
|
| 272 |
+
end
|
| 273 |
+
|
| 274 |
+
%% ========================
|
| 275 |
+
%% LLM ๋ถ์
|
| 276 |
+
%% ========================
|
| 277 |
+
subgraph SG_LLM_Analysis ["๐ LLM ๋ถ์ (tools/profile_analyzer.py)"]
|
| 278 |
+
Tool_Analyze -- "ํ๋กํ(JSON) ์ ๋ฌ" --> LLM_Analyze["๐ค LLM (SWOT / ์์ฝ ๋ถ์)"]
|
| 279 |
+
end
|
| 280 |
+
|
| 281 |
+
%% ========================
|
| 282 |
+
%% ๊ฒฐ๊ณผ ๋ฐํ
|
| 283 |
+
%% ========================
|
| 284 |
+
subgraph SG_Result_Return_Analyze ["๐ฆ ๊ฒฐ๊ณผ ๋ฐํ"]
|
| 285 |
+
LLM_Analyze -- "๋ถ์ ๋ณด๊ณ ์ ํ
์คํธ" --> Agent
|
| 286 |
+
end
|
| 287 |
+
|
| 288 |
+
%% ========================
|
| 289 |
+
%% ์คํ์ผ
|
| 290 |
+
%% ========================
|
| 291 |
+
style Agent fill:#E91E63,color:#fff
|
| 292 |
+
style Tool_Analyze fill:#03A9F4,color:#fff
|
| 293 |
+
style LLM_Analyze fill:#BA68C8,color:#fff
|
| 294 |
+
```
|
| 295 |
+
|
| 296 |
+
------------------------------------------------------------------------
|
| 297 |
+
|
| 298 |
+
## ๐ ๋ฐ์ดํฐ ํ๋ฆ ์์ธ
|
| 299 |
+
|
| 300 |
+
1. **์ด๊ธฐ ์ค์ (UI โ API โ UI)**
|
| 301 |
+
* `streamlit_app.py` ์คํ ์ `load_data()` ํจ์๊ฐ FastAPI ์๋ฒ(`api/server.py`)์ `/merchants` ์๋ํฌ์ธํธ๋ฅผ ํธ์ถํ์ฌ ์ ์ฒด ๊ฐ๋งน์ ๋ชฉ๋ก(ID, ์ด๋ฆ)์ ๋ฐ์์ต๋๋ค.
|
| 302 |
+
* ์ฌ์ฉ์๊ฐ Streamlit ๋๋กญ๋ค์ด ๋ฉ๋ด์์ ์์ ์ ๊ฐ๊ฒ๋ฅผ ์ ํํฉ๋๋ค.
|
| 303 |
+
* ์ ํ๋ ๊ฐ๊ฒ ID๋ก FastAPI ์๋ฒ์ `/profile` ์๋ํฌ์ธํธ๋ฅผ ํธ์ถํ์ฌ ํด๋น ๊ฐ๊ฒ์ ์์ธ ํ๋กํ(JSON)๊ณผ ์๊ถ/์
์ข
ํ๊ท ๋ฐ์ดํฐ๋ฅผ ๋ฐ์์ต๋๋ค.
|
| 304 |
+
* ๋ฐ์์จ ํ๋กํ ๋ฐ์ดํฐ๋ `modules/visualization.py`๋ฅผ ํตํด ๊ทธ๋ํ์ ํ๋ก ์๊ฐํ๋์ด ์ฌ์ฉ์์๊ฒ ๋ณด์ฌ์ง๊ณ , `st.session_state.profile_data`์ ์ ์ฅ๋ฉ๋๋ค.
|
| 305 |
+
|
| 306 |
+
2. **์ปจ์คํ
์์ฒญ (UI โ Orchestrator)**
|
| 307 |
+
* ์ฌ์ฉ์๊ฐ Streamlit ์ฑํ
์
๋ ฅ์ฐฝ์ ์ง๋ฌธ์ ์
๋ ฅํฉ๋๋ค.
|
| 308 |
+
* `streamlit_app.py`๋ `orchestrator.invoke_agent()` ํจ์๋ฅผ ํธ์ถํฉ๋๋ค.
|
| 309 |
+
* ์ด๋ **์ฌ์ฉ์ ์ง๋ฌธ(Query)**, **์ฑํ
์ฉ์ผ๋ก ๊ฐ๊ณต๋ ๊ฐ๊ฒ ํ๋กํ(JSON ๋ฌธ์์ด)**, **์ด์ ๋ํ ๊ธฐ๋ก(History)**, **๋ง์ง๋ง ์ถ์ฒ ์ถ์ ๋ชฉ๋ก(์ ํ์ )**์ด `Orchestrator`๋ก ์ ๋ฌ๋ฉ๋๋ค.
|
| 310 |
+
|
| 311 |
+
3. **์๋ ๋ถ์ ๋ฐ ๋๊ตฌ ๋ผ์ฐํ
(Orchestrator โ LLM โ Tool)**
|
| 312 |
+
* `orchestrator.py`์ `AgentExecutor`๋ ์์คํ
ํ๋กฌํํธ์ ์ ๋ฌ๋ ์ปจํ
์คํธ(๊ฐ๊ฒ ํ๋กํ, ์ง๋ฌธ ๋ฑ)๋ฅผ ์กฐํฉํ์ฌ **์ฒซ ๋ฒ์งธ LLM(๋๊ตฌ ์ ํ์ฉ)**์ ํธ์ถํฉ๋๋ค.
|
| 313 |
+
* LLM์ ์ง๋ฌธ์ ์๋๋ฅผ ๋ถ์ํ๊ณ , ์์คํ
ํ๋กฌํํธ์ ๊ฐ์ด๋๋ผ์ธ์ ๋ฐ๋ผ `tools/tool_loader.py`์ ์ ์๋ **๋๊ตฌ ๋ชฉ๋ก ์ค ๊ฐ์ฅ ์ ํฉํ ๋๊ตฌ๋ฅผ ์ ํ**ํ๊ณ ํ์ํ ์
๋ ฅ๊ฐ(Arguments)์ ๊ฒฐ์ ํฉ๋๋ค.
|
| 314 |
+
|
| 315 |
+
4. **๋๊ตฌ ์คํ (Tool โ Modules/API/VectorDB/LLM)**
|
| 316 |
+
* ์ ํ๋ ๋๊ตฌ ํจ์(`tools/*.py`)๊ฐ ์คํ๋ฉ๋๋ค.
|
| 317 |
+
* ๋๊ตฌ๋ ํ์์ ๋ฐ๋ผ `modules/*.py`์ ํต์ฌ ๋ก์ง(์: `FestivalRecommender`), ์ธ๋ถ API(๋ ์จ ๋ฑ), VectorDB(`modules/knowledge_base.py` ๊ฒฝ์ ), ๋๋ ๋ณ๋์ LLM(`modules/llm_provider.py` ๊ฒฝ์ )์ ํธ์ถํ์ฌ ์์
์ ์ํํฉ๋๋ค.
|
| 318 |
+
|
| 319 |
+
5. **๊ฒฐ๊ณผ ์ทจํฉ ๋ฐ ๋ฐ๋ณต (Tool โ Orchestrator โ LLM โ Tool ...)**
|
| 320 |
+
* ๋๊ตฌ ์คํ ๊ฒฐ๊ณผ(Observation)๋ ๋ค์ `AgentExecutor`๋ก ๋ฐํ๋ฉ๋๋ค.
|
| 321 |
+
* ์์ด์ ํธ๋ ์ด ๊ฒฐ๊ณผ๋ฅผ ๋ฐํ์ผ๋ก **๋ค์ ํ๋์ ๊ฒฐ์ **ํฉ๋๋ค. (์: ์ถ๊ฐ ์ ๋ณด๊ฐ ํ์ํ๋ฉด ๋ค๋ฅธ ๋๊ตฌ๋ฅผ ํธ์ถํ๊ฑฐ๋, ๋ชจ๋ ์ ๋ณด๊ฐ ๋ชจ์๋ค๊ณ ํ๋จ๋๋ฉด ์ต์ข
๋ต๋ณ ์์ฑ์ ์ค๋น)
|
| 322 |
+
* ์ด "LLM ํ๋จ โ ๋๊ตฌ ํธ์ถ โ ๊ฒฐ๊ณผ ํ์ธ" ๊ณผ์ ์ ์ฌ์ฉ์์ ์์ฒญ์ด ์์ ํ ํด๊ฒฐ๋ ๋๊น์ง **์ฌ๋ฌ ๋ฒ ๋ฐ๋ณต**๋ ์ ์์ต๋๋ค (Agentic ํน์ฑ).
|
| 323 |
+
|
| 324 |
+
6. **์ต์ข
๋ต๋ณ ์์ฑ ๋ฐ ์ถ๋ ฅ (Orchestrator โ LLM โ UI)**
|
| 325 |
+
* `AgentExecutor`๊ฐ ์ต์ข
์ ์ผ๋ก ๋์ถํ ๊ฒฐ๊ณผ(`response['output']`) ๋๋ ํ์ ์ `orchestrator.py`๊ฐ ์ง์ **๋ ๋ฒ์งธ LLM(๋ต๋ณ ์์ฑ์ฉ)**์ ํธ์ถํ์ฌ, ๋ชจ๋ ์ค๊ฐ ๊ฒฐ๊ณผ์ ์ปจํ
์คํธ๋ฅผ ์ข
ํฉํ **์ต์ข
์ปจ์คํ
๋ณด๊ณ ์(์์ฐ์ด)**๋ฅผ ์์ฑํฉ๋๋ค.
|
| 326 |
+
* ์์ฑ๋ ๋ณด๊ณ ์๋ `streamlit_app.py`๋ก ๋ฐํ๋์ด ์ฌ์ฉ์ ํ๋ฉด์ ์ถ๋ ฅ๋ฉ๋๋ค.
|
| 327 |
+
|
| 328 |
+
---
|
| 329 |
+
|
| 330 |
+
## โ๏ธ ์ฃผ์ ํน์ง ์์ฝ
|
| 331 |
+
|
| 332 |
+
| ๊ธฐ๋ฅ | ์ค๋ช
|
|
| 333 |
+
| :--------------------- | :------------------------------------------------------------------------------------------- |
|
| 334 |
+
| **Agentic RAG** | LLM ์์ด์ ํธ๊ฐ ๊ฐ๊ฒ ํ๋กํ ์ปจํ
์คํธ๋ฅผ ๋ฐํ์ผ๋ก ์ค์ค๋ก ๋๊ตฌ๋ฅผ ์ ํํ๊ณ , ๋์ ์ผ๋ก RAG ๊ฒ์ ์ฟผ๋ฆฌ๋ฅผ ์์ฑํ์ฌ ์คํ |
|
| 335 |
+
| **Tool Calling Agent** | LangChain์ `create_tool_calling_agent`๋ฅผ ์ฌ์ฉํ์ฌ ์ฌ๋ฌ ๋๊ตฌ๋ฅผ ์์จ์ ์ผ๋ก ํธ์ถ ๋ฐ ์ฐ๊ณ |
|
| 336 |
+
| **ํ์ด๋ธ๋ฆฌ๋ ์ถ์ฒ** | FAISS ๋ฒกํฐ ๊ฒ์(์ ์ฌ๋) + LLM ๋์ ํ๊ฐ(๋ง์ถค์ฑ) ์ ์๋ฅผ ๊ฒฐํฉํ์ฌ ์ถ์ ์ถ์ฒ ์ ํ๋ ํฅ์ |
|
| 337 |
+
| **์ปจํ
์คํธ ๊ธฐ๋ฐ ๋ถ์** | ๋ชจ๋ ๋๊ตฌ ํธ์ถ ๋ฐ ์ต์ข
๋ต๋ณ ์์ฑ ์, ํ์ฌ ๋ถ์ ์ค์ธ ๊ฐ๊ฒ์ ํ๋กํ(JSON)์ ํต์ฌ ์ปจํ
์คํธ๋ก ํ์ฉ |
|
| 338 |
+
| **๋ชจ๋ํ๋ ๊ตฌ์กฐ** | ๊ธฐ๋ฅ๋ณ(UI, API, Orchestrator, Modules, Tools)๋ก ์ฝ๋๋ฅผ ๋ถ๋ฆฌํ์ฌ ์ ์ง๋ณด์์ฑ ๋ฐ ํ์ฅ์ฑ ์ฆ๋ |
|
| 339 |
+
| **๋ฐ์ดํฐ ์บ์ฑ** | Streamlit์ `@st.cache_data` / `@st.cache_resource`๋ฅผ ํ์ฉํ์ฌ ๋ฐ์ดํฐ ๋ฐ ๋ชจ๋ธ ๋ก๋ฉ ์๋ ์ต์ ํ |
|
| 340 |
+
|
| 341 |
+
---
|
| 342 |
+
|
| 343 |
+
## ๐ก ๊ธฐ์ ์คํ
|
| 344 |
+
|
| 345 |
+
* **Frontend:** Streamlit
|
| 346 |
+
* **Backend (Data API):** FastAPI
|
| 347 |
+
* **LLM:** Google Gemini 2.5 Flash (`gemini-2.5-flash`)
|
| 348 |
+
* **AI Framework:** LangChain (Agents, Tool Calling, Prompts)
|
| 349 |
+
* **VectorStore:** FAISS (Facebook AI Similarity Search)
|
| 350 |
+
* **Embedding model:** HuggingFace `dragonkue/BGE-m3-ko` (ํ๊ตญ์ด ํนํ ๋ชจ๋ธ)
|
| 351 |
+
* **Data Handling:** Pandas, NumPy
|
| 352 |
+
* **Visualization:** Matplotlib
|
| 353 |
+
|
| 354 |
+
---
|
| 355 |
+
|
| 356 |
+
## ๐ ์คํ ๋ฐฉ๋ฒ
|
| 357 |
+
|
| 358 |
+
### 1๏ธโฃ ์ฌ์ ์ค๋น
|
| 359 |
+
|
| 360 |
+
* Python 3.11 ์ด์ ์ค์น
|
| 361 |
+
* `uv` (Python ํจํค์ง ์ค์น ๋๊ตฌ) ์ค์น (`pip install uv`)
|
| 362 |
+
* Google API Key ๋ฐ๊ธ (Gemini ๋ชจ๋ธ ์ฌ์ฉ)
|
| 363 |
+
|
| 364 |
+
### 2๏ธโฃ FastAPI ์๋ฒ ์คํ
|
| 365 |
+
|
| 366 |
+
FastAPI ์๋ฒ๋ ๊ฐ๋งน์ ๋ฐ์ดํฐ(`final_df.csv`)๋ฅผ ๋ก๋ํ๊ณ , `/profile` (๊ฐ๊ฒ ์์ธ ์ ๋ณด), `/merchants` (๊ฐ๊ฒ ๋ชฉ๋ก) ์๋ํฌ์ธํธ๋ฅผ ์ ๊ณตํฉ๋๋ค.
|
| 367 |
+
|
| 368 |
+
```bash
|
| 369 |
+
# 1. ํ๋ก์ ํธ ๋ฃจํธ ํด๋๋ก ์ด๋
|
| 370 |
+
cd C:(๋ค์ด๋ฐ์ ํด๋ ์์น)
|
| 371 |
+
|
| 372 |
+
# 2. ๊ฐ์ํ๊ฒฝ ์์ฑ ๋ฐ ํ์ฑํ (์ต์ด 1ํ)
|
| 373 |
+
uv venv
|
| 374 |
+
|
| 375 |
+
# 3. ๊ฐ์ํ๊ฒฝ ํ์ฑํ (Windows)
|
| 376 |
+
.\.venv\Scripts\activate.bat
|
| 377 |
+
# (macOS/Linux: source .venv/bin/activate)
|
| 378 |
+
|
| 379 |
+
# 4. ํ์ํ ๋ผ์ด๋ธ๋ฌ๋ฆฌ ์ค์น
|
| 380 |
+
uv pip install -r requirements.txt
|
| 381 |
+
|
| 382 |
+
# 5. FastAPI ์๋ฒ ์คํ (api ํด๋์ server.py๋ฅผ ๋ชจ๋๋ก ์คํ)
|
| 383 |
+
python -m api.server
|
| 384 |
+
|
| 385 |
+
### 3๏ธโฃ Streamlit ์ฑ ์คํ
|
| 386 |
+
|
| 387 |
+
Streamlit ์ฑ์ ์ฌ์ฉ์ ์ธํฐํ์ด์ค๋ฅผ ์ ๊ณตํ๊ณ , FastAPI ์๋ฒ์์ ๋ฐ์ดํฐ๋ฅผ ๊ฐ์ ธ์ค๋ฉฐ, `Orchestrator`๋ฅผ ํตํด AI ์ปจ์คํ
์ ์ํํฉ๋๋ค.
|
| 388 |
+
|
| 389 |
+
```bash
|
| 390 |
+
# 1. (FastAPI ์๋ฒ์ ๋ค๋ฅธ ํฐ๋ฏธ๋์์) ํ๋ก์ ํธ ๋ฃจํธ ํด๋๋ก ์ด๋
|
| 391 |
+
cd C:\(๋ค์ด๋ฐ์ ํด๋ ์์น)
|
| 392 |
+
|
| 393 |
+
# 2. ๊ฐ์ํ๊ฒฝ ํ์ฑํ (Windows)
|
| 394 |
+
.\.venv\Scripts\activate.bat
|
| 395 |
+
# (macOS/Linux: source .venv/bin/activate)
|
| 396 |
+
|
| 397 |
+
# 3. Streamlit secrets ํ์ผ ์์ฑ (์ต์ด 1ํ)
|
| 398 |
+
# - .streamlit ํด๋๋ฅผ ์์ฑํฉ๋๋ค.
|
| 399 |
+
mkdir .streamlit
|
| 400 |
+
# ์๋ ๋ช
๋ น์ด์ "(๋ฐ๊ธ๋ฐ์ gemini API key)" ๋ถ๋ถ์ ์ค์ ํค๋ก ๋์ฒดํ์ธ์.
|
| 401 |
+
echo GOOGLE_API_KEY="(๋ฐ๊ธ๋ฐ์ gemini API key)" > .streamlit\secrets.toml
|
| 402 |
+
|
| 403 |
+
# 4. Streamlit ์ฑ ์คํ
|
| 404 |
+
uv run streamlit run streamlit_app.py
|
| 405 |
+
```
|
| 406 |
+
์ด์ ์น ๋ธ๋ผ์ฐ์ ์์ Streamlit ์ฑ ์ฃผ์(๋ณดํต http://localhost:8501)๋ก ์ ์ํ์ฌ MarketSync๋ฅผ ์ฌ์ฉํ ์ ์์ต๋๋ค.
|
| 407 |
+
|
| 408 |
+
------------------------------------------------------------------------
|
| 409 |
+
|
| 410 |
+
## ๐ ์์ ์๋๋ฆฌ์ค
|
| 411 |
+
|
| 412 |
+
| ์ฌ์ฉ์ ์
๋ ฅ | ์ฃผ์ ์คํ ๋๊ตฌ | ์์ ๊ฒฐ๊ณผ |
|
| 413 |
+
| :---------------------------------- | :------------------------------------------------------ | :-------------------------------------- |
|
| 414 |
+
| "์ฐ๋ฆฌ ๊ฐ๊ฒ ๋ถ์ํด์ค" | `analyze_merchant_profile` | ๊ฐ๊ฒ SWOT ๋ถ์ ๋ฐ ํต์ฌ ๊ณ ๊ฐ ๋ฆฌํฌํธ |
|
| 415 |
+
| "์ฃผ๋ง ๋ฐฉ๋ฌธ๊ฐ ๋๋ฆด ๋งํ ์ถ์ ์ถ์ฒํด์ค" | `recommend_festivals` | Top 3 ๋ง์ถค ์ถ์ ์ถ์ฒ ๋ฆฌ์คํธ |
|
| 416 |
+
| "`์์ธ๋์ ํธํ์ด` ๋ง์ผํ
์ ๋ต ์๋ ค์ค" | `create_festival_specific_marketing_strategy` | ํด๋น ์ถ์ ๋ง์ถคํ ๋ง์ผํ
์ ๋ต ์ ์ |
|
| 417 |
+
| "์ถ์ฒ๋ ์ถ์ ๋ค ๋ง์ผํ
๋ฐฉ๋ฒ ์๋ ค์ค" | `create_marketing_strategies_for_multiple_festivals` | ์ฌ๋ฌ ์ถ์ ์ ๋ํ ํตํฉ ๋ง์ผํ
์ ๋ต ์ ์ |
|
| 418 |
+
| "์์ฆ ๋จ๋ ํ๋ณด ๋ฐฉ๋ฒ ์๋ ค์ค" | `search_contextual_marketing_strategy` (RAG) | ๊ฐ๊ฒ ํน์ฑ ๊ธฐ๋ฐ ์ต์ ๋ง์ผํ
ํธ๋ ๋/ํ |
|
| 419 |
+
|
| 420 |
+
---
|
| 421 |
+
|
| 422 |
+
## ๐ง ํต์ฌ ์์ด๋์ด
|
| 423 |
+
|
| 424 |
+
> "LLM์ด ์ค์ค๋ก ๋๊ตฌ๋ฅผ ์ ํํ๊ณ ์คํํ๋ **Agentic RAG**"
|
| 425 |
+
|
| 426 |
+
* **LangChain์ Tool-Calling Agent ๊ตฌ์กฐ**: LLM์ด ์ฌ์ฉ์์ ๋ณต์กํ ์์ฒญ์ ์ดํดํ๊ณ , ํ์ํ ๊ธฐ๋ฅ(๋๊ตฌ)์ ์์จ์ ์ผ๋ก ํธ์ถํ๋ฉฐ ์์
์ ์ํํฉ๋๋ค.
|
| 427 |
+
* **์ปจํ
์คํธ ๊ธฐ๋ฐ ์์ฌ๊ฒฐ์ **: ๊ฐ๊ฒ ํ๋กํ(JSON) ๋ฐ์ดํฐ๋ฅผ ํต์ฌ ์ปจํ
์คํธ๋ก ํ์ฉํ์ฌ, ๋ชจ๋ ๋ถ์๊ณผ ์ถ์ฒ์ด ํ์ฌ ๋ถ์ ์ค์ธ ๊ฐ๊ฒ์ ๋ง์ถฐ ์ด๋ฃจ์ด์ง๋๋ค.
|
| 428 |
+
* **ํ์ด๋ธ๋ฆฌ๋ ์ถ์ฒ ์์ง**: FAISS ๋ฒกํฐ ๊ฒ์(์ ์ฌ๋ ๊ธฐ๋ฐ)๊ณผ LLM ์ฌํ๊ฐ(๊ฐ๊ฒ ๋ง์ถค์ฑ ๊ธฐ๋ฐ)๋ฅผ ๊ฒฐํฉํ์ฌ ์ถ์ฒ์ ์ ํ์ฑ๊ณผ ๊ด๋ จ์ฑ์ ๊ทน๋ํํฉ๋๋ค.
|
| 429 |
+
"# MarketSync"
|
assets/ShinhanCard_Logo.png
ADDED
|
Git LFS Details
|
assets/Synapse.png
ADDED
|
Git LFS Details
|
config.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# config.py
|
| 2 |
+
|
| 3 |
+
import logging
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
|
| 6 |
+
# --- Paths ---
|
| 7 |
+
PROJECT_ROOT = Path(__file__).resolve().parent
|
| 8 |
+
PATH_DATA_DIR = PROJECT_ROOT / 'data'
|
| 9 |
+
PATH_VECTORSTORE_DIR = PROJECT_ROOT / 'vectorstore'
|
| 10 |
+
ASSETS = PROJECT_ROOT / "assets"
|
| 11 |
+
|
| 12 |
+
# Data Files
|
| 13 |
+
PATH_FINAL_DF = PATH_DATA_DIR / 'final_df.csv'
|
| 14 |
+
PATH_FESTIVAL_DF = PATH_DATA_DIR / 'festival_df.csv'
|
| 15 |
+
|
| 16 |
+
# Vectorstore Paths
|
| 17 |
+
PATH_FAISS_MARKETING = PATH_VECTORSTORE_DIR / 'faiss_marketing'
|
| 18 |
+
PATH_FAISS_FESTIVAL = PATH_VECTORSTORE_DIR / 'faiss_festival'
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
# --- API ---
|
| 22 |
+
API_SERVER_URL = "http://127.0.0.1:8000"
|
| 23 |
+
API_PROFILE_ENDPOINT = f"{API_SERVER_URL}/profile"
|
| 24 |
+
API_MERCHANTS_ENDPOINT = f"{API_SERVER_URL}/merchants"
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
# --- Models ---
|
| 28 |
+
LLM_MODEL_NAME = "gemini-2.5-pro"
|
| 29 |
+
EMBEDDING_MODEL = "dragonkue/BGE-m3-ko"
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
# --- RAG Weights ---
|
| 33 |
+
FESTIVAL_EMBEDDING_WEIGHT = 0.4
|
| 34 |
+
FESTIVAL_DYNAMIC_WEIGHT = 0.6
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
# --- Logging ---
|
| 38 |
+
LOGGING_LEVEL = logging.INFO
|
| 39 |
+
LOGGING_FORMAT = "%(asctime)s - [%(levelname)s] - %(name)s (%(funcName)s): %(message)s"
|
| 40 |
+
|
| 41 |
+
def get_logger(name: str):
|
| 42 |
+
"""
|
| 43 |
+
ํ์คํ๋ ํฌ๋งท์ผ๋ก ๋ก๊ฑฐ๋ฅผ ๋ฐํํฉ๋๋ค.
|
| 44 |
+
"""
|
| 45 |
+
logging.basicConfig(level=LOGGING_LEVEL, format=LOGGING_FORMAT)
|
| 46 |
+
logger = logging.getLogger(name)
|
| 47 |
+
logger.setLevel(LOGGING_LEVEL)
|
| 48 |
+
return logger
|
data/big_data_set1_f.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:70c7eb4e4ab4351dfa309f26589738e7b1f804cabd8499bc37308be4cc45d510
|
| 3 |
+
size 377760
|
data/big_data_set2_f.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:caf8ba53f32c65d1666703b60c503d4e3f57eddfa322679ecf3a64498a9bb7b7
|
| 3 |
+
size 10299462
|
data/big_data_set3_f.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8ef850fcc6a15db9366603f077bd59214454401a4d57b93276e46452516bedfd
|
| 3 |
+
size 9622819
|
data/festival_df.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e6805b51be222cf84d60b01aeb7c4ec7f99d4cde1662a82fd9c9d145d3bad2eb
|
| 3 |
+
size 259519
|
data/final_df.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6ccc3ea7f1a9f714b6d9bd538faabfcf30d4995055e86ea027f386c5c4855999
|
| 3 |
+
size 34118076
|
dict
ADDED
|
File without changes
|
format
ADDED
|
File without changes
|
list
ADDED
|
File without changes
|
modules/filtering.py
ADDED
|
@@ -0,0 +1,397 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# modules/filtering.py
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
import traceback
|
| 5 |
+
from typing import List, Dict, Any, Optional, Tuple
|
| 6 |
+
import pandas as pd
|
| 7 |
+
|
| 8 |
+
from langchain_core.messages import HumanMessage
|
| 9 |
+
from langchain_community.vectorstores import FAISS
|
| 10 |
+
from langchain_core.documents import Document
|
| 11 |
+
|
| 12 |
+
import config
|
| 13 |
+
from modules.knowledge_base import load_festival_vectorstore
|
| 14 |
+
from modules.llm_provider import get_llm
|
| 15 |
+
from utils.parser_utils import extract_json_from_llm_response
|
| 16 |
+
|
| 17 |
+
logger = config.get_logger(__name__)
|
| 18 |
+
|
| 19 |
+
# 4๋ฒ ์ ์: ํ์ดํ๋ผ์ธ ๋ก์ง์ ํด๋์ค๋ก ์บก์ํ
|
| 20 |
+
class FestivalRecommender:
|
| 21 |
+
"""
|
| 22 |
+
ํ์ด๋ธ๋ฆฌ๋ ์ถ์ ์ถ์ฒ ํ์ดํ๋ผ์ธ์ ์บก์ํํ ํด๋์ค.
|
| 23 |
+
"""
|
| 24 |
+
def __init__(self, store_profile: str, user_query: str, specific_intent: Optional[str] = None):
|
| 25 |
+
self.store_profile = store_profile
|
| 26 |
+
self.user_query = user_query
|
| 27 |
+
self.specific_intent = specific_intent
|
| 28 |
+
|
| 29 |
+
# LLM ์ธ์คํด์ค๋ฅผ ๋ฏธ๋ฆฌ ์์ฑ
|
| 30 |
+
self.llm_temp_01 = get_llm(0.1)
|
| 31 |
+
self.llm_temp_03 = get_llm(0.3)
|
| 32 |
+
|
| 33 |
+
# VectorStore ๋ก๋
|
| 34 |
+
self.vectorstore = load_festival_vectorstore()
|
| 35 |
+
|
| 36 |
+
# ๊ฐ์ค์น (config์์ ๋ก๋)
|
| 37 |
+
self.embedding_weight = config.FESTIVAL_EMBEDDING_WEIGHT
|
| 38 |
+
self.dynamic_weight = config.FESTIVAL_DYNAMIC_WEIGHT
|
| 39 |
+
|
| 40 |
+
def _rewrite_query(self) -> str:
|
| 41 |
+
"""
|
| 42 |
+
(1๋จ๊ณ) ๊ฐ๊ฒ ํ๋กํ๊ณผ ์ฌ์ฉ์ ์ง๋ฌธ์ ๋ฐํ์ผ๋ก Vector Store ๊ฒ์์ฉ ์ฟผ๋ฆฌ๋ฅผ LLM์ด ์ฌ์์ฑํฉ๋๋ค.
|
| 43 |
+
"""
|
| 44 |
+
logger.info("--- [Filter 1/5] ์ฟผ๋ฆฌ ์ฌ์์ฑ ์์ ---")
|
| 45 |
+
|
| 46 |
+
intent_prompt = f"์ฌ์ฉ์์ ๊ตฌ์ฒด์ ์ธ ์์ฒญ: {self.specific_intent}" if self.specific_intent else ""
|
| 47 |
+
|
| 48 |
+
# --- (์ฌ์ฉ์ ์์ฒญ) ํ๋กฌํํธ ์๋ณธ ์ ์ง ---
|
| 49 |
+
prompt = f"""
|
| 50 |
+
๋น์ ์ ์์๊ณต์ธ ๋ง์ผํ
์ ์ํ AI ์ปจ์คํดํธ์
๋๋ค.
|
| 51 |
+
๋น์ ์ ์๋ฌด๋ [๊ฐ๊ฒ ํ๋กํ]๊ณผ [์ฌ์ฉ์ ์ง๋ฌธ]์ ์๋๋ฅผ ์๋ฒฝํ๊ฒ ์ดํดํ๊ณ ,
|
| 52 |
+
์ด ๊ฐ๊ฒ์ ๊ฐ์ฅ ์ ํฉํ ์ถ์ ๋ฅผ ์ฐพ๊ธฐ ์ํ '์ต์ ์ ๊ฒ์ ํค์๋'๋ฅผ ์์ฑํ๋ ๊ฒ์
๋๋ค.
|
| 53 |
+
๊ฒ์ ์์ง์ '์ถ์ ์๊ฐ ๋ด์ฉ'์ ๊ธฐ๋ฐ์ผ๋ก ์ ์ฌ๋๋ฅผ ์ธก์ ํ์ฌ ์ถ์ ๋ฅผ ์ฐพ์๋
๋๋ค.
|
| 54 |
+
|
| 55 |
+
[๊ฐ๊ฒ ํ๋กํ]
|
| 56 |
+
{self.store_profile}
|
| 57 |
+
|
| 58 |
+
[์ฌ์ฉ์ ์ง๋ฌธ]
|
| 59 |
+
{self.user_query}
|
| 60 |
+
{intent_prompt}
|
| 61 |
+
|
| 62 |
+
[๊ฒ์ ํค์๋ ์์ฑ ๊ฐ์ด๋]
|
| 63 |
+
1. ๊ฐ๊ฒ์ '์
์ข
', '์๊ถ', '์ฃผ์ ๊ณ ๊ฐ์ธต(์ฑ๋ณ/์ฐ๋ น)'์ ํต์ฌ ํค์๋๋ก ์ฌ์ฉํ์ธ์.
|
| 64 |
+
2. ๊ฐ๊ฒ์ '๊ฐ์ '์ด๋ '์ฝ์ '์ ๋ณด์ํ ์ ์๋ ๋ฐฉํฅ์ ๊ณ ๋ คํ์ธ์.
|
| 65 |
+
(์: '์ ๊ท ๊ณ ๊ฐ ํ๋ณด'๊ฐ ํ์ํ๋ฉด '์ ๋ ์ธ๊ตฌ', '๊ด๊ด๊ฐ', '๋๊ท๋ชจ' ๋ฑ)
|
| 66 |
+
(์: '๊ฐ๋จ๊ฐ'๊ฐ ๋ฎ์ผ๋ฉด '๊ตฌ๋งค๋ ฅ ๋์', '3040๋ ์ง์ฅ์ธ' ๋ฑ)
|
| 67 |
+
3. ์ฌ์ฉ์ ์ง๋ฌธ์ ์๋๋ฅผ ๋ฐ์ํ์ธ์. (์: '์ฌ๋ฆ' ์ถ์ , 'ํน์ ์ง์ญ' ์ถ์ )
|
| 68 |
+
4. 5~8๊ฐ์ ํต์ฌ ํค์๋๋ฅผ ์กฐํฉํ์ฌ ์์ฐ์ค๋ฌ์ด ๋ฌธ์ฅ์ด๋ ๊ตฌ๋ฌธ์ผ๋ก ๋ง๋์ธ์.
|
| 69 |
+
|
| 70 |
+
[๊ฒ์ ํค์๋ ์์ฑ ๋จ๊ณ ๋ฐ ๊ฐ์ด๋]
|
| 71 |
+
1. **๋ถ์:** ๊ฐ๊ฒ ํ๋กํ(์
์ข
, ์๊ถ, ์ฃผ์ ๊ณ ๊ฐ)์ ๋ฐํ์ผ๋ก ํ์ฌ ๊ฐ๊ฒ๊ฐ ๋ง์ผํ
์ ์ผ๋ก ๊ฐ์ฅ ํ์๋ก ํ๋ ๊ฒ(์: ์ ๊ท ๊ณ ๊ฐ ์ ์
, ๊ฐ๋จ๊ฐ ์์น, ํน์ ์ฐ๋ น๋ ํ๋ณด)์ด ๋ฌด์์ธ์ง ๋ด๋ถ์ ์ผ๋ก ๋ถ์ํฉ๋๋ค.
|
| 72 |
+
2. **๋ชฉํ ์ค์ :** ๋ถ์ ๊ฒฐ๊ณผ์ ์ฌ์ฉ์ ์ง๋ฌธ์ ์๋๋ฅผ ๊ฒฐํฉํ์ฌ, ์ถ์ ์ ๊ธฐ๋ํ๋ ์ต์ข
์ ์ธ ๋ชฉํ๋ฅผ ๋ช
ํํ ํฉ๋๋ค. (์: "20๋ ์ฌ์ฑ์ ์ ์
์ ์ฆ๊ฐ์ํฌ ์ถ์ ", "๊ฐ์กฑ ๋จ์ ๊ด๊ด๊ฐ์ด ๋ง์ ์ถ์ ")
|
| 73 |
+
3. **ํค์๋ ์ถ์ถ:** ์ค์ ๋ ๋ชฉํ์ ๋ถํฉํ๋ **ํต์ฌ ํค์๋ 7๊ฐ๋ฅผ ๋ช
์ฌ ํํ๋ก ์ถ์ถ**ํฉ๋๋ค.
|
| 74 |
+
- '์
์ข
', '์ฃผ์ ๊ณ ๊ฐ์ธต(์ฑ๋ณ/์ฐ๋ น)', 'ํ์ํ ๊ณ ๊ฐ ์ ์
ํํ(์: ๊ด๊ด๊ฐ, ๊ฐ์กฑ๋จ์, ์ง์ฅ์ธ)', '์์ฆ/ํ
๋ง'๋ฅผ ํฌํจํ์ฌ ๊ตฌ์ฒด์ ์ผ๋ก ๋ง๋ญ๋๋ค.
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
[์ถ๋ ฅ ํ์]
|
| 78 |
+
(์ค์ง ์ฌ์์ฑ๋ ์ฟผ๋ฆฌ๋ง ์ถ๋ ฅ)
|
| 79 |
+
"""
|
| 80 |
+
|
| 81 |
+
try:
|
| 82 |
+
response = self.llm_temp_01.invoke([HumanMessage(content=prompt)])
|
| 83 |
+
rewritten_query = response.content.strip().replace('"', '').replace("'", "")
|
| 84 |
+
|
| 85 |
+
if not rewritten_query:
|
| 86 |
+
logger.warning("--- [Filter 1/5 ERROR] ์ฟผ๋ฆฌ ์ฌ์์ฑ ์คํจ, ์๋ณธ ์ฟผ๋ฆฌ ์ฌ์ฉ ---")
|
| 87 |
+
return self.user_query
|
| 88 |
+
|
| 89 |
+
return rewritten_query
|
| 90 |
+
|
| 91 |
+
except Exception as e:
|
| 92 |
+
logger.critical(f"--- [Filter 1/5 CRITICAL ERROR] {e} ---", exc_info=True)
|
| 93 |
+
return self.user_query # ์คํจ ์ ์๋ณธ ์ฟผ๋ฆฌ ๋ฐํ
|
| 94 |
+
|
| 95 |
+
def _search_candidates(self, query: str, k: int) -> List[Tuple[Document, float]]:
|
| 96 |
+
"""
|
| 97 |
+
(2๋จ๊ณ) ์ฌ์์ฑ๋ ์ฟผ๋ฆฌ๋ฅผ ์ฌ์ฉํ์ฌ Vector Store์์ K๊ฐ์ ํ๋ณด๋ฅผ ๊ฒ์ํฉ๋๋ค.
|
| 98 |
+
"""
|
| 99 |
+
logger.info(f"--- [Filter 2/5] ํ๋ณด ๊ฒ์ (์๋ฒ ๋ฉ ์ ์) ์์ (Query: {query}) ---")
|
| 100 |
+
try:
|
| 101 |
+
if self.vectorstore is None:
|
| 102 |
+
raise RuntimeError("์ถ์ ๋ฒกํฐ์คํ ์ด๊ฐ ๋ก๋๋์ง ์์์ต๋๋ค.")
|
| 103 |
+
|
| 104 |
+
candidates_with_scores = self.vectorstore.similarity_search_with_relevance_scores(query, k=k)
|
| 105 |
+
return candidates_with_scores
|
| 106 |
+
|
| 107 |
+
except Exception as e:
|
| 108 |
+
logger.critical(f"--- [Filter 2/5 CRITICAL ERROR] {e} ---", exc_info=True)
|
| 109 |
+
return []
|
| 110 |
+
|
| 111 |
+
def _evaluate_candidates_dynamically(self, candidates: List[Document]) -> Dict[str, Dict[str, Any]]:
|
| 112 |
+
"""
|
| 113 |
+
(3๋จ๊ณ) LLM์ ์ฌ์ฉํ์ฌ ํ๋ณด๋ค์ '๋์ ์์ฑ'์ ํ๊ฐํฉ๋๋ค.
|
| 114 |
+
"""
|
| 115 |
+
logger.info(f"--- [Filter 3/5] ๋์ ์์ฑ ํ๊ฐ (LLM) ์์ (ํ๋ณด {len(candidates)}๊ฐ) ---")
|
| 116 |
+
|
| 117 |
+
candidates_data = []
|
| 118 |
+
for doc in candidates:
|
| 119 |
+
meta = doc.metadata
|
| 120 |
+
candidates_data.append({
|
| 121 |
+
"์ถ์ ๋ช
": meta.get('์ถ์ ๋ช
'),
|
| 122 |
+
"์ฃผ์์ฑ๋ณ": meta.get('์ฃผ์์ฑ๋ณ'),
|
| 123 |
+
"์ฃผ์์ฐ๋ น๋": meta.get('์ฃผ์์ฐ๋ น๋'),
|
| 124 |
+
"์ฃผ์๊ณ ๊ฐ์ธต": meta.get('์ฃผ์๊ณ ๊ฐ์ธต'),
|
| 125 |
+
"์ฃผ์๋ฐฉ๋ฌธ์": meta.get('์ฃผ์๋ฐฉ๋ฌธ์'),
|
| 126 |
+
"์ถ์ ์ธ๊ธฐ": meta.get('์ถ์ ์ธ๊ธฐ'),
|
| 127 |
+
"์ถ์ ์ธ๊ธฐ๋": meta.get('์ถ์ ์ธ๊ธฐ๋'),
|
| 128 |
+
"์ธ๊ธฐ๋_์ ์": meta.get('์ธ๊ธฐ๋_์ ์')
|
| 129 |
+
})
|
| 130 |
+
|
| 131 |
+
candidates_json_str = json.dumps(candidates_data, ensure_ascii=False, indent=2)
|
| 132 |
+
|
| 133 |
+
# --- (์ฌ์ฉ์ ์์ฒญ) ํ๋กฌํํธ ์๋ณธ ์ ์ง ---
|
| 134 |
+
prompt = f"""
|
| 135 |
+
๋น์ ์ ๋์ฒ ํ ์ถ์ ๋ฐ์ดํฐ ๋ถ์๊ฐ์
๋๋ค. [๊ฐ๊ฒ ํ๋กํ]๊ณผ [์ฌ์ฉ์ ์์ฒญ]์ ๋ฐํ์ผ๋ก,
|
| 136 |
+
๊ฐ [์ถ์ ํ๋ณด]๊ฐ ์ด ๊ฐ๊ฒ์ 'ํ๊ฒ ๊ณ ๊ฐ' ๋ฐ '๋ง์ผํ
๋ชฉํ'์ ์ผ๋ง๋ ์ ๋ง๋์ง
|
| 137 |
+
**์ค์ง ์ ๊ณต๋ '๋์ ์์ฑ' (์ฃผ์์ฑ๋ณ, ์ฃผ์์ฐ๋ น๋, ์ฃผ์๊ณ ๊ฐ์ธต, ์ฃผ์๋ฐฉ๋ฌธ์, ์ธ๊ธฐ๋)๋ง์
|
| 138 |
+
๊ธฐ์ค์ผ๋ก** ํ๊ฐํ๊ณ '๋์ _์ ์' (0~100์ )๋ฅผ ๋งค๊ธฐ์ธ์.
|
| 139 |
+
|
| 140 |
+
[๊ฐ๊ฒ ํ๋กํ]
|
| 141 |
+
{self.store_profile}
|
| 142 |
+
|
| 143 |
+
[์ฌ์ฉ์ ์์ฒญ]
|
| 144 |
+
{self.user_query}
|
| 145 |
+
|
| 146 |
+
[ํ๊ฐ ๋์ ์ถ์ ํ๋ณด ๋ชฉ๋ก (JSON)]
|
| 147 |
+
{candidates_json_str}
|
| 148 |
+
|
| 149 |
+
[๋์ ์ ์ ํ๊ฐ ๊ฐ์ด๋]
|
| 150 |
+
1. **ํ๊ฒ ์ผ์น (์ฑ๋ณ/์ฐ๋ น)**: ๊ฐ๊ฒ์ 'ํต์ฌ๊ณ ๊ฐ'(์: 30๋ ์ฌ์ฑ)๊ณผ ์ถ์ ์ '์ฃผ์์ฑ๋ณ', '์ฃผ์์ฐ๋ น๋'๊ฐ ์ผ์นํ ์๋ก ๋์ ์ ์๋ฅผ ์ฃผ์ธ์.
|
| 151 |
+
2. **๊ณ ๊ฐ์ธต ์ผ์น**: ๊ฐ๊ฒ์ '์
์ข
'(์: ์นดํ)๊ณผ ์ถ์ ์ '์ฃผ์๊ณ ๊ฐ์ธต'(์: 2030 ์ฌ์ฑ, ์ฐ์ธ)์ด ์๋์ง๊ฐ ๋ ์๋ก ๋์ ์ ์๋ฅผ ์ฃผ์ธ์.
|
| 152 |
+
3. **๋ฐฉ๋ฌธ์ ํน์ฑ**: ๊ฐ๊ฒ๊ฐ '์ ๊ท ๊ณ ๊ฐ ํ๋ณด'๊ฐ ํ์ํ๊ณ ์ถ์ ์ '์ฃผ์๋ฐฉ๋ฌธ์'๊ฐ '์ธ์ง์ธ'์ด๋ผ๋ฉด ๋์ ์ ์๋ฅผ ์ฃผ์ธ์. ๋ฐ๋๋ก '๋จ๊ณจ ํ๋ณด'๊ฐ ๋ชฉํ์ธ๋ฐ 'ํ์ง์ธ' ๋ฐฉ๋ฌธ์๊ฐ ๋ง๋ค๋ฉด ๋์ ์ ์๋ฅผ ์ฃผ์ธ์.
|
| 153 |
+
4. **์ธ๊ธฐ๋**: '์ถ์ ์ธ๊ธฐ', '์ถ์ ์ธ๊ธฐ๋', '์ธ๊ธฐ๋_์ ์'๊ฐ ๋์์๋ก ๋ฐฉ๋ฌธ๊ฐ ์๊ฐ ๋ณด์ฅ๋๋ฏ๋ก ๋์ ์ ์๋ฅผ ์ฃผ์ธ์.
|
| 154 |
+
5. **๋ณตํฉ ํ๊ฐ**: ์ด ๋ชจ๋ ์์๋ฅผ ์ข
ํฉํ์ฌ 0์ ์์ 100์ ์ฌ์ด์ '๋์ _์ ์'๋ฅผ ๋ถ์ฌํ์ธ์.
|
| 155 |
+
6. **์ด์ ์์ฑ**: ์ ๊ทธ๋ฐ ์ ์๋ฅผ ์ฃผ์๋์ง 'ํ๊ฐ_์ด์ '์ ๊ฐ๋ตํ ์์ฝํ์ธ์.
|
| 156 |
+
|
| 157 |
+
[์ถ๋ ฅ ํ์ (JSON ๋ฆฌ์คํธ)]
|
| 158 |
+
[
|
| 159 |
+
{{
|
| 160 |
+
"์ถ์ ๋ช
": "[์ถ์ ์ด๋ฆ]",
|
| 161 |
+
"๋์ _์ ์": 85,
|
| 162 |
+
"ํ๊ฐ_์ด์ ": "๊ฐ๊ฒ์ ํต์ฌ ๊ณ ๊ฐ์ธ 30๋ ์ฌ์ฑ๊ณผ ์ถ์ ์ ์ฃผ์์ฐ๋ น๋/์ฃผ์์ฑ๋ณ์ด ์ผ์นํ๋ฉฐ, '์ธ์ง์ธ' ๋ฐฉ๋ฌธ์ ํน์ฑ์ด ์ ๊ท ๊ณ ๊ฐ ํ๋ณด ๋ชฉํ์ ๋ถํฉํจ."
|
| 163 |
+
}},
|
| 164 |
+
...
|
| 165 |
+
]
|
| 166 |
+
"""
|
| 167 |
+
|
| 168 |
+
try:
|
| 169 |
+
response = self.llm_temp_01.invoke([HumanMessage(content=prompt)])
|
| 170 |
+
response_text = response.content.strip()
|
| 171 |
+
|
| 172 |
+
# 5๋ฒ ์ ์: ๊ณตํต ํ์ ์ฌ์ฉ
|
| 173 |
+
scores_list = extract_json_from_llm_response(response_text)
|
| 174 |
+
|
| 175 |
+
scores_dict = {
|
| 176 |
+
item['์ถ์ ๋ช
']: {
|
| 177 |
+
"dynamic_score": item.get('๋์ _์ ์', 0),
|
| 178 |
+
"dynamic_reason": item.get('ํ๊ฐ_์ด์ ', 'N/A')
|
| 179 |
+
}
|
| 180 |
+
for item in scores_list if isinstance(item, dict) and '์ถ์ ๋ช
' in item
|
| 181 |
+
}
|
| 182 |
+
return scores_dict
|
| 183 |
+
|
| 184 |
+
except (ValueError, json.JSONDecodeError) as e:
|
| 185 |
+
logger.error(f"--- [Filter 3/5 CRITICAL ERROR] ๋์ ์ ์ JSON ํ์ฑ ์คํจ: {e} ---")
|
| 186 |
+
logger.debug(f"LLM ์๋ณธ ์๋ต (์ 500์): {response_text[:500]} ...")
|
| 187 |
+
return {} # ์ค๋ฅ ๋ฐ์ ์ ๋น ๋์
๋๋ฆฌ ๋ฐํ (Fallback)
|
| 188 |
+
except Exception as e:
|
| 189 |
+
logger.critical(f"--- [Filter 3/5 CRITICAL ERROR] (Outer Catch) {e} ---", exc_info=True)
|
| 190 |
+
return {}
|
| 191 |
+
|
| 192 |
+
def _calculate_hybrid_scores(
|
| 193 |
+
self,
|
| 194 |
+
embedding_candidates: List[Tuple[Document, float]],
|
| 195 |
+
dynamic_scores: Dict[str, Dict[str, Any]]
|
| 196 |
+
) -> List[Dict[str, Any]]:
|
| 197 |
+
"""
|
| 198 |
+
(4๋จ๊ณ) Score 1(์๋ฒ ๋ฉ)๊ณผ Score 2(๋์ )๋ฅผ ๊ฐ์ค ํฉ์ฐํ์ฌ ์ต์ข
'ํ์ด๋ธ๋ฆฌ๋ ์ ์'๋ฅผ ๊ณ์ฐํฉ๋๋ค.
|
| 199 |
+
"""
|
| 200 |
+
logger.info("--- [Filter 4/5] ํ์ด๋ธ๋ฆฌ๋ ์ ์ ๊ณ์ฐ ์์ ---")
|
| 201 |
+
hybrid_results = []
|
| 202 |
+
|
| 203 |
+
for doc, embedding_score in embedding_candidates:
|
| 204 |
+
festival_name = doc.metadata.get('์ถ์ ๋ช
')
|
| 205 |
+
if not festival_name:
|
| 206 |
+
continue
|
| 207 |
+
|
| 208 |
+
normalized_embedding_score = embedding_score * 100
|
| 209 |
+
dynamic_eval = dynamic_scores.get(festival_name, {"dynamic_score": 0, "dynamic_reason": "N/A"})
|
| 210 |
+
dynamic_score = dynamic_eval["dynamic_score"]
|
| 211 |
+
|
| 212 |
+
hybrid_score = (normalized_embedding_score * self.embedding_weight) + \
|
| 213 |
+
(dynamic_score * self.dynamic_weight)
|
| 214 |
+
|
| 215 |
+
hybrid_results.append({
|
| 216 |
+
"document": doc,
|
| 217 |
+
"metadata": doc.metadata,
|
| 218 |
+
"score_embedding": normalized_embedding_score,
|
| 219 |
+
"score_dynamic": dynamic_score,
|
| 220 |
+
"score_dynamic_reason": dynamic_eval["dynamic_reason"],
|
| 221 |
+
"score_hybrid": hybrid_score
|
| 222 |
+
})
|
| 223 |
+
|
| 224 |
+
hybrid_results.sort(key=lambda x: x.get("score_hybrid", 0), reverse=True)
|
| 225 |
+
return hybrid_results
|
| 226 |
+
|
| 227 |
+
|
| 228 |
+
# 2026๋
๋ ์ง ์์ธก ํฌํผ ํจ์
|
| 229 |
+
def _predict_next_year_date(self, date_str_2025: Optional[str]) -> str:
|
| 230 |
+
"""2025๋
๋ ์ง ๋ฌธ์์ด(YYYY.MM.DD~...)์ ๋ฐ์ 2026๋
์์ ์๊ธฐ๋ฅผ ํ
์คํธ๋ก ๋ฐํํฉ๋๋ค."""
|
| 231 |
+
if not date_str_2025 or not isinstance(date_str_2025, str):
|
| 232 |
+
return "2026๋
์ ๋ณด ์์" # ๋ ์ง ์ ๋ณด ์์ผ๋ฉด ๋ช
์์ ๋ฐํ
|
| 233 |
+
|
| 234 |
+
try:
|
| 235 |
+
# "~" ์๋ถ๋ถ๋ง ์ฌ์ฉํ์ฌ ์์ ๋ ์ง ํ์ฑ (YYYY.MM.DD ํ์ ๊ฐ์ )
|
| 236 |
+
start_date_str = date_str_2025.split('~')[0].strip()
|
| 237 |
+
date_2025 = pd.to_datetime(start_date_str, format='%Y.%m.%d', errors='coerce')
|
| 238 |
+
|
| 239 |
+
if pd.isna(date_2025): # YYYY.MM.DD ํ์ฑ ์คํจ ์ ๋ค๋ฅธ ํ์ ์๋ (์: YYYY-MM-DD)
|
| 240 |
+
date_2025 = pd.to_datetime(start_date_str, errors='coerce')
|
| 241 |
+
|
| 242 |
+
if pd.isna(date_2025): # ์ต์ข
ํ์ฑ ์คํจ ์
|
| 243 |
+
logger.warning(f"๋ ์ง ์์ธก ์คํจ: '{start_date_str}' (์๋ณธ: '{date_str_2025}') ํ์์ ์ธ์ํ ์ ์์ต๋๋ค.")
|
| 244 |
+
return f"2026๋
์ ๋ณด ์์ (2025๋
: {date_str_2025})"
|
| 245 |
+
|
| 246 |
+
month = date_2025.month
|
| 247 |
+
day = date_2025.day
|
| 248 |
+
|
| 249 |
+
if day <= 10:
|
| 250 |
+
timing = f"{month}์ ์ด"
|
| 251 |
+
elif day <= 20:
|
| 252 |
+
timing = f"{month}์ ์ค์"
|
| 253 |
+
else:
|
| 254 |
+
timing = f"{month}์ ๋ง"
|
| 255 |
+
|
| 256 |
+
return f"2026๋
{timing}๊ฒฝ ์์ (2025๋
: {date_str_2025})"
|
| 257 |
+
except Exception as e:
|
| 258 |
+
logger.error(f"๋ ์ง ์์ธก ์ค ์ค๋ฅ ({date_str_2025}): {e}")
|
| 259 |
+
return f"2026๋
์ ๋ณด ์์ (์ค๋ฅ: {e})"
|
| 260 |
+
|
| 261 |
+
def _format_recommendation_results(
|
| 262 |
+
self,
|
| 263 |
+
ranked_list: List[Dict[str, Any]],
|
| 264 |
+
top_k: int
|
| 265 |
+
) -> List[Dict[str, Any]]:
|
| 266 |
+
|
| 267 |
+
""" (5๋จ๊ณ) ์ต์ข
๋ต๋ณ ํฌ๋งทํ
(LLM) """
|
| 268 |
+
logger.info(f"--- [Filter 5/5] ์ต์ข
๋ต๋ณ ํฌ๋งทํ
(LLM) ์์ (Top {top_k}) ---")
|
| 269 |
+
top_candidates = ranked_list[:top_k]
|
| 270 |
+
candidates_data = []
|
| 271 |
+
for candidate in top_candidates:
|
| 272 |
+
meta = candidate["metadata"]
|
| 273 |
+
date_2025 = meta.get('2025_๊ธฐ๊ฐ')
|
| 274 |
+
predicted_2026_timing = self._predict_next_year_date(date_2025)
|
| 275 |
+
candidates_data.append({
|
| 276 |
+
"์ถ์ ๋ช
": meta.get('์ถ์ ๋ช
'),
|
| 277 |
+
"์๊ฐ": meta.get('์๊ฐ'),
|
| 278 |
+
"predicted_2026_timing": predicted_2026_timing,
|
| 279 |
+
"์ฃผ์๊ณ ๊ฐ์ธต": meta.get('์ฃผ์๊ณ ๊ฐ์ธต'),
|
| 280 |
+
"์ฃผ์๋ฐฉ๋ฌธ์": meta.get('์ฃผ์๋ฐฉ๋ฌธ์'),
|
| 281 |
+
"์ถ์ ์ธ๊ธฐ": meta.get('์ถ์ ์ธ๊ธฐ'),
|
| 282 |
+
"ํํ์ด์ง": meta.get('ํํ์ด์ง'),
|
| 283 |
+
"์ถ์ฒ_์ ์": round(candidate["score_hybrid"], 1),
|
| 284 |
+
"์ถ์ฒ_๊ทผ๊ฑฐ_ํค์๋": f"ํค์๋/์๊ฐ ์ผ์น๋ ({round(candidate['score_embedding'], 0)}์ )",
|
| 285 |
+
"์ถ์ฒ_๊ทผ๊ฑฐ_๋์ ": f"๊ฐ๊ฒ ๋ง์ถค์ฑ({round(candidate['score_dynamic'], 0)}์ ): {candidate['score_dynamic_reason']}"
|
| 286 |
+
})
|
| 287 |
+
candidates_json_str = json.dumps(candidates_data, ensure_ascii=False, indent=2)
|
| 288 |
+
|
| 289 |
+
prompt = f"""
|
| 290 |
+
๋น์ ์ ์์๊ณต์ธ ์ปจ์คํดํธ์
๋๋ค. [๊ฐ๊ฒ ํ๋กํ]๊ณผ AI๊ฐ ๋ถ์ํ [์ต์ข
์ถ์ฒ ์ถ์ ๋ชฉ๋ก]์ ๋ฐํ์ผ๋ก,
|
| 291 |
+
์ฌ์ฅ๋๊ป ์ ์ํ ์ต์ข
์ถ์ฒ ๋ต๋ณ์ ์์ฑํ์ธ์.
|
| 292 |
+
|
| 293 |
+
[๊ฐ๊ฒ ํ๋กํ]
|
| 294 |
+
{self.store_profile}
|
| 295 |
+
|
| 296 |
+
[์ต์ข
์ถ์ฒ ์ถ์ ๋ชฉ๋ก (JSON) - ์๊ฐ, 2026๋
์์ ์๊ธฐ ํฌํจ]
|
| 297 |
+
{candidates_json_str}
|
| 298 |
+
|
| 299 |
+
[์ต์ข
๋ต๋ณ ์์ฑ ๊ฐ์ด๋๋ผ์ธ]
|
| 300 |
+
1. **[์ต์ข
์ถ์ฒ ์ถ์ ๋ชฉ๋ก]์ ๋ชจ๋ ์ ๋ณด**๋ฅผ ์ฌ์ฉํ์ฌ ์ต์ข
๋ต๋ณ์ JSON ํ์์ผ๋ก ์์ฑํฉ๋๋ค.
|
| 301 |
+
2. '์ถ์ฒ_์ด์ '๋ '์ถ์ฒ_๊ทผ๊ฑฐ_ํค์๋'์ '์ถ์ฒ_๊ทผ๊ฑฐ_๋์ '์ ์กฐํฉํ์ฌ **์์ฐ์ค๋ฌ์ด ์์ ํ ๋ฌธ์ฅ**์ผ๋ก ์์ฑํ์ธ์.
|
| 302 |
+
3. **(์์ ) '์ถ์ _๊ธฐ๋ณธ์ ๋ณด'**: ์
๋ ฅ JSON์ **'์๊ฐ', '์ฃผ์๊ณ ๊ฐ์ธต', '์ฃผ์๋ฐฉ๋ฌธ์', '์ถ์ ์ธ๊ธฐ'** ์ ๋ณด๋ฅผ ์กฐํฉํ์ฌ ์ถ์ ๋ฅผ ์ค๋ช
ํ๋ ์์ฐ์ค๋ฌ์ด ๋ฌธ์ฅ์ผ๋ก ์์ฑํ์ธ์. '์๊ฐ' ๋ด์ฉ์ ๋ฐํ์ผ๋ก ์ถ์ ์ ํต์ฌ ๋ด์ฉ์ ์์ฝํ๊ณ , ๊ณ ๊ฐ์ธต/๋ฐฉ๋ฌธ์/์ธ๊ธฐ๋ ์ ๋ณด๋ฅผ ๋ง๋ถ์
๋๋ค. (์: "**'{{์๊ฐ ์์ฝ}}'**์(๋ฅผ) ์ฃผ์ ๋ก ํ๋ ์ถ์ ์ด๋ฉฐ, ์ฃผ๋ก **{{์ฃผ์๊ณ ๊ฐ์ธต}}**์ด ๋ฐฉ๋ฌธํ๊ณ **{{์ฃผ์๋ฐฉ๋ฌธ์}}** ํน์ฑ์ ๋ณด์
๋๋ค. (์ธ๊ธฐ๋: **{{์ถ์ ์ธ๊ธฐ}}**)")
|
| 303 |
+
4. **(์ค์) '2026๋
์์ ์๊ธฐ'**: ์
๋ ฅ JSON์ ์๋ **`predicted_2026_timing` ๊ฐ์ ๊ทธ๋๋ก** ๊ฐ์ ธ์์ ์ถ๋ ฅ JSON์ `'2026๋
์์ ์๊ธฐ'` ํ๋ ๊ฐ์ผ๋ก ์ฌ์ฉํ์ธ์. **์ ๋ ์ง์ ๊ณ์ฐํ๊ฑฐ๋ ์์ ํ์ง ๋ง์ธ์.**
|
| 304 |
+
5. **(์ค์) ๋จ์ ์ ์ธ**: '๋จ์ '์ด๋ '๋ถ์ ํฉํ ์ด์ '๋ ์ ๋ ์ถ๋ ฅํ์ง ๋ง์ธ์.
|
| 305 |
+
6. **(์ค์) ์ทจ์์ ๊ธ์ง**: ์ ๋๋ก `~~text~~`์ ๊ฐ์ ์ทจ์์ ๋งํฌ๋ค์ด์ ์ฌ์ฉํ์ง ๋ง์ธ์.
|
| 306 |
+
7. **์ถ๋ ฅ ํ์ (JSON)**: ๋ฐ๋์ ์๋์ JSON ๋ฆฌ์คํธ ํ์์ผ๋ก๋ง ์๋ตํ์ธ์. ๋ค๋ฅธ ์ค๋ช
์์ด JSON๋ง ์ถ๋ ฅํด์ผ ํฉ๋๋ค.
|
| 307 |
+
|
| 308 |
+
[์๋ต ํ์ (JSON ๋ฆฌ์คํธ)]
|
| 309 |
+
[
|
| 310 |
+
{{
|
| 311 |
+
"์ถ์ ๋ช
": "[์ถ์ ์ด๋ฆ]",
|
| 312 |
+
"์ถ์ฒ_์ ์": 95.2,
|
| 313 |
+
"์ถ์ _๊ธฐ๋ณธ์ ๋ณด": "[์ถ์ ์๊ฐ ์์ฝ, ์ฃผ์ ๊ณ ๊ฐ์ธต, ์ฃผ์ ๋ฐฉ๋ฌธ์, ์ธ๊ธฐ๋๋ฅผ ์กฐํฉํ ์์ ํ ๋ฌธ์ฅ]",
|
| 314 |
+
"์ถ์ฒ_์ด์ ": "[๊ฐ๊ฒ ํ๋กํ๊ณผ ์ถ์ฒ ๊ทผ๊ฑฐ๋ฅผ ๋ฐํ์ผ๋ก ์ด ์ถ์ ๋ฅผ ์ถ์ฒํ๋ ์ด์ ๋ฅผ ์์ ํ์ผ๋ก ์์ฑ.]",
|
| 315 |
+
"ํํ์ด์ง": "[์ถ์ ํํ์ด์ง URL]",
|
| 316 |
+
"2026๋
์์ ์๊ธฐ": "[์
๋ ฅ JSON์ predicted_2026_timing ๊ฐ์ ๊ทธ๋๋ก ์ฌ์ฉ]"
|
| 317 |
+
}},
|
| 318 |
+
...
|
| 319 |
+
]
|
| 320 |
+
"""
|
| 321 |
+
response_text = ""
|
| 322 |
+
try:
|
| 323 |
+
response = self.llm_temp_03.invoke([HumanMessage(content=prompt)])
|
| 324 |
+
response_text = response.content.strip()
|
| 325 |
+
final_list = extract_json_from_llm_response(response_text)
|
| 326 |
+
return final_list
|
| 327 |
+
except (ValueError, json.JSONDecodeError) as e:
|
| 328 |
+
logger.error(f"--- [Filter 5/5 CRITICAL ERROR] ์ต์ข
๋ต๋ณ JSON ํ์ฑ ์คํจ: {e} ---")
|
| 329 |
+
logger.debug(f"LLM ์๋ณธ ์๋ต (์ 500์): {response_text[:500]} ...")
|
| 330 |
+
return [{"error": f"์ต์ข
๋ต๋ณ ์์ฑ ์ค JSON ํ์ฑ ์ค๋ฅ ๋ฐ์: {e}", "details": response_text}]
|
| 331 |
+
except Exception as e:
|
| 332 |
+
logger.critical(f"--- [Filter 5/5 CRITICAL ERROR] (Outer Catch) {e} ---", exc_info=True)
|
| 333 |
+
return [{"error": f"์ต์ข
๋ต๋ณ ์์ฑ ์ค ์ ์ ์๋ ์ค๋ฅ ๋ฐ์: {e}"}]
|
| 334 |
+
|
| 335 |
+
|
| 336 |
+
def run(self, search_k: int = 10, top_k: int = 3) -> List[Dict[str, Any]]:
|
| 337 |
+
"""
|
| 338 |
+
ํ์ดํ๋ผ์ธ 1~5๋จ๊ณ๋ฅผ ์์ฐจ์ ์ผ๋ก ์คํํฉ๋๋ค.
|
| 339 |
+
"""
|
| 340 |
+
try:
|
| 341 |
+
# 1๋จ๊ณ: ์ฟผ๋ฆฌ ์ฌ์์ฑ
|
| 342 |
+
rewritten_query = self._rewrite_query()
|
| 343 |
+
logger.info(f"--- [Filter 1/5] ์ฟผ๋ฆฌ ์ฌ์์ฑ ์๋ฃ: {rewritten_query} ---")
|
| 344 |
+
|
| 345 |
+
# 2๋จ๊ณ: ํ๋ณด ๊ฒ์
|
| 346 |
+
embedding_candidates = self._search_candidates(query=rewritten_query, k=search_k)
|
| 347 |
+
if not embedding_candidates:
|
| 348 |
+
logger.warning("--- [Filter 2/5] ํ๋ณด ๊ฒ์ ๊ฒฐ๊ณผ ์์ ---")
|
| 349 |
+
return [{"error": "์ถ์ฒํ ๋งํ ์ถ์ ๋ฅผ ์ฐพ์ง ๋ชปํ์ต๋๋ค."}]
|
| 350 |
+
|
| 351 |
+
logger.info(f"--- [Filter 2/5] ํ๋ณด ๊ฒ์ ์๋ฃ (ํ๋ณด {len(embedding_candidates)}๊ฐ) ---")
|
| 352 |
+
|
| 353 |
+
# 3๋จ๊ณ: ๋์ ์์ฑ ํ๊ฐ
|
| 354 |
+
candidate_docs = [doc for doc, score in embedding_candidates]
|
| 355 |
+
dynamic_scores_dict = self._evaluate_candidates_dynamically(candidates=candidate_docs)
|
| 356 |
+
|
| 357 |
+
if not dynamic_scores_dict:
|
| 358 |
+
logger.warning("--- [Filter 3/5 WARNING] ๋์ ์์ฑ ํ๊ฐ ์คํจ. ์๋ฒ ๋ฉ ์ ์๋ง์ผ๋ก ์ถ์ฒ์ ์งํํฉ๋๋ค. ---")
|
| 359 |
+
# dynamic_scores_dict = {} (๋น ๋์
๋๋ฆฌ๋ก ๊ณ์ ์งํ)
|
| 360 |
+
|
| 361 |
+
logger.info(f"--- [Filter 3/5] ๋์ ์์ฑ ํ๊ฐ ์๋ฃ ({len(dynamic_scores_dict)}๊ฐ) ---")
|
| 362 |
+
|
| 363 |
+
# 4๋จ๊ณ: ํ์ด๋ธ๋ฆฌ๋ ์ ์ ๊ณ์ฐ
|
| 364 |
+
hybrid_results = self._calculate_hybrid_scores(
|
| 365 |
+
embedding_candidates=embedding_candidates,
|
| 366 |
+
dynamic_scores=dynamic_scores_dict
|
| 367 |
+
)
|
| 368 |
+
logger.info(f"--- [Filter 4/5] ํ์ด๋ธ๋ฆฌ๋ ์ ์ ๊ณ์ฐ ๋ฐ ์ ๋ ฌ ์๋ฃ ---")
|
| 369 |
+
|
| 370 |
+
# 5๋จ๊ณ: ์ต์ข
๋ต๋ณ ํฌ๋งทํ
|
| 371 |
+
final_recommendations = self._format_recommendation_results(
|
| 372 |
+
ranked_list=hybrid_results,
|
| 373 |
+
top_k=top_k
|
| 374 |
+
)
|
| 375 |
+
logger.info(f"--- [Filter 5/5] ์ต์ข
๋ต๋ณ ํฌ๋งทํ
์๋ฃ ---")
|
| 376 |
+
|
| 377 |
+
# 5๋จ๊ณ(LLM ํฌ๋งทํ
) ์คํจ ์ Fallback
|
| 378 |
+
if final_recommendations and isinstance(final_recommendations, list) and "error" in final_recommendations[0]:
|
| 379 |
+
logger.warning(f"--- [Tool WARNING] ์ต์ข
๋ต๋ณ ํฌ๋งทํ
์คํจ. 4๋จ๊ณ ์๋ณธ ๋ฐ์ดํฐ๋ก Fallback. ({final_recommendations[0]['error']}) ---")
|
| 380 |
+
|
| 381 |
+
fallback_results = []
|
| 382 |
+
for item in hybrid_results[:top_k]:
|
| 383 |
+
meta = item.get("metadata", {})
|
| 384 |
+
fallback_results.append({
|
| 385 |
+
"์ถ์ ๋ช
": meta.get("์ถ์ ๋ช
", "N/A"),
|
| 386 |
+
"์ถ์ฒ_์ ์": round(item.get("score_hybrid", 0), 1),
|
| 387 |
+
"์ถ์ฒ_์ด์ ": f"์๋ฒ ๋ฉ({round(item.get('score_embedding',0),0)}์ ), ๋ง์ถค์ฑ({round(item.get('score_dynamic',0),0)}์ ): {item.get('score_dynamic_reason', 'N/A')}",
|
| 388 |
+
"์ถ์ _๊ธฐ๋ณธ์ ๋ณด": meta.get("์๊ฐ", "N/A")[:100] + "...",
|
| 389 |
+
"ํํ์ด์ง": meta.get("ํํ์ด์ง", "N/A")
|
| 390 |
+
})
|
| 391 |
+
return fallback_results
|
| 392 |
+
|
| 393 |
+
return final_recommendations
|
| 394 |
+
|
| 395 |
+
except Exception as e:
|
| 396 |
+
logger.critical(f"--- [Tool CRITICAL] ์ถ์ ์ถ์ฒ ํ์ดํ๋ผ์ธ ์ ์ฒด ์ค๋ฅ: {e} ---", exc_info=True)
|
| 397 |
+
return [{"error": f"์ถ์ ๋ฅผ ์ถ์ฒํ๋ ๊ณผ์ ์์ ์๊ธฐ์น ๋ชปํ ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {e}"}]
|
modules/knowledge_base.py
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# modules/knowledge_base.py
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import streamlit as st
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
from langchain_community.vectorstores import FAISS
|
| 7 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
| 8 |
+
import traceback
|
| 9 |
+
|
| 10 |
+
import config
|
| 11 |
+
|
| 12 |
+
logger = config.get_logger(__name__)
|
| 13 |
+
|
| 14 |
+
@st.cache_resource
|
| 15 |
+
def _load_embedding_model():
|
| 16 |
+
"""
|
| 17 |
+
์๋ฒ ๋ฉ ๋ชจ๋ธ์ ๋ณ๋ ํจ์๋ก ๋ถ๋ฆฌํ์ฌ ์บ์ฑ (FAISS ๋ก๋ ์ ์ฌ์ฌ์ฉ)
|
| 18 |
+
"""
|
| 19 |
+
try:
|
| 20 |
+
logger.info("--- [Cache] HuggingFace ์๋ฒ ๋ฉ ๋ชจ๋ธ ์ต์ด ๋ก๋ฉ ์์ ---")
|
| 21 |
+
|
| 22 |
+
model_name = config.EMBEDDING_MODEL
|
| 23 |
+
model_kwargs = {'device': 'cpu'}
|
| 24 |
+
encode_kwargs = {'normalize_embeddings': True}
|
| 25 |
+
|
| 26 |
+
embeddings = HuggingFaceEmbeddings(
|
| 27 |
+
model_name=model_name,
|
| 28 |
+
model_kwargs=model_kwargs,
|
| 29 |
+
encode_kwargs=encode_kwargs
|
| 30 |
+
)
|
| 31 |
+
logger.info(f"--- [Cache] HuggingFace ์๋ฒ ๋ฉ ๋ชจ๋ธ ({model_name}) ๋ก๋ฉ ์ฑ๊ณต ---")
|
| 32 |
+
return embeddings
|
| 33 |
+
except Exception as e:
|
| 34 |
+
logger.critical(f"--- [CRITICAL ERROR] ์๋ฒ ๋ฉ ๋ชจ๋ธ ๋ก๋ฉ ์คํจ: {e} ---", exc_info=True)
|
| 35 |
+
st.error(f"์๋ฒ ๋ฉ ๋ชจ๋ธ('{config.EMBEDDING_MODEL}') ๋ก๋ฉ ์ค ์ฌ๊ฐํ ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {e}")
|
| 36 |
+
return None
|
| 37 |
+
|
| 38 |
+
@st.cache_resource
|
| 39 |
+
def load_marketing_vectorstore():
|
| 40 |
+
"""
|
| 41 |
+
'๋ง์ผํ
์ ๋ต' FAISS Vector Store๋ฅผ ๋ก๋ํ์ฌ Retriever๋ฅผ ์์ฑํฉ๋๋ค.
|
| 42 |
+
"""
|
| 43 |
+
try:
|
| 44 |
+
logger.info("--- [Cache] '๋ง์ผํ
' FAISS Vector Store ์ต์ด ๋ก๋ฉ ์์ ---")
|
| 45 |
+
embeddings = _load_embedding_model()
|
| 46 |
+
|
| 47 |
+
if embeddings is None:
|
| 48 |
+
raise RuntimeError("์๋ฒ ๋ฉ ๋ชจ๋ธ ๋ก๋ฉ์ ์คํจํ์ฌ Retriever๋ฅผ ์์ฑํ ์ ์์ต๋๋ค.")
|
| 49 |
+
|
| 50 |
+
vector_db_path = config.PATH_FAISS_MARKETING
|
| 51 |
+
|
| 52 |
+
if not vector_db_path.exists():
|
| 53 |
+
logger.critical(f"--- [CRITICAL ERROR] '๋ง์ผํ
' Vector DB ๊ฒฝ๋ก๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค: {vector_db_path}")
|
| 54 |
+
st.error(f"'๋ง์ผํ
' Vector DB ํ์ผ์ ์ฐพ์ ์ ์์ต๋๋ค. (๊ฒฝ๋ก: {vector_db_path})")
|
| 55 |
+
return None
|
| 56 |
+
|
| 57 |
+
db = FAISS.load_local(
|
| 58 |
+
folder_path=str(vector_db_path),
|
| 59 |
+
embeddings=embeddings,
|
| 60 |
+
allow_dangerous_deserialization=True
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
+
retriever = db.as_retriever(search_kwargs={"k": 2})
|
| 64 |
+
|
| 65 |
+
logger.info("--- [Cache] '๋ง์ผํ
' FAISS Vector Store ๋ก๋ฉ ์ฑ๊ณต ---")
|
| 66 |
+
return retriever
|
| 67 |
+
|
| 68 |
+
except Exception as e:
|
| 69 |
+
logger.critical(f"--- [CRITICAL ERROR] '๋ง์ผํ
' FAISS ๋ก๋ฉ ์คํจ: {e} ---", exc_info=True)
|
| 70 |
+
st.error(f"'๋ง์ผํ
' Vector Store ๋ก๋ฉ ์ค ์ค๋ฅ ๋ฐ์: {e}")
|
| 71 |
+
return None
|
| 72 |
+
|
| 73 |
+
@st.cache_resource
|
| 74 |
+
def load_festival_vectorstore():
|
| 75 |
+
"""
|
| 76 |
+
'์ถ์ ์ ๋ณด' FAISS Vector Store๋ฅผ ๋ก๋ํฉ๋๋ค.
|
| 77 |
+
"""
|
| 78 |
+
try:
|
| 79 |
+
logger.info("--- [Cache] '์ถ์ ' FAISS Vector Store ์ต์ด ๋ก๋ฉ ์์ ---")
|
| 80 |
+
embeddings = _load_embedding_model()
|
| 81 |
+
|
| 82 |
+
if embeddings is None:
|
| 83 |
+
raise RuntimeError("์๋ฒ ๋ฉ ๋ชจ๋ธ ๋ก๋ฉ์ ์คํจํ์ฌ '์ถ์ ' Vector Store๋ฅผ ๋ก๋ํ ์ ์์ต๋๋ค.")
|
| 84 |
+
|
| 85 |
+
vector_db_path = config.PATH_FAISS_FESTIVAL
|
| 86 |
+
|
| 87 |
+
if not vector_db_path.exists():
|
| 88 |
+
logger.critical(f"--- [CRITICAL ERROR] '์ถ์ ' Vector DB ๊ฒฝ๋ก๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค: {vector_db_path}")
|
| 89 |
+
st.error(f"'์ถ์ ' Vector DB ํ์ผ์ ์ฐพ์ ์ ์์ต๋๋ค. (๊ฒฝ๋ก: {vector_db_path})")
|
| 90 |
+
return None
|
| 91 |
+
|
| 92 |
+
db = FAISS.load_local(
|
| 93 |
+
folder_path=str(vector_db_path),
|
| 94 |
+
embeddings=embeddings,
|
| 95 |
+
allow_dangerous_deserialization=True
|
| 96 |
+
)
|
| 97 |
+
logger.info("--- [Cache] '์ถ์ ' FAISS Vector Store ๋ก๋ฉ ์ฑ๊ณต ---")
|
| 98 |
+
return db
|
| 99 |
+
|
| 100 |
+
except Exception as e:
|
| 101 |
+
logger.critical(f"--- [CRITICAL ERROR] '์ถ์ ' FAISS ๋ก๋ฉ ์คํจ: {e} ---", exc_info=True)
|
| 102 |
+
st.error(f"'์ถ์ ' Vector Store ๋ก๋ฉ ์ค ์ค๋ฅ ๋ฐ์: {e}")
|
| 103 |
+
return None
|
modules/llm_provider.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# modules/llm_provider.py
|
| 2 |
+
|
| 3 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
| 4 |
+
from typing import Optional
|
| 5 |
+
|
| 6 |
+
import config
|
| 7 |
+
|
| 8 |
+
logger = config.get_logger(__name__)
|
| 9 |
+
|
| 10 |
+
_llm_instance: Optional[ChatGoogleGenerativeAI] = None
|
| 11 |
+
|
| 12 |
+
def set_llm(llm: ChatGoogleGenerativeAI):
|
| 13 |
+
"""
|
| 14 |
+
Orchestrator๊ฐ ์์ฑํ ๊ธฐ๋ณธ LLM ์ธ์คํด์ค๋ฅผ
|
| 15 |
+
๊ธ๋ก๋ฒ ๋ณ์์ ์ ์ฅํฉ๋๋ค.
|
| 16 |
+
"""
|
| 17 |
+
global _llm_instance
|
| 18 |
+
if _llm_instance is None:
|
| 19 |
+
logger.info(f"--- [LLM Provider] Global LLM instance set. (Model: {llm.model}, Temp: {llm.temperature}) ---")
|
| 20 |
+
_llm_instance = llm
|
| 21 |
+
else:
|
| 22 |
+
logger.info("--- [LLM Provider] Global LLM instance already set. ---")
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def get_llm(temperature: float = 0.1) -> ChatGoogleGenerativeAI:
|
| 26 |
+
"""
|
| 27 |
+
์ ์ฅ๋ ๊ธ๋ก๋ฒ LLM ์ธ์คํด์ค๋ฅผ ๊ฒ์ํฉ๋๋ค.
|
| 28 |
+
๋ง์ฝ ๋๊ตฌ๊ฐ ์์ฒญํ temperature๊ฐ ๊ธฐ๋ณธ๊ฐ๊ณผ ๋ค๋ฅด๋ฉด,
|
| 29 |
+
๊ธฐ๋ณธ ์ธ์คํด์ค์ ์ค์ ์ ๋ณต์ฌํ์ฌ temperature๋ง ๋ณ๊ฒฝํ
|
| 30 |
+
์๋ก์ด ์ธ์คํด์ค๋ฅผ ๋ฐํํฉ๋๋ค. (API ํค ๋ฑ์ ์ฌ์ฌ์ฉ)
|
| 31 |
+
"""
|
| 32 |
+
global _llm_instance
|
| 33 |
+
if _llm_instance is None:
|
| 34 |
+
logger.error("--- [LLM Provider] LLM not initialized. ---")
|
| 35 |
+
raise RuntimeError(
|
| 36 |
+
"LLM not initialized. The Orchestrator must call set_llm() before any tools are used."
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
if _llm_instance.temperature == temperature:
|
| 40 |
+
logger.debug(f"--- [LLM Provider] Reusing global LLM instance (temp={temperature}) ---")
|
| 41 |
+
return _llm_instance
|
| 42 |
+
|
| 43 |
+
logger.info(f"--- [LLM Provider] Creating new LLM instance with temp={temperature} (default was {_llm_instance.temperature}) ---")
|
| 44 |
+
|
| 45 |
+
try:
|
| 46 |
+
# Pydantic v2+ (langchain-core 0.1.23+)
|
| 47 |
+
return _llm_instance.model_copy(update={"temperature": temperature})
|
| 48 |
+
except AttributeError:
|
| 49 |
+
# Pydantic v1 (fallback)
|
| 50 |
+
logger.warning("--- [LLM Provider] Using .copy() fallback (Pydantic v1) ---")
|
| 51 |
+
return _llm_instance.copy(update={"temperature": temperature})
|
modules/profile_utils.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# modules/profile_utils.py
|
| 2 |
+
|
| 3 |
+
from typing import Dict, Any
|
| 4 |
+
import config
|
| 5 |
+
|
| 6 |
+
logger = config.get_logger(__name__)
|
| 7 |
+
|
| 8 |
+
def get_chat_profile_dict(store_profile_dict: Dict[str, Any]) -> Dict[str, Any]:
|
| 9 |
+
"""
|
| 10 |
+
FastAPI (server.py)์์ ๋ฐ์ 'store_profile' ๋์
๋๋ฆฌ๋ฅผ ๊ธฐ๋ฐ์ผ๋ก,
|
| 11 |
+
visualization.py์ orchestrator.py์์ ๊ณตํต์ผ๋ก ์ฌ์ฉํ
|
| 12 |
+
'์ฑํ
์ฉ ํ๋กํ ๋์
๋๋ฆฌ' (์ฌ์ฅ๋์ด ์์ฒญํ์ ํญ๋ชฉ)๋ฅผ ์์ฑํฉ๋๋ค.
|
| 13 |
+
|
| 14 |
+
์ด ํจ์๊ฐ '์ฑํ
์ฉ ํ๋กํ'์ ๋จ์ผ ์ ์(Source of Truth) ์ญํ ์ ํฉ๋๋ค.
|
| 15 |
+
"""
|
| 16 |
+
try:
|
| 17 |
+
# 1. ๊ธฐ๋ณธ ์ ๋ณด
|
| 18 |
+
chat_profile_data = {
|
| 19 |
+
"๊ฐ๋งน์ ๋ช
": store_profile_dict.get('๊ฐ๋งน์ ๋ช
', 'N/A'),
|
| 20 |
+
"๊ฐ๋งน์ ID": store_profile_dict.get('๊ฐ๋งน์ ID', 'N/A'),
|
| 21 |
+
"์๊ถ": store_profile_dict.get('์๊ถ', 'N/A'),
|
| 22 |
+
"์
์ข
": store_profile_dict.get('์
์ข
', 'N/A'),
|
| 23 |
+
"์ฃผ์": store_profile_dict.get('๊ฐ๋งน์ ์ฃผ์', 'N/A'),
|
| 24 |
+
"์ด์ ๊ธฐ๊ฐ ์์ค": store_profile_dict.get('์ด์๊ฐ์์_์์ค', 'N/A'),
|
| 25 |
+
"๋งค์ถ ์์ค": store_profile_dict.get('๋งค์ถ๊ตฌ๊ฐ_์์ค', 'N/A'),
|
| 26 |
+
"๋งค์ถ ๊ฑด์ ์์ค": store_profile_dict.get('์๋งค์ถ๊ฑด์_์์ค', 'N/A'),
|
| 27 |
+
"๋ฐฉ๋ฌธ ๊ณ ๊ฐ์ ์์ค": store_profile_dict.get('์์ ๋ํฌ๊ณ ๊ฐ์_์์ค', 'N/A'),
|
| 28 |
+
"๊ฐ๋จ๊ฐ ์์ค": store_profile_dict.get('์๊ฐ๋จ๊ฐ_์์ค', 'N/A'),
|
| 29 |
+
"์ ๊ท/์ฌ๋ฐฉ๋ฌธ์จ": f"์ ๊ท {(store_profile_dict.get('์ ๊ท๊ณ ๊ฐ๋น์จ') or 0):.1f}% / ์ฌ๋ฐฉ๋ฌธ {(store_profile_dict.get('์ฌ์ด์ฉ๊ณ ๊ฐ๋น์จ') or 0):.1f}%",
|
| 30 |
+
"๋์ผ ์๊ถ ๋๋น ๋งค์ถ ์์": f"์์ {(store_profile_dict.get('๋์ผ์๊ถ๋ด๋งค์ถ์์๋น์จ') or 0):.1f}%",
|
| 31 |
+
"๋์ผ ์
์ข
๋๋น ๋งค์ถ ์์": f"์์ {(store_profile_dict.get('๋์ผ์
์ข
๋ด๋งค์ถ์์๋น์จ') or 0):.1f}%"
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
# 2. '์๋์ถ์ถํน์ง' ์ถ๊ฐ
|
| 35 |
+
chat_profile_data["์๋์ถ์ถํน์ง"] = store_profile_dict.get('์๋์ถ์ถํน์ง', {})
|
| 36 |
+
|
| 37 |
+
return chat_profile_data
|
| 38 |
+
|
| 39 |
+
except Exception as e:
|
| 40 |
+
logger.critical(f"--- [Profile Utils CRITICAL] ์ฑํ
ํ๋กํ ๋์
๋๋ฆฌ ์์ฑ ์คํจ: {e} ---", exc_info=True)
|
| 41 |
+
return {
|
| 42 |
+
"์
์ข
": store_profile_dict.get('์
์ข
', '์ ์ ์์'),
|
| 43 |
+
"์๋์ถ์ถํน์ง": store_profile_dict.get('์๋์ถ์ถํน์ง', {}),
|
| 44 |
+
"์ฃผ์": store_profile_dict.get('๊ฐ๋งน์ ์ฃผ์', '์ ์ ์์'),
|
| 45 |
+
"error": "ํ๋กํ ์์ฝ ์ค ์ค๋ฅ ๋ฐ์"
|
| 46 |
+
}
|
modules/visualization.py
ADDED
|
@@ -0,0 +1,230 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# modules/visualization.py
|
| 2 |
+
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import matplotlib.pyplot as plt
|
| 5 |
+
from matplotlib import font_manager
|
| 6 |
+
import numpy as np
|
| 7 |
+
import streamlit as st
|
| 8 |
+
|
| 9 |
+
import config
|
| 10 |
+
from modules.profile_utils import get_chat_profile_dict
|
| 11 |
+
|
| 12 |
+
logger = config.get_logger(__name__)
|
| 13 |
+
|
| 14 |
+
def set_korean_font():
|
| 15 |
+
"""
|
| 16 |
+
์์คํ
์ ์ค์น๋ ํ๊ธ ํฐํธ๋ฅผ ์ฐพ์ Matplotlib์ ์ค์ ํฉ๋๋ค.
|
| 17 |
+
"""
|
| 18 |
+
font_list = ['Malgun Gothic', 'AppleGothic', 'NanumGothic']
|
| 19 |
+
|
| 20 |
+
found_font = False
|
| 21 |
+
for font_name in font_list:
|
| 22 |
+
if any(font.name == font_name for font in font_manager.fontManager.ttflist):
|
| 23 |
+
plt.rc('font', family=font_name)
|
| 24 |
+
logger.info(f"โ
ํ๊ธ ํฐํธ '{font_name}'์(๋ฅผ) ์ฐพ์ ๊ทธ๋ํ์ ์ ์ฉํฉ๋๋ค.")
|
| 25 |
+
found_font = True
|
| 26 |
+
break
|
| 27 |
+
|
| 28 |
+
if not found_font:
|
| 29 |
+
logger.warning("โ ๏ธ ๊ฒฝ๊ณ : Malgun Gothic, AppleGothic, NanumGothic ํฐํธ๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค.")
|
| 30 |
+
|
| 31 |
+
plt.rcParams['axes.unicode_minus'] = False
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def display_merchant_profile(profile_data: dict):
|
| 35 |
+
set_korean_font()
|
| 36 |
+
|
| 37 |
+
"""
|
| 38 |
+
๋ถ์๋ ๊ฐ๋งน์ ํ๋กํ ์ ์ฒด๋ฅผ Streamlit ํ๋ฉด์ ์๊ฐํํฉ๋๋ค.
|
| 39 |
+
"""
|
| 40 |
+
if not profile_data or "store_profile" not in profile_data:
|
| 41 |
+
st.error("๋ถ์ํ ๊ฐ๋งน์ ๋ฐ์ดํฐ๊ฐ ์์ต๋๋ค.")
|
| 42 |
+
return
|
| 43 |
+
|
| 44 |
+
store_data = profile_data["store_profile"]
|
| 45 |
+
store_name = store_data.get('๊ฐ๋งน์ ๋ช
', '์ ํ ๋งค์ฅ')
|
| 46 |
+
|
| 47 |
+
st.info(f"**'{store_name}'**์ ์์ธ ๋ถ์ ๊ฒฐ๊ณผ์
๋๋ค.")
|
| 48 |
+
|
| 49 |
+
tab1, tab2, tab3, tab4 = st.tabs([
|
| 50 |
+
"๐ ๊ธฐ๋ณธ ์ ๋ณด",
|
| 51 |
+
"๐งโ๐คโ๐ง ์ฃผ์ ๊ณ ๊ฐ์ธต (์ฑ๋ณ/์ฐ๋ น๋)",
|
| 52 |
+
"๐ถ ์ฃผ์ ๊ณ ๊ฐ ์ ํ (์๊ถ)",
|
| 53 |
+
"๐ ๊ณ ๊ฐ ์ถฉ์ฑ๋ (์ ๊ท/์ฌ๋ฐฉ๋ฌธ)"
|
| 54 |
+
])
|
| 55 |
+
|
| 56 |
+
with tab1:
|
| 57 |
+
render_basic_info_table(store_data)
|
| 58 |
+
|
| 59 |
+
with tab2:
|
| 60 |
+
st.subheader("๐งโ๐คโ๐ง ์ฃผ์ ๊ณ ๊ฐ์ธต ๋ถํฌ (์ฑ๋ณ/์ฐ๋ น๋)")
|
| 61 |
+
fig2 = plot_customer_distribution(store_data)
|
| 62 |
+
st.pyplot(fig2)
|
| 63 |
+
|
| 64 |
+
with tab3:
|
| 65 |
+
st.subheader("๐ถ ์ฃผ์ ๊ณ ๊ฐ ์ ํ (์๊ถ)")
|
| 66 |
+
fig3 = plot_customer_type_pie(store_data)
|
| 67 |
+
st.pyplot(fig3)
|
| 68 |
+
|
| 69 |
+
with tab4:
|
| 70 |
+
st.subheader("๐ ์ ๊ท vs ์ฌ๋ฐฉ๋ฌธ ๊ณ ๊ฐ ๋น์จ")
|
| 71 |
+
fig4 = plot_loyalty_donut(store_data)
|
| 72 |
+
st.pyplot(fig4)
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
def get_main_customer_segment(store_data):
|
| 76 |
+
"""์ฃผ์ ๊ณ ๊ฐ์ธต(์ฑ๋ณ/์ฐ๋ น๋) ํ
์คํธ๋ฅผ ๋ฐํํฉ๋๋ค."""
|
| 77 |
+
segments = {
|
| 78 |
+
'๋จ์ฑ 20๋ ์ดํ': store_data.get('๋จ์ฑ20๋์ดํ๋น์จ', 0),
|
| 79 |
+
'๋จ์ฑ 30๋': store_data.get('๋จ์ฑ30๋๋น์จ', 0),
|
| 80 |
+
'๋จ์ฑ 40๋': store_data.get('๋จ์ฑ40๋๋น์จ', 0),
|
| 81 |
+
'๋จ์ฑ 50๋ ์ด์': store_data.get('๋จ์ฑ50๋๋น์จ', 0) + store_data.get('๋จ์ฑ60๋์ด์๋น์จ', 0),
|
| 82 |
+
'์ฌ์ฑ 20๋ ์ดํ': store_data.get('์ฌ์ฑ20๋์ดํ๋น์จ', 0),
|
| 83 |
+
'์ฌ์ฑ 30๋': store_data.get('์ฌ์ฑ30๋๋น์จ', 0),
|
| 84 |
+
'์ฌ์ฑ 40๋': store_data.get('์ฌ์ฑ40๋๋น์จ', 0),
|
| 85 |
+
'์ฌ์ฑ 50๋ ์ด์': store_data.get('์ฌ์ฑ50๋๋น์จ', 0) + store_data.get('์ฌ์ฑ60๋์ด์๋น์จ', 0)
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
if not any(segments.values()):
|
| 89 |
+
return None
|
| 90 |
+
|
| 91 |
+
max_segment = max(segments, key=segments.get)
|
| 92 |
+
max_value = segments[max_segment]
|
| 93 |
+
|
| 94 |
+
if max_value == 0:
|
| 95 |
+
return None
|
| 96 |
+
|
| 97 |
+
return f"'{max_segment}({max_value:.1f}%)'"
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
def render_basic_info_table(store_data):
|
| 101 |
+
"""(Tab 1) ๊ธฐ๋ณธ ์ ๋ณด ์์ฝ ํ์ ํ
์คํธ๋ฅผ ๋ ๋๋งํฉ๋๋ค."""
|
| 102 |
+
|
| 103 |
+
summary_data = get_chat_profile_dict(store_data)
|
| 104 |
+
|
| 105 |
+
st.subheader("๐ ๊ฐ๋งน์ ๊ธฐ๋ณธ ์ ๋ณด")
|
| 106 |
+
summary_df = pd.DataFrame(summary_data.items(), columns=["ํญ๋ชฉ", "๋ด์ฉ"])
|
| 107 |
+
summary_df = summary_df[summary_df['ํญ๋ชฉ'] != '์๋์ถ์ถํน์ง']
|
| 108 |
+
summary_df = summary_df.astype(str)
|
| 109 |
+
st.table(summary_df.set_index('ํญ๋ชฉ'))
|
| 110 |
+
|
| 111 |
+
st.subheader("๐ ๋ถ์ ์์ฝ")
|
| 112 |
+
st.write(f"โ
**{summary_data.get('๊ฐ๋งน์ ๋ช
', 'N/A')}**์(๋) '{summary_data.get('์๊ถ', 'N/A')}' ์๊ถ์ '{summary_data.get('์
์ข
', 'N/A')}' ์
์ข
๊ฐ๋งน์ ์
๋๋ค.")
|
| 113 |
+
st.write(f"๐ ๋งค์ถ ์์ค์ **{summary_data.get('๋งค์ถ ์์ค', 'N/A')}**์ด๋ฉฐ, ๋์ผ ์๊ถ ๋ด ๋งค์ถ ์์๋ **{summary_data.get('๋์ผ ์๊ถ ๋๋น ๋งค์ถ ์์', 'N/A')}**์
๋๋ค.")
|
| 114 |
+
st.write(f"๐ฐ ๋ฐฉ๋ฌธ ๊ณ ๊ฐ์๋ **{summary_data.get('๋ฐฉ๋ฌธ ๊ณ ๊ฐ์ ์์ค', 'N/A')}** ์์ค์ด๋ฉฐ, ๊ฐ๋จ๊ฐ๋ **{summary_data.get('๊ฐ๋จ๊ฐ ์์ค', 'N/A')}** ์์ค์
๋๋ค.")
|
| 115 |
+
|
| 116 |
+
main_customer = get_main_customer_segment(store_data)
|
| 117 |
+
if main_customer:
|
| 118 |
+
st.write(f"๐ฅ ์ฃผ์ ๊ณ ๊ฐ์ธต์ **{main_customer}**์ด(๊ฐ) ๊ฐ์ฅ ๋ง์ต๋๋ค.")
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
def plot_customer_distribution(store_data):
|
| 122 |
+
"""(Tab 2) ๊ณ ๊ฐ ํน์ฑ ๋ถํฌ (์ฑ๋ณ/์ฐ๋ น๋)๋ฅผ ๋ณด์ฌ์ฃผ๋ ๋ง๋ ๊ทธ๋ํ๋ฅผ ์์ฑํฉ๋๋ค."""
|
| 123 |
+
labels = ['20๋ ์ดํ', '30๋', '40๋', '50๋ ์ด์']
|
| 124 |
+
male_percents = [
|
| 125 |
+
store_data.get('๋จ์ฑ20๋์ดํ๋น์จ', 0), store_data.get('๋จ์ฑ30๋๋น์จ', 0),
|
| 126 |
+
store_data.get('๋จ์ฑ40๋๋น์จ', 0),
|
| 127 |
+
store_data.get('๋จ์ฑ50๋๋น์จ', 0) + store_data.get('๋จ์ฑ60๋์ด์๋น์จ', 0)
|
| 128 |
+
]
|
| 129 |
+
female_percents = [
|
| 130 |
+
store_data.get('์ฌ์ฑ20๋์ดํ๋น์จ', 0), store_data.get('์ฌ์ฑ30๋๋น์จ', 0),
|
| 131 |
+
store_data.get('์ฌ์ฑ40๋๋น์จ', 0),
|
| 132 |
+
store_data.get('์ฌ์ฑ50๋๋น์จ', 0) + store_data.get('์ฌ์ฑ60๋์ด์๋น์จ', 0)
|
| 133 |
+
]
|
| 134 |
+
|
| 135 |
+
x = np.arange(len(labels))
|
| 136 |
+
width = 0.35
|
| 137 |
+
fig, ax = plt.subplots(figsize=(10, 6))
|
| 138 |
+
rects1 = ax.bar(x - width/2, male_percents, width, label='๋จ์ฑ', color='cornflowerblue')
|
| 139 |
+
rects2 = ax.bar(x + width/2, female_percents, width, label='์ฌ์ฑ', color='salmon')
|
| 140 |
+
|
| 141 |
+
ax.set_ylabel('๊ณ ๊ฐ ๋น์จ (%)')
|
| 142 |
+
ax.set_title('์ฃผ์ ๊ณ ๊ฐ์ธต ๋ถํฌ (์ฑ๋ณ/์ฐ๋ น๋)', fontsize=16)
|
| 143 |
+
ax.set_xticks(x)
|
| 144 |
+
ax.set_xticklabels(labels, fontsize=12)
|
| 145 |
+
ax.legend()
|
| 146 |
+
ax.grid(axis='y', linestyle='--', alpha=0.7)
|
| 147 |
+
|
| 148 |
+
ax.bar_label(rects1, padding=3, fmt='%.1f')
|
| 149 |
+
ax.bar_label(rects2, padding=3, fmt='%.1f')
|
| 150 |
+
|
| 151 |
+
fig.tight_layout()
|
| 152 |
+
return fig
|
| 153 |
+
|
| 154 |
+
|
| 155 |
+
def plot_customer_type_pie(store_data):
|
| 156 |
+
"""(Tab 3) ์ฃผ์ ๊ณ ๊ฐ ์ ํ (๊ฑฐ์ฃผ์, ์ง์ฅ์ธ, ์ ๋์ธ๊ตฌ)์ ํ์ด ์ฐจํธ๋ก ์์ฑํฉ๋๋ค."""
|
| 157 |
+
|
| 158 |
+
customer_data = {
|
| 159 |
+
'์ ๋์ธ๊ตฌ': store_data.get("์ ๋์ธ๊ตฌ์ด์ฉ๋น์จ", 0),
|
| 160 |
+
'๊ฑฐ์ฃผ์': store_data.get("๊ฑฐ์ฃผ์์ด์ฉ๋น์จ", 0),
|
| 161 |
+
'์ง์ฅ์ธ': store_data.get("์ง์ฅ์ธ์ด์ฉ๋น์จ", 0)
|
| 162 |
+
}
|
| 163 |
+
|
| 164 |
+
filtered_data = {label: (size or 0) for label, size in customer_data.items()}
|
| 165 |
+
filtered_data = {label: size for label, size in filtered_data.items() if size > 0}
|
| 166 |
+
|
| 167 |
+
sizes = list(filtered_data.values())
|
| 168 |
+
labels = list(filtered_data.keys())
|
| 169 |
+
|
| 170 |
+
if not sizes or sum(sizes) == 0:
|
| 171 |
+
fig, ax = plt.subplots(figsize=(6, 6))
|
| 172 |
+
ax.text(0.5, 0.5, "๋ฐ์ดํฐ ์์", horizontalalignment='center', verticalalignment='center', transform=ax.transAxes)
|
| 173 |
+
ax.set_title("์ฃผ์ ๊ณ ๊ฐ ์ ํ", fontsize=13)
|
| 174 |
+
return fig
|
| 175 |
+
|
| 176 |
+
pie_labels = [f"{label} ({size:.1f}%)" for label, size in zip(labels, sizes)]
|
| 177 |
+
|
| 178 |
+
fig, ax = plt.subplots(figsize=(6, 6))
|
| 179 |
+
|
| 180 |
+
wedges, texts, autotexts = ax.pie(
|
| 181 |
+
sizes,
|
| 182 |
+
labels=pie_labels,
|
| 183 |
+
autopct='%1.1f%%',
|
| 184 |
+
startangle=90,
|
| 185 |
+
pctdistance=0.8
|
| 186 |
+
)
|
| 187 |
+
|
| 188 |
+
plt.setp(autotexts, size=9, weight="bold", color="white")
|
| 189 |
+
ax.set_title("์ฃผ์ ๊ณ ๊ฐ ์ ํ", fontsize=13)
|
| 190 |
+
ax.axis('equal')
|
| 191 |
+
|
| 192 |
+
return fig
|
| 193 |
+
|
| 194 |
+
|
| 195 |
+
def plot_loyalty_donut(store_data):
|
| 196 |
+
"""(Tab 4) ์ ๊ท vs ์ฌ๋ฐฉ๋ฌธ ๊ณ ๊ฐ ๋น์จ์ ๋๋ ์ฐจํธ๋ก ์์ฑํฉ๋๋ค."""
|
| 197 |
+
|
| 198 |
+
visit_ratio = {
|
| 199 |
+
'์ ๊ท ๊ณ ๊ฐ': store_data.get('์ ๊ท๊ณ ๊ฐ๋น์จ') or 0,
|
| 200 |
+
'์ฌ์ด์ฉ ๊ณ ๊ฐ': store_data.get('์ฌ์ด์ฉ๊ณ ๊ฐ๋น์จ') or 0
|
| 201 |
+
}
|
| 202 |
+
|
| 203 |
+
sizes = list(visit_ratio.values())
|
| 204 |
+
labels = list(visit_ratio.keys())
|
| 205 |
+
|
| 206 |
+
if not sizes or sum(sizes) == 0:
|
| 207 |
+
fig, ax = plt.subplots(figsize=(5, 5))
|
| 208 |
+
ax.text(0.5, 0.5, "๋ฐ์ดํฐ ์์", horizontalalignment='center', verticalalignment='center', transform=ax.transAxes)
|
| 209 |
+
ax.set_title("์ ๊ท vs ์ฌ๋ฐฉ๋ฌธ ๊ณ ๊ฐ ๋น์จ")
|
| 210 |
+
return fig
|
| 211 |
+
|
| 212 |
+
fig, ax = plt.subplots(figsize=(5, 5))
|
| 213 |
+
|
| 214 |
+
wedges, texts, autotexts = ax.pie(
|
| 215 |
+
sizes,
|
| 216 |
+
labels=labels,
|
| 217 |
+
autopct='%1.1f%%',
|
| 218 |
+
startangle=90,
|
| 219 |
+
pctdistance=0.85,
|
| 220 |
+
colors=['lightcoral', 'skyblue']
|
| 221 |
+
)
|
| 222 |
+
|
| 223 |
+
centre_circle = plt.Circle((0, 0), 0.70, fc='white')
|
| 224 |
+
ax.add_artist(centre_circle)
|
| 225 |
+
|
| 226 |
+
plt.setp(autotexts, size=10, weight="bold")
|
| 227 |
+
ax.set_title("์ ๊ท vs ์ฌ๋ฐฉ๋ฌธ ๊ณ ๊ฐ ๋น์จ", fontsize=14)
|
| 228 |
+
ax.axis('equal')
|
| 229 |
+
|
| 230 |
+
return fig
|
orchestrator.py
ADDED
|
@@ -0,0 +1,247 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# orchestrator.py
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
import traceback
|
| 5 |
+
from typing import List, Optional, Dict, Any
|
| 6 |
+
from pydantic import ValidationError
|
| 7 |
+
|
| 8 |
+
from langchain.agents import AgentExecutor, create_tool_calling_agent
|
| 9 |
+
from langchain_core.prompts import ChatPromptTemplate
|
| 10 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
| 11 |
+
from langchain.tools.render import render_text_description
|
| 12 |
+
|
| 13 |
+
import config
|
| 14 |
+
from modules.llm_provider import set_llm
|
| 15 |
+
from modules.profile_utils import get_chat_profile_dict
|
| 16 |
+
|
| 17 |
+
# tools/tool_loader.py ์์ ๋ชจ๋ ๋๊ตฌ๋ฅผ ๊ฐ์ ธ์ด
|
| 18 |
+
from tools.tool_loader import ALL_TOOLS
|
| 19 |
+
|
| 20 |
+
logger = config.get_logger(__name__)
|
| 21 |
+
|
| 22 |
+
# --- ํฌํผ ํจ์๋ฅผ ๊ณตํต ์ ํธ๋ฆฌํฐ ํธ์ถ๋ก ๋ณ๊ฒฝ ---
|
| 23 |
+
def _get_chat_profile_json_string(store_profile_dict: Dict[str, Any]) -> str:
|
| 24 |
+
"""
|
| 25 |
+
๊ณตํต ์ ํธ๋ฆฌํฐ(profile_utils.py)๋ฅผ ํธ์ถํ์ฌ '์ฑํ
์ฉ ํ๋กํ ๋์
๋๋ฆฌ'๋ฅผ ์์ฑํ๊ณ ,
|
| 26 |
+
์ด๋ฅผ JSON ๋ฌธ์์ด๋ก ๋ณํํ์ฌ ๋ฐํํฉ๋๋ค.
|
| 27 |
+
"""
|
| 28 |
+
try:
|
| 29 |
+
summary_dict = get_chat_profile_dict(store_profile_dict)
|
| 30 |
+
return json.dumps(summary_dict, ensure_ascii=False)
|
| 31 |
+
|
| 32 |
+
except Exception as e:
|
| 33 |
+
logger.critical(f"--- [Orchestrator CRITICAL] ์ฑํ
์ฉ JSON ์์ฑ ์คํจ: {e} ---", exc_info=True)
|
| 34 |
+
fallback_data = {
|
| 35 |
+
"์
์ข
": store_profile_dict.get('์
์ข
', '์ ์ ์์'),
|
| 36 |
+
"์๋์ถ์ถํน์ง": store_profile_dict.get('์๋์ถ์ถํน์ง', {}),
|
| 37 |
+
"์ฃผ์": store_profile_dict.get('๊ฐ๋งน์ ์ฃผ์', '์ ์ ์์'),
|
| 38 |
+
"error": "ํ๋กํ ์์ฝ ์ค ์ค๋ฅ ๋ฐ์"
|
| 39 |
+
}
|
| 40 |
+
return json.dumps(fallback_data, ensure_ascii=False)
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
class AgentOrchestrator:
|
| 44 |
+
def __init__(self, google_api_key):
|
| 45 |
+
"""Gemini Flash ๊ธฐ๋ฐ Agent Orchestrator ์ด๊ธฐํ"""
|
| 46 |
+
self.llm = ChatGoogleGenerativeAI(
|
| 47 |
+
model=config.LLM_MODEL_NAME,
|
| 48 |
+
google_api_key=google_api_key,
|
| 49 |
+
temperature=0.1
|
| 50 |
+
)
|
| 51 |
+
set_llm(self.llm)
|
| 52 |
+
|
| 53 |
+
# tool_loader ์์ ๋๊ตฌ ๋ชฉ๋ก์ ๊ฐ์ ธ์ด
|
| 54 |
+
self.tools = ALL_TOOLS
|
| 55 |
+
|
| 56 |
+
self.rendered_tools = render_text_description(self.tools)
|
| 57 |
+
|
| 58 |
+
self.system_prompt_template = """
|
| 59 |
+
{base_system_prompt}
|
| 60 |
+
|
| 61 |
+
---
|
| 62 |
+
๐ฆ [ํ์ฌ ๊ฐ๊ฒ ํ๋กํ (JSON)]
|
| 63 |
+
{store_profile_context}
|
| 64 |
+
|
| 65 |
+
๐ [์ด์ ์ถ์ฒ ์ถ์ ๋ฆฌ์คํธ]
|
| 66 |
+
{last_recommended_festivals}
|
| 67 |
+
|
| 68 |
+
---
|
| 69 |
+
๐ก ๋ฐ๋์ ์ ์ ๋ณด๋ฅผ ๊ธฐ๋ฐ์ผ๋ก ํ๋จํ๋,
|
| 70 |
+
๋๊ตฌ ๋ผ์ฐํ
๊ท์น(1~4์์)์ ๋ฐ๋ผ *์ ์ ํ ๋จ ํ๋์ ๋๊ตฌ๋ฅผ ํธ์ถ*ํด์ผ ํฉ๋๋ค.
|
| 71 |
+
"""
|
| 72 |
+
|
| 73 |
+
self.prompt = ChatPromptTemplate.from_messages([
|
| 74 |
+
("system", self.system_prompt_template),
|
| 75 |
+
("placeholder", "{chat_history}"),
|
| 76 |
+
("human", "{input}"),
|
| 77 |
+
("placeholder", "{agent_scratchpad}"),
|
| 78 |
+
])
|
| 79 |
+
|
| 80 |
+
self.agent = create_tool_calling_agent(self.llm, self.tools, self.prompt)
|
| 81 |
+
|
| 82 |
+
self.agent_executor = AgentExecutor(
|
| 83 |
+
agent=self.agent,
|
| 84 |
+
tools=self.tools,
|
| 85 |
+
verbose=True,
|
| 86 |
+
handle_parsing_errors=True,
|
| 87 |
+
return_intermediate_steps=True,
|
| 88 |
+
)
|
| 89 |
+
logger.info(f"--- [Streamlit] AgentOrchestrator ์ด๊ธฐํ ์๋ฃ (Model: {config.LLM_MODEL_NAME}) ---")
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
def setup_system_prompt(self):
|
| 93 |
+
"""Gemini Flash ์ ์ฉ ๊ฐํ ํ๋กฌํํธ"""
|
| 94 |
+
|
| 95 |
+
logger.info("--- [Orchestrator] ์์คํ
ํ๋กฌํํธ ์ค์ ์๋ฃ ---")
|
| 96 |
+
|
| 97 |
+
# --- (์ฌ์ฉ์ ์์ฒญ) ํ๋กฌํํธ ์๋ณธ ์ ์ง ---
|
| 98 |
+
return f"""
|
| 99 |
+
๋น์ ์ **์ ํ์นด๋ ๋ฐ์ดํฐ ๊ธฐ๋ฐ ์ง์ญ์ถ์ ์ ๋ฌธ AI ์ปจ์คํดํธ**์
๋๋ค.
|
| 100 |
+
๋น์ ์ ์๋ฌด๋ ์ฌ์ฅ๋์ ๊ฐ๊ฒ ์ ๋ณด๋ฅผ ๊ธฐ๋ฐ์ผ๋ก
|
| 101 |
+
**๊ฐ๊ฒ ๋ถ์ โ ์ถ์ ์ถ์ฒ โ ์ถ์ ๋ถ์ โ ๋ง์ผํ
์ ๋ต ์ ์**์ ์ํํ๋ ๊ฒ์
๋๋ค.
|
| 102 |
+
|
| 103 |
+
---
|
| 104 |
+
๐ง [์ฌ์ฉ ๊ฐ๋ฅํ ๋๊ตฌ ๋ชฉ๋ก]
|
| 105 |
+
(๋๊ตฌ ๋ชฉ๋ก์ ์์ด์ ํธ์ ๋ด์ฅ๋์ด ์์ผ๋ฉฐ, ์๋ [๋๊ตฌ ๋ผ์ฐํ
๊ท์น]์ ๋ฐ๋ผ ํธ์ถ๋ฉ๋๋ค.)
|
| 106 |
+
|
| 107 |
+
---
|
| 108 |
+
๐ฏ **[ํต์ฌ ์๋ฌด ์์ฝ]**
|
| 109 |
+
1๏ธโฃ ์ฌ์ฉ์์ ์์ฒญ์ ์์ํ๊ธฐ ์ํด **ํ์ํ ๋ชจ๋ ๋๊ตฌ๋ฅผ ์์จ์ ์ผ๋ก ํธ์ถ**ํด์ผ ํฉ๋๋ค. ๋๋ก๋ **์ฌ๋ฌ ๋๊ตฌ๋ฅผ ์์ฐจ์ ์ผ๋ก ํธ์ถ**ํด์ผ ํ ์๋ ์์ต๋๋ค. (์: ์ถ์ ์ถ์ฒ โ ๋ง์ผํ
์ ๋ต ์์ฑ)
|
| 110 |
+
2๏ธโฃ **๋๊ตฌ ํธ์ถ ์์ด** "์ฃ์กํฉ๋๋ค" ๋๋ "์ ๋ชจ๋ฅด๊ฒ ์ต๋๋ค" ๊ฐ์ ๋ต๋ณ์ ์์ฑํ๋ ๊ฒ์ ์ ๋ ๊ธ์ง์
๋๋ค.
|
| 111 |
+
3๏ธโฃ ๋ชจ๋ ์์ฒญ์ ๋ฐ๋์ ์ ํฉํ ๋๊ตฌ ํธ์ถ๋ก ์ด์ด์ ธ์ผ ํฉ๋๋ค.
|
| 112 |
+
4๏ธโฃ ๋ชจ๋ ๋๊ตฌ ์คํ ๊ฒฐ๊ณผ๋ฅผ ๋ฐํ์ผ๋ก, ์ฌ์ฅ๋์๊ฒ ์ ๊ณตํ [์ต์ข
๋ต๋ณ]์
|
| 113 |
+
**์์ฐ์ค๋ฌ์ด ํ๊ตญ์ด(๋งํฌ๋ค์ด ํ์)**๋ก ์์ฑํฉ๋๋ค.
|
| 114 |
+
|
| 115 |
+
---
|
| 116 |
+
๐งญ **[๋๊ตฌ ๋ผ์ฐํ
๊ท์น (์ฐ์ ์์ ์ ์ฉ)]**
|
| 117 |
+
|
| 118 |
+
**[1์์] ์ถ์ ์ถ์ฒ ์์ฒญ**
|
| 119 |
+
- ํค์๋: "์ถ์ ์ถ์ฒ", "์ฐธ์ฌํ ๋งํ ์ถ์ ", "์ด๋ค ์ถ์ ", "ํ์ฌ ์ฐพ์์ค", "์ด๋๊ฐ ์ข์"
|
| 120 |
+
- โ `recommend_festivals`
|
| 121 |
+
|
| 122 |
+
**[2์์] ํน์ ์ถ์ ๋ถ์/์ ๋ต ์์ฒญ**
|
| 123 |
+
- **2-1. ๋ง์ผํ
์ ๋ต ์์ฒญ (์ถ์ 1๊ฐ)**: ์ถ์ ์ด๋ฆ์ด 1๊ฐ ํฌํจ๋์ด ์๊ณ '๋ง์ผํ
', '์ ๋ต' ๋ฑ์ ํค์๋๊ฐ ์๋ ๊ฒฝ์ฐ
|
| 124 |
+
- โ `create_festival_specific_marketing_strategy`
|
| 125 |
+
- **2-2. ๋ง์ผํ
์ ๋ต ์์ฒญ (์ถ์ 2๊ฐ ์ด์)**: ์ถ์ ์ด๋ฆ์ด 2๊ฐ ์ด์ ํฌํจ๋์ด ์๊ณ '๋ง์ผํ
', '์ ๋ต' ๋ฑ์ ํค์๋๊ฐ ์๋ ๊ฒฝ์ฐ
|
| 126 |
+
- โ `create_marketing_strategies_for_multiple_festivals`
|
| 127 |
+
- **2-3. ์ถ์ ์์ธ ๋ถ์ ์์ฒญ**: "~์ถ์ ์ด๋?", "๋ถ์ํด์ค"
|
| 128 |
+
- โ `analyze_festival_profile`
|
| 129 |
+
|
| 130 |
+
**[3์์] ๊ฐ๊ฒ ๋ถ์ ์์ฒญ**
|
| 131 |
+
- ํค์๋: โ์ฐ๋ฆฌ ๊ฐ๊ฒโ, โSWOTโ, โ๊ณ ๊ฐ ํน์ฑโ, โ๋ถ์ํด์คโ
|
| 132 |
+
- โ `analyze_merchant_profile`
|
| 133 |
+
|
| 134 |
+
**[4์์] ์ผ๋ฐ ๋ง์ผํ
/ํ๋ณด ์์ฒญ**
|
| 135 |
+
- ํค์๋: โ๋ง์ผํ
โ, โํ๋ณดโ, โ๋งค์ถโ, โ์ ๋ตโ
|
| 136 |
+
- โ `search_contextual_marketing_strategy`
|
| 137 |
+
|
| 138 |
+
**[๊ธฐํ]**
|
| 139 |
+
- ๋ช
ํํ ๋ถ๋ฅ๋์ง ์์ผ๋ฉด 4์์ ๋๊ตฌ ์ฌ์ฉ
|
| 140 |
+
- โ `search_contextual_marketing_strategy`
|
| 141 |
+
|
| 142 |
+
---
|
| 143 |
+
โ
**[ํ๋ ์ฒดํฌ๋ฆฌ์คํธ]**
|
| 144 |
+
- 1๏ธโฃ ์ฌ์ฉ์์ ์์ฒญ์ด **์์ ํ ํด๊ฒฐ๋ ๋๊น์ง** ํ์ํ ๋ชจ๋ ๋๊ตฌ๋ฅผ ํธ์ถํ ๊ฒ
|
| 145 |
+
- 2๏ธโฃ [1์์] ์์
์, ๋ง์ผํ
์ ๋ต ์์ฒญ์ด ์์๋์ง **๋ฐ๋์ ์ฌํ์ธ**ํ๊ณ 2๋จ๊ณ ๋๊ตฌ ํธ์ถ์ ๊ฒฐ์ ํ ๊ฒ
|
| 146 |
+
- 3๏ธโฃ ๋๊ตฌ ํธ์ถ ์์ด ์ข
๋ฃํ์ง ๋ง ๊ฒ
|
| 147 |
+
- 4๏ธโฃ ์ต์ข
๋ต๋ณ์ ์์ฐ์ค๋ฌ์ด ํ๊ตญ์ด(๋งํฌ๋ค์ด)๋ก ์์ฑํ ๊ฒ
|
| 148 |
+
|
| 149 |
+
---
|
| 150 |
+
โ๏ธ **[์ต์ข
๋ต๋ณ ๊ฐ์ด๋๋ผ์ธ] (๋งค์ฐ ์ค์)**
|
| 151 |
+
1. **์น์ ํ ์ ๋ฌธ๊ฐ ๋งํฌ**: ํญ์ ์ฌ์ฅ๋์ ๋ํ๋ฏ, ์ ๋ฌธ์ ์ด๋ฉด์๋ ์น์ ํ๊ณ ์ดํดํ๊ธฐ ์ฌ์ด ๋งํฌ๋ฅผ ์ฌ์ฉํฉ๋๋ค.
|
| 152 |
+
2. **(์์ฒญ 2) ์ถ์ฒ ์ ์ ํ์**: `recommend_festivals` ๋๊ตฌ์ ๊ฒฐ๊ณผ๋ฅผ ํฌ๋งทํ
ํ ๋, ๊ฐ ์ถ์ ์ด๋ฆ ์์ด๋ ๋ฐ๋ก ์๋์ **(์ถ์ฒ ์ ์: XX.X์ )**๊ณผ ๊ฐ์ด '์ถ์ฒ_์ ์'๋ฅผ **๋ฐ๋์** ๋ช
์ํ์ธ์.
|
| 153 |
+
3. **(์์ฒญ 4) ์ทจ์์ ๊ธ์ง**: ์ ๋๋ก `~~text~~`์ ๊ฐ์ ์ทจ์์ ๋งํฌ๋ค์ด์ ์ฌ์ฉํ์ง ๋ง์ธ์.
|
| 154 |
+
4. **(์์ฒญ 3) ๋ค์ ์ง๋ฌธ ์ ์**: ์ฌ์ฉ์๊ฐ ๋ค์์ ๋ฌด์์ ํ ์ ์์์ง ์ ์ ์๋๋ก, ๋ต๋ณ์ **๊ฐ์ฅ ๋ง์ง๋ง**์ ์๋์ ๊ฐ์ [๋ค์ ์ง๋ฌธ ์์]๋ฅผ 2~3๊ฐ ์ ์ํ์ธ์.
|
| 155 |
+
|
| 156 |
+
[๋ค์ ์ง๋ฌธ ์์]
|
| 157 |
+
* "๋ฐฉ๊ธ ์ถ์ฒํด์ค ์ถ์ ๋ค์ ๋ง์ผํ
์ ๋ต์ ์๋ ค์ค"
|
| 158 |
+
* "[์ถ์ ์ด๋ฆ]์ ๋ํ ๋ง์ผํ
์ ๋ต์ ์ง์ค"
|
| 159 |
+
* "๋ด ๊ฐ๊ฒ์ ๊ฐ์ ์ ํ์ฉํ ๋ค๋ฅธ ํ๋ณด ๋ฐฉ๋ฒ์?"
|
| 160 |
+
"""
|
| 161 |
+
|
| 162 |
+
def invoke_agent(
|
| 163 |
+
self,
|
| 164 |
+
user_query: str,
|
| 165 |
+
store_profile_dict: dict,
|
| 166 |
+
chat_history: list,
|
| 167 |
+
last_recommended_festivals: Optional[List[str]] = None,
|
| 168 |
+
):
|
| 169 |
+
|
| 170 |
+
"""์ฌ์ฉ์ ์
๋ ฅ์ ๋ฐ์ Agent๋ฅผ ์คํํ๊ณ ๊ฒฐ๊ณผ๋ฅผ ๋ฐํ"""
|
| 171 |
+
logger.info(f"--- [Orchestrator] Agent ์คํ ์์ (Query: {user_query[:30]}...) ---")
|
| 172 |
+
|
| 173 |
+
base_system_prompt = self.setup_system_prompt()
|
| 174 |
+
store_profile_chat_json_str = _get_chat_profile_json_string(store_profile_dict)
|
| 175 |
+
last_recommended_festivals_str = (
|
| 176 |
+
"์์" if not last_recommended_festivals else str(last_recommended_festivals)
|
| 177 |
+
)
|
| 178 |
+
|
| 179 |
+
try:
|
| 180 |
+
response = self.agent_executor.invoke({
|
| 181 |
+
"input": user_query,
|
| 182 |
+
"chat_history": chat_history,
|
| 183 |
+
"store_profile_context": store_profile_chat_json_str,
|
| 184 |
+
"store_profile": store_profile_chat_json_str,
|
| 185 |
+
"last_recommended_festivals": last_recommended_festivals_str,
|
| 186 |
+
"base_system_prompt": base_system_prompt,
|
| 187 |
+
})
|
| 188 |
+
|
| 189 |
+
output_text = response.get("output", "").strip()
|
| 190 |
+
|
| 191 |
+
is_garbage_response = (
|
| 192 |
+
len(output_text) < 10 and ("}" in output_text or "`" in output_text)
|
| 193 |
+
)
|
| 194 |
+
|
| 195 |
+
if not output_text or is_garbage_response:
|
| 196 |
+
|
| 197 |
+
if is_garbage_response:
|
| 198 |
+
logger.warning(f"--- [Orchestrator WARNING] ๋น์ ์ ์๋ต ๊ฐ์ง ('{output_text}') โ ์ฌ์๋ ์ํ ---")
|
| 199 |
+
else:
|
| 200 |
+
logger.warning("--- [Orchestrator WARNING] ์๋ต ๋น์ด์์ โ ์ฌ์๋ ์ํ ---")
|
| 201 |
+
|
| 202 |
+
retry_input = f"""
|
| 203 |
+
[์ฌ์๋ ์์ฒญ]
|
| 204 |
+
์ด์ ์๋ต์ด ๋น์ด์๊ฑฐ๋ ๋น์ ์์ ์ธ ๊ฐ('{output_text}')์ด์์ต๋๋ค.
|
| 205 |
+
์ฌ์ฉ์ ์ง๋ฌธ: "{user_query}"
|
| 206 |
+
|
| 207 |
+
๋น์ ์ ๋ฐ๋์ ํ๋์ ๋๊ตฌ๋ฅผ ํธ์ถํด์ผ ํฉ๋๋ค.
|
| 208 |
+
๋๊ตฌ ๋ผ์ฐํ
๊ท์น(1~4์์)์ ๋ฐ๋ผ ์ ์ ํ ๋๊ตฌ๋ฅผ ์ ํํ๊ณ ํธ์ถํ์ญ์์ค.
|
| 209 |
+
"""
|
| 210 |
+
|
| 211 |
+
response = self.agent_executor.invoke({
|
| 212 |
+
"input": retry_input,
|
| 213 |
+
"chat_history": chat_history,
|
| 214 |
+
"store_profile_context": store_profile_chat_json_str,
|
| 215 |
+
"store_profile": store_profile_chat_json_str,
|
| 216 |
+
"last_recommended_festivals": last_recommended_festivals_str,
|
| 217 |
+
"base_system_prompt": base_system_prompt,
|
| 218 |
+
})
|
| 219 |
+
|
| 220 |
+
final_response = response.get("output", "").strip()
|
| 221 |
+
|
| 222 |
+
else:
|
| 223 |
+
final_response = output_text
|
| 224 |
+
|
| 225 |
+
if not final_response:
|
| 226 |
+
final_response = "์ฃ์กํฉ๋๋ค. ์์ฒญ์ ์ฒ๋ฆฌํ๋ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค. ์ง๋ฌธ์ ์กฐ๊ธ ๋ ๋ช
ํํ ๋ง์ํด์ฃผ์๊ฒ ์ด์?"
|
| 227 |
+
|
| 228 |
+
logger.info("--- [Orchestrator] Agent ์คํ ์๋ฃ ---\n")
|
| 229 |
+
|
| 230 |
+
return {
|
| 231 |
+
"final_response": final_response,
|
| 232 |
+
"intermediate_steps": response.get("intermediate_steps", [])
|
| 233 |
+
}
|
| 234 |
+
|
| 235 |
+
except ValidationError as e:
|
| 236 |
+
logger.error(f"--- [Orchestrator Pydantic ERROR] {e} ---\n", exc_info=True)
|
| 237 |
+
return {
|
| 238 |
+
"final_response": f"์ฃ์กํฉ๋๋ค. ๋๊ตฌ ์
๋ ฅ๊ฐ(Pydantic) ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {e}",
|
| 239 |
+
"intermediate_steps": []
|
| 240 |
+
}
|
| 241 |
+
|
| 242 |
+
except Exception as e:
|
| 243 |
+
logger.critical(f"--- [Orchestrator CRITICAL ERROR] {e} ---\n", exc_info=True)
|
| 244 |
+
return {
|
| 245 |
+
"final_response": f"์ฃ์กํฉ๋๋ค. ์ ์ ์๋ ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {e}",
|
| 246 |
+
"intermediate_steps": []
|
| 247 |
+
}
|
pyproject.toml
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[project]
|
| 2 |
+
name = "shcard_2025_bigcontest_chatbot"
|
| 3 |
+
version = "0.1.0"
|
| 4 |
+
description = "์ ํ์นด๋ ๊ฐ๋งน์ ์ถ์ฒ ์ฑ๋ด"
|
| 5 |
+
readme = "README.md"
|
| 6 |
+
requires-python = ">=3.11"
|
| 7 |
+
dependencies = [
|
| 8 |
+
"streamlit>=1.38.0",
|
| 9 |
+
"google-generativeai>=0.8.0",
|
| 10 |
+
"pandas>=2.2.0",
|
| 11 |
+
"mcp>=1.13.1",
|
| 12 |
+
"fastmcp>=2.11.0",
|
| 13 |
+
"langchain>=0.1.0",
|
| 14 |
+
"langchain-google-genai>=1.0.0",
|
| 15 |
+
"langchain-mcp-adapters>=0.1.0",
|
| 16 |
+
"langchain-core>=0.1.0",
|
| 17 |
+
"langgraph>=0.1.0",
|
| 18 |
+
"pillow>=10.0.0",
|
| 19 |
+
"asyncio>=4.0.0",
|
| 20 |
+
"matplotlib>=3.7.2",
|
| 21 |
+
]
|
requirements.txt
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
streamlit>=1.38.0
|
| 2 |
+
google-generativeai>=0.8.0
|
| 3 |
+
pandas>=2.2.0
|
| 4 |
+
numpy>=1.24.0
|
| 5 |
+
|
| 6 |
+
mcp>=1.13.1
|
| 7 |
+
fastmcp>=2.11.0
|
| 8 |
+
|
| 9 |
+
langchain>=0.2.0
|
| 10 |
+
langchain-core>=0.2.0
|
| 11 |
+
langchain-google-genai>=1.0.0
|
| 12 |
+
langchain-mcp-adapters>=0.1.0
|
| 13 |
+
langgraph>=0.1.0
|
| 14 |
+
langchain-community>=0.2.0
|
| 15 |
+
|
| 16 |
+
faiss-cpu>=1.8.0
|
| 17 |
+
pypdf>=4.2.0
|
| 18 |
+
sentence-transformers
|
| 19 |
+
torch
|
| 20 |
+
|
| 21 |
+
Pillow>=10.0.0
|
| 22 |
+
|
| 23 |
+
fastapi>=0.111.0
|
| 24 |
+
uvicorn>=0.26.0
|
| 25 |
+
matplotlib>=3.7.2
|
streamlit_app.py
ADDED
|
@@ -0,0 +1,500 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# streamlit_app.py (FastAPI ํตํฉ ๋ฒ์ )
|
| 2 |
+
|
| 3 |
+
import streamlit as st
|
| 4 |
+
import os
|
| 5 |
+
import pandas as pd
|
| 6 |
+
import numpy as np # api/server.py์์ ํ์
|
| 7 |
+
import math # api/server.py์์ ํ์
|
| 8 |
+
import json
|
| 9 |
+
import traceback
|
| 10 |
+
# import requests # ๋ ์ด์ API ํธ์ถ์ ํ์ํ์ง ์์
|
| 11 |
+
from PIL import Image
|
| 12 |
+
from pathlib import Path
|
| 13 |
+
|
| 14 |
+
from langchain_core.messages import HumanMessage, AIMessage
|
| 15 |
+
|
| 16 |
+
import config
|
| 17 |
+
from orchestrator import AgentOrchestrator
|
| 18 |
+
from modules.visualization import display_merchant_profile
|
| 19 |
+
from modules.knowledge_base import load_marketing_vectorstore, load_festival_vectorstore
|
| 20 |
+
|
| 21 |
+
logger = config.get_logger(__name__)
|
| 22 |
+
|
| 23 |
+
# --- ํ์ด์ง ์ค์ ---
|
| 24 |
+
st.set_page_config(
|
| 25 |
+
page_title="MarketSync(๋ง์ผ์ฑํฌ)",
|
| 26 |
+
page_icon="๐",
|
| 27 |
+
layout="wide",
|
| 28 |
+
initial_sidebar_state="expanded"
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
# --- (1) api/data_loader.py์์ ๊ฐ์ ธ์จ ํจ์ ---
|
| 32 |
+
# config.py๋ฅผ ์ง์ ์ํฌํธํ๋ฏ๋ก sys.path ์กฐ์ ํ์ ์์
|
| 33 |
+
def load_and_preprocess_data():
|
| 34 |
+
"""
|
| 35 |
+
๋ฏธ๋ฆฌ ๊ฐ๊ณต๋ final_df.csv ํ์ผ์ ์์ ํ๊ฒ ์ฐพ์ ๋ก๋ํ๊ณ ,
|
| 36 |
+
๋ฐ์ดํฐ๋ฅผ ์ฒ๋ฆฌํ๋ ๊ณผ์ ์์ ๋ฐ์ํ ์ ์๋ ๋ชจ๋ ์ค๋ฅ๋ฅผ ๋ฐฉ์ดํฉ๋๋ค.
|
| 37 |
+
(api/data_loader.py์ ์๋ณธ ํจ์)
|
| 38 |
+
"""
|
| 39 |
+
try:
|
| 40 |
+
file_path = config.PATH_FINAL_DF
|
| 41 |
+
|
| 42 |
+
if not file_path.exists():
|
| 43 |
+
logger.critical(f"--- [CRITICAL DATA ERROR] ๋ฐ์ดํฐ ํ์ผ์ ์ฐพ์ ์ ์์ต๋๋ค. ์์ ๊ฒฝ๋ก: {file_path}")
|
| 44 |
+
logger.critical(f"--- ํ์ฌ ์์
๊ฒฝ๋ก: {Path.cwd()} ---")
|
| 45 |
+
return None
|
| 46 |
+
|
| 47 |
+
df = pd.read_csv(file_path)
|
| 48 |
+
|
| 49 |
+
except Exception as e:
|
| 50 |
+
logger.critical(f"--- [CRITICAL DATA ERROR] ๋ฐ์ดํฐ ํ์ผ ๋ก๋ฉ ์ค ์์ธกํ์ง ๋ชปํ ์ค๋ฅ ๋ฐ์: {e} ---", exc_info=True)
|
| 51 |
+
return None
|
| 52 |
+
|
| 53 |
+
logger.info("--- [Preprocess] Streamlit Arrow ๋ณํ ์ค๋ฅ ๋ฐฉ์ง์ฉ ๋ฐ์ดํฐ ํด๋ฆฌ๋ ์์ ---")
|
| 54 |
+
for col in df.select_dtypes(include='object').columns:
|
| 55 |
+
temp_series = (
|
| 56 |
+
df[col]
|
| 57 |
+
.astype(str)
|
| 58 |
+
.str.replace('%', '', regex=False)
|
| 59 |
+
.str.replace(',', '', regex=False)
|
| 60 |
+
.str.strip()
|
| 61 |
+
)
|
| 62 |
+
numeric_series = pd.to_numeric(temp_series, errors='coerce')
|
| 63 |
+
df[col] = numeric_series.fillna(temp_series)
|
| 64 |
+
|
| 65 |
+
logger.info("--- [Preprocess] ๋ฐ์ดํฐ ํด๋ฆฌ๋ ์๋ฃ ---")
|
| 66 |
+
|
| 67 |
+
cols_to_process = ['์๋งค์ถ๊ธ์ก_๊ตฌ๊ฐ', '์๋งค์ถ๊ฑด์_๊ตฌ๊ฐ', '์์ ๋ํฌ๊ณ ๊ฐ์_๊ตฌ๊ฐ', '์๊ฐ๋จ๊ฐ_๊ตฌ๊ฐ']
|
| 68 |
+
|
| 69 |
+
for col in cols_to_process:
|
| 70 |
+
if col in df.columns:
|
| 71 |
+
try:
|
| 72 |
+
series_str = df[col].astype(str).fillna('')
|
| 73 |
+
series_split = series_str.str.split('_').str[0]
|
| 74 |
+
series_numeric = pd.to_numeric(series_split, errors='coerce')
|
| 75 |
+
df[col] = series_numeric.fillna(0).astype(int)
|
| 76 |
+
except Exception as e:
|
| 77 |
+
logger.warning(f"--- [DATA WARNING] '{col}' ์ปฌ๋ผ ์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์: {e}. ํด๋น ์ปฌ๋ผ์ ๊ฑด๋๋๋๋ค. ---", exc_info=True)
|
| 78 |
+
continue
|
| 79 |
+
|
| 80 |
+
logger.info(f"--- [Preprocess] ๋ฐ์ดํฐ ๋ก๋ ๋ฐ ์ ์ฒ๋ฆฌ ์ต์ข
์๋ฃ. (Shape: {df.shape}) ---")
|
| 81 |
+
return df
|
| 82 |
+
|
| 83 |
+
# --- (2) api/server.py์์ ๊ฐ์ ธ์จ ํฌํผ ํจ์ ---
|
| 84 |
+
def replace_nan_with_none(data):
|
| 85 |
+
"""
|
| 86 |
+
๋์
์
๋๋ฆฌ๋ ๋ฆฌ์คํธ ๋ด์ ๋ชจ๋ NaN ๊ฐ์ None์ผ๋ก ์ฌ๊ท์ ์ผ๋ก ๋ณํํฉ๋๋ค.
|
| 87 |
+
(api/server.py์ ์๋ณธ ํจ์)
|
| 88 |
+
"""
|
| 89 |
+
if isinstance(data, dict):
|
| 90 |
+
return {k: replace_nan_with_none(v) for k, v in data.items()}
|
| 91 |
+
elif isinstance(data, list):
|
| 92 |
+
return [replace_nan_with_none(i) for i in data]
|
| 93 |
+
elif isinstance(data, float) and math.isnan(data):
|
| 94 |
+
return None
|
| 95 |
+
return data
|
| 96 |
+
|
| 97 |
+
# --- (3) api/server.py์ POST /profile ๋ก์ง์ ๋ณํํ ํจ์ ---
|
| 98 |
+
def get_merchant_profile_logic(merchant_id: str, df_merchant: pd.DataFrame):
|
| 99 |
+
"""
|
| 100 |
+
๊ฐ๋งน์ ID์ ๋ง์คํฐ ๋ฐ์ดํฐํ๋ ์์ ๋ฐ์ ํ๋กํ์ผ๋ง๋ ๋ฐ์ดํฐ๋ฅผ ๋ฐํํฉ๋๋ค.
|
| 101 |
+
(api/server.py์ POST /profile ์๋ํฌ์ธํธ ๋ก์ง)
|
| 102 |
+
"""
|
| 103 |
+
logger.info(f"โ
[Local Logic] ๊ฐ๋งน์ ID '{merchant_id}' ํ๋กํ์ผ๋ง ์์ฒญ ์์ ")
|
| 104 |
+
try:
|
| 105 |
+
store_df_multiple = df_merchant[df_merchant['๊ฐ๋งน์ ID'] == merchant_id]
|
| 106 |
+
|
| 107 |
+
if store_df_multiple.empty:
|
| 108 |
+
logger.warning(f"โ ๏ธ [Local Logic] 404 - '{merchant_id}' ๊ฐ๋งน์ ID๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค.")
|
| 109 |
+
raise ValueError(f"'{merchant_id}' ๊ฐ๋งน์ ID๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค.")
|
| 110 |
+
|
| 111 |
+
if len(store_df_multiple) > 1:
|
| 112 |
+
logger.info(f" [INFO] '{merchant_id}'์ ๋ํด {len(store_df_multiple)}๊ฐ์ ๋ฐ์ดํฐ ๋ฐ๊ฒฌ. ์ต์ ๋ฐ์ดํฐ๋ก ํํฐ๋งํฉ๋๋ค.")
|
| 113 |
+
temp_df = store_df_multiple.copy()
|
| 114 |
+
temp_df['๊ธฐ์ค๋
์_dt'] = pd.to_datetime(temp_df['๊ธฐ์ค๋
์'])
|
| 115 |
+
latest_store_df = temp_df.sort_values(by='๊ธฐ์ค๋
์_dt', ascending=False).iloc[[0]]
|
| 116 |
+
else:
|
| 117 |
+
latest_store_df = store_df_multiple
|
| 118 |
+
|
| 119 |
+
store_data = latest_store_df.iloc[0].to_dict()
|
| 120 |
+
|
| 121 |
+
# (๊ณ ๊ฐ ๋น์จ ๋ฐ ์๋์ถ์ถํน์ง ๊ณ์ฐ ๋ก์ง์ ์๋ณธ๊ณผ ๋์ผ)
|
| 122 |
+
# 4-1. ๊ณ ๊ฐ ์ฑ๋ณ ๋น์จ ๊ณ์ฐ ๋ฐ ์ ์ฅ
|
| 123 |
+
store_data['๋จ์ฑ๊ณ ๊ฐ๋น์จ'] = (
|
| 124 |
+
store_data.get('๋จ์ฑ20๋์ดํ๋น์จ', 0) + store_data.get('๋จ์ฑ30๋๋น์จ', 0) +
|
| 125 |
+
store_data.get('๋จ์ฑ40๋๋น์จ', 0) + store_data.get('๋จ์ฑ50๋๋น์จ', 0) +
|
| 126 |
+
store_data.get('๋จ์ฑ60๋์ด์๋น์จ', 0)
|
| 127 |
+
)
|
| 128 |
+
store_data['์ฌ์ฑ๊ณ ๊ฐ๋น์จ'] = (
|
| 129 |
+
store_data.get('์ฌ์ฑ20๋์ดํ๋น์จ', 0) + store_data.get('์ฌ์ฑ30๋๋น์จ', 0) +
|
| 130 |
+
store_data.get('์ฌ์ฑ40๋๋น์จ', 0) + store_data.get('์ฌ์ฑ50๋๋น์จ', 0) +
|
| 131 |
+
store_data.get('์ฌ์ฑ60๋์ด์๋น์จ', 0)
|
| 132 |
+
)
|
| 133 |
+
|
| 134 |
+
# 4-2. ์ฐ๋ น๋๋ณ ๋น์จ ๊ณ์ฐ (20๋์ดํ, 30๋, 40๋, 50๋์ด์)
|
| 135 |
+
store_data['์ฐ๋ น๋20๋์ดํ๊ณ ๊ฐ๋น์จ'] = store_data.get('๋จ์ฑ20๋์ดํ๋น์จ', 0) + store_data.get('์ฌ์ฑ20๋์ดํ๋น์จ', 0)
|
| 136 |
+
store_data['์ฐ๋ น๋30๋๊ณ ๊ฐ๋น์จ'] = store_data.get('๋จ์ฑ30๋๋น์จ', 0) + store_data.get('์ฌ์ฑ30๋๋น์จ', 0)
|
| 137 |
+
store_data['์ฐ๋ น๋40๋๊ณ ๊ฐ๋น์จ'] = store_data.get('๋จ์ฑ40๋๋น์จ', 0) + store_data.get('์ฌ์ฑ40๋๋น์จ', 0)
|
| 138 |
+
store_data['์ฐ๋ น๋50๋๊ณ ๊ฐ๋น์จ'] = (
|
| 139 |
+
store_data.get('๋จ์ฑ50๋๋น์จ', 0) + store_data.get('์ฌ์ฑ50๋๋น์จ', 0) +
|
| 140 |
+
store_data.get('๋จ์ฑ60๋์ด์๋น์จ', 0) + store_data.get('์ฌ์ฑ60๋์ด์๋น์จ', 0)
|
| 141 |
+
)
|
| 142 |
+
|
| 143 |
+
male_ratio = store_data.get('๋จ์ฑ๊ณ ๊ฐ๋น์จ', 0)
|
| 144 |
+
female_ratio = store_data.get('์ฌ์ฑ๊ณ ๊ฐ๋น์จ', 0)
|
| 145 |
+
ํต์ฌ๊ณ ๊ฐ_์ฑ๋ณ = '๋จ์ฑ ์ค์ฌ' if male_ratio > female_ratio else '์ฌ์ฑ ์ค์ฌ'
|
| 146 |
+
|
| 147 |
+
age_ratios = {
|
| 148 |
+
'20๋์ดํ': store_data.get('์ฐ๋ น๋20๋์ดํ๊ณ ๊ฐ๋น์จ', 0),
|
| 149 |
+
'30๋': store_data.get('์ฐ๋ น๋30๋๊ณ ๊ฐ๋น์จ', 0),
|
| 150 |
+
'40๋': store_data.get('์ฐ๋ น๋40๋๊ณ ๊ฐ๋น์จ', 0),
|
| 151 |
+
'50๋์ด์': store_data.get('์ฐ๋ น๋50๋๊ณ ๊ฐ๋น์จ', 0),
|
| 152 |
+
}
|
| 153 |
+
ํต์ฌ์ฐ๋ น๋_๊ฒฐ๊ณผ = max(age_ratios, key=age_ratios.get)
|
| 154 |
+
|
| 155 |
+
store_data['์๋์ถ์ถํน์ง'] = {
|
| 156 |
+
"ํต์ฌ๊ณ ๊ฐ": ํต์ฌ๊ณ ๊ฐ_์ฑ๋ณ,
|
| 157 |
+
"ํต์ฌ์ฐ๋ น๋": ํต์ฌ์ฐ๋ น๋_๊ฒฐ๊ณผ,
|
| 158 |
+
"๋งค์ถ์์": f"์๊ถ ๋ด ์์ {store_data.get('๋์ผ์๊ถ๋ด๋งค์ถ์์๋น์จ', 0):.1f}%, ์
์ข
๋ด ์์ {store_data.get('๋์ผ์
์ข
๋ด๋งค์ถ์์๋น์จ', 0):.1f}%"
|
| 159 |
+
}
|
| 160 |
+
|
| 161 |
+
area = store_data.get('์๊ถ')
|
| 162 |
+
category = store_data.get('์
์ข
')
|
| 163 |
+
|
| 164 |
+
average_df = df_merchant[(df_merchant['์๊ถ'] == area) & (df_merchant['์
์ข
'] == category)]
|
| 165 |
+
|
| 166 |
+
if average_df.empty:
|
| 167 |
+
average_data = {}
|
| 168 |
+
else:
|
| 169 |
+
numeric_cols = average_df.select_dtypes(include=np.number).columns
|
| 170 |
+
average_data = average_df[numeric_cols].mean().to_dict()
|
| 171 |
+
|
| 172 |
+
average_data['๊ฐ๋งน์ ๋ช
'] = f"{area} {category} ์
์ข
ํ๊ท "
|
| 173 |
+
|
| 174 |
+
final_result = {
|
| 175 |
+
"store_profile": store_data,
|
| 176 |
+
"average_profile": average_data
|
| 177 |
+
}
|
| 178 |
+
|
| 179 |
+
clean_result = replace_nan_with_none(final_result)
|
| 180 |
+
|
| 181 |
+
logger.info(f"โ
[Local Logic] '{store_data.get('๊ฐ๋งน์ ๋ช
')}({merchant_id})' ํ๋กํ์ผ๋ง ์ฑ๊ณต (๊ธฐ์ค๋
์: {store_data.get('๊ธฐ์ค๋
์')})")
|
| 182 |
+
return clean_result
|
| 183 |
+
|
| 184 |
+
except ValueError as e: # HTTPException์ ValueError๋ก ๋ณ๊ฒฝ
|
| 185 |
+
logger.error(f"โ [Local Logic ERROR] ์ฒ๋ฆฌ ์ค ์ค๋ฅ: {e}", exc_info=True)
|
| 186 |
+
raise e
|
| 187 |
+
except Exception as e:
|
| 188 |
+
logger.critical(f"โ [Local Logic CRITICAL] ์์ธกํ์ง ๋ชปํ ์ค๋ฅ: {e}\n{traceback.format_exc()}", exc_info=True)
|
| 189 |
+
raise Exception(f"์๋ฒ ๋ด๋ถ ์ค๋ฅ ๋ฐ์: {e}")
|
| 190 |
+
|
| 191 |
+
# --- (๋) API ๋ก์ง ํตํฉ ---
|
| 192 |
+
|
| 193 |
+
|
| 194 |
+
# --- ์ด๋ฏธ์ง ๋ก๋ ํจ์ ---
|
| 195 |
+
@st.cache_data
|
| 196 |
+
def load_image(image_name: str) -> Image.Image | None:
|
| 197 |
+
"""assets ํด๋์์ ์ด๋ฏธ์ง๋ฅผ ๋ก๋ํ๊ณ ์บ์ํฉ๋๋ค."""
|
| 198 |
+
try:
|
| 199 |
+
image_path = config.ASSETS / image_name
|
| 200 |
+
if not image_path.is_file():
|
| 201 |
+
logger.error(f"์ด๋ฏธ์ง ํ์ผ์ ์ฐพ์ ์ ์์ต๋๋ค: {image_path}")
|
| 202 |
+
# ... (์ค๋ฅ ๋ก๊น
) ...
|
| 203 |
+
return None
|
| 204 |
+
return Image.open(image_path)
|
| 205 |
+
except Exception as e:
|
| 206 |
+
logger.error(f"์ด๋ฏธ์ง ๋ก๋ฉ ์ค ์ค๋ฅ ๋ฐ์ ({image_name}): {e}", exc_info=True)
|
| 207 |
+
return None
|
| 208 |
+
|
| 209 |
+
# --- (4) ๋ฐ์ดํฐ ๋ก๋ ํจ์ ์์ ---
|
| 210 |
+
|
| 211 |
+
@st.cache_data
|
| 212 |
+
def load_master_dataframe():
|
| 213 |
+
"""
|
| 214 |
+
(์์ ) FastAPI ์๋ฒ์ ์ญํ ์ ๋์ ํ์ฌ,
|
| 215 |
+
์ฑ ์์ ์ 'final_df.csv' ๋ง์คํฐ ๋ฐ์ดํฐ ์ ์ฒด๋ฅผ ๋ก๋ํ๊ณ ์ ์ฒ๋ฆฌํฉ๋๋ค.
|
| 216 |
+
"""
|
| 217 |
+
logger.info("๋ง์คํฐ ๋ฐ์ดํฐํ๋ ์ ๋ก๋ ์๋...")
|
| 218 |
+
df = load_and_preprocess_data() # (1)์์ ๋ณต์ฌํ ํจ์ ํธ์ถ
|
| 219 |
+
if df is None:
|
| 220 |
+
logger.critical("--- [Streamlit Error] ๋ง์คํฐ ๋ฐ์ดํฐ ๋ก๋ฉ ์คํจ! ---")
|
| 221 |
+
return None
|
| 222 |
+
logger.info("--- [Streamlit] ๋ง์คํฐ ๋ฐ์ดํฐํ๋ ์ ๋ก๋ ๋ฐ ์บ์ ์๋ฃ ---")
|
| 223 |
+
return df
|
| 224 |
+
|
| 225 |
+
@st.cache_data
|
| 226 |
+
def load_merchant_list_for_ui(_df_master: pd.DataFrame):
|
| 227 |
+
"""
|
| 228 |
+
(์์ ) ๋ง์คํฐ ๋ฐ์ดํฐํ๋ ์์์ UI ๊ฒ์์ฉ (ID, ์ด๋ฆ) ๋ชฉ๋ก๋ง ์ถ์ถํฉ๋๋ค.
|
| 229 |
+
(api/server.py์ GET /merchants ์๋ํฌ์ธํธ ๋ก์ง)
|
| 230 |
+
"""
|
| 231 |
+
try:
|
| 232 |
+
if _df_master is None:
|
| 233 |
+
return None
|
| 234 |
+
logger.info(f"โ
[Local Logic] '/merchants' ๊ฐ๋งน์ ๋ชฉ๋ก ์์ฒญ ์์ ")
|
| 235 |
+
merchant_list = _df_master[['๊ฐ๋งน์ ID', '๊ฐ๋งน์ ๋ช
']].drop_duplicates().to_dict('records')
|
| 236 |
+
logger.info(f"โ
[Local Logic] ๊ฐ๋งน์ ๋ชฉ๋ก {len(merchant_list)}๊ฐ ๋ฐํ ์๋ฃ")
|
| 237 |
+
return pd.DataFrame(merchant_list)
|
| 238 |
+
except Exception as e:
|
| 239 |
+
st.error(f"๊ฐ๊ฒ ๋ชฉ๋ก์ ๋ถ๋ฌ์ค๋ ๋ฐ ์คํจํ์ต๋๋ค: {e}")
|
| 240 |
+
logger.critical(f"๊ฐ๊ฒ ๋ชฉ๋ก ๋ก๋ฉ ์คํจ: {e}", exc_info=True)
|
| 241 |
+
return None
|
| 242 |
+
|
| 243 |
+
# --- ๋ฐ์ดํฐ ๋ก๋ ์คํ (์์ ) ---
|
| 244 |
+
# ๋ง์คํฐ ๋ฐ์ดํฐํ๋ ์์ ๋จผ์ ๋ก๋ํฉ๋๋ค.
|
| 245 |
+
MASTER_DF = load_master_dataframe()
|
| 246 |
+
if MASTER_DF is None:
|
| 247 |
+
st.error("๐จ ๋ฐ์ดํฐ ๋ก๋ฉ ์คํจ! data/final_df.csv ํ์ผ์ ํ์ธํด์ฃผ์ธ์.")
|
| 248 |
+
st.stop()
|
| 249 |
+
|
| 250 |
+
# UI์ฉ ๊ฐ๋งน์ ๋ชฉ๋ก์ ๋ง์คํฐ์์ ์ถ์ถํฉ๋๋ค.
|
| 251 |
+
merchant_df = load_merchant_list_for_ui(MASTER_DF)
|
| 252 |
+
if merchant_df is None:
|
| 253 |
+
st.error("๐จ ๊ฐ๋งน์ ๋ชฉ๋ก ์ถ์ถ ์คํจ!")
|
| 254 |
+
st.stop()
|
| 255 |
+
|
| 256 |
+
# --- ์ธ์
์ด๊ธฐํ ํจ์ ---
|
| 257 |
+
def initialize_session():
|
| 258 |
+
""" ์ธ์
์ด๊ธฐํ ๋ฐ AI ๋ชจ๋ ๋ก๋ """
|
| 259 |
+
if "orchestrator" not in st.session_state:
|
| 260 |
+
google_api_key = os.environ.get("GOOGLE_API_KEY")
|
| 261 |
+
if not google_api_key:
|
| 262 |
+
st.error("๐ GOOGLE_API_KEY ํ๊ฒฝ๋ณ์๊ฐ ์ค์ ๋์ง ์์์ต๋๋ค!")
|
| 263 |
+
st.stop()
|
| 264 |
+
with st.spinner("๐ง AI ๋ชจ๋ธ๊ณผ ๋น
๋ฐ์ดํฐ๋ฅผ ๋ก๋ฉํ๊ณ ์์ด์... ์ ์๋ง ๊ธฐ๋ค๋ ค์ฃผ์ธ์!"):
|
| 265 |
+
try:
|
| 266 |
+
# LLM ์บ์ ์ค์
|
| 267 |
+
try:
|
| 268 |
+
from langchain.cache import InMemoryCache
|
| 269 |
+
from langchain.globals import set_llm_cache
|
| 270 |
+
set_llm_cache(InMemoryCache())
|
| 271 |
+
logger.info("--- [Streamlit] ์ ์ญ LLM ์บ์(InMemoryCache) ํ์ฑํ ---")
|
| 272 |
+
except ImportError:
|
| 273 |
+
logger.warning("--- [Streamlit] langchain.cache ์ํฌํธ ์คํจ. LLM ์บ์ ๋นํ์ฑํ ---")
|
| 274 |
+
|
| 275 |
+
|
| 276 |
+
load_marketing_vectorstore()
|
| 277 |
+
db = load_festival_vectorstore()
|
| 278 |
+
if db is None:
|
| 279 |
+
st.error("๐พ ์ถ์ ๋ฒกํฐ DB ๋ก๋ฉ ์คํจ! 'build_vector_store.py' ์คํ ์ฌ๋ถ๋ฅผ ํ์ธํ์ธ์.")
|
| 280 |
+
st.stop()
|
| 281 |
+
logger.info("--- [Streamlit] ๋ชจ๋ AI ๋ชจ๋ ๋ก๋ฉ ์๋ฃ ---")
|
| 282 |
+
except Exception as e:
|
| 283 |
+
st.error(f"๐คฏ AI ๋ชจ๋ ์ด๊ธฐํ ์ค ์ค๋ฅ ๋ฐ์: {e}")
|
| 284 |
+
logger.critical(f"AI ๋ชจ๋ ์ด๊ธฐํ ์คํจ: {e}", exc_info=True)
|
| 285 |
+
st.stop()
|
| 286 |
+
st.session_state.orchestrator = AgentOrchestrator(google_api_key)
|
| 287 |
+
|
| 288 |
+
# ์ธ์
์ํ ๋ณ์ ์ด๊ธฐํ
|
| 289 |
+
if "step" not in st.session_state:
|
| 290 |
+
st.session_state.step = "get_merchant_name"
|
| 291 |
+
st.session_state.messages = []
|
| 292 |
+
st.session_state.merchant_id = None
|
| 293 |
+
st.session_state.merchant_name = None
|
| 294 |
+
st.session_state.profile_data = None
|
| 295 |
+
st.session_state.consultation_result = None
|
| 296 |
+
if "last_recommended_festivals" not in st.session_state:
|
| 297 |
+
st.session_state.last_recommended_festivals = []
|
| 298 |
+
|
| 299 |
+
# --- ์ฒ์์ผ๋ก ๋์๊ฐ๊ธฐ ํจ์ ---
|
| 300 |
+
def restart_consultation():
|
| 301 |
+
""" ์ธ์
์ํ ์ด๊ธฐํ """
|
| 302 |
+
keys_to_reset = ["step", "merchant_name", "merchant_id", "profile_data", "messages", "consultation_result", "last_recommended_festivals"]
|
| 303 |
+
for key in keys_to_reset:
|
| 304 |
+
if key in st.session_state:
|
| 305 |
+
del st.session_state[key]
|
| 306 |
+
|
| 307 |
+
# --- ์ฌ์ด๋๋ฐ ๋ ๋๋ง ํจ์ ---
|
| 308 |
+
def render_sidebar():
|
| 309 |
+
""" ์ฌ์ด๋๋ฐ ๋ ๋๋ง (Synapse ๋ก๊ณ ๊ฐ์กฐ ๋ฐ ๊ฐ๊ฒฉ ์กฐ์ ) """
|
| 310 |
+
with st.sidebar:
|
| 311 |
+
# ๋ก๊ณ ์ด๋ฏธ์ง ๋ก๋
|
| 312 |
+
synapse_logo = load_image("Synapse.png")
|
| 313 |
+
shinhancard_logo = load_image("ShinhanCard_Logo.png")
|
| 314 |
+
|
| 315 |
+
col1, col2, col3 = st.columns([1, 5, 1]) # ๊ฐ์ด๋ฐ ์ปฌ๋ผ ๋๋น ์กฐ์
|
| 316 |
+
with col2:
|
| 317 |
+
if synapse_logo:
|
| 318 |
+
st.image(synapse_logo, use_container_width=True)
|
| 319 |
+
|
| 320 |
+
st.write("")
|
| 321 |
+
st.markdown(" ")
|
| 322 |
+
col_sh1, col_sh2, col_sh3 = st.columns([1, 5, 1])
|
| 323 |
+
with col_sh2:
|
| 324 |
+
if shinhancard_logo:
|
| 325 |
+
st.image(shinhancard_logo, use_container_width=True) # ์ปฌ๋ผ ๋๋น์ ๋ง์ถค
|
| 326 |
+
|
| 327 |
+
st.markdown("<p style='text-align: center; color: grey; margin-top: 20px;'>2025 Big Contest</p>", unsafe_allow_html=True) # ์์ชฝ ๋ง์ง ์ด์ง ๋๋ฆผ
|
| 328 |
+
st.markdown("<p style='text-align: center; color: grey;'>AI DATA ํ์ฉ๋ถ์ผ</p>", unsafe_allow_html=True)
|
| 329 |
+
st.markdown("---")
|
| 330 |
+
|
| 331 |
+
if st.button('์ฒ์์ผ๋ก ๋์๊ฐ๊ธฐ', key='restart_button_styled', use_container_width=True): # ๋ฒํผ ์์ด์ฝ ์ถ๊ฐ
|
| 332 |
+
restart_consultation()
|
| 333 |
+
st.rerun()
|
| 334 |
+
|
| 335 |
+
# --- ๊ฐ๊ฒ ๊ฒ์ UI ํจ์ (์์ ) ---
|
| 336 |
+
def render_get_merchant_name_step():
|
| 337 |
+
""" UI 1๋จ๊ณ: ๊ฐ๋งน์ ๊ฒ์ ๋ฐ ์ ํ (API ํธ์ถ ๋ก์ง ์์ ) """
|
| 338 |
+
st.subheader("๐ ์ปจ์คํ
๋ฐ์ ๊ฐ๊ฒ๋ฅผ ๊ฒ์ํด์ฃผ์ธ์")
|
| 339 |
+
st.caption("๊ฐ๊ฒ ์ด๋ฆ ๋๋ ๊ฐ๋งน์ ID์ ์ผ๋ถ๋ฅผ ์
๋ ฅํ์ฌ ๊ฒ์ํ ์ ์์ต๋๋ค.")
|
| 340 |
+
|
| 341 |
+
search_query = st.text_input(
|
| 342 |
+
"๊ฐ๊ฒ ์ด๋ฆ ๋๋ ๊ฐ๋งน์ ID ๊ฒ์",
|
| 343 |
+
placeholder="์: ๋ฉ๊ฐ์ปคํผ, ์คํ๋ฒ
์ค, 003AC99735 ๋ฑ",
|
| 344 |
+
label_visibility="collapsed"
|
| 345 |
+
)
|
| 346 |
+
|
| 347 |
+
if search_query:
|
| 348 |
+
mask = (
|
| 349 |
+
merchant_df['๊ฐ๋งน์ ๋ช
'].str.contains(search_query, case=False, na=False, regex=False) |
|
| 350 |
+
merchant_df['๊ฐ๋งน์ ID'].str.contains(search_query, case=False, na=False, regex=False)
|
| 351 |
+
)
|
| 352 |
+
search_results = merchant_df[mask].copy()
|
| 353 |
+
|
| 354 |
+
if not search_results.empty:
|
| 355 |
+
search_results['display'] = search_results['๊ฐ๋งน์ ๋ช
'] + " (" + search_results['๊ฐ๋งน์ ID'] + ")"
|
| 356 |
+
options = ["โฌ ์๋ ๋ชฉ๋ก์์ ๊ฐ๊ฒ๋ฅผ ์ ํํด์ฃผ์ธ์..."] + search_results['display'].tolist()
|
| 357 |
+
selected_display_name = st.selectbox(
|
| 358 |
+
"๊ฐ๊ฒ ์ ํ:",
|
| 359 |
+
options,
|
| 360 |
+
label_visibility="collapsed"
|
| 361 |
+
)
|
| 362 |
+
|
| 363 |
+
if selected_display_name != "โฌ๏ธ ์๋ ๋ชฉ๋ก์์ ๊ฐ๊ฒ๋ฅผ ์ ํํด์ฃผ์ธ์...":
|
| 364 |
+
try:
|
| 365 |
+
selected_row = search_results[search_results['display'] == selected_display_name].iloc[0]
|
| 366 |
+
selected_merchant_id = selected_row['๊ฐ๋งน์ ID']
|
| 367 |
+
selected_merchant_name = selected_row['๊ฐ๋งน์ ๋ช
']
|
| 368 |
+
button_label = f"๐ '{selected_merchant_name}' ๋ถ์ ์์ํ๊ธฐ"
|
| 369 |
+
is_selection_valid = True
|
| 370 |
+
except (IndexError, KeyError):
|
| 371 |
+
button_label = "๋ถ์ ์์ํ๊ธฐ"
|
| 372 |
+
is_selection_valid = False
|
| 373 |
+
|
| 374 |
+
if st.button(button_label, disabled=not is_selection_valid, type="primary", use_container_width=True):
|
| 375 |
+
with st.spinner(f"๐ '{selected_merchant_name}' ๊ฐ๊ฒ ์ ๋ณด๋ฅผ ๋ถ์ ์ค์
๋๋ค... ์ ์๋ง ๊ธฐ๋ค๋ ค์ฃผ์ธ์!"):
|
| 376 |
+
profile_data = None
|
| 377 |
+
try:
|
| 378 |
+
# --- (์์ ) API POST ์์ฒญ ๋์ (3)์์ ๋ง๋ ๋ก์ปฌ ํจ์ ํธ์ถ ---
|
| 379 |
+
profile_data = get_merchant_profile_logic(selected_merchant_id, MASTER_DF)
|
| 380 |
+
# --------------------------------------------------------
|
| 381 |
+
|
| 382 |
+
if "store_profile" not in profile_data or "average_profile" not in profile_data:
|
| 383 |
+
st.error("ํ๋กํ ์์ฑ ํ์์ด ์ฌ๋ฐ๋ฅด์ง ์์ต๋๋ค.")
|
| 384 |
+
profile_data = None
|
| 385 |
+
except ValueError as e: # 404 ์ค๋ฅ
|
| 386 |
+
st.error(f"๊ฐ๊ฒ ํ๋กํ ๋ก๋ฉ ์คํจ: {e}")
|
| 387 |
+
except Exception as e:
|
| 388 |
+
st.error(f"๊ฐ๊ฒ ํ๋กํ ๋ก๋ฉ ์ค ์์์น ๋ชปํ ์ค๋ฅ ๋ฐ์: {e}")
|
| 389 |
+
logger.critical(f"๊ฐ๊ฒ ํ๋กํ ๋ก์ปฌ ๋ก์ง ์คํจ: {e}", exc_info=True)
|
| 390 |
+
|
| 391 |
+
if profile_data:
|
| 392 |
+
st.session_state.merchant_name = selected_merchant_name
|
| 393 |
+
st.session_state.merchant_id = selected_merchant_id
|
| 394 |
+
st.session_state.profile_data = profile_data
|
| 395 |
+
st.session_state.step = "show_profile_and_chat"
|
| 396 |
+
st.success(f"โ
'{selected_merchant_name}' ๋ถ์ ์๋ฃ!")
|
| 397 |
+
st.rerun()
|
| 398 |
+
else:
|
| 399 |
+
st.info("๐ก ๊ฒ์ ๊ฒฐ๊ณผ๊ฐ ์์ต๋๋ค. ๋ค๋ฅธ ๊ฒ์์ด๋ฅผ ์๋ํด๋ณด์ธ์.")
|
| 400 |
+
|
| 401 |
+
# --- ํ๋กํ ๋ฐ ์ฑํ
UI ํจ์ ---
|
| 402 |
+
def render_show_profile_and_chat_step():
|
| 403 |
+
"""UI 2๋จ๊ณ: ํ๋กํ ํ์ธ ๋ฐ AI ์ฑํ
"""
|
| 404 |
+
st.subheader(f"โจ '{st.session_state.merchant_name}' ๊ฐ๊ฒ ๋ถ์ ์๋ฃ")
|
| 405 |
+
with st.expander("๐ ์์ธ ๋ฐ์ดํฐ ๋ถ์ ๋ฆฌํฌํธ ๋ณด๊ธฐ", expanded=True):
|
| 406 |
+
try:
|
| 407 |
+
display_merchant_profile(st.session_state.profile_data)
|
| 408 |
+
except Exception as e:
|
| 409 |
+
st.error(f"ํ๋กํ ์๊ฐํ ์ค ์ค๋ฅ ๋ฐ์: {e}")
|
| 410 |
+
logger.error(f"--- [Visualize ERROR]: {e}\n{traceback.format_exc()}", exc_info=True)
|
| 411 |
+
|
| 412 |
+
st.divider()
|
| 413 |
+
st.subheader("๐ฌ AI ์ปจ์คํดํธ์ ์๋ด์ ์์ํ์ธ์.")
|
| 414 |
+
st.info("๊ฐ๊ฒ ๋ถ์ ์ ๋ณด๋ฅผ ๋ฐํ์ผ๋ก ๊ถ๊ธํ ์ ์ ์ง๋ฌธํด๋ณด์ธ์. (์: '20๋ ์ฌ์ฑ ๊ณ ๊ฐ์ ๋๋ฆฌ๊ณ ์ถ์ด์')")
|
| 415 |
+
|
| 416 |
+
for message in st.session_state.messages:
|
| 417 |
+
with st.chat_message(message["role"]):
|
| 418 |
+
st.markdown(message["content"])
|
| 419 |
+
|
| 420 |
+
if prompt := st.chat_input("์์ฒญ์ฌํญ์ ์
๋ ฅํ์ธ์..."):
|
| 421 |
+
st.session_state.messages.append({"role": "user", "content": prompt})
|
| 422 |
+
with st.chat_message("user"):
|
| 423 |
+
st.markdown(prompt)
|
| 424 |
+
|
| 425 |
+
with st.chat_message("assistant"):
|
| 426 |
+
with st.spinner("AI ์ปจ์คํดํธ๊ฐ ๋ต๋ณ์ ์์ฑ ์ค์
๋๋ค...(์ต๋ 1~2๋ถ)"):
|
| 427 |
+
orchestrator = st.session_state.orchestrator
|
| 428 |
+
|
| 429 |
+
if "store_profile" not in st.session_state.profile_data:
|
| 430 |
+
st.error("์ธ์
์ 'store_profile' ๋ฐ์ดํฐ๊ฐ ์์ต๋๋ค. ๋ค์ ์์ํด์ฃผ์ธ์.")
|
| 431 |
+
st.stop()
|
| 432 |
+
|
| 433 |
+
agent_history = []
|
| 434 |
+
history_to_convert = st.session_state.messages[:-1][-10:]
|
| 435 |
+
|
| 436 |
+
for msg in history_to_convert:
|
| 437 |
+
if msg["role"] == "user":
|
| 438 |
+
agent_history.append(HumanMessage(content=msg["content"]))
|
| 439 |
+
elif msg["role"] == "assistant":
|
| 440 |
+
agent_history.append(AIMessage(content=msg["content"]))
|
| 441 |
+
|
| 442 |
+
result = orchestrator.invoke_agent(
|
| 443 |
+
user_query=prompt,
|
| 444 |
+
store_profile_dict=st.session_state.profile_data["store_profile"],
|
| 445 |
+
chat_history=agent_history,
|
| 446 |
+
last_recommended_festivals=st.session_state.last_recommended_festivals,
|
| 447 |
+
)
|
| 448 |
+
|
| 449 |
+
response_text = ""
|
| 450 |
+
st.session_state.last_recommended_festivals = []
|
| 451 |
+
|
| 452 |
+
if "error" in result:
|
| 453 |
+
response_text = f"์ค๋ฅ ๋ฐ์: {result['error']}"
|
| 454 |
+
|
| 455 |
+
elif "final_response" in result:
|
| 456 |
+
response_text = result.get("final_response", "์๋ต์ ์์ฑํ์ง ๋ชปํ์ต๋๋ค.")
|
| 457 |
+
intermediate_steps = result.get("intermediate_steps", [])
|
| 458 |
+
|
| 459 |
+
try:
|
| 460 |
+
for step in intermediate_steps:
|
| 461 |
+
action = step[0]
|
| 462 |
+
tool_output = step[1]
|
| 463 |
+
|
| 464 |
+
if hasattr(action, 'tool') and action.tool == "recommend_festivals":
|
| 465 |
+
if tool_output and isinstance(tool_output, list) and isinstance(tool_output[0], dict):
|
| 466 |
+
recommended_list = [
|
| 467 |
+
f.get("์ถ์ ๋ช
") for f in tool_output if f.get("์ถ์ ๋ช
")
|
| 468 |
+
]
|
| 469 |
+
|
| 470 |
+
st.session_state.last_recommended_festivals = recommended_list
|
| 471 |
+
logger.info(f"--- [Streamlit] ์ถ์ฒ ์ถ์ ์ ์ฅ๋จ (Intermediate Steps): {recommended_list} ---")
|
| 472 |
+
break
|
| 473 |
+
|
| 474 |
+
except Exception as e:
|
| 475 |
+
logger.critical(f"--- [Streamlit CRITICAL] Intermediate steps ์ฒ๋ฆฌ ์ค ์์ธ ๋ฐ์: {e} ---", exc_info=True)
|
| 476 |
+
|
| 477 |
+
else:
|
| 478 |
+
response_text = "์ ์ ์๋ ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค."
|
| 479 |
+
|
| 480 |
+
st.markdown(response_text)
|
| 481 |
+
st.session_state.messages.append({"role": "assistant", "content": response_text})
|
| 482 |
+
|
| 483 |
+
# --- ๋ฉ์ธ ์คํ ํจ์ ---
|
| 484 |
+
def main():
|
| 485 |
+
st.title("๐ MarketSync (๋ง์ผ์ฑํฌ)")
|
| 486 |
+
st.subheader("์์๊ณต์ธ ๋ง์ถคํ ์ถ์ ์ถ์ฒ & ๋ง์ผํ
AI ์ปจ์คํดํธ")
|
| 487 |
+
st.caption("์ ํ์นด๋ ๋น
๋ฐ์ดํฐ์ AI ์์ด์ ํธ๋ฅผ ํ์ฉํ์ฌ, ์ฌ์ฅ๋ ๊ฐ๊ฒ์ ๊ผญ ๋ง๋ ์ง์ญ ์ถ์ ์ ๋ง์ผํ
์ ๋ต์ ์ฐพ์๋๋ฆฝ๋๋ค.")
|
| 488 |
+
st.divider()
|
| 489 |
+
|
| 490 |
+
initialize_session()
|
| 491 |
+
render_sidebar()
|
| 492 |
+
|
| 493 |
+
if st.session_state.step == "get_merchant_name":
|
| 494 |
+
render_get_merchant_name_step()
|
| 495 |
+
elif st.session_state.step == "show_profile_and_chat":
|
| 496 |
+
render_show_profile_and_chat_step()
|
| 497 |
+
|
| 498 |
+
# --- ์ฑ ์คํ ---
|
| 499 |
+
if __name__ == "__main__":
|
| 500 |
+
main()
|
tools/festival_recommender.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# tools/festival_recommender.py
|
| 2 |
+
|
| 3 |
+
from langchain_core.tools import tool
|
| 4 |
+
from typing import List, Dict, Any
|
| 5 |
+
|
| 6 |
+
import config
|
| 7 |
+
from modules.filtering import FestivalRecommender
|
| 8 |
+
|
| 9 |
+
logger = config.get_logger(__name__)
|
| 10 |
+
|
| 11 |
+
@tool
|
| 12 |
+
def recommend_festivals(user_query: str, store_profile: str) -> List[Dict[str, Any]]:
|
| 13 |
+
"""
|
| 14 |
+
(๋๊ตฌ) ์ฌ์ฉ์์ ์ง๋ฌธ๊ณผ ๊ฐ๊ฒ ํ๋กํ์ ๋ฐํ์ผ๋ก ๋ง์ถคํ ์ถ์ ๋ฅผ ์ถ์ฒํ๋
|
| 15 |
+
[ํ์ด๋ธ๋ฆฌ๋ 5๋จ๊ณ ํ์ดํ๋ผ์ธ]์ ์คํํฉ๋๋ค.
|
| 16 |
+
1. ์ฟผ๋ฆฌ ์ฌ์์ฑ (ํ๋กํ ๊ธฐ๋ฐ)
|
| 17 |
+
2. ํ๋ณด ๊ฒ์ (์๋ฒ ๋ฉ ์ ์ - Score 1)
|
| 18 |
+
3. ๋์ ์์ฑ ํ๊ฐ (LLM ๊ธฐ๋ฐ - Score 2)
|
| 19 |
+
4. ํ์ด๋ธ๋ฆฌ๋ ์ ์ ๊ณ์ฐ (Score 1 + Score 2)
|
| 20 |
+
5. ์ต์ข
๋ต๋ณ ํฌ๋งทํ
(LLM ๊ธฐ๋ฐ)
|
| 21 |
+
|
| 22 |
+
์ด ๋๊ตฌ๋ '์ถ์ ์ถ์ฒํด์ค'์ ๊ฐ์ ์์ฒญ ์ ๋จ๋
์ผ๋ก ์ฌ์ฉ๋์ด์ผ ํฉ๋๋ค.
|
| 23 |
+
"""
|
| 24 |
+
logger.info(f"--- [Tool] (์ ๊ท) ํ์ด๋ธ๋ฆฌ๋ ์ถ์ ์ถ์ฒ ํ์ดํ๋ผ์ธ ์์ (Query: {user_query[:30]}...) ---")
|
| 25 |
+
|
| 26 |
+
# 4๋ฒ ์ ์: ํ์ดํ๋ผ์ธ ํด๋์ค๋ฅผ ์ธ์คํด์คํํ๊ณ ์คํ
|
| 27 |
+
pipeline = FestivalRecommender(store_profile, user_query)
|
| 28 |
+
|
| 29 |
+
# .run() ๋ฉ์๋๊ฐ ๋ชจ๋ ์์ธ์ฒ๋ฆฌ๋ฅผ ํฌํจ
|
| 30 |
+
return pipeline.run()
|
tools/marketing_strategy.py
ADDED
|
@@ -0,0 +1,230 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# tools/marketing_strategy.py
|
| 2 |
+
|
| 3 |
+
import traceback
|
| 4 |
+
import json
|
| 5 |
+
from typing import List
|
| 6 |
+
|
| 7 |
+
from langchain_core.tools import tool
|
| 8 |
+
|
| 9 |
+
import config
|
| 10 |
+
from modules.llm_provider import get_llm
|
| 11 |
+
from modules.knowledge_base import load_marketing_vectorstore
|
| 12 |
+
|
| 13 |
+
from tools.profile_analyzer import get_festival_profile_by_name
|
| 14 |
+
|
| 15 |
+
logger = config.get_logger(__name__)
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
@tool
|
| 19 |
+
def search_contextual_marketing_strategy(user_query: str, store_profile: str) -> str:
|
| 20 |
+
"""
|
| 21 |
+
(RAG Tool) ์ฌ์ฉ์์ ์ง๋ฌธ๊ณผ ๊ฐ๊ฒ ํ๋กํ(JSON ๋ฌธ์์ด)์ ๋ฐํ์ผ๋ก '๋ง์ผํ
์ ๋ต' Vector DB์์
|
| 22 |
+
๊ด๋ จ์ฑ์ด ๋์ ์ปจํ
์คํธ(์ ๋ต)๋ฅผ ๊ฒ์ํ๊ณ , LLM์ ํตํด ์ต์ข
๋ต๋ณ์ ์์ฑํ์ฌ ๋ฐํํฉ๋๋ค.
|
| 23 |
+
"""
|
| 24 |
+
logger.info("--- [Tool] RAG ๋ง์ผํ
์ ๋ต ๊ฒ์ ํธ์ถ๋จ ---")
|
| 25 |
+
|
| 26 |
+
try:
|
| 27 |
+
retriever = load_marketing_vectorstore()
|
| 28 |
+
if retriever is None:
|
| 29 |
+
raise RuntimeError("๋ง์ผํ
Retriever๊ฐ ๋ก๋๋์ง ์์์ต๋๋ค.")
|
| 30 |
+
|
| 31 |
+
# 1. ์ปจํ
์คํธ๋ฅผ ๊ณ ๋ คํ ๊ฒ์ ์ฟผ๋ฆฌ ์์ฑ
|
| 32 |
+
try:
|
| 33 |
+
profile_dict = json.loads(store_profile)
|
| 34 |
+
profile_for_query = (
|
| 35 |
+
f"๊ฐ๊ฒ ์์น: {profile_dict.get('์ฃผ์', '์ ์ ์์')}\n"
|
| 36 |
+
f"๊ฐ๊ฒ ์
์ข
: {profile_dict.get('์
์ข
', '์ ์ ์์')}\n"
|
| 37 |
+
f"ํต์ฌ ๊ณ ๊ฐ: {profile_dict.get('์๋์ถ์ถํน์ง', {}).get('ํต์ฌ๊ณ ๊ฐ', '์ ์ ์์')}"
|
| 38 |
+
)
|
| 39 |
+
except Exception:
|
| 40 |
+
profile_for_query = store_profile
|
| 41 |
+
|
| 42 |
+
contextual_query = f"[๊ฐ๊ฒ ์ ๋ณด:\n{profile_for_query}\n]์ ๋ํ [์ง๋ฌธ: {user_query}]"
|
| 43 |
+
logger.info(f"--- [Tool] RAG ๊ฒ์ ์ฟผ๋ฆฌ: {contextual_query} ---")
|
| 44 |
+
|
| 45 |
+
# 2. Vector DB ๊ฒ์
|
| 46 |
+
docs = retriever.invoke(contextual_query)
|
| 47 |
+
|
| 48 |
+
if not docs:
|
| 49 |
+
logger.warning("--- [Tool] RAG ๊ฒ์ ๊ฒฐ๊ณผ ์์ ---")
|
| 50 |
+
return "์ฃ์กํฉ๋๋ค. ์ฌ์ฅ๋์ ๊ฐ๊ฒ ํ๋กํ๊ณผ ์ง๋ฌธ์ ๋ง๋ ๋ง์ผํ
์ ๋ต์ ์ฐพ์ง ๋ชปํ์ต๋๋ค. ๊ฐ๊ฒ์ ํน์ง์ ์กฐ๊ธ ๋ ์๋ ค์ฃผ์๊ฑฐ๋, ๋ค๋ฅธ ์ง๋ฌธ์ ์๋ํด๋ณด์๊ฒ ์ด์?"
|
| 51 |
+
|
| 52 |
+
# 3. LLM์ ์ ๋ฌํ ์ปจํ
์คํธ ํฌ๋งทํ
|
| 53 |
+
context = "\n\n---\n\n".join([doc.page_content for doc in docs])
|
| 54 |
+
logger.info("--- [Tool] RAG ์ปจํ
์คํธ ์์ฑ ์๋ฃ ---")
|
| 55 |
+
|
| 56 |
+
# 4. LLM์ ํตํ ๋ต๋ณ ์ฌ๊ตฌ์ฑ
|
| 57 |
+
llm = get_llm(temperature=0.3)
|
| 58 |
+
|
| 59 |
+
# --- (์ฌ์ฉ์ ์์ฒญ) ํ๋กฌํํธ ์๋ณธ ์ ์ง ---
|
| 60 |
+
prompt = f"""
|
| 61 |
+
๋น์ ์ ์์๊ณต์ธ ์ ๋ฌธ ๋ง์ผํ
์ปจ์คํดํธ์
๋๋ค.
|
| 62 |
+
์๋ [๊ฐ๊ฒ ํ๋กํ]๊ณผ [์ฐธ๊ณ ๋ง์ผํ
์ ๋ต]์ ๋ฐํ์ผ๋ก, ์ฌ์ฉ์์ [์ง๋ฌธ]์ ๋ํ ๋ง์ถคํ ๋ง์ผํ
์ ๋ต 3๊ฐ์ง๋ฅผ ์ ์ํด์ฃผ์ธ์.
|
| 63 |
+
|
| 64 |
+
[๊ฐ๊ฒ ํ๋กํ]
|
| 65 |
+
{store_profile}
|
| 66 |
+
|
| 67 |
+
[์ง๋ฌธ]
|
| 68 |
+
{user_query}
|
| 69 |
+
|
| 70 |
+
[์ฐธ๊ณ ๋ง์ผํ
์ ๋ต]
|
| 71 |
+
{context}
|
| 72 |
+
|
| 73 |
+
[์์ฑ ๊ฐ์ด๋๋ผ์ธ]
|
| 74 |
+
1. [์ฐธ๊ณ ๋ง์ผํ
์ ๋ต]์ ๊ทธ๋๋ก ๋ณต์ฌํ์ง ๋ง๊ณ , [๊ฐ๊ฒ ํ๋กํ]์ ํน์ง(์: ์
์ข
, ํต์ฌ ๊ณ ๊ฐ, ์๊ถ)๊ณผ [์ง๋ฌธ]์ ์๋๋ฅผ ์กฐํฉํ์ฌ **๊ฐ๊ฒ์ ํนํ๋ ์๋ก์ด ์์ด๋์ด**๋ก ์ฌ๊ตฌ์ฑํด์ฃผ์ธ์.
|
| 75 |
+
2. ๊ฐ ์ ๋ต์ ๊ตฌ์ฒด์ ์ธ ์คํ ๋ฐฉ์์ ํฌํจํด์ผ ํฉ๋๋ค.
|
| 76 |
+
3. ์น์ ํ๊ณ ์ ๋ฌธ์ ์ธ ๋งํฌ๋ฅผ ์ฌ์ฉํ์ธ์.
|
| 77 |
+
4. ์๋ [์ถ๋ ฅ ํ์]์ ์ ํํ ์ง์ผ์ฃผ์ธ์.
|
| 78 |
+
5. **์ทจ์์ ๊ธ์ง**: ์ ๋๋ก `~~text~~`์ ๊ฐ์ ์ทจ์์ ๋งํฌ๋ค์ด์ ์ฌ์ฉํ์ง ๋ง์ธ์.
|
| 79 |
+
|
| 80 |
+
[์ถ๋ ฅ ํ์]
|
| 81 |
+
์ฌ์ฅ๋ ๊ฐ๊ฒ์ ํน์ฑ์ ๊ณ ๋ คํ 3๊ฐ์ง ๋ง์ผํ
์์ด๋์ด๋ฅผ ์ ์ํด ๋๋ฆฝ๋๋ค.
|
| 82 |
+
|
| 83 |
+
**1. [์ ๋ต ์ ๋ชฉ 1]**
|
| 84 |
+
* **์ ๋ต ๋ด์ฉ:** (๊ฐ๊ฒ์ ์ด๋ค ํน์ง์ ํ์ฉํ์ฌ ์ด๋ป๊ฒ ์คํํ๋์ง ๊ตฌ์ฒด์ ์ผ๋ก ์์ )
|
| 85 |
+
* **๊ธฐ๋ ํจ๊ณผ:** (์ด ์ ๋ต์ ํตํด ์ป์ ์ ์๋ ๊ตฌ์ฒด์ ์ธ ํจ๊ณผ)
|
| 86 |
+
|
| 87 |
+
**2. [์ ๋ต ์ ๋ชฉ 2]**
|
| 88 |
+
* **์ ๋ต ๋ด์ฉ:** (๊ฐ๊ฒ์ ์ด๋ค ํน์ง์ ํ์ฉํ์ฌ ์ด๋ป๊ฒ ์คํํ๋์ง ๊ตฌ์ฒด์ ์ผ๋ก ์์ )
|
| 89 |
+
* **๊ธฐ๋ ํจ๊ณผ:** (์ด ์ ๋ต์ ํตํด ์ป์ ์ ์๋ ๊ตฌ์ฒด์ ์ธ ํจ๊ณผ)
|
| 90 |
+
|
| 91 |
+
**3. [์ ๋ต ์ ๋ชฉ 3]**
|
| 92 |
+
* **์ ๋ต ๋ด์ฉ:** (๊ฐ๊ฒ์ ์ด๋ค ํน์ง์ ํ์ฉํ์ฌ ์ด๋ป๊ฒ ์คํํ๋์ง ๊ตฌ์ฒด์ ์ผ๋ก ์์ )
|
| 93 |
+
* **๊ธฐ๋ ํจ๊ณผ:** (์ด ์ ๋ต์ ํตํด ์ป์ ์ ์๋ ๊ตฌ์ฒด์ ์ธ ํจ๊ณผ)
|
| 94 |
+
"""
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
try:
|
| 98 |
+
response = llm.invoke(prompt)
|
| 99 |
+
logger.info("--- [Tool] RAG + LLM ๋ต๋ณ ์์ฑ ์๋ฃ ---")
|
| 100 |
+
return response.content
|
| 101 |
+
except Exception as llm_e:
|
| 102 |
+
logger.critical(f"--- [Tool CRITICAL] RAG LLM ํธ์ถ ์ค ์ค๋ฅ: {llm_e} ---", exc_info=True)
|
| 103 |
+
return f"์ค๋ฅ: ๊ฒ์๋ ์ ๋ต์ ์ฒ๋ฆฌํ๋ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค. (LLM ์ค๋ฅ: {llm_e})"
|
| 104 |
+
|
| 105 |
+
except Exception as e:
|
| 106 |
+
logger.critical(f"--- [Tool CRITICAL] RAG ๋ง์ผํ
์ ๋ต ๊ฒ์ ์ค ์ค๋ฅ: {e} ---", exc_info=True)
|
| 107 |
+
return f"์ฃ์กํฉ๋๋ค. ๋ง์ผํ
์ ๋ต์ ์์ฑํ๋ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {e}"
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
@tool
|
| 111 |
+
def create_festival_specific_marketing_strategy(festival_name: str, store_profile: str) -> str:
|
| 112 |
+
"""
|
| 113 |
+
(RAG x2 Tool) ํน์ ์ถ์ ์ด๋ฆ(์: '๊ด์
๊ฐ๊ฐ์ฐฌ์ถ์ ')๊ณผ ๊ฐ๊ฒ ํ๋กํ(JSON ๋ฌธ์์ด)์ ์
๋ ฅ๋ฐ์,
|
| 114 |
+
'์ถ์ DB'์ '๋ง์ผํ
DB'๋ฅผ *๋์์* RAG๋ก ์ฐธ์กฐํ์ฌ,
|
| 115 |
+
ํด๋น ์ถ์ ๊ธฐ๊ฐ ๋์ ์คํํ ์ ์๋ ๋ง์ถคํ ๋ง์ผํ
์ ๋ต *1๊ฐ*๋ฅผ ์์ฑํฉ๋๋ค.
|
| 116 |
+
"""
|
| 117 |
+
logger.info(f"--- [Tool] '*๋จ์ผ* ์ถ์ ๋ง์ถคํ ์ ๋ต ์์ฑ (RAGx2)' ๋๊ตฌ ํธ์ถ (๋์: {festival_name}) ---")
|
| 118 |
+
|
| 119 |
+
try:
|
| 120 |
+
# 1. (RAG 1) ์ถ์ ์ ๋ณด ๊ฐ์ ธ์ค๊ธฐ (๊ธฐ์กด ๋๊ตฌ ์ฌ์ฌ์ฉ)
|
| 121 |
+
festival_profile_str = get_festival_profile_by_name.invoke({"festival_name": festival_name})
|
| 122 |
+
|
| 123 |
+
if "์ค๋ฅ" in festival_profile_str or "์ฐพ์ ์ ์์" in festival_profile_str:
|
| 124 |
+
logger.warning(f"--- [Tool WARNING] ์ถ์ ํ๋กํ์ ์ฐพ์ง ๋ชปํจ: {festival_name} ---")
|
| 125 |
+
festival_profile_str = f"{{\"์ถ์ ๋ช
\": \"{festival_name}\", \"์ ๋ณด\": \"์์ธ ์ ๋ณด๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค.\"}}"
|
| 126 |
+
else:
|
| 127 |
+
logger.info(f"--- [Tool] (RAG 1) ์ถ์ ํ๋กํ ๋ก๋ ์ฑ๊ณต: {festival_name} ---")
|
| 128 |
+
|
| 129 |
+
# 2. (RAG 2) ๊ด๋ จ ๋ง์ผํ
์ ๋ต ๊ฒ์
|
| 130 |
+
marketing_retriever = load_marketing_vectorstore()
|
| 131 |
+
if marketing_retriever is None:
|
| 132 |
+
raise RuntimeError("๋ง์ผํ
Retriever๊ฐ ๋ก๋๋์ง ์์์ต๋๋ค.")
|
| 133 |
+
|
| 134 |
+
combined_query = f"""
|
| 135 |
+
์ถ์ ์ ๋ณด: {festival_profile_str}
|
| 136 |
+
๊ฐ๊ฒ ํ๋กํ: {store_profile}
|
| 137 |
+
์ง๋ฌธ: ์ ๊ฐ๊ฒ๊ฐ ์ ์ถ์ ๊ธฐ๊ฐ ๋์ ํ ์ ์๋ ์ต๊ณ ์ ๋ง์ผํ
์ ๋ต์?
|
| 138 |
+
"""
|
| 139 |
+
marketing_docs = marketing_retriever.invoke(combined_query)
|
| 140 |
+
|
| 141 |
+
if not marketing_docs:
|
| 142 |
+
marketing_context = "์ฐธ๊ณ ํ ๋งํ ๋ง์ผํ
์ ๋ต์ ์ฐพ์ง ๋ชปํ์ต๋๋ค."
|
| 143 |
+
logger.warning("--- [Tool] (RAG 2) ๋ง์ผํ
์ ๋ต ๊ฒ์ ๊ฒฐ๊ณผ ์์ ---")
|
| 144 |
+
else:
|
| 145 |
+
marketing_context = "\n\n---\n\n".join([doc.page_content for doc in marketing_docs])
|
| 146 |
+
logger.info(f"--- [Tool] (RAG 2) ๋ง์ผํ
์ ๋ต ์ปจํ
์คํธ {len(marketing_docs)}๊ฐ ํ๋ณด ---")
|
| 147 |
+
|
| 148 |
+
# 3. LLM์ ํตํ ์ต์ข
์ ๋ต ์์ฑ
|
| 149 |
+
llm = get_llm(temperature=0.5)
|
| 150 |
+
|
| 151 |
+
# --- (์ฌ์ฉ์ ์์ฒญ) ํ๋กฌํํธ ์๋ณธ ์ ์ง ---
|
| 152 |
+
prompt = f"""
|
| 153 |
+
๋น์ ์ ์ถ์ ์ฐ๊ณ ๋ง์ผํ
์ ๋ฌธ ์ปจ์คํดํธ์
๋๋ค.
|
| 154 |
+
์๋ [๊ฐ๊ฒ ํ๋กํ], [์ถ์ ํ๋กํ], [์ฐธ๊ณ ๋ง์ผํ
์ ๋ต]์ ๋ชจ๋ ๊ณ ๋ คํ์ฌ,
|
| 155 |
+
[๊ฐ๊ฒ ํ๋กํ]์ ์ฌ์ฅ๋์ด [์ถ์ ํ๋กํ] ๊ธฐ๊ฐ ๋์ ์คํํ ์ ์๋
|
| 156 |
+
**์ฐฝ์์ ์ด๊ณ ๊ตฌ์ฒด์ ์ธ ๋ง์ถคํ ๋ง์ผํ
์ ๋ต 1๊ฐ์ง**๋ฅผ ์ ์ํด์ฃผ์ธ์.
|
| 157 |
+
|
| 158 |
+
[๊ฐ๊ฒ ํ๋กํ]
|
| 159 |
+
{store_profile}
|
| 160 |
+
|
| 161 |
+
[์ถ์ ํ๋กํ]
|
| 162 |
+
{festival_profile_str}
|
| 163 |
+
|
| 164 |
+
[์ฐธ๊ณ ๋ง์ผํ
์ ๋ต]
|
| 165 |
+
{marketing_context}
|
| 166 |
+
|
| 167 |
+
[์์ฑ ๊ฐ์ด๋๋ผ์ธ]
|
| 168 |
+
1. **๋งค์ฐ ์ค์:** [๊ฐ๊ฒ ํ๋กํ]์ ํน์ง(์
์ข
, ์์น, ํต์ฌ ๊ณ ๊ฐ)๊ณผ [์ถ์ ํ๋กํ]์ ํน์ง(์ฃผ์ , ์ฃผ์ ๋ฐฉ๋ฌธ๊ฐ)์
|
| 169 |
+
**๋ฐ๋์ ์ฐ๊ด์ง์ด** ๊ตฌ์ฒด์ ์ธ ์ ๋ต์ ๋ง๋์ธ์.
|
| 170 |
+
2. [์ฐธ๊ณ ๋ง์ผํ
์ ๋ต]์ ์์ด๋์ด ๋ฐ์์๋ง ํ์ฉํ๊ณ , ๋ณต์ฌํ์ง ๋ง์ธ์.
|
| 171 |
+
3. ์ ๋ต์ 1๊ฐ์ง๋ง ๊น์ด ์๊ฒ ์ ์ํฉ๋๋ค.
|
| 172 |
+
4. ์น์ ํ๊ณ ์ ๋ฌธ์ ์ธ ๋งํฌ๋ฅผ ์ฌ์ฉํ์ธ์.
|
| 173 |
+
5. ์๋ [์ถ๋ ฅ ํ์]์ ์ ํํ ์ง์ผ์ฃผ์ธ์.
|
| 174 |
+
6. **์ทจ์์ ๊ธ์ง**: ์ ๋๋ก `~~text~~`์ ๊ฐ์ ์ทจ์์ ๋งํฌ๋ค์ด์ ์ฌ์ฉํ์ง ๋ง์ธ์.
|
| 175 |
+
|
| 176 |
+
[์ถ๋ ฅ ํ์]
|
| 177 |
+
### ๐ {json.loads(festival_profile_str).get('์ถ์ ๋ช
', festival_name)} ๋ง์ถคํ ๋ง์ผํ
์ ๋ต
|
| 178 |
+
|
| 179 |
+
**1. (์ ๋ต ์์ด๋์ด ์ ๋ชฉ)**
|
| 180 |
+
* **์ ๋ต ๊ฐ์:** (๊ฐ๊ฒ์ ์ด๋ค ํน์ง๊ณผ ์ถ์ ์ ์ด๋ค ํน์ง์ ์ฐ๊ด์ง์๋์ง ์ค๋ช
)
|
| 181 |
+
* **๊ตฌ์ฒด์ ์คํ ๋ฐฉ์:** (์ฌ์ฅ๋์ด '๋ฌด์์', '์ด๋ป๊ฒ' ํด์ผ ํ๋์ง ๋จ๊ณ๋ณ๋ก ์ค๋ช
. ์: ๋ฉ๋ด ๊ฐ๋ฐ, ํ๋ณด ๋ฌธ๊ตฌ, SNS ์ด๋ฒคํธ ๋ฑ)
|
| 182 |
+
* **ํ๊ฒ ๊ณ ๊ฐ:** (์ด ์ ๋ต์ด ์ถ์ ๋ฐฉ๋ฌธ๊ฐ ์ค ๋๊ตฌ์๊ฒ ๋งค๋ ฅ์ ์ผ์ง)
|
| 183 |
+
* **๊ธฐ๋ ํจ๊ณผ:** (์์๋๋ ๊ฒฐ๊ณผ, ์: ์ ๊ท ๊ณ ๊ฐ ์ ์
, ๊ฐ๋จ๊ฐ ์์น ๋ฑ)
|
| 184 |
+
"""
|
| 185 |
+
|
| 186 |
+
try:
|
| 187 |
+
response = llm.invoke(prompt)
|
| 188 |
+
logger.info("--- [Tool] (RAGx2) ์ต์ข
์ ๋ต ์์ฑ ์๋ฃ ---")
|
| 189 |
+
return response.content
|
| 190 |
+
except Exception as llm_e:
|
| 191 |
+
logger.critical(f"--- [Tool CRITICAL] '์ถ์ ๋ง์ถคํ ์ ๋ต ์์ฑ (RAGx2)' LLM ํธ์ถ ์ค ์ค๋ฅ: {llm_e} ---", exc_info=True)
|
| 192 |
+
return f"์ค๋ฅ: ๊ฒ์๋ ์ ๋ต์ ์ฒ๋ฆฌํ๋ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค. (LLM ์ค๋ฅ: {llm_e})"
|
| 193 |
+
|
| 194 |
+
except Exception as e:
|
| 195 |
+
logger.critical(f"--- [Tool CRITICAL] '์ถ์ ๋ง์ถคํ ์ ๋ต ์์ฑ (RAG)' ์ค ์ค๋ฅ: {e} ---", exc_info=True)
|
| 196 |
+
return f"์ฃ์กํฉ๋๋ค. '{festival_name}' ์ถ์ ์ ๋ต์ ์์ฑํ๋ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {e}"
|
| 197 |
+
|
| 198 |
+
|
| 199 |
+
@tool
|
| 200 |
+
def create_marketing_strategies_for_multiple_festivals(festival_names: List[str], store_profile: str) -> str:
|
| 201 |
+
"""
|
| 202 |
+
์ฌ๋ฌ ๊ฐ์ ์ถ์ ์ด๋ฆ ๋ฆฌ์คํธ์ ๊ฐ๊ฒ ํ๋กํ(JSON ๋ฌธ์์ด)์ ์
๋ ฅ๋ฐ์,
|
| 203 |
+
๊ฐ ์ถ์ ์ ํนํ๋ ๋ง์ถคํ ๋ง์ผํ
์ ๋ต์ *๋ชจ๋* ์์ฑํ๊ณ ํ๋์ ๋ฌธ์์ด๋ก ์ทจํฉํ์ฌ ๋ฐํํฉ๋๋ค.
|
| 204 |
+
(์: ["์ฒญ์ก์ฌ๊ณผ์ถ์ ", "๋ถ์ฒ๊ตญ์ ๋งํ์ถ์ "])
|
| 205 |
+
"""
|
| 206 |
+
logger.info(f"--- [Tool] '*๋ค์* ์ถ์ ๋ง์ถคํ ์ ๋ต ์์ฑ' ๋๊ตฌ ํธ์ถ (๋์: {festival_names}) ---")
|
| 207 |
+
|
| 208 |
+
final_report = []
|
| 209 |
+
|
| 210 |
+
if not festival_names:
|
| 211 |
+
logger.warning("--- [Tool] ์ถ์ ์ด๋ฆ ๋ชฉ๋ก์ด ๋น์ด์์ ---")
|
| 212 |
+
return "์ค๋ฅ: ์ถ์ ์ด๋ฆ ๋ชฉ๋ก์ด ๋น์ด์์ต๋๋ค. ์ ๋ต์ ์์ฑํ ์ ์์ต๋๋ค."
|
| 213 |
+
|
| 214 |
+
# ๊ฐ๋ณ ์ ๋ต ์์ฑ ๋๊ตฌ๋ฅผ ์ฌ์ฌ์ฉ
|
| 215 |
+
for festival_name in festival_names:
|
| 216 |
+
try:
|
| 217 |
+
strategy = create_festival_specific_marketing_strategy.invoke({
|
| 218 |
+
"festival_name": festival_name,
|
| 219 |
+
"store_profile": store_profile
|
| 220 |
+
})
|
| 221 |
+
|
| 222 |
+
final_report.append(strategy)
|
| 223 |
+
|
| 224 |
+
except Exception as e:
|
| 225 |
+
error_message = f"--- [์ค๋ฅ] '{festival_name}'์ ์ ๋ต ์์ฑ ์ค ๋ฌธ์ ๊ฐ ๋ฐ์ํ์ต๋๋ค: {e} ---"
|
| 226 |
+
logger.critical(f"--- [Tool CRITICAL] '{festival_name}' ์ ๋ต ์์ฑ ์ค ์ค๋ฅ: {e} ---", exc_info=True)
|
| 227 |
+
final_report.append(error_message)
|
| 228 |
+
|
| 229 |
+
logger.info("--- [Tool] '๋ค์ ์ถ์ ๋ง์ถคํ ์ ๋ต ์์ฑ' ์๋ฃ ---")
|
| 230 |
+
return "\n\n---\n\n".join(final_report)
|
tools/profile_analyzer.py
ADDED
|
@@ -0,0 +1,205 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# tools/profile_analyzer.py
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
import traceback
|
| 5 |
+
import pandas as pd
|
| 6 |
+
import math
|
| 7 |
+
import streamlit as st
|
| 8 |
+
from langchain_core.tools import tool
|
| 9 |
+
from langchain_core.messages import HumanMessage
|
| 10 |
+
|
| 11 |
+
import config
|
| 12 |
+
from modules.llm_provider import get_llm
|
| 13 |
+
# filtering ๋ชจ๋์์ ๋ ์ง ์์ธก ํจ์ ๊ฐ์ ธ์ค๊ธฐ
|
| 14 |
+
from modules.filtering import FestivalRecommender
|
| 15 |
+
|
| 16 |
+
logger = config.get_logger(__name__)
|
| 17 |
+
|
| 18 |
+
# nan ๊ฐ ์ฒ๋ฆฌ๊ธฐ
|
| 19 |
+
def replace_nan_with_none(data):
|
| 20 |
+
if isinstance(data, dict):
|
| 21 |
+
return {k: replace_nan_with_none(v) for k, v in data.items()}
|
| 22 |
+
elif isinstance(data, list):
|
| 23 |
+
return [replace_nan_with_none(i) for i in data]
|
| 24 |
+
elif isinstance(data, float) and math.isnan(data):
|
| 25 |
+
return None
|
| 26 |
+
return data
|
| 27 |
+
|
| 28 |
+
# ์ถ์ ๋ฐ์ดํฐ ๋ก๋
|
| 29 |
+
@st.cache_data
|
| 30 |
+
def _load_festival_data():
|
| 31 |
+
try:
|
| 32 |
+
file_path = config.PATH_FESTIVAL_DF
|
| 33 |
+
if not file_path.exists():
|
| 34 |
+
logger.error(f"--- [Tool Definition ERROR] '{config.PATH_FESTIVAL_DF}' ํ์ผ์ ์ฐพ์ ์ ์์ต๋๋ค.")
|
| 35 |
+
return None
|
| 36 |
+
df = pd.read_csv(file_path)
|
| 37 |
+
if '์ถ์ ๋ช
' not in df.columns:
|
| 38 |
+
logger.error("--- [Tool Definition ERROR] '์ถ์ ๋ช
' ์ปฌ๋ผ์ด df์ ์์ต๋๋ค.")
|
| 39 |
+
return None
|
| 40 |
+
df_dict = df.set_index('์ถ์ ๋ช
').to_dict(orient='index')
|
| 41 |
+
logger.info(f"--- [Cache] ์ถ์ ์๋ณธ CSV ๋ก๋ ๋ฐ ๋์
๋๋ฆฌ ๋ณํ ์๋ฃ (์ด {len(df_dict)}๊ฐ) ---")
|
| 42 |
+
return df_dict
|
| 43 |
+
except Exception as e:
|
| 44 |
+
logger.critical(f"--- [Tool Definition CRITICAL ERROR] ์ถ์ ๋ฐ์ดํฐ ๋ก๋ ์คํจ: {e} ---", exc_info=True)
|
| 45 |
+
return None
|
| 46 |
+
|
| 47 |
+
# ----------------------------
|
| 48 |
+
# Tool 1: ํน์ ์ถ์ ์ ๋ณด ์กฐํ
|
| 49 |
+
@tool
|
| 50 |
+
def get_festival_profile_by_name(festival_name: str) -> str:
|
| 51 |
+
"""
|
| 52 |
+
์ถ์ ์ด๋ฆ์ ์
๋ ฅ๋ฐ์, ํด๋น ์ถ์ ์ ์์ธ ํ๋กํ(์๊ฐ, ์ง์ญ, ํค์๋, ๊ธฐ๊ฐ, ๊ณ ๊ฐ์ธต ๋ฑ)์
|
| 53 |
+
JSON ๋ฌธ์์ด๋ก ๋ฐํํฉ๋๋ค. ๋ฐ์ดํฐ๋ฒ ์ด์ค์์ ์ ํํ ์ด๋ฆ์ ์ฐพ์์ผ ํฉ๋๋ค.
|
| 54 |
+
(์: "๋ณด๋ น๋จธ๋์ถ์ ์์ธ ์ ๋ณด ์๋ ค์ค")
|
| 55 |
+
"""
|
| 56 |
+
logger.info(f"--- [Tool] 'ํน์ ์ถ์ ์ ๋ณด ์กฐํ' ๋๊ตฌ ํธ์ถ (๋์: {festival_name}) ---")
|
| 57 |
+
try:
|
| 58 |
+
festival_db = _load_festival_data()
|
| 59 |
+
if festival_db is None:
|
| 60 |
+
return json.dumps({"error": "์ถ์ ๋ฐ์ดํฐ๋ฒ ์ด์ค๋ฅผ ๋ก๋ํ์ง ๋ชปํ์ต๋๋ค."})
|
| 61 |
+
profile_dict = festival_db.get(festival_name)
|
| 62 |
+
if profile_dict:
|
| 63 |
+
profile_dict = replace_nan_with_none(profile_dict)
|
| 64 |
+
profile_dict['์ถ์ ๋ช
'] = festival_name
|
| 65 |
+
return json.dumps(profile_dict, ensure_ascii=False)
|
| 66 |
+
else:
|
| 67 |
+
return json.dumps({"error": f"'{festival_name}' ์ถ์ ๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค. ์ฒ ์๋ฅผ ํ์ธํด์ฃผ์ธ์."})
|
| 68 |
+
except Exception as e:
|
| 69 |
+
logger.critical(f"--- [Tool CRITICAL] 'ํน์ ์ถ์ ์ ๋ณด ์กฐํ' ์ค ์ค๋ฅ: {e} ---", exc_info=True)
|
| 70 |
+
return json.dumps({"error": f"'{festival_name}' ์ถ์ ๊ฒ์ ์ค ์ค๋ฅ ๋ฐ์: {e}"})
|
| 71 |
+
|
| 72 |
+
# ----------------------------
|
| 73 |
+
# Tool 2: ๊ฐ๋งน์ ํ๋กํ ๋ถ์ (LLM)
|
| 74 |
+
@tool
|
| 75 |
+
def analyze_merchant_profile(store_profile: str) -> str:
|
| 76 |
+
"""
|
| 77 |
+
๊ฐ๋งน์ (๊ฐ๊ฒ)์ ํ๋กํ ๋ฐ์ดํฐ(JSON ๋ฌธ์์ด)๋ฅผ ์
๋ ฅ๋ฐ์, LLM์ ์ฌ์ฉํ์ฌ
|
| 78 |
+
[๊ฐ์ , ์ฝ์ , ๊ธฐํ ์์ธ]์ ๋ถ์ํ๋ ์ปจ์คํ
๋ฆฌํฌํธ๋ฅผ ์์ฑํฉ๋๋ค.
|
| 79 |
+
์ด ๋๊ตฌ๋ ๊ฐ๊ฒ์ ํ์ฌ ์ํ๋ฅผ ์ง๋จํ๊ณ ๋ง์ผํ
์ ๋ต์ ์ ์ํ๋ ๋ฐ ์ฌ์ฉ๋ฉ๋๋ค.
|
| 80 |
+
"""
|
| 81 |
+
logger.info("--- [Tool] '๊ฐ๋งน์ ํ๋กํ ๋ถ์' ๋๊ตฌ ํธ์ถ ---")
|
| 82 |
+
try:
|
| 83 |
+
llm = get_llm(temperature=0.3)
|
| 84 |
+
prompt = f"""
|
| 85 |
+
๋น์ ์ ์ต๊ณ ์ ์๊ถ ๋ถ์ ์ ๋ฌธ๊ฐ์
๋๋ค.
|
| 86 |
+
์๋ [๊ฐ๊ฒ ํ๋กํ] ๋ฐ์ดํฐ๋ฅผ ๋ฐํ์ผ๋ก, ์ด ๊ฐ๊ฒ์ [๊ฐ์ ], [์ฝ์ ], [๊ธฐํ ์์ธ]์
|
| 87 |
+
์ฌ์ฅ๋์ด ์ดํดํ๊ธฐ ์ฝ๊ฒ ์ปจ์คํ
๋ฆฌํฌํธ ํ์์ผ๋ก ์์ฝํด์ฃผ์ธ์.
|
| 88 |
+
|
| 89 |
+
[๊ฐ๊ฒ ํ๋กํ]
|
| 90 |
+
{store_profile}
|
| 91 |
+
|
| 92 |
+
[๋ถ์ ๊ฐ์ด๋๋ผ์ธ]
|
| 93 |
+
1. **๊ฐ์ (Strengths)**: '๋์ผ ์๊ถ/์
์ข
๋๋น' ๋์ ์์น(๋งค์ถ, ๋ฐฉ๋ฌธ๊ฐ, ๊ฐ๋จ๊ฐ ๋ฑ)๋ '์ฌ๋ฐฉ๋ฌธ์จ' ๋ฑ์ ์ฐพ์ **๊ฒฝ์ ์ฐ์**๊ฐ ๋๋ ํต์ฌ ์์ ๊ฐ์กฐํ์ธ์.
|
| 94 |
+
2. **์ฝ์ (Weaknesses)**: '๋์ผ ์๊ถ/์
์ข
๋๋น' ๋ฎ์ ์์น๋ '์ ๊ท ๊ณ ๊ฐ ๋น์จ' ๋ฑ์ ์ฐพ์ **๊ฐ์ ์ด ์๊ธํ ์์ญ**์ ์ธ๊ธํ์ธ์.
|
| 95 |
+
3. **๊ธฐํ (Opportunities)**: ๊ฐ๊ฒ์ ํ์ฌ ๊ฐ์ ๊ณผ '์ฃผ์ ๊ณ ๊ฐ์ธต'์ด๋ '์๊ถ' ํน์ฑ์ ๋ฐํ์ผ๋ก, **๊ฐ๊ฒ๊ฐ ํ์ฉํ ์ ์๋ ๋ง์ผํ
(์: ํน์ ์ฐ๋ น๋ ํ๊ฒ, ์ ๊ท ๊ณ ๊ฐ ์ ์น)์ด ํจ๊ณผ์ ์ผ์ง ์ ์ํ๊ณ ์ด๋ฅผ ๋ฌ์ฑํ๊ธฐ ์ํ ๋ฐฉํฅ์ฑ์ ์ ์ํ์ธ์.
|
| 96 |
+
4. **ํ์**: ๋งํฌ๋ค์ด์ ์ฌ์ฉํ์ฌ ๋ช
ํํ๊ณ ๊ฐ๋
์ฑ ์ข๊ฒ ์์ฑํ์ธ์.
|
| 97 |
+
5. **์ ๋ฌธ์ฑ/์น์ ํจ**: ์ ๋ฌธ์ ์ธ ๋ถ์ ์ฉ์ด๋ฅผ ์ฌ์ฉํ๋, ์ฌ์ฅ๋์ด ์ฝ๊ฒ ์ดํดํ ์ ์๋๋ก ์น์ ํ๊ณ ๋ช
ํํ๊ฒ ์ค๋ช
ํ์ธ์.
|
| 98 |
+
6. **(์์ฒญ 4) ์ทจ์์ ๊ธ์ง**: ์ ๋๋ก `~~text~~`์ ๊ฐ์ ์ทจ์์ ๋งํฌ๋ค์ด์ ์ฌ์ฉํ์ง ๋ง์ธ์.
|
| 99 |
+
|
| 100 |
+
[๋ต๋ณ ํ์]
|
| 101 |
+
### ๐ช ์ฌ์ฅ๋ ๊ฐ๊ฒ ํ๋กํ ๋ถ์ ๋ฆฌํฌํธ
|
| 102 |
+
|
| 103 |
+
**1. ๊ฐ์ (Strengths)**
|
| 104 |
+
* [๋ถ์๋ ๊ฐ์ 1] (๋ถ์ ๊ทผ๊ฑฐ ๋ช
์)
|
| 105 |
+
* [๋ถ์๋ ๊ฐ์ 2] (๋ถ์ ๊ทผ๊ฑฐ ๋ช
์)
|
| 106 |
+
* [ํ์์ ์ถ๊ฐ ๊ฐ์ ]
|
| 107 |
+
|
| 108 |
+
**2. ์ฝ์ (Weaknesses)**
|
| 109 |
+
* [๋ถ์๋ ์ฝ์ 1] (๊ฐ์ ํ์์ฑ ๋ช
์)
|
| 110 |
+
* [๋ถ์๋ ์ฝ์ 2] (๊ฐ์ ํ์์ฑ ๋ช
์)
|
| 111 |
+
* [ํ์์ ์ถ๊ฐ ์ฝ์ ]
|
| 112 |
+
|
| 113 |
+
**3. ๊ธฐํ (Opportunities)**
|
| 114 |
+
* [๋ถ์๋ ๊ธฐํ ์์ธ 1] (ํ์ฉ ๋ฐฉ์ ์ ์)
|
| 115 |
+
* [๋ถ์๋ ๊ธฐํ ์์ธ 2] (ํ์ฉ ๋ฐฉ์ ์ ์)
|
| 116 |
+
* [ํ์์ ์ถ๊ฐ ๊ธฐํ ์์ธ]
|
| 117 |
+
"""
|
| 118 |
+
response = llm.invoke([HumanMessage(content=prompt)])
|
| 119 |
+
analysis_report = response.content.strip()
|
| 120 |
+
return analysis_report
|
| 121 |
+
except Exception as e:
|
| 122 |
+
logger.critical(f"--- [Tool CRITICAL] '๊ฐ๋งน์ ํ๋กํ ๋ถ์' ์ค ์ค๋ฅ: {e} ---", exc_info=True)
|
| 123 |
+
return f"๊ฐ๊ฒ ํ๋กํ์ ๋ถ์ํ๋ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {e}"
|
| 124 |
+
|
| 125 |
+
# ----------------------------
|
| 126 |
+
# Tool 3: ์ถ์ ํ๋กํ ๋ถ์ (LLM)
|
| 127 |
+
@tool
|
| 128 |
+
def analyze_festival_profile(festival_name: str) -> str:
|
| 129 |
+
"""
|
| 130 |
+
์ถ์ ์ด๋ฆ์ ์
๋ ฅ๋ฐ์, ํด๋น ์ถ์ ์ ์์ธ ํ๋กํ์ ์กฐํํ๊ณ ,
|
| 131 |
+
LLM์ ์ฌ์ฉํ์ฌ [ํต์ฌ ํน์ง]๊ณผ [์ฃผ์ ๋ฐฉ๋ฌธ๊ฐ ํน์ฑ]์ ์์ฝ ๋ฆฌํฌํธ๋ก ๋ฐํํฉ๋๋ค.
|
| 132 |
+
(์: "๋ณด๋ น๋จธ๋์ถ์ ๋ ์ด๋ค ์ถ์ ์ผ?")
|
| 133 |
+
"""
|
| 134 |
+
logger.info(f"--- [Tool] '์ถ์ ํ๋กํ ๋ถ์' ๋๊ตฌ ํธ์ถ (๋์: {festival_name}) ---")
|
| 135 |
+
try:
|
| 136 |
+
# 1. Tool 1 ํธ์ถ
|
| 137 |
+
profile_json = get_festival_profile_by_name.invoke(festival_name)
|
| 138 |
+
|
| 139 |
+
profile_dict = json.loads(profile_json)
|
| 140 |
+
|
| 141 |
+
if "error" in profile_dict:
|
| 142 |
+
return profile_json
|
| 143 |
+
|
| 144 |
+
# 2. LLM ์์ฝ์ ์ํ ์ ๋ณด ์ถ์ถ
|
| 145 |
+
summary = {
|
| 146 |
+
"์ถ์ ๋ช
": profile_dict.get('์ถ์ ๋ช
'),
|
| 147 |
+
"์๊ฐ": profile_dict.get('์๊ฐ'),
|
| 148 |
+
"์ง์ญ": profile_dict.get('์ง์ญ'),
|
| 149 |
+
"ํค์๋": profile_dict.get('ํค์๋'),
|
| 150 |
+
"2025_๊ธฐ๊ฐ": profile_dict.get('2025_๊ธฐ๊ฐ'),
|
| 151 |
+
"์ฃผ์_๊ณ ๊ฐ์ธต": profile_dict.get('์ฃผ์๊ณ ๊ฐ์ธต', 'N/A'),
|
| 152 |
+
"์ฃผ์_๋ฐฉ๋ฌธ์": profile_dict.get('์ฃผ์๋ฐฉ๋ฌธ์', 'N/A'),
|
| 153 |
+
"์ถ์ _์ธ๊ธฐ๋": profile_dict.get('์ถ์ ์ธ๊ธฐ', 'N/A'),
|
| 154 |
+
"์ธ๊ธฐ๋_์ ์": profile_dict.get('์ธ๊ธฐ๋_์ ์', 'N/A'),
|
| 155 |
+
"ํํ์ด์ง": profile_dict.get('ํํ์ด์ง')
|
| 156 |
+
}
|
| 157 |
+
|
| 158 |
+
# 2026๋
๋ ์ง ์์ธก ์ถ๊ฐ
|
| 159 |
+
temp_recommender = FestivalRecommender("", "")
|
| 160 |
+
predicted_2026_timing = temp_recommender._predict_next_year_date(summary["2025_๊ธฐ๊ฐ"])
|
| 161 |
+
|
| 162 |
+
summary_str = json.dumps(summary, ensure_ascii=False, indent=2)
|
| 163 |
+
|
| 164 |
+
llm = get_llm(temperature=0.1)
|
| 165 |
+
|
| 166 |
+
# --- ํ๋กฌํํธ ์์ ---
|
| 167 |
+
prompt = f"""
|
| 168 |
+
๋น์ ์ ์ถ์ ์ ๋ฌธ ๋ถ์๊ฐ์
๋๋ค. ์๋ [์ถ์ ํ๋กํ ์์ฝ]์ ๋ฐํ์ผ๋ก,
|
| 169 |
+
์ด ์ถ์ ์ **ํต์ฌ ํน์ง**๊ณผ **์ฃผ์ ๋ฐฉ๋ฌธ๊ฐ(ํ๊ฒ ๊ณ ๊ฐ) ํน์ฑ**์
|
| 170 |
+
์ดํดํ๊ธฐ ์ฝ๊ฒ ์์ฝํด์ฃผ์ธ์.
|
| 171 |
+
|
| 172 |
+
[์ถ์ ํ๋กํ ์์ฝ]
|
| 173 |
+
{summary_str}
|
| 174 |
+
|
| 175 |
+
[๋ถ์ ๊ฐ์ด๋๋ผ์ธ]
|
| 176 |
+
1. **ํต์ฌ ํน์ง**: ์
๋ ฅ๋ **'์๊ฐ'** ๋ด์ฉ์ ๋ฐํ์ผ๋ก ์ถ์ ์ ์ฃผ์ ์ ์ฃผ์ ๋ด์ฉ์ **2~3๋ฌธ์ฅ์ผ๋ก ์์ธํ ์์ฝ**ํ๊ณ , 'ํค์๋'์ '์ถ์ _์ธ๊ธฐ๋', '์ธ๊ธฐ๋_์ ์'๋ฅผ ์ธ๊ธํ์ฌ ๋ถ์ฐ ์ค๋ช
ํฉ๋๋ค. (์: "'{summary.get("์๊ฐ", "์๊ฐ ์ ๋ณด ์์")[:50]}...'์(๋ฅผ) ์ฃผ์ ๋ก ํ๋ ์ถ์ ์
๋๋ค. ์ฃผ์ ํค์๋๋ '{summary.get("ํค์๋", "N/A")}'์ด๋ฉฐ, ์ธ๊ธฐ๋๋ '{summary.get("์ถ์ _์ธ๊ธฐ๋", "N/A")}' ์์ค์
๋๋ค.")
|
| 177 |
+
2. **์ฃผ์ ๋ฐฉ๋ฌธ๊ฐ**: '์ฃผ์_๊ณ ๊ฐ์ธต'๊ณผ '์ฃผ์_๋ฐฉ๋ฌธ์' ์ปฌ๋ผ์ ์ง์ ์ธ์ฉํ์ฌ ์ค๋ช
ํฉ๋๋ค.
|
| 178 |
+
(์: {summary.get("์ฃผ์_๊ณ ๊ฐ์ธต", "N/A")}์ด ์ฃผ๋ก ๋ฐฉ๋ฌธํ๋ฉฐ, {summary.get("์ฃผ์_๋ฐฉ๋ฌธ์", "N/A")} ๋น์จ์ด ๋์ต๋๋ค.)
|
| 179 |
+
3. **ํ์**: ์๋์ ๊ฐ์ ๋งํฌ๋ค์ด ํ์์ผ๋ก ๋ต๋ณ์ ์์ฑํ์ธ์.
|
| 180 |
+
4. **์ทจ์์ ๊ธ์ง**: ์ ๋๋ก `~~text~~`์ ๊ฐ์ ์ทจ์์ ๋งํฌ๋ค์ด์ ์ฌ์ฉํ์ง ๋ง์ธ์.
|
| 181 |
+
|
| 182 |
+
[๋ต๋ณ ํ์]
|
| 183 |
+
### ๐ ์ถ์ ํ๋กํ ๋ถ์ ๋ฆฌํฌํธ: {summary.get("์ถ์ ๋ช
")}
|
| 184 |
+
|
| 185 |
+
**1. ์ถ์ ํต์ฌ ํน์ง**
|
| 186 |
+
* [์ถ์ ์๊ฐ ๋ด์ฉ์ ๋ฐํ์ผ๋ก 2~3๋ฌธ์ฅ ์์ฝ. ํค์๋์ ์ธ๊ธฐ๋ ํฌํจ]
|
| 187 |
+
|
| 188 |
+
**2. ์ฃผ์ ๋ฐฉ๋ฌธ๊ฐ ํน์ฑ**
|
| 189 |
+
* **์ฃผ์ ๊ณ ๊ฐ์ธต:** {summary.get("์ฃผ์_๊ณ ๊ฐ์ธต")}
|
| 190 |
+
* **์ฃผ์ ๋ฐฉ๋ฌธ์:** {summary.get("์ฃผ์_๋ฐฉ๋ฌธ์")}
|
| 191 |
+
|
| 192 |
+
**3. 2026๋
๊ฐ์ต ๊ธฐ๊ฐ (์์)**
|
| 193 |
+
* {predicted_2026_timing}
|
| 194 |
+
|
| 195 |
+
**4. ํํ์ด์ง**
|
| 196 |
+
* {summary.get("ํํ์ด์ง", "์ ๋ณด ์์")}
|
| 197 |
+
"""
|
| 198 |
+
|
| 199 |
+
response = llm.invoke([HumanMessage(content=prompt)])
|
| 200 |
+
analysis_report = response.content.strip()
|
| 201 |
+
return analysis_report
|
| 202 |
+
|
| 203 |
+
except Exception as e:
|
| 204 |
+
logger.critical(f"--- [Tool CRITICAL] '์ถ์ ํ๋กํ ๋ถ๏ฟฝ๏ฟฝ๏ฟฝ' ์ค ์ค๋ฅ: {e} ---", exc_info=True)
|
| 205 |
+
return f"'{festival_name}' ์ถ์ ํ๋กํ์ ๋ถ์ํ๋ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {e}"
|
tools/tool_loader.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# tools/tool_loader.py
|
| 2 |
+
|
| 3 |
+
from .festival_recommender import recommend_festivals
|
| 4 |
+
from .marketing_strategy import (
|
| 5 |
+
search_contextual_marketing_strategy,
|
| 6 |
+
create_festival_specific_marketing_strategy,
|
| 7 |
+
create_marketing_strategies_for_multiple_festivals
|
| 8 |
+
)
|
| 9 |
+
from .profile_analyzer import (
|
| 10 |
+
get_festival_profile_by_name,
|
| 11 |
+
analyze_merchant_profile,
|
| 12 |
+
analyze_festival_profile,
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
# ์ค์ผ์คํธ๋ ์ดํฐ๊ฐ ์ฌ์ฉํ ์ต์ข
๋๊ตฌ ๋ฆฌ์คํธ
|
| 16 |
+
ALL_TOOLS = [
|
| 17 |
+
recommend_festivals, # (ํตํฉ) ๊ฐ๊ฒ ๋ง์ถคํ ์ถ์ ์ถ์ฒ (์ฟผ๋ฆฌ ์ฌ์์ฑ ~ ์ต์ข
๋ญํน)
|
| 18 |
+
get_festival_profile_by_name, # (DB์กฐํ) ์ถ์ ์ด๋ฆ์ผ๋ก ์์ธ ํ๋กํ(JSON) ๊ฒ์
|
| 19 |
+
search_contextual_marketing_strategy, # (RAG) ์ผ๋ฐ์ ์ธ ๋ง์ผํ
/ํ๋ณด ์ ๋ต์ Vector DB์์ ๊ฒ์
|
| 20 |
+
create_festival_specific_marketing_strategy, # (LLM) *๋จ์ผ* ์ถ์ ์ ๋ํ ๋ง์ถคํ ๋ง์ผํ
์ ๋ต ์์ฑ
|
| 21 |
+
create_marketing_strategies_for_multiple_festivals, # (LLM) *์ฌ๋ฌ* ์ถ์ ์ ๋ํ ๋ง์ถคํ ๋ง์ผํ
์ ๋ต ๋์ ์์ฑ
|
| 22 |
+
analyze_merchant_profile, # (LLM) ๊ฐ๊ฒ ํ๋กํ(JSON)์ ๋ฐ์ SWOT/๊ณ ๊ฐ ํน์ฑ ๋ถ์
|
| 23 |
+
analyze_festival_profile, # (LLM) ์ถ์ ํ๋กํ(JSON)์ ๋ฐ์ ํต์ฌ ํน์ง/๋ฐฉ๋ฌธ๊ฐ ๋ถ์
|
| 24 |
+
]
|
utils/parser_utils.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# utils/parser_utils.py
|
| 2 |
+
|
| 3 |
+
import re
|
| 4 |
+
import json
|
| 5 |
+
from typing import List, Dict, Any, Union
|
| 6 |
+
|
| 7 |
+
def extract_json_from_llm_response(response_text: str) -> Union[List[Dict[str, Any]], Dict[str, Any]]:
|
| 8 |
+
"""
|
| 9 |
+
LLM ์๋ต ํ
์คํธ์์ ```json ... ``` ๋๋ [...] ๋๋ {...} ๋ธ๋ก์
|
| 10 |
+
์์ ํ๊ฒ ์ถ์ถํ๊ณ ํ์ฑํฉ๋๋ค.
|
| 11 |
+
์คํจ ์ ValueError๋ฅผ ๋ฐ์์ํต๋๋ค.
|
| 12 |
+
"""
|
| 13 |
+
json_str = None
|
| 14 |
+
|
| 15 |
+
# 1. ```json [...] ``` ๋งํฌ๋ค์ด ๋ธ๋ก ๊ฒ์ (๊ฐ์ฅ ์ฐ์ )
|
| 16 |
+
# re.DOTALL (s) ํ๋๊ทธ: ์ค๋ฐ๊ฟ ๋ฌธ์๋ฅผ ํฌํจํ์ฌ ๋งค์นญ
|
| 17 |
+
# re.MULTILINE (m) ํ๋๊ทธ: ^, $๊ฐ ๊ฐ ์ค์ ์์/๋์ ๋งค์นญ
|
| 18 |
+
json_match = re.search(
|
| 19 |
+
r'```json\s*([\s\S]*?)\s*```',
|
| 20 |
+
response_text,
|
| 21 |
+
re.DOTALL | re.IGNORECASE
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
if json_match:
|
| 25 |
+
json_str = json_match.group(1).strip()
|
| 26 |
+
else:
|
| 27 |
+
# 2. ๋งํฌ๋ค์ด์ด ์๋ค๋ฉด, ์ฒซ ๋ฒ์งธ { ๋๋ [ ๋ฅผ ์ฐพ์
|
| 28 |
+
first_bracket_match = re.search(r'[{|\[]', response_text)
|
| 29 |
+
if first_bracket_match:
|
| 30 |
+
start_index = first_bracket_match.start()
|
| 31 |
+
|
| 32 |
+
# ์๋ต์ด ๋ฆฌ์คํธ([])๋ก ์์ํ๋ ๊ฒฝ์ฐ
|
| 33 |
+
if response_text[start_index] == '[':
|
| 34 |
+
list_match = re.search(r'(\[[\s\S]*\])', response_text[start_index:], re.DOTALL)
|
| 35 |
+
if list_match:
|
| 36 |
+
json_str = list_match.group(0)
|
| 37 |
+
|
| 38 |
+
# ์๋ต์ด ๋์
๋๋ฆฌ({})๋ก ์์ํ๋ ๊ฒฝ์ฐ
|
| 39 |
+
elif response_text[start_index] == '{':
|
| 40 |
+
dict_match = re.search(r'(\{[\s\S]*\})', response_text[start_index:], re.DOTALL)
|
| 41 |
+
if dict_match:
|
| 42 |
+
json_str = dict_match.group(0)
|
| 43 |
+
|
| 44 |
+
if json_str is None:
|
| 45 |
+
raise ValueError(f"์๋ต์์ JSON ๋ธ๋ก์ ์ฐพ์ง ๋ชปํ์ต๋๋ค. (์๋ต ์์: {response_text[:150]}...)")
|
| 46 |
+
|
| 47 |
+
try:
|
| 48 |
+
# (๋๋ฒ๊น
) ์ถ์ถ๋ ๋ฌธ์์ด ๋ก๊น
|
| 49 |
+
# print(f"--- [Parser DEBUG] Extracted JSON String: {json_str[:200]}... ---")
|
| 50 |
+
return json.loads(json_str)
|
| 51 |
+
except json.JSONDecodeError as e:
|
| 52 |
+
raise ValueError(f"JSON ํ์ฑ์ ์คํจํ์ต๋๋ค: {e}. (์ถ์ถ๋ ๋ฌธ์์ด: {json_str[:150]}...)")
|
uv.lock
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
vectorstore/faiss_festival/index.faiss
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:78a06f6935bd51302619b61b155efa818a2ce0aaa5b3b8c2f7b498151b8a2619
|
| 3 |
+
size 364589
|
vectorstore/faiss_festival/index.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2bc78f137c400ceb373cc99cae9b9a016bd74ecce05cf9b40460b1847af6b19d
|
| 3 |
+
size 503563
|
vectorstore/faiss_marketing/index.faiss
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:997ab4e4061ea89a33e067f80e45b1ee05865a7f8839ee9914c0e43fee705df3
|
| 3 |
+
size 4288557
|
vectorstore/faiss_marketing/index.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6349770bbc7f676dee108982fe181e1af15ee1485eb539ad9eb8226f799e9fbd
|
| 3 |
+
size 1494859
|
๊ธฐํ/create_faiss_festival.py
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import os
|
| 3 |
+
import traceback
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
import time
|
| 6 |
+
|
| 7 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
| 8 |
+
from langchain_community.vectorstores import FAISS
|
| 9 |
+
from langchain.docstore.document import Document
|
| 10 |
+
|
| 11 |
+
# --- 1. ์ถ์ ๋ฐ์ดํฐ ๋ก๋ ---
|
| 12 |
+
def _load_and_process_festivals_for_indexing():
|
| 13 |
+
"""
|
| 14 |
+
1. ๋ ์ง ์ฒ๋ฆฌ, dropna, ์ปฌ๋ผ๋ช
๋ณ๊ฒฝ ๋ก์ง์ ๋ชจ๋ ์ ๊ฑฐํฉ๋๋ค.
|
| 15 |
+
2. ์๋ณธ CSV๋ฅผ ๊ทธ๋๋ก ์ฝ๊ณ NaN์ ๋น ๋ฌธ์์ด("")๋ก ๋ณํํฉ๋๋ค.
|
| 16 |
+
(Filtering ๋จ๊ณ์์ ๋ชจ๋ ์๋ณธ ์ปฌ๋ผ์ metadata๋ก ์ฌ์ฉํ๊ธฐ ์ํจ)
|
| 17 |
+
"""
|
| 18 |
+
print("--- [Indexer] 'festival_df.csv' ๋ก๋ฉ ๋ฐ ์ ์ฒ๋ฆฌ ์์... ---")
|
| 19 |
+
try:
|
| 20 |
+
project_root = Path(__file__).resolve().parent
|
| 21 |
+
file_path = project_root / 'data' / 'festival_df.csv'
|
| 22 |
+
if not file_path.exists():
|
| 23 |
+
raise FileNotFoundError(f"๋ฐ์ดํฐ ํ์ผ์ ์ฐพ์ ์ ์์ต๋๋ค: {file_path}")
|
| 24 |
+
|
| 25 |
+
df = pd.read_csv(file_path)
|
| 26 |
+
if df.empty:
|
| 27 |
+
raise ValueError("'festival_df.csv' ํ์ผ์ ๋ฐ์ดํฐ๊ฐ ์์ต๋๋ค.")
|
| 28 |
+
|
| 29 |
+
# ๋ชจ๋ NaN ๊ฐ์ ๋น ๋ฌธ์์ด๋ก ๋์ฒด (๋ฐ์ดํฐ ์ ์ค ๋ฐฉ์ง)
|
| 30 |
+
df = df.fillna("")
|
| 31 |
+
|
| 32 |
+
print(f"--- [Indexer] 'festival_df.csv' ๋ก๋ฉ ์ฑ๊ณต. {len(df)}๊ฐ ์ถ์ ๋ฐ๊ฒฌ ---")
|
| 33 |
+
return df.to_dict('records')
|
| 34 |
+
|
| 35 |
+
except Exception as e:
|
| 36 |
+
print(f"--- [Indexer CRITICAL] 'festival_df.csv' ๋ก๋ฉ ์คํจ: {e}\n{traceback.format_exc()} ---")
|
| 37 |
+
return None
|
| 38 |
+
|
| 39 |
+
# --- 2. ์๋ฒ ๋ฉ ๋ชจ๋ธ ์ค๋น (์ ์ง) ---
|
| 40 |
+
def get_embeddings_model():
|
| 41 |
+
print("--- [Indexer] HuggingFace ์๋ฒ ๋ฉ ๋ชจ๋ธ ๋ก๋ฉ ์์... ---")
|
| 42 |
+
model_name = "dragonkue/BGE-m3-ko"
|
| 43 |
+
model_kwargs = {'device': 'cpu'}
|
| 44 |
+
encode_kwargs = {'normalize_embeddings': True}
|
| 45 |
+
|
| 46 |
+
embeddings = HuggingFaceEmbeddings(
|
| 47 |
+
model_name=model_name,
|
| 48 |
+
model_kwargs=model_kwargs,
|
| 49 |
+
encode_kwargs=encode_kwargs
|
| 50 |
+
)
|
| 51 |
+
print("--- [Indexer] HuggingFace ์๋ฒ ๋ฉ ๋ชจ๋ธ ๋ก๋ฉ ์๋ฃ ---")
|
| 52 |
+
return embeddings
|
| 53 |
+
|
| 54 |
+
# --- 3. ๋ฒกํฐ ์คํ ์ด ๊ตฌ์ถ ๋ฐ ์ ์ฅ ---
|
| 55 |
+
def build_and_save_vector_store():
|
| 56 |
+
start_time = time.time()
|
| 57 |
+
|
| 58 |
+
# 1. ์ถ์ ๋ฐ์ดํฐ ๋ก๋ (์ ์ง)
|
| 59 |
+
festivals = _load_and_process_festivals_for_indexing()
|
| 60 |
+
if not festivals:
|
| 61 |
+
print("--- [Indexer ERROR] ์ถ์ ๋ฐ์ดํฐ๊ฐ ์์ด ์ธ๋ฑ์ฑ์ ์ค๋จํฉ๋๋ค.")
|
| 62 |
+
return
|
| 63 |
+
|
| 64 |
+
# 2. ์๋ฒ ๋ฉ ๋ชจ๋ธ ๋ก๋ (์ ์ง)
|
| 65 |
+
embeddings = get_embeddings_model()
|
| 66 |
+
|
| 67 |
+
# 3. LangChain Document ๊ฐ์ฒด ์์ฑ
|
| 68 |
+
documents = []
|
| 69 |
+
print("--- [Indexer] ์ถ์ ์ ๋ณด -> ๋ฌธ์(Document) ๋ณํ ์์ ---")
|
| 70 |
+
for festival in festivals:
|
| 71 |
+
|
| 72 |
+
# 1๋ฒ ์ ์(์๋ฒ ๋ฉ)์ '์ถ์ ๋ช
'์ ๋ค์ ์ถ๊ฐํฉ๋๋ค. (์ง์ ํด์ฃผ์ ์ฌํญ ๋ฐ์)
|
| 73 |
+
content = (
|
| 74 |
+
f"์ถ์ ๋ช
: {festival.get('์ถ์ ๋ช
', '')}\n" # <-- ์ด ๋ถ๋ถ ์ถ๊ฐ
|
| 75 |
+
f"์ถ์ ํค์๋: {festival.get('ํค์๋', '')}\n"
|
| 76 |
+
f"์ถ์ ์๊ฐ: {festival.get('์๊ฐ', '')}"
|
| 77 |
+
)
|
| 78 |
+
|
| 79 |
+
# 2๋ฒ ์ ์(๋์ )์ ์ฌ์ฉ๋ ๋ฉํ๋ฐ์ดํฐ (๊ฐ์ฅ ์ค์)
|
| 80 |
+
# (์ถ์ ๋ช
, ์ฃผ์๊ณ ๊ฐ์ธต, ์ธ๊ธฐ๋_์ ์ ๋ฑ ๋ชจ๋ ์๋ณธ ์ปฌ๋ผ์ด ํฌํจ๋จ)
|
| 81 |
+
metadata = festival
|
| 82 |
+
|
| 83 |
+
documents.append(Document(page_content=content, metadata=metadata))
|
| 84 |
+
|
| 85 |
+
print(f"--- [Indexer] ๋ฌธ์ ๋ณํ ์๋ฃ. ์ด {len(documents)}๊ฐ ๋ฌธ์ ์์ฑ ---")
|
| 86 |
+
|
| 87 |
+
# 4. FAISS ๋ฒกํฐ ์คํ ์ด ์์ฑ (์ ์ง)
|
| 88 |
+
print("--- [Indexer] FAISS ๋ฒกํฐ ์คํ ์ด ์์ฑ ์์ (์๊ฐ์ด ๊ฑธ๋ฆด ์ ์์ต๋๋ค)... ---")
|
| 89 |
+
vector_store = FAISS.from_documents(documents, embeddings)
|
| 90 |
+
print("--- [Indexer] FAISS ๋ฒกํฐ ์คํ ์ด ์์ฑ ์๋ฃ ---")
|
| 91 |
+
|
| 92 |
+
# 5. ๋ก์ปฌ์ ์ ์ฅ (์ ์ง)
|
| 93 |
+
project_root = Path(__file__).resolve().parent
|
| 94 |
+
save_path = project_root / 'faiss_festival'
|
| 95 |
+
|
| 96 |
+
os.makedirs(save_path.parent, exist_ok=True)
|
| 97 |
+
vector_store.save_local(str(save_path))
|
| 98 |
+
|
| 99 |
+
end_time = time.time()
|
| 100 |
+
print("=" * 50)
|
| 101 |
+
print(f"๐ ์ฑ๊ณต! FAISS ๋ฒกํฐ ์คํ ์ด๋ฅผ ์์ฑํ์ฌ '{save_path}'์ ์ ์ฅํ์ต๋๋ค.")
|
| 102 |
+
print(f"์ด ์์ ์๊ฐ: {end_time - start_time:.2f}์ด")
|
| 103 |
+
print("=" * 50)
|
| 104 |
+
|
| 105 |
+
if __name__ == "__main__":
|
| 106 |
+
build_and_save_vector_store()
|
๊ธฐํ/create_faiss_marketing.py
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# create_marketing_retriever.py
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
|
| 4 |
+
import os
|
| 5 |
+
import time
|
| 6 |
+
from langchain_community.document_loaders import DirectoryLoader, PyPDFLoader
|
| 7 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 8 |
+
from langchain_community.vectorstores import FAISS
|
| 9 |
+
# [๋ณ๊ฒฝ] Google ๋์ HuggingFace ์๋ฒ ๋ฉ์ ๊ฐ์ ธ์ต๋๋ค.
|
| 10 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
| 11 |
+
# [์ญ์ ] from dotenv import load_dotenv (๋ ์ด์ ํ์ ์์)
|
| 12 |
+
|
| 13 |
+
def create_and_save_retriever():
|
| 14 |
+
"""
|
| 15 |
+
(์์ ๋จ) ๋ก์ปฌ Hugging Face ์๋ฒ ๋ฉ ๋ชจ๋ธ('dragonkue/BGE-m3-ko')์ ์ฌ์ฉํ์ฌ
|
| 16 |
+
๋ง์ผํ
PDF ๋ฌธ์๋ก๋ถํฐ Retriever๋ฅผ ์์ฑํ๊ณ ํ์ผ๋ก ์ ์ฅํฉ๋๋ค.
|
| 17 |
+
"""
|
| 18 |
+
try:
|
| 19 |
+
# 0. [๋ณ๊ฒฝ] API ํค ๋ก๋ฉ ๋ก์ง ์ญ์
|
| 20 |
+
print("โ
๋ก์ปฌ ์๋ฒ ๋ฉ ๋ชจ๋ธ์ ์ฌ์ฉํฉ๋๋ค. (API ํค ํ์ ์์)")
|
| 21 |
+
|
| 22 |
+
# 1. ๋ฐ์ดํฐ ๋ก๋
|
| 23 |
+
loader = DirectoryLoader(
|
| 24 |
+
'./marketing',
|
| 25 |
+
glob="**/*.pdf",
|
| 26 |
+
loader_cls=PyPDFLoader,
|
| 27 |
+
show_progress=True,
|
| 28 |
+
use_multithreading=True
|
| 29 |
+
)
|
| 30 |
+
documents = loader.load()
|
| 31 |
+
print(f"โ
์ด {len(documents)}๊ฐ์ PDF ๋ฌธ์๋ฅผ ๋ถ๋ฌ์์ต๋๋ค.")
|
| 32 |
+
|
| 33 |
+
if not documents:
|
| 34 |
+
raise ValueError("๐จ 'marketing' ํด๋์ PDF ํ์ผ์ด ์์ต๋๋ค. ๋ฌธ์๋ฅผ ์ถ๊ฐํด์ฃผ์ธ์.")
|
| 35 |
+
|
| 36 |
+
# 2. ํ
์คํธ ๋ถํ
|
| 37 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
| 38 |
+
chunk_size=1000,
|
| 39 |
+
chunk_overlap=100
|
| 40 |
+
)
|
| 41 |
+
docs = text_splitter.split_documents(documents)
|
| 42 |
+
print(f"โ
๋ฌธ์๋ฅผ ์ด {len(docs)}๊ฐ์ ์ฒญํฌ(chunk)๋ก ๋ถํ ํ์ต๋๋ค.")
|
| 43 |
+
|
| 44 |
+
if not docs:
|
| 45 |
+
raise ValueError("๐จ ๋ฌธ์๋ฅผ ์ฒญํฌ๋ก ๋ถํ ํ๋ ๋ฐ ์คํจํ์ต๋๋ค.")
|
| 46 |
+
|
| 47 |
+
# 3. [๋ณ๊ฒฝ] ์๋ฒ ๋ฉ ๋ชจ๋ธ ์ค์
|
| 48 |
+
print(f"โ
์๋ฒ ๋ฉ ๋ชจ๋ธ 'dragonkue/BGE-m3-ko' ๋ก๋๋ฅผ ์์ํฉ๋๋ค...")
|
| 49 |
+
|
| 50 |
+
model_name = "dragonkue/BGE-m3-ko"
|
| 51 |
+
# ๐ก [์ฐธ๊ณ ] ๋ก์ปฌ PC/์๋ฒ์ GPU๊ฐ ์๋ค๋ฉด {'device': 'cuda'}๋ก ๋ณ๊ฒฝํ์ธ์.
|
| 52 |
+
model_kwargs = {'device': 'cpu'}
|
| 53 |
+
# ๐ก [์ค์] BGE ๋ชจ๋ธ์ ๊ฒ์ ์ฑ๋ฅ์ ์ํด ์ ๊ทํ(normalize)๋ฅผ ๊ฐ๋ ฅํ ๊ถ์ฅํฉ๋๋ค.
|
| 54 |
+
encode_kwargs = {'normalize_embeddings': True}
|
| 55 |
+
|
| 56 |
+
embeddings = HuggingFaceEmbeddings(
|
| 57 |
+
model_name=model_name,
|
| 58 |
+
model_kwargs=model_kwargs,
|
| 59 |
+
encode_kwargs=encode_kwargs
|
| 60 |
+
)
|
| 61 |
+
print(f"โ
์๋ฒ ๋ฉ ๋ชจ๋ธ์ ์ฑ๊ณต์ ์ผ๋ก ๋ก๋ํ์ต๋๋ค.")
|
| 62 |
+
|
| 63 |
+
# 4. Vector Store ์์ฑ (FAISS)
|
| 64 |
+
vectorstore = None
|
| 65 |
+
|
| 66 |
+
# [๋ณ๊ฒฝ] ๋ก์ปฌ ๋ชจ๋ธ์ ๋ฐฐ์น ์ฒ๋ฆฌ๊ฐ ๋งค์ฐ ๋น ๋ฅด๋ฏ๋ก, API ์ ํ(time.sleep)์ด ํ์ ์์ต๋๋ค.
|
| 67 |
+
# [์ฐธ๊ณ ] BGE-m3-ko ๋ชจ๋ธ์ ๋ฐฐ์น ์ฒ๋ฆฌ๋ฅผ ์ง์ํ๋ฏ๋ก, FAISS.from_documents๊ฐ ๋ด๋ถ์ ์ผ๋ก ํจ์จ์ ์ผ๋ก ์ฒ๋ฆฌํฉ๋๋ค.
|
| 68 |
+
print(f"๐ ์ด {len(docs)}๊ฐ์ ์ฒญํฌ์ ๋ํ ์๋ฒ ๋ฉ์ ์์ํฉ๋๋ค. (์๊ฐ์ด ๊ฑธ๋ฆด ์ ์์)")
|
| 69 |
+
|
| 70 |
+
vectorstore = FAISS.from_documents(docs, embeddings)
|
| 71 |
+
|
| 72 |
+
# 5. [๋ณ๊ฒฝ] ๋ก์ปฌ ์ ์ฅ (๊ฒฝ๋ก๋ ๋์ผ)
|
| 73 |
+
save_dir = './retriever/marketing_retriever' # [๊ฒฝ๋ก ์์ ] knowledge_base.py์ ๋ง์ถค
|
| 74 |
+
os.makedirs(save_dir, exist_ok=True)
|
| 75 |
+
|
| 76 |
+
vectorstore.save_local(save_dir)
|
| 77 |
+
|
| 78 |
+
print(f"๐ Retriever๊ฐ ์ฑ๊ณต์ ์ผ๋ก ์์ฑ๋์ด '{save_dir}' ํด๋์ ์ ์ฅ๋์์ต๋๋ค!")
|
| 79 |
+
|
| 80 |
+
except Exception as e:
|
| 81 |
+
print(f"๐จ๐จ ์น๋ช
์ ์ธ ์ค๋ฅ ๋ฐ์ ๐จ๐จ: {e}")
|
| 82 |
+
import traceback
|
| 83 |
+
traceback.print_exc()
|
| 84 |
+
|
| 85 |
+
if __name__ == '__main__':
|
| 86 |
+
# 1. ํ์ํ ๋ผ์ด๋ธ๋ฌ๋ฆฌ๊ฐ ์ค์น๋์๋์ง ํ์ธ
|
| 87 |
+
try:
|
| 88 |
+
import langchain_community
|
| 89 |
+
import sentence_transformers
|
| 90 |
+
import faiss
|
| 91 |
+
import torch
|
| 92 |
+
except ImportError as e:
|
| 93 |
+
print(f"๐จ [์ค๋ฅ] {e.name} ๋ผ์ด๋ธ๋ฌ๋ฆฌ๊ฐ ์ค์น๋์ง ์์์ต๋๋ค.")
|
| 94 |
+
print("๐ ๋ค์ ๋ช
๋ น์ด๋ฅผ ์คํํ์ฌ ํ์ํ ๋ผ์ด๋ธ๋ฌ๋ฆฌ๋ฅผ ์ค์นํด์ฃผ์ธ์:")
|
| 95 |
+
print("pip install langchain-community sentence-transformers faiss-cpu torch")
|
| 96 |
+
print("(GPU ์ฌ์ฉ ์: pip install langchain-community sentence-transformers faiss-gpu torch)")
|
| 97 |
+
exit(1)
|
| 98 |
+
|
| 99 |
+
create_and_save_retriever()
|
๊ธฐํ/create_final_df.py
ADDED
|
@@ -0,0 +1,530 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<<<<<<< HEAD
|
| 2 |
+
# final_df.py
|
| 3 |
+
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import numpy as np
|
| 6 |
+
import os
|
| 7 |
+
import sys
|
| 8 |
+
|
| 9 |
+
# ์คํฌ๋ฆฝํธ ํ์ผ์ด ์์นํ ๋๋ ํ ๋ฆฌ๋ฅผ ๊ธฐ์ค์ผ๋ก ๊ฒฝ๋ก ์ค์
|
| 10 |
+
script_path = os.path.abspath(sys.argv[0])
|
| 11 |
+
script_dir = os.path.dirname(script_path)
|
| 12 |
+
|
| 13 |
+
# ๋ฐ์ดํฐ ํด๋ ๊ฒฝ๋ก
|
| 14 |
+
data_dir = os.path.join(script_dir, 'data')
|
| 15 |
+
|
| 16 |
+
# 'data' ํด๋๊ฐ ์ค์ ๋ก ์กด์ฌํ๋์ง ํ์ธ (์์ ์ฅ์น)
|
| 17 |
+
if not os.path.exists(data_dir):
|
| 18 |
+
print(f"Error: Data directory not found at {data_dir}. Please check your folder structure.")
|
| 19 |
+
sys.exit(1)
|
| 20 |
+
|
| 21 |
+
# ํ์ผ ๊ฒฝ๋ก ํจ์
|
| 22 |
+
def get_file_path(filename):
|
| 23 |
+
"""data ํด๋ ๋ด์ ํ์ผ ๊ฒฝ๋ก๋ฅผ ๋ฐํํฉ๋๋ค."""
|
| 24 |
+
return os.path.join(data_dir, filename)
|
| 25 |
+
|
| 26 |
+
# --------------------------------------------------------------------------
|
| 27 |
+
#### 1) ๋ฐ์ดํฐ 1 - **๊ฐ๋งน์ ๊ฐ์์ ๋ณด**
|
| 28 |
+
# --------------------------------------------------------------------------
|
| 29 |
+
|
| 30 |
+
file_path1 = get_file_path('big_data_set1_f.csv')
|
| 31 |
+
|
| 32 |
+
try:
|
| 33 |
+
df1 = pd.read_csv(file_path1, encoding="cp949")
|
| 34 |
+
except FileNotFoundError:
|
| 35 |
+
print(f"Error: File not found at {file_path1}. Please ensure big_data_set1_f.csv is in the 'data' folder.")
|
| 36 |
+
sys.exit(1)
|
| 37 |
+
|
| 38 |
+
col_map1 = {
|
| 39 |
+
"ENCODED_MCT": "๊ฐ๋งน์ ID",
|
| 40 |
+
"MCT_BSE_AR": "๊ฐ๋งน์ ์ฃผ์",
|
| 41 |
+
"MCT_NM": "๊ฐ๋งน์ ๋ช
",
|
| 42 |
+
"MCT_BRD_NUM": "๋ธ๋๋๊ตฌ๋ถ์ฝ๋",
|
| 43 |
+
"MCT_SIGUNGU_NM": "์ง์ญ๋ช
",
|
| 44 |
+
"HPSN_MCT_ZCD_NM": "์
์ข
",
|
| 45 |
+
"HPSN_MCT_BZN_CD_NM": "์๊ถ",
|
| 46 |
+
"ARE_D": "๊ฐ์ค์ผ",
|
| 47 |
+
"MCT_ME_D": "ํ์
์ฌ๋ถ"
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
df1 = df1.rename(columns=col_map1)
|
| 51 |
+
|
| 52 |
+
# - ์ ์ฒ๋ฆฌ
|
| 53 |
+
df1['๋ธ๋๋๊ตฌ๋ถ์ฝ๋'] = df1['๋ธ๋๋๊ตฌ๋ถ์ฝ๋'].fillna('๋ฏธํ์ธ')
|
| 54 |
+
df1['์๊ถ'] = df1['์๊ถ'].fillna('๋ฏธํ์ธ')
|
| 55 |
+
|
| 56 |
+
df1['๊ฐ์ค์ผ'] = df1['๊ฐ์ค์ผ'].astype(str)
|
| 57 |
+
# errors='coerce' ์ถ๊ฐ: ipynb ์ฝ๋์๋ ์์ง๋ง, ์์ ํ datetime ๋ณํ์ ์ํด ์ ์ง (์๋ณธ py ์ฝ๋ ์ ์ง)
|
| 58 |
+
df1['๊ฐ์ค์ผ'] = pd.to_datetime(df1['๊ฐ์ค์ผ'], format='%Y%m%d', errors='coerce')
|
| 59 |
+
|
| 60 |
+
# ipynb ํ์ผ์์๋ errors='coerce'๊ฐ ์์์ง๋ง, int ๋ณํ ์ ์ค๋ฅ ๋ฐฉ์ง๋ฅผ ์ํด ์๋ณธ py ์ฝ๋์ ์์ ๋ก์ง์ ๋ฐ๋ฆ.
|
| 61 |
+
df1['ํ์
์ฌ๋ถ'] = df1['ํ์
์ฌ๋ถ'].apply(lambda x: pd.to_datetime(int(x), format='%Y%m%d', errors='coerce') if pd.notna(x) and str(x).isdigit() else pd.NaT)
|
| 62 |
+
df1['์ด์์ํ'] = df1['ํ์
์ฌ๋ถ'].apply(lambda x: '์ด์์ค' if pd.isna(x) else 'ํ์
')
|
| 63 |
+
|
| 64 |
+
# --------------------------------------------------------------------------
|
| 65 |
+
#### 2) ๋ฐ์ดํฐ 2 - **๊ฐ๋งน์ ์๋ณ ์ด์ฉ์ ๋ณด**
|
| 66 |
+
# --------------------------------------------------------------------------
|
| 67 |
+
|
| 68 |
+
file_path2 = get_file_path('big_data_set2_f.csv')
|
| 69 |
+
|
| 70 |
+
try:
|
| 71 |
+
df2 = pd.read_csv(file_path2, encoding="cp949")
|
| 72 |
+
except FileNotFoundError:
|
| 73 |
+
print(f"Error: File not found at {file_path2}. Please ensure big_data_set2_f.csv is in the 'data' folder.")
|
| 74 |
+
sys.exit(1)
|
| 75 |
+
|
| 76 |
+
col_map2 = {
|
| 77 |
+
"ENCODED_MCT": "๊ฐ๋งน์ ID",
|
| 78 |
+
"TA_YM": "๊ธฐ์ค๋
์",
|
| 79 |
+
"MCT_OPE_MS_CN": "์ด์๊ฐ์์_๊ตฌ๊ฐ",
|
| 80 |
+
"RC_M1_SAA": "์๋งค์ถ๊ธ์ก_๊ตฌ๊ฐ",
|
| 81 |
+
"RC_M1_TO_UE_CT": "์๋งค์ถ๊ฑด์_๊ตฌ๊ฐ",
|
| 82 |
+
"RC_M1_UE_CUS_CN": "์์ ๋ํฌ๊ณ ๊ฐ์_๊ตฌ๊ฐ",
|
| 83 |
+
"RC_M1_AV_NP_AT": "์๊ฐ๋จ๊ฐ_๊ตฌ๊ฐ",
|
| 84 |
+
"APV_CE_RAT": "์ทจ์์จ_๊ตฌ๊ฐ",
|
| 85 |
+
"DLV_SAA_RAT": "๋ฐฐ๋ฌ๋งค์ถ๋น์จ",
|
| 86 |
+
"M1_SME_RY_SAA_RAT": "๋์ผ์
์ข
๋งค์ถ๋๋น๋น์จ",
|
| 87 |
+
"M1_SME_RY_CNT_RAT": "๋์ผ์
์ข
๊ฑด์๋๋น๋น์จ",
|
| 88 |
+
"M12_SME_RY_SAA_PCE_RT": "๋์ผ์
์ข
๋ด๋งค์ถ์์๋น์จ",
|
| 89 |
+
"M12_SME_BZN_SAA_PCE_RT": "๋์ผ์๊ถ๋ด๋งค์ถ์์๋น์จ",
|
| 90 |
+
"M12_SME_RY_ME_MCT_RAT": "๋์ผ์
์ข
ํด์ง๊ฐ๋งน์ ๋น์ค",
|
| 91 |
+
"M12_SME_BZN_ME_MCT_RAT": "๋์ผ์๊ถํด์ง๊ฐ๋งน์ ๋น์ค"
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
df2 = df2.rename(columns=col_map2)
|
| 95 |
+
|
| 96 |
+
# - ์ ์ฒ๋ฆฌ
|
| 97 |
+
df2['๊ธฐ์ค๋
์'] = pd.to_datetime(df2['๊ธฐ์ค๋
์'].astype(str), format='%Y%m')
|
| 98 |
+
|
| 99 |
+
df2.replace(-999999.9, np.nan, inplace=True)
|
| 100 |
+
|
| 101 |
+
# --------------------------------------------------------------------------
|
| 102 |
+
#### 3) ๋ฐ์ดํฐ 3 - **๊ฐ๋งน์ ์๋ณ ์ด์ฉ ๊ณ ๊ฐ์ ๋ณด**
|
| 103 |
+
# --------------------------------------------------------------------------
|
| 104 |
+
|
| 105 |
+
file_path3 = get_file_path('big_data_set3_f.csv')
|
| 106 |
+
|
| 107 |
+
try:
|
| 108 |
+
df3 = pd.read_csv(file_path3, encoding="cp949")
|
| 109 |
+
except FileNotFoundError:
|
| 110 |
+
print(f"Error: File not found at {file_path3}. Please ensure big_data_set3_f.csv is in the 'data' folder.")
|
| 111 |
+
sys.exit(1)
|
| 112 |
+
|
| 113 |
+
col_map3 = {
|
| 114 |
+
"ENCODED_MCT": "๊ฐ๋งน์ ID",
|
| 115 |
+
"TA_YM": "๊ธฐ์ค๋
์",
|
| 116 |
+
"M12_MAL_1020_RAT": "๋จ์ฑ20๋์ดํ๋น์จ",
|
| 117 |
+
"M12_MAL_30_RAT": "๋จ์ฑ30๋๋น์จ",
|
| 118 |
+
"M12_MAL_40_RAT": "๋จ์ฑ40๋๋น์จ",
|
| 119 |
+
"M12_MAL_50_RAT": "๋จ์ฑ50๋๋น์จ",
|
| 120 |
+
"M12_MAL_60_RAT": "๋จ์ฑ60๋์ด์๋น์จ",
|
| 121 |
+
"M12_FME_1020_RAT": "์ฌ์ฑ20๋์ดํ๋น์จ",
|
| 122 |
+
"M12_FME_30_RAT": "์ฌ์ฑ30๋๋น์จ",
|
| 123 |
+
"M12_FME_40_RAT": "์ฌ์ฑ40๋๋น์จ",
|
| 124 |
+
"M12_FME_50_RAT": "์ฌ์ฑ50๋๋น์จ",
|
| 125 |
+
"M12_FME_60_RAT": "์ฌ์ฑ60๋์ด์๋น์จ",
|
| 126 |
+
"MCT_UE_CLN_REU_RAT": "์ฌ์ด์ฉ๊ณ ๊ฐ๋น์จ",
|
| 127 |
+
"MCT_UE_CLN_NEW_RAT": "์ ๊ท๊ณ ๊ฐ๋น์จ",
|
| 128 |
+
"RC_M1_SHC_RSD_UE_CLN_RAT": "๊ฑฐ์ฃผ์์ด์ฉ๋น์จ",
|
| 129 |
+
"RC_M1_SHC_WP_UE_CLN_RAT": "์ง์ฅ์ธ์ด์ฉ๋น์จ",
|
| 130 |
+
"RC_M1_SHC_FLP_UE_CLN_RAT": "์ ๋์ธ๊ตฌ์ด์ฉ๋น์จ"
|
| 131 |
+
}
|
| 132 |
+
|
| 133 |
+
df3 = df3.rename(columns=col_map3)
|
| 134 |
+
|
| 135 |
+
# - ์ ์ฒ๋ฆฌ
|
| 136 |
+
df3['๊ธฐ์ค๋
์'] = pd.to_datetime(df3['๊ธฐ์ค๋
์'].astype(str), format='%Y%m')
|
| 137 |
+
|
| 138 |
+
df3.replace(-999999.9, np.nan, inplace=True)
|
| 139 |
+
|
| 140 |
+
# --------------------------------------------------------------------------
|
| 141 |
+
#### ๋ฐ์ดํฐ ํตํฉ
|
| 142 |
+
# --------------------------------------------------------------------------
|
| 143 |
+
|
| 144 |
+
df23 = pd.merge(df2, df3, on=["๊ฐ๋งน์ ID", "๊ธฐ์ค๋
์"], how="inner")
|
| 145 |
+
|
| 146 |
+
final_df = pd.merge(df23, df1, on="๊ฐ๋งน์ ID", how="left")
|
| 147 |
+
|
| 148 |
+
# --------------------------------------------------------------------------
|
| 149 |
+
#### ์ด์๊ฐ ์ฒ๋ฆฌ
|
| 150 |
+
# --------------------------------------------------------------------------
|
| 151 |
+
|
| 152 |
+
non_seongdong_areas = [
|
| 153 |
+
'์๊ตฌ์ ๋ก๋ฐ์ค', 'ํ์ฐ์ง๊ตฌ', '๋ฏธ์์ฌ๊ฑฐ๋ฆฌ', '๋ฐฉ๋ฐฐ์ญ',
|
| 154 |
+
'์์', '๋๋๋ฌธ์ญ์ฌ๋ฌธํ๊ณต์์ญ', '๊ฑด๋์
๊ตฌ',
|
| 155 |
+
'์๋ฉด์ญ', '์ค๋จ'
|
| 156 |
+
]
|
| 157 |
+
|
| 158 |
+
# Step 1๏ธโฃ ์ฃผ์๊ฐ '์ฑ๋๊ตฌ'์ ํฌํจ๋ ๋ฐ์ดํฐ๋ง ๋จ๊ธฐ๊ธฐ
|
| 159 |
+
mask_seongdong_addr = final_df['๊ฐ๋งน์ ์ฃผ์'].str.contains('์ฑ๋๊ตฌ', na=False)
|
| 160 |
+
seongdong_df = final_df[mask_seongdong_addr].copy()
|
| 161 |
+
|
| 162 |
+
# Step 2๏ธโฃ ์๊ถ๋ช
์ด ์ฑ๋๊ตฌ ์ธ์ธ๋ฐ ์ฃผ์๋ ์ฑ๋๊ตฌ์ธ ๊ฒฝ์ฐ โ ๋ผ๋ฒจ ๊ต์
|
| 163 |
+
mask_mislabel = seongdong_df['์๊ถ'].isin(non_seongdong_areas)
|
| 164 |
+
seongdong_df.loc[mask_mislabel, '์๊ถ'] = '๋ฏธํ์ธ(์ฑ๋๊ตฌ)'
|
| 165 |
+
|
| 166 |
+
# Step 3๏ธโฃ (ipynb ์ฝ๋ ๋ก์ง ์ ์ฉ) ์๊ถ๋ช
์ด '๋ฏธํ์ธ'์ธ๋ฐ ์ฃผ์๊ฐ ์ฑ๋๊ตฌ๊ฐ ์๋ ๊ฒฝ์ฐ ์ ๊ฑฐ
|
| 167 |
+
# ์ฃผํผํฐ ๋
ธํธ๋ถ์ ๋ก์ง์ ๊ทธ๋๋ก ๋ฐ์ํฉ๋๋ค. (์ค์ ํํฐ๋ง ํจ๊ณผ๋ ์์ง๋ง, ์ฝ๋ ์ผ์น์ฑ ํ๋ณด)
|
| 168 |
+
final_clean_df = seongdong_df[
|
| 169 |
+
~(
|
| 170 |
+
(seongdong_df['์๊ถ'].str.contains('๋ฏธํ์ธ')) &
|
| 171 |
+
(~seongdong_df['๊ฐ๋งน์ ์ฃผ์'].str.contains('์ฑ๋๊ตฌ', na=False))
|
| 172 |
+
)
|
| 173 |
+
].copy()
|
| 174 |
+
|
| 175 |
+
# ์
์ข
- ํ ์
์ข
์ด 100ํผ์ธ ๊ฒฝ์ฐ ์ ์ธ(์ด์์น ์ทจ๊ธ)
|
| 176 |
+
final_clean_df = final_clean_df[final_clean_df['์
์ข
'] != '์ ์ ํ'].copy()
|
| 177 |
+
|
| 178 |
+
# ์๋งค์ถ๊ธ์ก_๊ตฌ๊ฐ ์ปฌ๋ผ์ ๊ณ ์ ๊ฐ
|
| 179 |
+
unique_sales_bins = final_clean_df['์๋งค์ถ๊ธ์ก_๊ตฌ๊ฐ'].dropna().unique()
|
| 180 |
+
|
| 181 |
+
# ๋งค์ถ๊ตฌ๊ฐ์์ค ๋งคํ ๋์
๋๋ฆฌ ์ ์
|
| 182 |
+
# --------------------------------------------------------------------------
|
| 183 |
+
# โ๏ธ [์์ ] ๊ตฌ๊ฐ -> ์์ค ๋ณํ (์ปฌ๋ผ๋ณ ๋ค๋ฅธ ๋ช
์นญ ์ ์ฉ)
|
| 184 |
+
# --------------------------------------------------------------------------
|
| 185 |
+
|
| 186 |
+
# --------------------------------------------------------------------------
|
| 187 |
+
# โ๏ธ [์์ ] ๊ตฌ๊ฐ -> ์์ค ๋ณํ (๋ชจ๋ ์ปฌ๋ผ ์ ์ฉ)
|
| 188 |
+
# --------------------------------------------------------------------------
|
| 189 |
+
|
| 190 |
+
# 1. '์๋งค์ถ๊ธ์ก_๊ตฌ๊ฐ' (๊ท๋ชจ/์์ ๊ธฐ์ค)
|
| 191 |
+
sales_volume_map = {
|
| 192 |
+
'1_10%์ดํ': '์ต์์',
|
| 193 |
+
'2_10-25%': '์์',
|
| 194 |
+
'3_25-50%': '์ค์์',
|
| 195 |
+
'4_50-75%': '์คํ์',
|
| 196 |
+
'5_75-90%': 'ํ์',
|
| 197 |
+
'6_90%์ด๊ณผ(ํ์ 10% ์ดํ)': '์ตํ์'
|
| 198 |
+
}
|
| 199 |
+
|
| 200 |
+
# 2. '์๊ฐ๋จ๊ฐ_๊ตฌ๊ฐ' (๊ฐ๊ฒฉ๋ ๊ธฐ์ค)
|
| 201 |
+
price_level_map = {
|
| 202 |
+
'1_10%์ดํ': '์ต๊ณ ๊ฐ',
|
| 203 |
+
'2_10-25%': '๊ณ ๊ฐ',
|
| 204 |
+
'3_25-50%': '์ค๊ฐ',
|
| 205 |
+
'4_50-75%': '์ค์ ๊ฐ',
|
| 206 |
+
'5_75-90%': '์ ๊ฐ',
|
| 207 |
+
'6_90%์ด๊ณผ(ํ์ 10% ์ดํ)': '์ต์ ๊ฐ'
|
| 208 |
+
}
|
| 209 |
+
|
| 210 |
+
# 3. '์ด์๊ฐ์์_๊ตฌ๊ฐ' (๊ฒฝํ/์ฐ์ฐจ ๊ธฐ์ค)
|
| 211 |
+
operation_period_map = {
|
| 212 |
+
'1_10%์ดํ': '์ต์ฅ๊ธฐ', # ๊ฐ์ฅ ์ค๋ ์ด์
|
| 213 |
+
'2_10-25%': '์ฅ๊ธฐ',
|
| 214 |
+
'3_25-50%': '์ค๊ธฐ',
|
| 215 |
+
'4_50-75%': '๋จ๊ธฐ',
|
| 216 |
+
'5_75-90%': '์ ๊ท',
|
| 217 |
+
'6_90%์ด๊ณผ(ํ์ 10% ์ดํ)': '์ต์ ๊ท' # ๊ฐ์ฅ ์ต๊ทผ ๊ฐ์
|
| 218 |
+
}
|
| 219 |
+
|
| 220 |
+
# 4. '์๋งค์ถ๊ฑด์_๊ตฌ๊ฐ' (๊ฑฐ๋๋/๋น๋ ๊ธฐ์ค)
|
| 221 |
+
transaction_count_map = {
|
| 222 |
+
'1_10%์ดํ': '๊ฑฐ๋ ์ต๋ค', # ๊ฑฐ๋๊ฐ ๊ฐ์ฅ ๋ง์
|
| 223 |
+
'2_10-25%': '๊ฑฐ๋ ๋ง์',
|
| 224 |
+
'3_25-50%': '๊ฑฐ๋ ๋ณดํต',
|
| 225 |
+
'4_50-75%': '๊ฑฐ๋ ์ ์',
|
| 226 |
+
'5_75-90%': '๊ฑฐ๋ ํฌ์',
|
| 227 |
+
'6_90%์ด๊ณผ(ํ์ 10% ์ดํ)': '๊ฑฐ๋ ์ต์ '
|
| 228 |
+
}
|
| 229 |
+
|
| 230 |
+
# 5. '์์ ๋ํฌ๊ณ ๊ฐ์_๊ตฌ๊ฐ' (๊ณ ๊ฐ ๊ท๋ชจ ๊ธฐ์ค)
|
| 231 |
+
customer_count_map = {
|
| 232 |
+
'1_10%์ดํ': '๊ณ ๊ฐ ์ต๋ค', # ๊ณ ๊ฐ ์๊ฐ ๊ฐ์ฅ ๋ง์
|
| 233 |
+
'2_10-25%': '๊ณ ๊ฐ ๋ง์',
|
| 234 |
+
'3_25-50%': '๊ณ ๊ฐ ๋ณดํต',
|
| 235 |
+
'4_50-75%': '๊ณ ๊ฐ ์ ์',
|
| 236 |
+
'5_75-90%': '๊ณ ๊ฐ ํฌ์',
|
| 237 |
+
'6_90%์ด๊ณผ(ํ์ 10% ์ดํ)': '๊ณ ๊ฐ ์ต์ '
|
| 238 |
+
}
|
| 239 |
+
|
| 240 |
+
|
| 241 |
+
# --- ์ ์ปฌ๋ผ ์์ฑ ---
|
| 242 |
+
|
| 243 |
+
final_clean_df['๋งค์ถ๊ตฌ๊ฐ_์์ค'] = final_clean_df['์๋งค์ถ๊ธ์ก_๊ตฌ๊ฐ'].map(sales_volume_map)
|
| 244 |
+
final_clean_df['์๊ฐ๋จ๊ฐ_์์ค'] = final_clean_df['์๊ฐ๋จ๊ฐ_๊ตฌ๊ฐ'].map(price_level_map)
|
| 245 |
+
final_clean_df['์ด์๊ฐ์์_์์ค'] = final_clean_df['์ด์๊ฐ์์_๊ตฌ๊ฐ'].map(operation_period_map)
|
| 246 |
+
final_clean_df['์๋งค์ถ๊ฑด์_์์ค'] = final_clean_df['์๋งค์ถ๊ฑด์_๊ตฌ๊ฐ'].map(transaction_count_map)
|
| 247 |
+
final_clean_df['์์ ๋ํฌ๊ณ ๊ฐ์_์์ค'] = final_clean_df['์์ ๋ํฌ๊ณ ๊ฐ์_๊ตฌ๊ฐ'].map(customer_count_map)
|
| 248 |
+
|
| 249 |
+
# --- ๋ฏธํ์ธ ๊ฐ ์ฒ๋ฆฌ ---
|
| 250 |
+
final_clean_df['๋งค์ถ๊ตฌ๊ฐ_์์ค'] = final_clean_df['๋งค์ถ๊ตฌ๊ฐ_์์ค'].fillna('๋ฏธํ์ธ')
|
| 251 |
+
final_clean_df['์๊ฐ๋จ๊ฐ_์์ค'] = final_clean_df['์๊ฐ๋จ๊ฐ_์์ค'].fillna('๋ฏธํ์ธ')
|
| 252 |
+
final_clean_df['์ด์๊ฐ์์_์์ค'] = final_clean_df['์ด์๊ฐ์์_์์ค'].fillna('๋ฏธํ์ธ')
|
| 253 |
+
final_clean_df['์๋งค์ถ๊ฑด์_์์ค'] = final_clean_df['์๋งค์ถ๊ฑด์_์์ค'].fillna('๋ฏธํ์ธ')
|
| 254 |
+
final_clean_df['์์ ๋ํฌ๊ณ ๊ฐ์_์์ค'] = final_clean_df['์์ ๋ํฌ๊ณ ๊ฐ์_์์ค'].fillna('๋ฏธํ์ธ')
|
| 255 |
+
# --------------------------------------------------------------------------
|
| 256 |
+
# final_df ์ ์ฅ
|
| 257 |
+
# --------------------------------------------------------------------------
|
| 258 |
+
|
| 259 |
+
# 'data' ํด๋ ๋ด์ ์ ์ฅ
|
| 260 |
+
save_path = get_file_path("final_df.csv")
|
| 261 |
+
|
| 262 |
+
# CSV ํ์ผ ์ ์ฅ (์ธ๋ฑ์ค ์ ์ธ)
|
| 263 |
+
final_clean_df.to_csv(save_path, index=False, encoding="utf-8-sig")
|
| 264 |
+
|
| 265 |
+
=======
|
| 266 |
+
# final_df.py
|
| 267 |
+
|
| 268 |
+
import pandas as pd
|
| 269 |
+
import numpy as np
|
| 270 |
+
import os
|
| 271 |
+
import sys
|
| 272 |
+
|
| 273 |
+
# ์คํฌ๋ฆฝํธ ํ์ผ์ด ์์นํ ๋๋ ํ ๋ฆฌ๋ฅผ ๊ธฐ์ค์ผ๋ก ๊ฒฝ๋ก ์ค์
|
| 274 |
+
script_path = os.path.abspath(sys.argv[0])
|
| 275 |
+
script_dir = os.path.dirname(script_path)
|
| 276 |
+
|
| 277 |
+
# ๋ฐ์ดํฐ ํด๋ ๊ฒฝ๋ก
|
| 278 |
+
data_dir = os.path.join(script_dir, 'data')
|
| 279 |
+
|
| 280 |
+
# 'data' ํด๋๊ฐ ์ค์ ๋ก ์กด์ฌํ๋์ง ํ์ธ (์์ ์ฅ์น)
|
| 281 |
+
if not os.path.exists(data_dir):
|
| 282 |
+
print(f"Error: Data directory not found at {data_dir}. Please check your folder structure.")
|
| 283 |
+
sys.exit(1)
|
| 284 |
+
|
| 285 |
+
# ํ์ผ ๊ฒฝ๋ก ํจ์
|
| 286 |
+
def get_file_path(filename):
|
| 287 |
+
"""data ํด๋ ๋ด์ ํ์ผ ๊ฒฝ๋ก๋ฅผ ๋ฐํํฉ๋๋ค."""
|
| 288 |
+
return os.path.join(data_dir, filename)
|
| 289 |
+
|
| 290 |
+
# --------------------------------------------------------------------------
|
| 291 |
+
#### 1) ๋ฐ์ดํฐ 1 - **๊ฐ๋งน์ ๊ฐ์์ ๋ณด**
|
| 292 |
+
# --------------------------------------------------------------------------
|
| 293 |
+
|
| 294 |
+
file_path1 = get_file_path('big_data_set1_f.csv')
|
| 295 |
+
|
| 296 |
+
try:
|
| 297 |
+
df1 = pd.read_csv(file_path1, encoding="cp949")
|
| 298 |
+
except FileNotFoundError:
|
| 299 |
+
print(f"Error: File not found at {file_path1}. Please ensure big_data_set1_f.csv is in the 'data' folder.")
|
| 300 |
+
sys.exit(1)
|
| 301 |
+
|
| 302 |
+
col_map1 = {
|
| 303 |
+
"ENCODED_MCT": "๊ฐ๋งน์ ID",
|
| 304 |
+
"MCT_BSE_AR": "๊ฐ๋งน์ ์ฃผ์",
|
| 305 |
+
"MCT_NM": "๊ฐ๋งน์ ๋ช
",
|
| 306 |
+
"MCT_BRD_NUM": "๋ธ๋๋๊ตฌ๋ถ์ฝ๋",
|
| 307 |
+
"MCT_SIGUNGU_NM": "์ง์ญ๋ช
",
|
| 308 |
+
"HPSN_MCT_ZCD_NM": "์
์ข
",
|
| 309 |
+
"HPSN_MCT_BZN_CD_NM": "์๊ถ",
|
| 310 |
+
"ARE_D": "๊ฐ์ค์ผ",
|
| 311 |
+
"MCT_ME_D": "ํ์
์ฌ๋ถ"
|
| 312 |
+
}
|
| 313 |
+
|
| 314 |
+
df1 = df1.rename(columns=col_map1)
|
| 315 |
+
|
| 316 |
+
# - ์ ์ฒ๋ฆฌ
|
| 317 |
+
df1['๋ธ๋๋๊ตฌ๋ถ์ฝ๋'] = df1['๋ธ๋๋๊ตฌ๋ถ์ฝ๋'].fillna('๋ฏธํ์ธ')
|
| 318 |
+
df1['์๊ถ'] = df1['์๊ถ'].fillna('๋ฏธํ์ธ')
|
| 319 |
+
|
| 320 |
+
df1['๊ฐ์ค์ผ'] = df1['๊ฐ์ค์ผ'].astype(str)
|
| 321 |
+
# errors='coerce' ์ถ๊ฐ: ipynb ์ฝ๋์๋ ์์ง๋ง, ์์ ํ datetime ๋ณํ์ ์ํด ์ ์ง (์๋ณธ py ์ฝ๋ ์ ์ง)
|
| 322 |
+
df1['๊ฐ์ค์ผ'] = pd.to_datetime(df1['๊ฐ์ค์ผ'], format='%Y%m%d', errors='coerce')
|
| 323 |
+
|
| 324 |
+
# ipynb ํ์ผ์์๋ errors='coerce'๊ฐ ์์์ง๋ง, int ๋ณํ ์ ์ค๋ฅ ๋ฐฉ์ง๋ฅผ ์ํด ์๋ณธ py ์ฝ๋์ ์์ ๋ก์ง์ ๋ฐ๋ฆ.
|
| 325 |
+
df1['ํ์
์ฌ๋ถ'] = df1['ํ์
์ฌ๋ถ'].apply(lambda x: pd.to_datetime(int(x), format='%Y%m%d', errors='coerce') if pd.notna(x) and str(x).isdigit() else pd.NaT)
|
| 326 |
+
df1['์ด์์ํ'] = df1['ํ์
์ฌ๋ถ'].apply(lambda x: '์ด์์ค' if pd.isna(x) else 'ํ์
')
|
| 327 |
+
|
| 328 |
+
# --------------------------------------------------------------------------
|
| 329 |
+
#### 2) ๋ฐ์ดํฐ 2 - **๊ฐ๋งน์ ์๋ณ ์ด์ฉ์ ๋ณด**
|
| 330 |
+
# --------------------------------------------------------------------------
|
| 331 |
+
|
| 332 |
+
file_path2 = get_file_path('big_data_set2_f.csv')
|
| 333 |
+
|
| 334 |
+
try:
|
| 335 |
+
df2 = pd.read_csv(file_path2, encoding="cp949")
|
| 336 |
+
except FileNotFoundError:
|
| 337 |
+
print(f"Error: File not found at {file_path2}. Please ensure big_data_set2_f.csv is in the 'data' folder.")
|
| 338 |
+
sys.exit(1)
|
| 339 |
+
|
| 340 |
+
col_map2 = {
|
| 341 |
+
"ENCODED_MCT": "๊ฐ๋งน์ ID",
|
| 342 |
+
"TA_YM": "๊ธฐ์ค๋
์",
|
| 343 |
+
"MCT_OPE_MS_CN": "์ด์๊ฐ์์_๊ตฌ๊ฐ",
|
| 344 |
+
"RC_M1_SAA": "์๋งค์ถ๊ธ์ก_๊ตฌ๊ฐ",
|
| 345 |
+
"RC_M1_TO_UE_CT": "์๋งค์ถ๊ฑด์_๊ตฌ๊ฐ",
|
| 346 |
+
"RC_M1_UE_CUS_CN": "์์ ๋ํฌ๊ณ ๊ฐ์_๊ตฌ๊ฐ",
|
| 347 |
+
"RC_M1_AV_NP_AT": "์๊ฐ๋จ๊ฐ_๊ตฌ๊ฐ",
|
| 348 |
+
"APV_CE_RAT": "์ทจ์์จ_๊ตฌ๊ฐ",
|
| 349 |
+
"DLV_SAA_RAT": "๋ฐฐ๋ฌ๋งค์ถ๋น์จ",
|
| 350 |
+
"M1_SME_RY_SAA_RAT": "๋์ผ์
์ข
๋งค์ถ๋๋น๋น์จ",
|
| 351 |
+
"M1_SME_RY_CNT_RAT": "๋์ผ์
์ข
๊ฑด์๋๋น๋น์จ",
|
| 352 |
+
"M12_SME_RY_SAA_PCE_RT": "๋์ผ์
์ข
๋ด๋งค์ถ์์๋น์จ",
|
| 353 |
+
"M12_SME_BZN_SAA_PCE_RT": "๋์ผ์๊ถ๋ด๋งค์ถ์์๋น์จ",
|
| 354 |
+
"M12_SME_RY_ME_MCT_RAT": "๋์ผ์
์ข
ํด์ง๊ฐ๋งน์ ๋น์ค",
|
| 355 |
+
"M12_SME_BZN_ME_MCT_RAT": "๋์ผ์๊ถํด์ง๊ฐ๋งน์ ๋น์ค"
|
| 356 |
+
}
|
| 357 |
+
|
| 358 |
+
df2 = df2.rename(columns=col_map2)
|
| 359 |
+
|
| 360 |
+
# - ์ ์ฒ๋ฆฌ
|
| 361 |
+
df2['๊ธฐ์ค๋
์'] = pd.to_datetime(df2['๊ธฐ์ค๋
์'].astype(str), format='%Y%m')
|
| 362 |
+
|
| 363 |
+
df2.replace(-999999.9, np.nan, inplace=True)
|
| 364 |
+
|
| 365 |
+
# --------------------------------------------------------------------------
|
| 366 |
+
#### 3) ๋ฐ์ดํฐ 3 - **๊ฐ๋งน์ ์๋ณ ์ด์ฉ ๊ณ ๊ฐ์ ๋ณด**
|
| 367 |
+
# --------------------------------------------------------------------------
|
| 368 |
+
|
| 369 |
+
file_path3 = get_file_path('big_data_set3_f.csv')
|
| 370 |
+
|
| 371 |
+
try:
|
| 372 |
+
df3 = pd.read_csv(file_path3, encoding="cp949")
|
| 373 |
+
except FileNotFoundError:
|
| 374 |
+
print(f"Error: File not found at {file_path3}. Please ensure big_data_set3_f.csv is in the 'data' folder.")
|
| 375 |
+
sys.exit(1)
|
| 376 |
+
|
| 377 |
+
col_map3 = {
|
| 378 |
+
"ENCODED_MCT": "๊ฐ๋งน์ ID",
|
| 379 |
+
"TA_YM": "๊ธฐ์ค๋
์",
|
| 380 |
+
"M12_MAL_1020_RAT": "๋จ์ฑ20๋์ด๏ฟฝ๏ฟฝ๏ฟฝ๋น์จ",
|
| 381 |
+
"M12_MAL_30_RAT": "๋จ์ฑ30๋๋น์จ",
|
| 382 |
+
"M12_MAL_40_RAT": "๋จ์ฑ40๋๋น์จ",
|
| 383 |
+
"M12_MAL_50_RAT": "๋จ์ฑ50๋๋น์จ",
|
| 384 |
+
"M12_MAL_60_RAT": "๋จ์ฑ60๋์ด์๋น์จ",
|
| 385 |
+
"M12_FME_1020_RAT": "์ฌ์ฑ20๋์ดํ๋น์จ",
|
| 386 |
+
"M12_FME_30_RAT": "์ฌ์ฑ30๋๋น์จ",
|
| 387 |
+
"M12_FME_40_RAT": "์ฌ์ฑ40๋๋น์จ",
|
| 388 |
+
"M12_FME_50_RAT": "์ฌ์ฑ50๋๋น์จ",
|
| 389 |
+
"M12_FME_60_RAT": "์ฌ์ฑ60๋์ด์๋น์จ",
|
| 390 |
+
"MCT_UE_CLN_REU_RAT": "์ฌ์ด์ฉ๊ณ ๊ฐ๋น์จ",
|
| 391 |
+
"MCT_UE_CLN_NEW_RAT": "์ ๊ท๊ณ ๊ฐ๋น์จ",
|
| 392 |
+
"RC_M1_SHC_RSD_UE_CLN_RAT": "๊ฑฐ์ฃผ์์ด์ฉ๋น์จ",
|
| 393 |
+
"RC_M1_SHC_WP_UE_CLN_RAT": "์ง์ฅ์ธ์ด์ฉ๋น์จ",
|
| 394 |
+
"RC_M1_SHC_FLP_UE_CLN_RAT": "์ ๋์ธ๊ตฌ์ด์ฉ๋น์จ"
|
| 395 |
+
}
|
| 396 |
+
|
| 397 |
+
df3 = df3.rename(columns=col_map3)
|
| 398 |
+
|
| 399 |
+
# - ์ ์ฒ๋ฆฌ
|
| 400 |
+
df3['๊ธฐ์ค๋
์'] = pd.to_datetime(df3['๊ธฐ์ค๋
์'].astype(str), format='%Y%m')
|
| 401 |
+
|
| 402 |
+
df3.replace(-999999.9, np.nan, inplace=True)
|
| 403 |
+
|
| 404 |
+
# --------------------------------------------------------------------------
|
| 405 |
+
#### ๋ฐ์ดํฐ ํตํฉ
|
| 406 |
+
# --------------------------------------------------------------------------
|
| 407 |
+
|
| 408 |
+
df23 = pd.merge(df2, df3, on=["๊ฐ๋งน์ ID", "๊ธฐ์ค๋
์"], how="inner")
|
| 409 |
+
|
| 410 |
+
final_df = pd.merge(df23, df1, on="๊ฐ๋งน์ ID", how="left")
|
| 411 |
+
|
| 412 |
+
# --------------------------------------------------------------------------
|
| 413 |
+
#### ์ด์๊ฐ ์ฒ๋ฆฌ
|
| 414 |
+
# --------------------------------------------------------------------------
|
| 415 |
+
|
| 416 |
+
non_seongdong_areas = [
|
| 417 |
+
'์๊ตฌ์ ๋ก๋ฐ์ค', 'ํ์ฐ์ง๊ตฌ', '๋ฏธ์์ฌ๊ฑฐ๋ฆฌ', '๋ฐฉ๋ฐฐ์ญ',
|
| 418 |
+
'์์', '๋๋๋ฌธ์ญ์ฌ๋ฌธํ๊ณต์์ญ', '๊ฑด๋์
๊ตฌ',
|
| 419 |
+
'์๋ฉด์ญ', '์ค๋จ'
|
| 420 |
+
]
|
| 421 |
+
|
| 422 |
+
# Step 1๏ธโฃ ์ฃผ์๊ฐ '์ฑ๋๊ตฌ'์ ํฌํจ๋ ๋ฐ์ดํฐ๋ง ๋จ๊ธฐ๊ธฐ
|
| 423 |
+
mask_seongdong_addr = final_df['๊ฐ๋งน์ ์ฃผ์'].str.contains('์ฑ๋๊ตฌ', na=False)
|
| 424 |
+
seongdong_df = final_df[mask_seongdong_addr].copy()
|
| 425 |
+
|
| 426 |
+
# Step 2๏ธโฃ ์๊ถ๋ช
์ด ์ฑ๋๊ตฌ ์ธ์ธ๋ฐ ์ฃผ์๋ ์ฑ๋๊ตฌ์ธ ๊ฒฝ์ฐ โ ๋ผ๋ฒจ ๊ต์
|
| 427 |
+
mask_mislabel = seongdong_df['์๊ถ'].isin(non_seongdong_areas)
|
| 428 |
+
seongdong_df.loc[mask_mislabel, '์๊ถ'] = '๋ฏธํ์ธ(์ฑ๋๊ตฌ)'
|
| 429 |
+
|
| 430 |
+
# Step 3๏ธโฃ (ipynb ์ฝ๋ ๋ก์ง ์ ์ฉ) ์๊ถ๋ช
์ด '๋ฏธํ์ธ'์ธ๋ฐ ์ฃผ์๊ฐ ์ฑ๋๊ตฌ๊ฐ ์๋ ๊ฒฝ์ฐ ์ ๊ฑฐ
|
| 431 |
+
# ์ฃผํผํฐ ๋
ธํธ๋ถ์ ๋ก์ง์ ๊ทธ๋๋ก ๋ฐ์ํฉ๋๋ค. (์ค์ ํํฐ๋ง ํจ๊ณผ๋ ์์ง๋ง, ์ฝ๋ ์ผ์น์ฑ ํ๋ณด)
|
| 432 |
+
final_clean_df = seongdong_df[
|
| 433 |
+
~(
|
| 434 |
+
(seongdong_df['์๊ถ'].str.contains('๋ฏธํ์ธ')) &
|
| 435 |
+
(~seongdong_df['๊ฐ๋งน์ ์ฃผ์'].str.contains('์ฑ๋๊ตฌ', na=False))
|
| 436 |
+
)
|
| 437 |
+
].copy()
|
| 438 |
+
|
| 439 |
+
# ์
์ข
- ํ ์
์ข
์ด 100ํผ์ธ ๊ฒฝ์ฐ ์ ์ธ(์ด์์น ์ทจ๊ธ)
|
| 440 |
+
final_clean_df = final_clean_df[final_clean_df['์
์ข
'] != '์ ์ ํ'].copy()
|
| 441 |
+
|
| 442 |
+
# ์๋งค์ถ๊ธ์ก_๊ตฌ๊ฐ ์ปฌ๋ผ์ ๊ณ ์ ๊ฐ
|
| 443 |
+
unique_sales_bins = final_clean_df['์๋งค์ถ๊ธ์ก_๊ตฌ๊ฐ'].dropna().unique()
|
| 444 |
+
|
| 445 |
+
# ๋งค์ถ๊ตฌ๊ฐ์์ค ๋งคํ ๋์
๋๋ฆฌ ์ ์
|
| 446 |
+
# --------------------------------------------------------------------------
|
| 447 |
+
# โ๏ธ [์์ ] ๊ตฌ๊ฐ -> ์์ค ๋ณํ (์ปฌ๋ผ๋ณ ๋ค๋ฅธ ๋ช
์นญ ์ ์ฉ)
|
| 448 |
+
# --------------------------------------------------------------------------
|
| 449 |
+
|
| 450 |
+
# --------------------------------------------------------------------------
|
| 451 |
+
# โ๏ธ [์์ ] ๊ตฌ๊ฐ -> ์์ค ๋ณํ (๋ชจ๋ ์ปฌ๋ผ ์ ์ฉ)
|
| 452 |
+
# --------------------------------------------------------------------------
|
| 453 |
+
|
| 454 |
+
# 1. '์๋งค์ถ๊ธ์ก_๊ตฌ๊ฐ' (๊ท๋ชจ/์์ ๊ธฐ์ค)
|
| 455 |
+
sales_volume_map = {
|
| 456 |
+
'1_10%์ดํ': '์ต์์',
|
| 457 |
+
'2_10-25%': '์์',
|
| 458 |
+
'3_25-50%': '์ค์์',
|
| 459 |
+
'4_50-75%': '์คํ์',
|
| 460 |
+
'5_75-90%': 'ํ์',
|
| 461 |
+
'6_90%์ด๊ณผ(ํ์ 10% ์ดํ)': '์ตํ์'
|
| 462 |
+
}
|
| 463 |
+
|
| 464 |
+
# 2. '์๊ฐ๋จ๊ฐ_๊ตฌ๊ฐ' (๊ฐ๊ฒฉ๋ ๊ธฐ์ค)
|
| 465 |
+
price_level_map = {
|
| 466 |
+
'1_10%์ดํ': '์ต๊ณ ๊ฐ',
|
| 467 |
+
'2_10-25%': '๊ณ ๊ฐ',
|
| 468 |
+
'3_25-50%': '์ค๊ฐ',
|
| 469 |
+
'4_50-75%': '์ค์ ๊ฐ',
|
| 470 |
+
'5_75-90%': '์ ๊ฐ',
|
| 471 |
+
'6_90%์ด๊ณผ(ํ์ 10% ์ดํ)': '์ต์ ๊ฐ'
|
| 472 |
+
}
|
| 473 |
+
|
| 474 |
+
# 3. '์ด์๊ฐ์์_๊ตฌ๊ฐ' (๊ฒฝํ/์ฐ์ฐจ ๊ธฐ์ค)
|
| 475 |
+
operation_period_map = {
|
| 476 |
+
'1_10%์ดํ': '์ต์ฅ๊ธฐ', # ๊ฐ์ฅ ์ค๋ ์ด์
|
| 477 |
+
'2_10-25%': '์ฅ๊ธฐ',
|
| 478 |
+
'3_25-50%': '์ค๊ธฐ',
|
| 479 |
+
'4_50-75%': '๋จ๊ธฐ',
|
| 480 |
+
'5_75-90%': '์ ๊ท',
|
| 481 |
+
'6_90%์ด๊ณผ(ํ์ 10% ์ดํ)': '์ต์ ๊ท' # ๊ฐ์ฅ ์ต๊ทผ ๊ฐ์
|
| 482 |
+
}
|
| 483 |
+
|
| 484 |
+
# 4. '์๋งค์ถ๊ฑด์_๊ตฌ๊ฐ' (๊ฑฐ๋๋/๋น๋ ๊ธฐ์ค)
|
| 485 |
+
transaction_count_map = {
|
| 486 |
+
'1_10%์ดํ': '๊ฑฐ๋ ์ต๋ค', # ๊ฑฐ๋๊ฐ ๊ฐ์ฅ ๋ง์
|
| 487 |
+
'2_10-25%': '๊ฑฐ๋ ๋ง์',
|
| 488 |
+
'3_25-50%': '๊ฑฐ๋ ๋ณดํต',
|
| 489 |
+
'4_50-75%': '๊ฑฐ๋ ์ ์',
|
| 490 |
+
'5_75-90%': '๊ฑฐ๋ ํฌ์',
|
| 491 |
+
'6_90%์ด๊ณผ(ํ์ 10% ์ดํ)': '๊ฑฐ๋ ์ต์ '
|
| 492 |
+
}
|
| 493 |
+
|
| 494 |
+
# 5. '์์ ๋ํฌ๊ณ ๊ฐ์_๊ตฌ๊ฐ' (๊ณ ๊ฐ ๊ท๋ชจ ๊ธฐ์ค)
|
| 495 |
+
customer_count_map = {
|
| 496 |
+
'1_10%์ดํ': '๊ณ ๊ฐ ์ต๋ค', # ๊ณ ๊ฐ ์๊ฐ ๊ฐ์ฅ ๋ง์
|
| 497 |
+
'2_10-25%': '๊ณ ๊ฐ ๋ง์',
|
| 498 |
+
'3_25-50%': '๊ณ ๊ฐ ๋ณดํต',
|
| 499 |
+
'4_50-75%': '๊ณ ๊ฐ ์ ์',
|
| 500 |
+
'5_75-90%': '๊ณ ๊ฐ ํฌ์',
|
| 501 |
+
'6_90%์ด๊ณผ(ํ์ 10% ์ดํ)': '๊ณ ๊ฐ ์ต์ '
|
| 502 |
+
}
|
| 503 |
+
|
| 504 |
+
|
| 505 |
+
# --- ์ ์ปฌ๋ผ ์์ฑ ---
|
| 506 |
+
|
| 507 |
+
final_clean_df['๋งค์ถ๊ตฌ๊ฐ_์์ค'] = final_clean_df['์๋งค์ถ๊ธ์ก_๊ตฌ๊ฐ'].map(sales_volume_map)
|
| 508 |
+
final_clean_df['์๊ฐ๋จ๊ฐ_์์ค'] = final_clean_df['์๊ฐ๋จ๊ฐ_๊ตฌ๊ฐ'].map(price_level_map)
|
| 509 |
+
final_clean_df['์ด์๊ฐ์์_์์ค'] = final_clean_df['์ด์๊ฐ์์_๊ตฌ๊ฐ'].map(operation_period_map)
|
| 510 |
+
final_clean_df['์๋งค์ถ๊ฑด์_์์ค'] = final_clean_df['์๋งค์ถ๊ฑด์_๊ตฌ๊ฐ'].map(transaction_count_map)
|
| 511 |
+
final_clean_df['์์ ๋ํฌ๊ณ ๊ฐ์_์์ค'] = final_clean_df['์์ ๋ํฌ๊ณ ๊ฐ์_๊ตฌ๊ฐ'].map(customer_count_map)
|
| 512 |
+
|
| 513 |
+
# --- ๋ฏธํ์ธ ๊ฐ ์ฒ๋ฆฌ ---
|
| 514 |
+
final_clean_df['๋งค์ถ๊ตฌ๊ฐ_์์ค'] = final_clean_df['๋งค์ถ๊ตฌ๊ฐ_์์ค'].fillna('๋ฏธํ์ธ')
|
| 515 |
+
final_clean_df['์๊ฐ๋จ๊ฐ_์์ค'] = final_clean_df['์๊ฐ๋จ๊ฐ_์์ค'].fillna('๋ฏธํ์ธ')
|
| 516 |
+
final_clean_df['์ด์๊ฐ์์_์์ค'] = final_clean_df['์ด์๊ฐ์์_์์ค'].fillna('๋ฏธํ์ธ')
|
| 517 |
+
final_clean_df['์๋งค์ถ๊ฑด์_์์ค'] = final_clean_df['์๋งค์ถ๊ฑด์_์์ค'].fillna('๋ฏธํ์ธ')
|
| 518 |
+
final_clean_df['์์ ๋ํฌ๊ณ ๊ฐ์_์์ค'] = final_clean_df['์์ ๋ํฌ๊ณ ๊ฐ์_์์ค'].fillna('๋ฏธํ์ธ')
|
| 519 |
+
# --------------------------------------------------------------------------
|
| 520 |
+
# final_df ์ ์ฅ
|
| 521 |
+
# --------------------------------------------------------------------------
|
| 522 |
+
|
| 523 |
+
# 'data' ํด๋ ๋ด์ ์ ์ฅ
|
| 524 |
+
save_path = get_file_path("final_df.csv")
|
| 525 |
+
|
| 526 |
+
# CSV ํ์ผ ์ ์ฅ (์ธ๋ฑ์ค ์ ์ธ)
|
| 527 |
+
final_clean_df.to_csv(save_path, index=False, encoding="utf-8-sig")
|
| 528 |
+
|
| 529 |
+
>>>>>>> 4025576cc0b52c8393af0ca720a1f6fabeb5e43a
|
| 530 |
+
print(f"CSV ํ์ผ ์ ์ฅ ์๋ฃ: {save_path}")
|
๊ธฐํ/feastival_df_add_keywords.py
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import pandas as pd
|
| 3 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
| 4 |
+
from langchain_core.messages import HumanMessage
|
| 5 |
+
import time
|
| 6 |
+
|
| 7 |
+
# --- ์ค์ ---
|
| 8 |
+
# โ ๏ธ ์๋ณธ ํ์ผ ๊ฒฝ๋ก์ ์ ์ฅ๋ ํ์ผ ์ด๋ฆ์ ํ์ธํ์ธ์.
|
| 9 |
+
INPUT_CSV_PATH = 'festival_df.csv'
|
| 10 |
+
OUTPUT_CSV_PATH = 'festival_df_updated.csv'
|
| 11 |
+
# ----------------
|
| 12 |
+
|
| 13 |
+
def generate_keywords_from_description(llm, description: str) -> str:
|
| 14 |
+
"""
|
| 15 |
+
์ถ์ ์๊ฐ๊ธ์ ๋ฐํ์ผ๋ก Gemini AI๋ฅผ ์ฌ์ฉํ์ฌ ํค์๋๋ฅผ ์์ฑํฉ๋๋ค.
|
| 16 |
+
"""
|
| 17 |
+
if not isinstance(description, str) or not description.strip():
|
| 18 |
+
return ""
|
| 19 |
+
|
| 20 |
+
# AI์๊ฒ ์ญํ ์ ๋ถ์ฌํ๊ณ , ์ํ๋ ๊ฒฐ๊ณผ๋ฌผ์ ํ์๊ณผ ๋ด์ฉ์ ๊ตฌ์ฒด์ ์ผ๋ก ์ง์ํ๋ ํ๋กฌํํธ
|
| 21 |
+
prompt = f"""
|
| 22 |
+
๋น์ ์ ์ง์ญ ์ถ์ ์ ๋ฌธ ๋ง์ผํ
๋ถ์๊ฐ์
๋๋ค.
|
| 23 |
+
์๋ ์ ๊ณต๋ ์ถ์ ์๊ฐ๊ธ์ ์ฝ๊ณ , ๋ถ์ค ์ฐธ๊ฐ๋ฅผ ๊ณ ๋ คํ๋ ๊ฐ๊ฒ ์ฌ์ฅ๋์๊ฒ ๋์์ด ๋ ๋งํ ํต์ฌ ํค์๋๋ฅผ ์ถ์ถํด์ฃผ์ธ์.
|
| 24 |
+
|
| 25 |
+
[์ถ์ถ ๊ฐ์ด๋๋ผ์ธ]
|
| 26 |
+
1. ๋ค์ 5๊ฐ์ง ์นดํ
๊ณ ๋ฆฌ๋ก ํค์๋๋ฅผ ๋ถ๋ฅํด์ฃผ์ธ์:
|
| 27 |
+
- **ํ๊ฒ ๊ณ ๊ฐ**: (์: 20๋, ๊ฐ์กฑ ๋จ์, ์น๊ตฌ, ์ฐ์ธ, ์ธ๊ตญ์ธ ๊ด๊ด๊ฐ)
|
| 28 |
+
- **๊ณ์ **: (์: ๋ด, ์ฌ๋ฆ, ๊ฐ์, ๊ฒจ์ธ)
|
| 29 |
+
- **์ถ์ ๋ถ์๊ธฐ**: (์: ํ๊ธฐ์ฐฌ, ์ ํต์ ์ธ, ํํ, ์์ฐ ์นํ์ )
|
| 30 |
+
- **์ฃผ์ ์ฝํ
์ธ **: (์: ๋จน๊ฑฐ๋ฆฌ, ํธ๋ํธ๋ญ, ์ฒดํ ํ๋, ๊ณต์ฐ, ์ ํต๋ฌธํ, ๋ถ๊ฝ๋์ด, ํน์ฐ๋ฌผ)
|
| 31 |
+
- **ํต์ฌ ํ
๋ง**: (์: ์ญ์ฌ, ๋ฌธํ, ์์
, ์์ , ๊ณ์ )
|
| 32 |
+
2. ๋ชจ๋ ํค์๋๋ฅผ ์ผํ(,)๋ก ๊ตฌ๋ถ๋ ํ๋์ ๋ฌธ์์ด๋ก ๋ง๋ค์ด ๋ฐํํด์ฃผ์ธ์.
|
| 33 |
+
(์์: ๊ฐ์กฑ ๋จ์, ์ฐ์ธ, ํ๊ธฐ์ฐฌ, ์ ํต์ ์ธ, ๋จน๊ฑฐ๋ฆฌ, ์ฒดํ, ์ญ์ฌ, ๋ฌธํ)
|
| 34 |
+
3. ์๊ฐ๊ธ์์ ๊ทผ๊ฑฐ๋ฅผ ์ฐพ์ ์ ์๋ ๋ด์ฉ์ ์ถ์ธกํ์ฌ ๋ง๋ค์ง ๋ง์ธ์.
|
| 35 |
+
|
| 36 |
+
[์ถ์ ์๊ฐ๊ธ]
|
| 37 |
+
{description}
|
| 38 |
+
|
| 39 |
+
[์ถ์ถ๋ ํค์๋ (์ผํ๋ก ๊ตฌ๋ถ)]
|
| 40 |
+
"""
|
| 41 |
+
|
| 42 |
+
try:
|
| 43 |
+
message = HumanMessage(content=prompt)
|
| 44 |
+
response = llm.invoke([message])
|
| 45 |
+
return response.content.strip()
|
| 46 |
+
except Exception as e:
|
| 47 |
+
print(f" [์ค๋ฅ] API ํธ์ถ ์ค ๋ฌธ์ ๋ฐ์: {e}")
|
| 48 |
+
return ""
|
| 49 |
+
|
| 50 |
+
def main():
|
| 51 |
+
"""
|
| 52 |
+
๋ฉ์ธ ์คํ ํจ์
|
| 53 |
+
"""
|
| 54 |
+
print("--- ๐ค '์๊ฐ' ๊ธฐ๋ฐ AI ํค์๋ ์๋ ์์ฑ ์์
์ ์์ํฉ๋๋ค. ---")
|
| 55 |
+
|
| 56 |
+
# 1. Google API ํค ๋ฐ LLM ์ด๊ธฐํ
|
| 57 |
+
try:
|
| 58 |
+
# 'GOOGLE_API_KEY' ๋ผ๋ ์ด๋ฆ์ ํ๊ฒฝ ๋ณ์๋ฅผ ์ฐพ์ต๋๋ค.
|
| 59 |
+
google_api_key = os.getenv("GOOGLE_API_KEY")
|
| 60 |
+
if not google_api_key:
|
| 61 |
+
raise ValueError("GOOGLE_API_KEY ํ๊ฒฝ ๋ณ์๊ฐ ์ค์ ๋์ง ์์์ต๋๋ค. API ํค๋ฅผ ์ค์ ํด์ฃผ์ธ์.")
|
| 62 |
+
|
| 63 |
+
# ์ ํํ ํ๋จ์ ์ํด temperature๋ฅผ ๋ฎ๊ฒ ์ค์
|
| 64 |
+
llm = ChatGoogleGenerativeAI(
|
| 65 |
+
model="gemini-2.5-flash",
|
| 66 |
+
# ํ๊ฒฝ ๋ณ์์์ ๋ถ๋ฌ์จ ํค๋ฅผ ์ฌ์ฉํฉ๋๋ค.
|
| 67 |
+
google_api_key=google_api_key,
|
| 68 |
+
temperature=0.1
|
| 69 |
+
)
|
| 70 |
+
print("โ
Gemini ๋ชจ๋ธ ์ด๊ธฐํ ์๋ฃ.")
|
| 71 |
+
except Exception as e:
|
| 72 |
+
print(f"โ [์น๋ช
์ ์ค๋ฅ] Gemini ๋ชจ๋ธ ์ด๊ธฐํ ์คํจ: {e}")
|
| 73 |
+
return
|
| 74 |
+
|
| 75 |
+
# 2. CSV ํ์ผ ๋ก๋
|
| 76 |
+
try:
|
| 77 |
+
df = pd.read_csv(INPUT_CSV_PATH)
|
| 78 |
+
print(f"โ
'{INPUT_CSV_PATH}' ํ์ผ ๋ก๋ฉ ์๋ฃ. (์ด {len(df)}๊ฐ ์ถ์ )")
|
| 79 |
+
except FileNotFoundError:
|
| 80 |
+
print(f"โ [์น๋ช
์ ์ค๋ฅ] ํ์ผ์ ์ฐพ์ ์ ์์ต๋๋ค: '{INPUT_CSV_PATH}'")
|
| 81 |
+
print(" ํ๋ก์ ํธ ํด๋ ๋ด์ 'festival_df.csv' ํ์ผ์ด ์๋์ง ํ์ธํด์ฃผ์ธ์.")
|
| 82 |
+
return
|
| 83 |
+
|
| 84 |
+
# 3. ๊ฐ ์ถ์ ๋ณ๋ก ํค์๋ ์์ฑ ๋ฐ ์ถ๊ฐ
|
| 85 |
+
new_keywords_list = []
|
| 86 |
+
total_rows = len(df)
|
| 87 |
+
|
| 88 |
+
for index, row in df.iterrows():
|
| 89 |
+
print(f"\n--- ({index + 1}/{total_rows}) '{row['์ถ์ ๋ช
']}' ์์
์ค ---")
|
| 90 |
+
|
| 91 |
+
description = row['์๊ฐ']
|
| 92 |
+
|
| 93 |
+
print(" - AI๋ฅผ ํธ์ถํ์ฌ ํค์๋๋ฅผ ์์ฑํฉ๋๋ค...")
|
| 94 |
+
new_keywords = generate_keywords_from_description(llm, description)
|
| 95 |
+
|
| 96 |
+
original_keywords = str(row.get('ํค์๋', ''))
|
| 97 |
+
|
| 98 |
+
all_keywords = original_keywords.split(',') + new_keywords.split(',')
|
| 99 |
+
unique_keywords = sorted(list(set([k.strip() for k in all_keywords if k.strip()])))
|
| 100 |
+
|
| 101 |
+
final_keywords_str = ', '.join(unique_keywords)
|
| 102 |
+
new_keywords_list.append(final_keywords_str)
|
| 103 |
+
|
| 104 |
+
print(f" - [๊ธฐ์กด ํค์๋]: {original_keywords if original_keywords else '์์'}")
|
| 105 |
+
print(f" - [AI ์์ฑ ํค์๋]: {new_keywords}")
|
| 106 |
+
print(f" - [์ต์ข
ํค์๋]: {final_keywords_str}")
|
| 107 |
+
|
| 108 |
+
time.sleep(0.5)
|
| 109 |
+
|
| 110 |
+
# 4. DataFrame์ ์๋ก์ด ํค์๋ ์ด ์ถ๊ฐ ๋ฐ ์ ์ฅ
|
| 111 |
+
df['ํค์๋'] = new_keywords_list
|
| 112 |
+
|
| 113 |
+
df.to_csv(OUTPUT_CSV_PATH, index=False, encoding='utf-8-sig')
|
| 114 |
+
print(f"\n--- ๐ ์์
์๋ฃ! ---")
|
| 115 |
+
print(f"โ
์๋ก์ด ํค์๋๊ฐ ์ถ๊ฐ๋ ํ์ผ์ด '{OUTPUT_CSV_PATH}' ๊ฒฝ๋ก์ ์ ์ฅ๋์์ต๋๋ค.")
|
| 116 |
+
|
| 117 |
+
if __name__ == "__main__":
|
| 118 |
+
main()
|
๊ธฐํ/festival_df_first.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import glob
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
# --- ์ค์ ๋ถ๋ถ ---
|
| 6 |
+
# 1. ๋ฐ์ดํฐ ํ์ผ๋ค์ด ์ ์ฅ๋ ํด๋ ๊ฒฝ๋ก๋ฅผ ์ง์ ํฉ๋๋ค.
|
| 7 |
+
# Windows ๊ฒฝ๋ก์ ๊ฒฝ์ฐ, ์ญ์ฌ๋์(\)๋ฅผ ๋ ๋ฒ ์ฐ๊ฑฐ๋(C:\\...) ์ฌ๋์(/)๋ก ๋ณ๊ฒฝํด์ผ ํฉ๋๋ค.
|
| 8 |
+
folder_path = 'C:/projects/shcard_2025_bigcontest/data/festival'
|
| 9 |
+
|
| 10 |
+
# 2. ํตํฉ๋ ํ์ผ์ ์ ์ฅํ ๊ฒฝ๋ก๋ฅผ ์ง์ ํฉ๋๋ค. (๊ฒฐ๊ณผ๋ฅผ ๊ฐ์ ํด๋์ ์ ์ฅ)
|
| 11 |
+
output_path = 'C:/projects/shcard_2025_bigcontest/data'
|
| 12 |
+
|
| 13 |
+
# --- ๋ฐ์ดํฐ ํตํฉ ํจ์ ์ ์ ---
|
| 14 |
+
def combine_festival_data(path, pattern, output_filename):
|
| 15 |
+
"""
|
| 16 |
+
์ง์ ๋ ๊ฒฝ๋ก์์ ํน์ ํจํด์ CSV ํ์ผ๋ค์ ์ฐพ์ ํ๋๋ก ํตํฉํ๊ณ ์ ์ฅํ๋ ํจ์.
|
| 17 |
+
|
| 18 |
+
:param path: CSV ํ์ผ๋ค์ด ์๋ ํด๋ ๊ฒฝ๋ก
|
| 19 |
+
:param pattern: ์ฐพ์ ํ์ผ ์ด๋ฆ์ ํจํด (์: '*_๋ฌธํ๊ด๊ด์ถ์ ์ฃผ์ ์งํ.csv')
|
| 20 |
+
:param output_filename: ์ ์ฅํ ์ต์ข
CSV ํ์ผ ์ด๋ฆ
|
| 21 |
+
"""
|
| 22 |
+
# ์ง์ ๋ ๊ฒฝ๋ก์ ํจํด์ ๊ฒฐํฉํ์ฌ ํ์ผ ๋ชฉ๋ก์ ๊ฐ์ ธ์ต๋๋ค.
|
| 23 |
+
file_list = glob.glob(os.path.join(path, pattern))
|
| 24 |
+
|
| 25 |
+
if not file_list:
|
| 26 |
+
print(f"โ ๏ธ ๊ฒฝ๊ณ : '{pattern}' ํจํด์ ํด๋นํ๋ ํ์ผ์ ์ฐพ์ ์ ์์ต๋๋ค.")
|
| 27 |
+
print(f"๊ฒฝ๋ก๋ฅผ ํ์ธํด์ฃผ์ธ์: {path}\n")
|
| 28 |
+
return
|
| 29 |
+
|
| 30 |
+
# ๊ฐ ํ์ผ์ DataFrame์ผ๋ก ์ฝ์ด ๋ฆฌ์คํธ์ ์ถ๊ฐํฉ๋๋ค.
|
| 31 |
+
df_list = [pd.read_csv(file) for file in file_list]
|
| 32 |
+
|
| 33 |
+
# ๋ชจ๋ DataFrame์ ํ๋๋ก ํฉ์นฉ๋๋ค.
|
| 34 |
+
combined_df = pd.concat(df_list, ignore_index=True)
|
| 35 |
+
|
| 36 |
+
# ํตํฉ๋ ๋ฐ์ดํฐ๋ฅผ CSV ํ์ผ๋ก ์ ์ฅํฉ๋๋ค.
|
| 37 |
+
# encoding='utf-8-sig'๋ Excel์์ ํ๊ธ์ด ๊นจ์ง์ง ์๋๋ก ํด์ค๋๋ค.
|
| 38 |
+
output_filepath = os.path.join(output_path, output_filename)
|
| 39 |
+
combined_df.to_csv(output_filepath, index=False, encoding='utf-8-sig')
|
| 40 |
+
|
| 41 |
+
print(f"โ
์ฑ๊ณต: {len(file_list)}๊ฐ์ ํ์ผ์ ํตํฉํ์ฌ '{output_filename}'์ผ๋ก ์ ์ฅํ์ต๋๋ค.")
|
| 42 |
+
print(f" - ์ด {len(combined_df)}๊ฐ์ ํ์ด ์์ฑ๋์์ต๋๋ค.")
|
| 43 |
+
print(f" - ์ ์ฅ ๊ฒฝ๋ก: {output_filepath}\n")
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
# --- ๋ฉ์ธ ์ฝ๋ ์คํ ---
|
| 47 |
+
print("===== ์ถ์ ๋ฐ์ดํฐ ํตํฉ์ ์์ํฉ๋๋ค. =====\n")
|
| 48 |
+
|
| 49 |
+
# 1. ๋ฌธํ๊ด๊ด์ถ์ ์ฃผ์ ์งํ ํตํฉ
|
| 50 |
+
combine_festival_data(folder_path, '*_๋ฌธํ๊ด๊ด์ถ์ ์ฃผ์ ์งํ.csv', 'ํตํฉ_๋ฌธํ๊ด๊ด์ถ์ _์ฃผ์_์งํ.csv')
|
| 51 |
+
|
| 52 |
+
# 2. ์ฑ_์ฐ๋ น๋ณ ๋ด๊ตญ์ธ ๋ฐฉ๋ฌธ์ ํตํฉ
|
| 53 |
+
combine_festival_data(folder_path, '*_์ฑ_์ฐ๋ น๋ณ ๋ด๊ตญ์ธ ๋ฐฉ๋ฌธ์.csv', 'ํตํฉ_์ฑ_์ฐ๋ น๋ณ_๋ด๊ตญ์ธ_๋ฐฉ๋ฌธ์.csv')
|
| 54 |
+
|
| 55 |
+
# 3. ์ฐ๋๋ณ ๋ฐฉ๋ฌธ์ ์ถ์ด ํตํฉ
|
| 56 |
+
combine_festival_data(folder_path, '*_์ฐ๋๋ณ ๋ฐฉ๋ฌธ์ ์ถ์ด.csv', 'ํตํฉ_์ฐ๋๋ณ_๋ฐฉ๋ฌธ์_์ถ์ด.csv')
|
| 57 |
+
|
| 58 |
+
print("===== ๋ชจ๋ ๋ฐ์ดํฐ ํตํฉ ์์
์ด ์๋ฃ๋์์ต๋๋ค. =====")
|
๊ธฐํ/festival_df_processing.py
ADDED
|
@@ -0,0 +1,235 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
"""festival_processing.ipynb
|
| 3 |
+
|
| 4 |
+
Automatically generated by Colab.
|
| 5 |
+
|
| 6 |
+
Original file is located at
|
| 7 |
+
https://colab.research.google.com/drive/1NnUdWSIUNLRY4O9PmcX5GFgeaTekcO7c
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
# 1. ๊ตฌ๊ธ ๋๋ผ์ด๋ธ ๋ง์ดํธ
|
| 11 |
+
from google.colab import drive
|
| 12 |
+
drive.mount('/content/drive')
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
# 2. CSV ํ์ผ ๊ฒฝ๋ก ์ง์
|
| 16 |
+
file_path = '/content/drive/MyDrive/Colab Notebooks/BigContest_2025/festival_df.csv'
|
| 17 |
+
|
| 18 |
+
# 3. ๋ฐ์ดํฐ ๋ถ๋ฌ์ค๊ธฐ
|
| 19 |
+
import pandas as pd
|
| 20 |
+
festival_df = pd.read_csv(file_path, encoding='utf-8')
|
| 21 |
+
|
| 22 |
+
# 4. ๋ฐ์ดํฐ ํ์ธ
|
| 23 |
+
print("===== ๋ฐ์ดํฐ ์ ๋ณด =====")
|
| 24 |
+
print(festival_df.info())
|
| 25 |
+
print("\n===== ๋ฐ์ดํฐ ์ํ =====")
|
| 26 |
+
print(festival_df.head())
|
| 27 |
+
print("\n===== ๊ธฐ๋ณธ ํต๊ณ =====")
|
| 28 |
+
print(festival_df.describe())
|
| 29 |
+
|
| 30 |
+
"""- ์ฃผ์ ์ฑ๋ณ
|
| 31 |
+
|
| 32 |
+
- ์ฃผ์ ๋์ด
|
| 33 |
+
"""
|
| 34 |
+
|
| 35 |
+
# ์ฃผ์ ์ฑ๋ณ
|
| 36 |
+
# ๋จ/์ฌ ์ปฌ๋ผ ๋ฆฌ์คํธ
|
| 37 |
+
male_cols = [c for c in festival_df.columns if '๋จ์ฑ๋น์จ' in c]
|
| 38 |
+
female_cols = [c for c in festival_df.columns if '์ฌ์ฑ๋น์จ' in c]
|
| 39 |
+
|
| 40 |
+
# ๋จ์ฑ/์ฌ์ฑ ๋น์จ ํฉ ๊ณ์ฐ - ๋จ์ฑ+์ฌ์ฑ์ด 100%์ธ์ง ์ฒดํฌ
|
| 41 |
+
festival_df['๋จ์ฑํฉ'] = festival_df[male_cols].sum(axis=1)
|
| 42 |
+
festival_df['์ฌ์ฑํฉ'] = festival_df[female_cols].sum(axis=1)
|
| 43 |
+
festival_df['๋จ๋
ํฉ๊ณ'] = festival_df['๋จ์ฑํฉ'] + festival_df['์ฌ์ฑํฉ']
|
| 44 |
+
|
| 45 |
+
# ํฉ๊ณ๊ฐ 100 ๊ทผ์ฒ์ธ์ง ํ์ธ
|
| 46 |
+
print(festival_df[['์ถ์ ๋ช
', '๋จ์ฑํฉ', '์ฌ์ฑํฉ', '๋จ๋
ํฉ๊ณ']].head())
|
| 47 |
+
|
| 48 |
+
import numpy as np
|
| 49 |
+
|
| 50 |
+
# 1๏ธโฃ ์ฃผ์ ์ฑ๋ณ ์ปฌ๋ผ ์ถ๊ฐ
|
| 51 |
+
festival_df['์ฃผ์์ฑ๋ณ'] = np.where(
|
| 52 |
+
festival_df['๋จ์ฑํฉ'] > festival_df['์ฌ์ฑํฉ'], '๋จ์ฑ', '์ฌ์ฑ'
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
# 2๏ธโฃ ์ฃผ์ ์ฐ๋ น๋ ์ปฌ๋ผ ์ถ๊ฐ
|
| 56 |
+
age_groups = ['09์ธ', '1019์ธ', '2029์ธ', '3039์ธ', '4049์ธ', '5059์ธ', '6069์ธ', '70์ธ์ด์']
|
| 57 |
+
|
| 58 |
+
# ๊ฐ ์ฐ๋ น๋๋ณ ๋จ+์ฌ ํฉ ๊ณ์ฐ
|
| 59 |
+
for age in age_groups:
|
| 60 |
+
festival_df[f'์ฐ๋ น๋_{age}_ํฉ'] = (
|
| 61 |
+
festival_df[f'๋จ์ฑ๋น์จ_{age}'] + festival_df[f'์ฌ์ฑ๋น์จ_{age}']
|
| 62 |
+
)
|
| 63 |
+
|
| 64 |
+
# ๊ฐ ํ๋ณ๋ก ๊ฐ์ฅ ํฐ ์ฐ๋ น๋ ์ฐพ๊ธฐ (NaN ๋ฐฉ์ด ํฌํจ)
|
| 65 |
+
festival_df['์ฃผ์์ฐ๋ น๋'] = (
|
| 66 |
+
festival_df[[f'์ฐ๋ น๋_{age}_ํฉ' for age in age_groups]]
|
| 67 |
+
.fillna(0)
|
| 68 |
+
.idxmax(axis=1)
|
| 69 |
+
.str.replace('์ฐ๋ น๋_', '', regex=False)
|
| 70 |
+
.str.replace('_ํฉ', '', regex=False)
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
# 3๏ธโฃ ์ค๊ฐ ๊ณ์ฐ ์ปฌ๋ผ(์ฐ๋ น๋_*_ํฉ)์ ์ ๋ฆฌํด์ ์ ๊ฑฐ ๊ฐ๋ฅ
|
| 74 |
+
festival_df.drop(columns=[f'์ฐ๋ น๋_{age}_ํฉ' for age in age_groups], inplace=True)
|
| 75 |
+
|
| 76 |
+
# โ
๊ฒฐ๊ณผ ๋ฏธ๋ฆฌ๋ณด๊ธฐ
|
| 77 |
+
print(festival_df[['์ถ์ ๋ช
', '์ฃผ์์ฑ๋ณ', '์ฃผ์์ฐ๋ น๋']].head())
|
| 78 |
+
print('--------------------------------------------')
|
| 79 |
+
print(festival_df[['์ถ์ ๋ช
', '์ฃผ์์ฑ๋ณ', '์ฃผ์์ฐ๋ น๋']].tail())
|
| 80 |
+
|
| 81 |
+
festival_df
|
| 82 |
+
|
| 83 |
+
"""- ์ฃผ์ ๊ณ ๊ฐ์ธต(์ฑ๋ณ+์ฐ๋ น)"""
|
| 84 |
+
|
| 85 |
+
# ๋จ/์ฌ ๊ฐ ์ฐ๋ น๋ ์ปฌ๋ผ ๋ฆฌ์คํธ
|
| 86 |
+
gender_age_cols = [f'๋จ์ฑ๋น์จ_{age}' for age in age_groups] + [f'์ฌ์ฑ๋น์จ_{age}' for age in age_groups]
|
| 87 |
+
|
| 88 |
+
def find_key_customer(row):
|
| 89 |
+
# ํด๋น ํ์์ ์ต๋๊ฐ์ ๊ฐ์ง๋ ์ปฌ๋ผ ์ฐพ๊ธฐ
|
| 90 |
+
max_col = row[gender_age_cols].idxmax()
|
| 91 |
+
|
| 92 |
+
# ์ปฌ๋ผ๋ช
์์ ์ฑ๋ณ๊ณผ ๋์ด ์ถ์ถ
|
| 93 |
+
gender, age = max_col.split('_')
|
| 94 |
+
|
| 95 |
+
# ๋์ด ํํ
|
| 96 |
+
if age == '70์ธ์ด์':
|
| 97 |
+
age_str = '70์ธ ์ด์'
|
| 98 |
+
else:
|
| 99 |
+
age_str = age[:2] + '~' + age[2:]
|
| 100 |
+
|
| 101 |
+
return f"{gender} {age_str}"
|
| 102 |
+
|
| 103 |
+
festival_df['์ฃผ์๊ณ ๊ฐ์ธต'] = festival_df.apply(find_key_customer, axis=1)
|
| 104 |
+
|
| 105 |
+
# ๊ฒฐ๊ณผ ํ์ธ
|
| 106 |
+
print(festival_df[['์ถ์ ๋ช
', '์ฃผ์๊ณ ๊ฐ์ธต']].head())
|
| 107 |
+
|
| 108 |
+
"""- ์ฃผ์ ๋ฐฉ๋ฌธ์(ํ์ง์ธ/์ธ์ง์ธ)"""
|
| 109 |
+
|
| 110 |
+
# ์ฐ๋๋ณ ํ์ง์ธ ๋ฐฉ๋ฌธ์ ์ ํฉ๊ณ
|
| 111 |
+
local_cols = ['2018_(ํ์ง์ธ)๋ฐฉ๋ฌธ์์', '2019_(ํ์ง์ธ)๋ฐฉ๋ฌธ์์',
|
| 112 |
+
'2022_(ํ์ง์ธ)๋ฐฉ๋ฌธ์์', '2023_(ํ์ง์ธ)๋ฐฉ๋ฌธ์์',
|
| 113 |
+
'2024_(ํ์ง์ธ)๋ฐฉ๋ฌธ์์']
|
| 114 |
+
|
| 115 |
+
# ์ฐ๋๋ณ ์ธ๋ถ ๋ฐฉ๋ฌธ์ ์ ํฉ๊ณ (์ธ์ง์ธ + ์ธ๊ตญ์ธ)
|
| 116 |
+
outside_cols = ['2018_(์ธ์ง์ธ)๋ฐฉ๋ฌธ์์', '2019_(์ธ์ง์ธ)๋ฐฉ๋ฌธ์์',
|
| 117 |
+
'2022_(์ธ์ง์ธ)๋ฐฉ๋ฌธ์์', '2023_(์ธ์ง์ธ)๋ฐฉ๋ฌธ์์',
|
| 118 |
+
'2024_(์ธ์ง์ธ)๋ฐฉ๋ฌธ์์']
|
| 119 |
+
|
| 120 |
+
foreign_cols = ['2018_(์ธ๊ตญ์ธ)๋ฐฉ๋ฌธ์์', '2019_(์ธ๊ตญ์ธ)๋ฐฉ๋ฌธ์์',
|
| 121 |
+
'2022_(์ธ๊ตญ์ธ)๋ฐฉ๋ฌธ์์', '2023_(์ธ๊ตญ์ธ)๋ฐฉ๋ฌธ์์',
|
| 122 |
+
'2024_(์ธ๊ตญ์ธ)๋ฐฉ๋ฌธ์์']
|
| 123 |
+
|
| 124 |
+
festival_df['์ดํ์ง์ธ'] = festival_df[local_cols].sum(axis=1)
|
| 125 |
+
festival_df['์ด์ธ๋ถ๋ฐฉ๋ฌธ์'] = festival_df[outside_cols + foreign_cols].sum(axis=1)
|
| 126 |
+
|
| 127 |
+
# ์ฃผ์ ๋ฐฉ๋ฌธ์ ํ๋จ
|
| 128 |
+
festival_df['์ฃผ์๋ฐฉ๋ฌธ์'] = np.where(
|
| 129 |
+
festival_df['์ดํ์ง์ธ'] >= festival_df['์ด์ธ๋ถ๋ฐฉ๋ฌธ์'],
|
| 130 |
+
'ํ์ง์ธ',
|
| 131 |
+
'์ธ๋ถ๋ฐฉ๋ฌธ์'
|
| 132 |
+
)
|
| 133 |
+
|
| 134 |
+
# ์ค๊ฐ ํฉ๊ณ ์ปฌ๋ผ ์ญ์
|
| 135 |
+
festival_df.drop(columns=['์ดํ์ง์ธ', '์ด์ธ๋ถ๋ฐฉ๋ฌธ์'], inplace=True)
|
| 136 |
+
|
| 137 |
+
# ๊ฒฐ๊ณผ ํ์ธ
|
| 138 |
+
print(festival_df[['์ถ์ ๋ช
', '์ฃผ์๋ฐฉ๋ฌธ์']])
|
| 139 |
+
|
| 140 |
+
festival_df.columns
|
| 141 |
+
|
| 142 |
+
"""- ์ถ์ ์ธ๊ธฐ
|
| 143 |
+
|
| 144 |
+
- ์ ์ฒด ๋ฐฉ๋ฌธ์์(์ ์ฒด๋ฐฉ๋ฌธ์์)
|
| 145 |
+
- ์ผํ๊ท ๋ฐฉ๋ฌธ์์
|
| 146 |
+
- ์ถ์ ๊ธฐ๊ฐ ๋ด๋น๊ฒ์ด์
๊ฒ์๋
|
| 147 |
+
- ์ถ์ ๊ธฐ๊ฐ ๊ด๊ด ์๋น
|
| 148 |
+
|
| 149 |
+
์ ์ปฌ๋ผ ์ด์ฉํด์ ์ธ๊ธฐ ๊ณ์ฐ
|
| 150 |
+
|
| 151 |
+
1) ์ฐ๋๋ณ ๋ฐ์ดํฐ๋ฅผ ๋ฐํ์ผ๋ก ๋ชจ๋ ์งํ๋ฅผ ๊ณ์ฐ
|
| 152 |
+
2) ๊ฐ ์ฐ๋๋ณ ์งํ๋ฅผ ํ์คํ -> ์ ์๏ฟฝ๏ฟฝ๏ฟฝํ๊ณ ํ๊ท ๋ด์ ํ๋์ ์ข
ํฉ '์ธ๊ธฐ ์ ์'๋ฅผ ์์ฑ
|
| 153 |
+
3) ์ข
ํฉ ์ธ๊ธฐ ์ ์๋ฅผ ๊ธฐ์ค์ผ๋ก '์/์ค/ํ' ๋ฑ๊ธ ๋๋
|
| 154 |
+
"""
|
| 155 |
+
|
| 156 |
+
import numpy as np
|
| 157 |
+
from sklearn.preprocessing import MinMaxScaler
|
| 158 |
+
|
| 159 |
+
# 1๏ธโฃ ์ธ๊ธฐ ์งํ ์ปฌ๋ผ ์ ์ (์ฐ๋๋ณ ๋ฐฉ๋ฌธ์์, ์ผํ๊ท ๋ฐฉ๋ฌธ์์, ๊ฒ์๋ ๋ฑ)
|
| 160 |
+
years = ['2018', '2019', '2022', '2023', '2024']
|
| 161 |
+
visitor_cols = [f'{year}_(์ ์ฒด)๋ฐฉ๋ฌธ์์' for year in years]
|
| 162 |
+
daily_avg_cols = [f'{year}_์ผํ๊ท ๋ฐฉ๋ฌธ์์' for year in years]
|
| 163 |
+
nav_cols = [f'{year}_์ถ์ ๊ธฐ๊ฐ_๋ด๋น๊ฒ์ด์
๊ฒ์๋' for year in years]
|
| 164 |
+
|
| 165 |
+
# 2๏ธโฃ ๋ชจ๋ ์งํ๋ฅผ ํฉ์ณ์ ์๋ก์ด ์ ์ ๋ฐ์ดํฐํ๋ ์ ์์ฑ
|
| 166 |
+
score_df = festival_df[visitor_cols + daily_avg_cols + nav_cols].fillna(0)
|
| 167 |
+
|
| 168 |
+
# 3๏ธโฃ MinMaxScaler๋ก 0~1๋ก ์ ๊ทํ
|
| 169 |
+
scaler = MinMaxScaler()
|
| 170 |
+
score_scaled = scaler.fit_transform(score_df)
|
| 171 |
+
|
| 172 |
+
# 4๏ธโฃ ์ฐ๋/์งํ๋ณ ์ ์ ํ๊ท ๋ด๊ธฐ
|
| 173 |
+
festival_df['์ธ๊ธฐ๋_์ ์'] = score_scaled.mean(axis=1)
|
| 174 |
+
|
| 175 |
+
# 5๏ธโฃ ์ ์๋ฅผ ๊ธฐ์ค์ผ๋ก ๋ฑ๊ธ ๋๋๊ธฐ (์/์ค/ํ)
|
| 176 |
+
festival_df['์ถ์ ์ธ๊ธฐ'] = pd.cut(
|
| 177 |
+
festival_df['์ธ๊ธฐ๋_์ ์'],
|
| 178 |
+
bins=[-0.01, 0.33, 0.66, 1.01],
|
| 179 |
+
labels=['ํ', '์ค', '์']
|
| 180 |
+
)
|
| 181 |
+
|
| 182 |
+
# โ
๊ฒฐ๊ณผ ๋ฏธ๋ฆฌ๋ณด๊ธฐ
|
| 183 |
+
print(festival_df[['์ถ์ ๋ช
', '์ธ๊ธฐ๋_์ ์', '์ถ์ ์ธ๊ธฐ']])
|
| 184 |
+
|
| 185 |
+
"""- ์ถ์ ์ธ๊ธฐ๋(์์น/ํ๋ฝ/๋ฏธ๋ฏธ)
|
| 186 |
+
|
| 187 |
+
- ์ฐ๋๋ณ ๋ณํ์จ์ ํ๊ท ์ผ๋ก ์ธ๊ธฐ๋ ์์น/ํ๋ฝ/๋ฏธ๋ฏธ ๊ฒฐ์
|
| 188 |
+
|
| 189 |
+
- ํ๊ท ์ด ์์ โ ์ ์ฒด์ ์ผ๋ก ์์น ์ถ์ธ
|
| 190 |
+
|
| 191 |
+
- ํ๊ท ์ด ์์ โ ์ ์ฒด์ ์ผ๋ก ํ๋ฝ ์ถ์ธ
|
| 192 |
+
|
| 193 |
+
- ํ๊ท ์ด ๊ฑฐ์ 0 โ ๋ฏธ๋ฏธํ ์ถ์ธ
|
| 194 |
+
"""
|
| 195 |
+
|
| 196 |
+
# 1๏ธโฃ ์ฐ๋๋ณ ์ธ๊ธฐ ์งํ ์ปฌ๋ผ๋ค
|
| 197 |
+
pop_cols = ['์ ์ฒด๋ฐฉ๋ฌธ์์', '์ผํ๊ท ๋ฐฉ๋ฌธ์์', '์ถ์ ๊ธฐ๊ฐ_๋ด๋น๊ฒ์ด์
๊ฒ์๋', '์ถ์ ๊ธฐ๊ฐ_๊ด๊ด์๋น']
|
| 198 |
+
|
| 199 |
+
# 2๏ธโฃ ์ฐ๋ ๋ฆฌ์คํธ (๋ฐ์ดํฐ์ ๋ง์ถฐ ์กฐ์ )
|
| 200 |
+
years = ['2018', '2019', '2022', '2023', '2024']
|
| 201 |
+
|
| 202 |
+
# 3๏ธโฃ ์ฐ๋๋ณ ๋ณํ์จ ๊ณ์ฐ
|
| 203 |
+
trend_list = []
|
| 204 |
+
for idx, row in festival_df.iterrows():
|
| 205 |
+
change_rates = []
|
| 206 |
+
for col in pop_cols:
|
| 207 |
+
year_values = [row[f'{year}_{col}'] for year in years if f'{year}_{col}' in festival_df.columns]
|
| 208 |
+
# ์ฐ์๋ ์ฐ๋ ๋ณํ์จ ๊ณ์ฐ ((์ด๋ฒ์ฐ๋-์ด์ ์ฐ๋)/์ด์ ์ฐ๋)
|
| 209 |
+
for i in range(1, len(year_values)):
|
| 210 |
+
prev = year_values[i-1]
|
| 211 |
+
curr = year_values[i]
|
| 212 |
+
if prev and not np.isnan(prev) and curr and not np.isnan(curr) and prev != 0:
|
| 213 |
+
rate = (curr - prev) / prev
|
| 214 |
+
change_rates.append(rate)
|
| 215 |
+
# ํ๊ท ๋ณํ์จ ๊ณ์ฐ
|
| 216 |
+
avg_rate = np.mean(change_rates) if change_rates else 0
|
| 217 |
+
# ์์น/ํ๋ฝ/๋ฏธ๋ฏธ ํ๋จ (์๊ณ๊ฐ 1% ์ฌ์ฉ)
|
| 218 |
+
if avg_rate > 0.01:
|
| 219 |
+
trend_list.append('์์น')
|
| 220 |
+
elif avg_rate < -0.01:
|
| 221 |
+
trend_list.append('ํ๋ฝ')
|
| 222 |
+
else:
|
| 223 |
+
trend_list.append('๋ฏธ๋ฏธ')
|
| 224 |
+
|
| 225 |
+
# 4๏ธโฃ ์ปฌ๋ผ ์ถ๊ฐ
|
| 226 |
+
festival_df['์ถ์ ์ธ๊ธฐ๋'] = trend_list
|
| 227 |
+
|
| 228 |
+
# โ
๊ฒฐ๊ณผ ํ์ธ
|
| 229 |
+
print(festival_df[['์ถ์ ๋ช
', '์ถ์ ์ธ๊ธฐ๋']])
|
| 230 |
+
|
| 231 |
+
festival_df
|
| 232 |
+
|
| 233 |
+
# CSV๋ก ์ ์ฅ
|
| 234 |
+
festival_df.to_csv('festival_df_edit.csv', index=False, encoding='utf-8-sig')
|
| 235 |
+
|
๊ธฐํ/festival_df_second.py
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import re
|
| 3 |
+
|
| 4 |
+
# --- 1. ๋ฐ์ดํฐ ๋ถ๋ฌ์ค๊ธฐ ---
|
| 5 |
+
# 3๊ฐ์ ํตํฉ CSV ํ์ผ ๊ฒฝ๋ก๋ฅผ ์ง์ ํฉ๋๋ค.
|
| 6 |
+
path_indicators = 'C:/projects/shcard_2025_bigcontest/data/ํตํฉ_๋ฌธํ๊ด๊ด์ถ์ _์ฃผ์_์งํ.csv'
|
| 7 |
+
path_demographics = 'C:/projects/shcard_2025_bigcontest/data/ํตํฉ_์ฑ_์ฐ๋ น๋ณ_๋ด๊ตญ์ธ_๋ฐฉ๋ฌธ์.csv'
|
| 8 |
+
path_trend = 'C:/projects/shcard_2025_bigcontest/data/ํตํฉ_์ฐ๋๋ณ_๋ฐฉ๋ฌธ์_์ถ์ด.csv'
|
| 9 |
+
|
| 10 |
+
df_indicators = pd.read_csv(path_indicators)
|
| 11 |
+
df_demographics = pd.read_csv(path_demographics)
|
| 12 |
+
df_trend = pd.read_csv(path_trend)
|
| 13 |
+
print("โ
3๊ฐ์ ํตํฉ ํ์ผ์ ์ฑ๊ณต์ ์ผ๋ก ๋ถ๋ฌ์์ต๋๋ค.\n")
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
# --- 2. ์ฐ๋๋ณ ๋ฐ์ดํฐ๋ฅผ Wide ํ์์ผ๋ก ๋ณํํ๋ ํจ์ ---
|
| 17 |
+
def pivot_by_year(df, index_col, year_col, drop_cols=None):
|
| 18 |
+
"""์ฐ๋๋ณ ๋ฐ์ดํฐ๋ฅผ (๋
๋)_(์ปฌ๋ผ๋ช
) ํํ๋ก ๋ณํํ๋ ํจ์"""
|
| 19 |
+
if drop_cols:
|
| 20 |
+
df = df.drop(columns=drop_cols)
|
| 21 |
+
|
| 22 |
+
df_wide = df.pivot_table(index=index_col, columns=year_col)
|
| 23 |
+
|
| 24 |
+
# ๋ฉํฐ๋ ๋ฒจ ์ปฌ๋ผ์ (๋
๋)_(์ปฌ๋ผ๋ช
) ํ์์ผ๋ก ํฉ์น๊ธฐ
|
| 25 |
+
df_wide.columns = [f"{int(col[1])}_{col[0]}" for col in df_wide.columns]
|
| 26 |
+
return df_wide.reset_index()
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
# --- 3. ๊ฐ ๋ฐ์ดํฐ ์ ์ ๋ฐ ๋ณํ ---
|
| 30 |
+
|
| 31 |
+
# 3-1. '์ฐ๋๋ณ ๋ฐฉ๋ฌธ์ ์ถ์ด' ๋ฐ์ดํฐ ๋ณํ
|
| 32 |
+
# ๋ถํ์ํ๊ฑฐ๋ ์ค๋ณต๋ ์ ์๋ ์ปฌ๋ผ์ ๋ฏธ๋ฆฌ ์ ๊ฑฐ
|
| 33 |
+
trend_drop_cols = ['์ผํ๊ท ๋ฐฉ๋ฌธ์์ ์ฆ๊ฐ๋ฅ ', '(์ด์ )์ ์ฒด๋ฐฉ๋ฌธ์', '(์ ์ฒด)๋ฐฉ๋ฌธ์์ฆ๊ฐ', '์ ๋
๋๋น๋ฐฉ๋ฌธ์์ฆ๊ฐ๋น์จ']
|
| 34 |
+
df_trend_wide = pivot_by_year(df_trend, '์ถ์ ๋ช
', '๊ฐ์ต๋
๋', drop_cols=trend_drop_cols)
|
| 35 |
+
print("โ
'์ฐ๋๋ณ ๋ฐฉ๋ฌธ์ ์ถ์ด' ๋ฐ์ดํฐ๋ฅผ Wide ํํ๋ก ๋ณํํ์ต๋๋ค.")
|
| 36 |
+
|
| 37 |
+
# 3-2. '์ฃผ์ ์งํ' ๋ฐ์ดํฐ ๋ณํ
|
| 38 |
+
# '๊ทธ๋ฃน๋ช
'๊ณผ '๊ตฌ๋ถ๋ช
'์ ํฉ์ณ ์๋ก์ด ์ปฌ๋ผ ์์ฑ
|
| 39 |
+
df_indicators['์งํ๊ตฌ๋ถ'] = df_indicators['๊ทธ๋ฃน๋ช
'] + '_' + df_indicators['๊ตฌ๋ถ๋ช
']
|
| 40 |
+
df_indicators_intermediate = df_indicators.pivot_table(
|
| 41 |
+
index=['์ถ์ ๋ช
', '๊ฐ์ต๋
๋'],
|
| 42 |
+
columns='์งํ๊ตฌ๋ถ',
|
| 43 |
+
values='์งํ๊ฐ'
|
| 44 |
+
).reset_index()
|
| 45 |
+
df_indicators_wide = pivot_by_year(df_indicators_intermediate, '์ถ์ ๋ช
', '๊ฐ์ต๋
๋')
|
| 46 |
+
print("โ
'์ฃผ์ ์งํ' ๋ฐ์ดํฐ๋ฅผ Wide ํํ๋ก ๋ณํํ์ต๋๋ค.")
|
| 47 |
+
|
| 48 |
+
# 3-3. '์ฑ_์ฐ๋ น๋ณ ๋ฐฉ๋ฌธ์' ๋ฐ์ดํฐ ๋ณํ (์ด ๋ฐ์ดํฐ๋ ์ฐ๋ ์ ๋ณด๊ฐ ์์ผ๋ฏ๋ก ์ด์ ๊ณผ ๋์ผ)
|
| 49 |
+
df_demographics_wide = df_demographics.pivot_table(
|
| 50 |
+
index='์ถ์ ๋ช
',
|
| 51 |
+
columns='์ฐ๋ น๋',
|
| 52 |
+
values=['๋จ์ฑ๋น์จ', '์ฌ์ฑ๋น์จ']
|
| 53 |
+
).reset_index()
|
| 54 |
+
# ์ปฌ๋ผ๋ช
์ ๋ฆฌ
|
| 55 |
+
df_demographics_wide.columns = [f'{col[0]}_{col[1]}' if col[1] else col[0] for col in df_demographics_wide.columns]
|
| 56 |
+
df_demographics_wide.columns = [re.sub(r'[^A-Za-z0-9_๊ฐ-ํฃ]', '', col) for col in df_demographics_wide.columns]
|
| 57 |
+
print("โ
'์ฑ_์ฐ๋ น๋ณ ๋ฐฉ๋ฌธ์' ๋ฐ์ดํฐ๋ฅผ Wide ํํ๋ก ๋ณํํ์ต๋๋ค.\n")
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
# --- 4. ๋ชจ๋ Wide ๋ฐ์ดํฐ ๋ณํฉ (Merging) ---
|
| 61 |
+
# '์ฑ_์ฐ๋ น๋ณ' ๋ฐ์ดํฐ๋ฅผ ๊ธฐ์ค์ผ๋ก '์ฐ๋๋ณ ์ถ์ด'์ '์ฃผ์ ์งํ'๋ฅผ ํฉ์นฉ๋๋ค.
|
| 62 |
+
# how='outer'๋ ํ์ชฝ์๋ง ์๋ ์ถ์ ์ ๋ณด๋ ๋๋ฝ์ํค์ง ์๊ธฐ ์ํจ์
๋๋ค.
|
| 63 |
+
final_df = pd.merge(df_demographics_wide, df_trend_wide, on='์ถ์ ๋ช
', how='outer')
|
| 64 |
+
final_df = pd.merge(final_df, df_indicators_wide, on='์ถ์ ๋ช
', how='outer')
|
| 65 |
+
print("โ
๋ชจ๋ ๋ฐ์ดํฐ๋ฅผ ํ๋์ DataFrame์ผ๋ก ์ต์ข
๋ณํฉํ์ต๋๋ค.")
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
# --- 5. ๊ฒฐ๊ณผ ํ์ธ ๋ฐ ์ ์ฅ ---
|
| 69 |
+
print("\n๐ ์ต์ข
ํตํฉ ๋ฐ์ดํฐ(Wide) ์ํ")
|
| 70 |
+
# ์ถ์ ๋ช
๊ณผ ์ฐ๋ ๊ด๋ จ ์ปฌ๋ผ ์ผ๋ถ๋ง ์ํ๋ก ์ถ๋ ฅ
|
| 71 |
+
sample_cols = [col for col in final_df.columns if '2023' in col or '์ถ์ ๋ช
' in col or '๋จ์ฑ' in col]
|
| 72 |
+
print(final_df[sample_cols].head())
|
| 73 |
+
|
| 74 |
+
print(f"\n- ์ต์ข
๋ฐ์ดํฐ๋ ์ด {len(final_df.columns)}๊ฐ์ ์ปฌ๋ผ๊ณผ {len(final_df)}๊ฐ์ ํ์ผ๋ก ๊ตฌ์ฑ๋ฉ๋๋ค.")
|
| 75 |
+
|
| 76 |
+
# ์ต์ข
๋ฐ์ดํฐ๋ฅผ ์๋ก์ด CSV ํ์ผ๋ก ์ ์ฅ
|
| 77 |
+
final_df.to_csv('C:/projects/shcard_2025_bigcontest/data/festival_df.csv', index=False, encoding='utf-8-sig')
|
| 78 |
+
print("\n๐พ 'festival_df.csv' ํ์ผ์ด ์ฑ๊ณต์ ์ผ๋ก ์ ์ฅ๋์์ต๋๋ค.")
|