hyeonjoo commited on
Commit
9b1e3db
ยท
0 Parent(s):

Initial project commit with LFS

Browse files
.devcontainer/devcontainer.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "Python 3",
3
+ // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
4
+ "image": "mcr.microsoft.com/devcontainers/python:1-3.11-bullseye",
5
+ "customizations": {
6
+ "codespaces": {
7
+ "openFiles": [
8
+ "README.md",
9
+ "streamlit_app.py"
10
+ ]
11
+ },
12
+ "vscode": {
13
+ "settings": {},
14
+ "extensions": [
15
+ "ms-python.python",
16
+ "ms-python.vscode-pylance"
17
+ ]
18
+ }
19
+ },
20
+ "updateContentCommand": "[ -f packages.txt ] && sudo apt update && sudo apt upgrade -y && sudo xargs apt install -y <packages.txt; [ -f requirements.txt ] && pip3 install --user -r requirements.txt; pip3 install --user streamlit; echo 'โœ… Packages installed and Requirements met'",
21
+ "postAttachCommand": {
22
+ "server": "streamlit run streamlit_app.py --server.enableCORS false --server.enableXsrfProtection false"
23
+ },
24
+ "portsAttributes": {
25
+ "8501": {
26
+ "label": "Application",
27
+ "onAutoForward": "openPreview"
28
+ }
29
+ },
30
+ "forwardPorts": [
31
+ 8501
32
+ ]
33
+ }
.gitattributes ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ data/*.csv filter=lfs diff=lfs merge=lfs -text
2
+ data/final_df.csv filter=lfs diff=lfs merge=lfs -text
3
+ vectorstore/faiss_festival/* filter=lfs diff=lfs merge=lfs -text
4
+ vectorstore/faiss_marketing/* filter=lfs diff=lfs merge=lfs -text
5
+ assets/*.png filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # poetry
98
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
+ #poetry.lock
103
+
104
+ # pdm
105
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106
+ #pdm.lock
107
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108
+ # in version control.
109
+ # https://pdm.fming.dev/#use-with-ide
110
+ .pdm.toml
111
+
112
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113
+ __pypackages__/
114
+
115
+ # Celery stuff
116
+ celerybeat-schedule
117
+ celerybeat.pid
118
+
119
+ # SageMath parsed files
120
+ *.sage.py
121
+
122
+ # Environments
123
+ .env
124
+ .venv
125
+ env/
126
+ venv/
127
+ ENV/
128
+ env.bak/
129
+ venv.bak/
130
+
131
+ # Spyder project settings
132
+ .spyderproject
133
+ .spyproject
134
+
135
+ # Rope project settings
136
+ .ropeproject
137
+
138
+ # mkdocs documentation
139
+ /site
140
+
141
+ # mypy
142
+ .mypy_cache/
143
+ .dmypy.json
144
+ dmypy.json
145
+
146
+ # Pyre type checker
147
+ .pyre/
148
+
149
+ # pytype static type analyzer
150
+ .pytype/
151
+
152
+ # Cython debug symbols
153
+ cython_debug/
154
+
155
+ # PyCharm
156
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157
+ # be added to the global gitignore or merged into this project gitignore. For a PyCharm
158
+ # project, it is recommended to include the following files:
159
+ # .idea/
160
+ # *.iml
161
+ # *.ipr
162
+ # *.iws
163
+
164
+ # VS Code
165
+ .vscode/
166
+
167
+ # macOS
168
+ .DS_Store
169
+ .AppleDouble
170
+ .LSOverride
171
+
172
+ # Windows
173
+ Thumbs.db
174
+ Thumbs.db:encryptable
175
+ ehthumbs.db
176
+ ehthumbs_vista.db
177
+ *.tmp
178
+ *.temp
179
+ Desktop.ini
180
+ $RECYCLE.BIN/
181
+ *.cab
182
+ *.msi
183
+ *.msix
184
+ *.msm
185
+ *.msp
186
+ *.lnk
187
+
188
+ # Linux
189
+ *~
190
+
191
+ # temporary files which can be created if a process still has a handle open of a deleted file
192
+ .fuse_hidden*
193
+
194
+ # KDE directory preferences
195
+ .directory
196
+
197
+ # Linux trash folder which might appear on any partition or disk
198
+ .Trash-*
199
+
200
+ # .nfs files are created when an open file is removed but is still being accessed
201
+ .nfs*
202
+
203
+ # Streamlit
204
+ .streamlit/
205
+
206
+ # Project specific
207
+ *.log
208
+ *.tmp
209
+ temp/
210
+ tmp/
211
+
212
+ # Data files (if they contain sensitive information)
213
+ # data/
214
+ # *.csv
215
+ # *.json
216
+ # *.pkl
217
+ # *.npy
218
+
219
+ # Model files (if they are large)
220
+ # *.model
221
+ # *.pkl
222
+ # *.h5
223
+ # *.pt
224
+ # *.pth
225
+
226
+ # Jupyter notebook checkpoints
227
+ .ipynb_checkpoints/
228
+
229
+ # Virtual environment
230
+ .venv/
231
+ venv/
232
+ env/
.python-version ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.11
LICENSE ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [yyyy] [name of copyright owner]
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
README.md ADDED
@@ -0,0 +1,429 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ๐ŸŽ‰ MarketSync (๋งˆ์ผ“์‹ฑํฌ)
2
+
3
+ ### Agentic RAG ๊ธฐ๋ฐ˜ ์†Œ์ƒ๊ณต์ธ ๋งž์ถคํ˜• ์ง€์—ญ ์ถ•์ œ ์ถ”์ฒœ & ๋งˆ์ผ€ํŒ… AI ์ปจ์„คํ„ดํŠธ
4
+
5
+ ์‹ ํ•œ์นด๋“œ ๋น…๋ฐ์ดํ„ฐ์™€ ์ „๊ตญ ์ถ•์ œ ์ •๋ณด๋ฅผ ํ†ตํ•ฉ ๋ถ„์„ํ•˜์—ฌ, **AI ์—์ด์ „ํŠธ**๊ฐ€ ๊ฐ€๊ฒŒ๋ณ„๋กœ ์ฐธ์—ฌํ•  ๋งŒํ•œ ์ง€์—ญ ์ถ•์ œ๋ฅผ ์ถ”์ฒœํ•˜๊ณ  ์ตœ์ ์˜ ๋งˆ์ผ€ํŒ… ์ „๋žต ๋ณด๊ณ ์„œ๋ฅผ ์ž๋™ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค. ๐Ÿค–
6
+
7
+ ---
8
+
9
+ ## ๐Ÿงญ ํ”„๋กœ์ ํŠธ ๊ฐœ์š”
10
+
11
+ MarketSync๋Š” **Streamlit ์›น ์ธํ„ฐํŽ˜์ด์Šค, FastAPI ๋ฐ์ดํ„ฐ ์„œ๋ฒ„, LangChain ์—์ด์ „ํŠธ**๋ฅผ ๊ฒฐํ•ฉํ•˜์—ฌ ์†Œ์ƒ๊ณต์ธ์„ ์œ„ํ•œ AI ์ปจ์„คํŒ… ์„œ๋น„์Šค๋ฅผ ์ œ๊ณตํ•ฉ๋‹ˆ๋‹ค. ์‚ฌ์šฉ์ž๋Š” ์ž์‹ ์˜ ๊ฐ€๊ฒŒ๋ฅผ ์„ ํƒํ•˜์—ฌ ์ƒ์„ธ ํ”„๋กœํ•„๊ณผ ๋ถ„์„ ๊ทธ๋ž˜ํ”„๋ฅผ ํ™•์ธํ•œ ๋’ค, "10์›”์— ์—ด๋ฆฌ๋Š” ์ถ•์ œ ์ถ”์ฒœํ•ด์ค˜", "์ถ”์ฒœ๋œ ์ถ•์ œ๋“ค์˜ ๋งˆ์ผ€ํŒ… ์ „๋žต ์•Œ๋ ค์ค˜" ์™€ ๊ฐ™์€ ์ž์—ฐ์–ด ์งˆ๋ฌธ์„ ํ†ตํ•ด ๋งž์ถคํ˜• ์ปจ์„คํŒ…์„ ๋ฐ›์„ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
12
+
13
+ **ํ•ต์‹ฌ ์•„ํ‚คํ…์ฒ˜๋Š” Agentic RAG**์ž…๋‹ˆ๋‹ค. AI ์—์ด์ „ํŠธ(`Orchestrator`)๋Š” ์‚ฌ์šฉ์ž์˜ ์งˆ๋ฌธ๊ณผ ๊ฐ€๊ฒŒ์˜ ์ƒ์„ธ ํ”„๋กœํ•„(JSON)์„ ๋ฐ”ํƒ•์œผ๋กœ ์ƒํ™ฉ์— ๋งž๋Š” **๋„๊ตฌ(Tool)**๋ฅผ ์ž์œจ์ ์œผ๋กœ ์„ ํƒํ•˜๊ณ , ์‚ฌ์šฉ์ž์˜ ์งˆ๋ฌธ์— ๋”ฐ๋ผ ํ•„์š”ํ•˜๋‹ค๋ฉด ์—ฌ๋Ÿฌ ๋„๊ตฌ๋ฅผ ์ˆœ์ฐจ์ ์œผ๋กœ ํ˜ธ์ถœํ•˜์—ฌ ์ตœ์ข… ์ปจ์„คํŒ… ๋ณด๊ณ ์„œ๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
14
+
15
+ ---
16
+
17
+ ## ๐Ÿ› ๏ธ ํ•ต์‹ฌ ๋„๊ตฌ ๋ฐ ์ž‘๋™ ๋ฐฉ์‹
18
+
19
+ AI ์—์ด์ „ํŠธ๊ฐ€ ์‚ฌ์šฉํ•˜๋Š” ์ฃผ์š” ๋„๊ตฌ์™€ ๋‚ด๋ถ€ ์ฒ˜๋ฆฌ ๊ณผ์ •์€ ๋‹ค์Œ๊ณผ ๊ฐ™์Šต๋‹ˆ๋‹ค.
20
+
21
+ | ๊ธฐ๋Šฅ ๋ถ„๋ฅ˜ | ๋„๊ตฌ ํ•จ์ˆ˜๋ช… (`tools/`) | ์ฃผ์š” ์ฒ˜๋ฆฌ ๊ณผ์ • (`modules/`) |
22
+ | :--------------- | :------------------------------------------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
23
+ | **์ถ•์ œ ์ถ”์ฒœ** | `recommend_festivals` (festival\_recommender.py) | **ํ•˜์ด๋ธŒ๋ฆฌ๋“œ 5๋‹จ๊ณ„ ํŒŒ์ดํ”„๋ผ์ธ (`filtering.py`)**: <br> 1. LLM ์ฟผ๋ฆฌ ์žฌ์ž‘์„ฑ <br> 2. FAISS ๋ฒกํ„ฐ ๊ฒ€์ƒ‰ (์œ ์‚ฌ ์ถ•์ œ ํ›„๋ณด ์„ ์ •) <br> 3. LLM ๋™์  ์†์„ฑ ํ‰๊ฐ€ (๊ฐ€๊ฒŒ ๋งž์ถค์„ฑ) <br> 4. ํ•˜์ด๋ธŒ๋ฆฌ๋“œ ์ ์ˆ˜ ๊ณ„์‚ฐ <br> 5. ์ตœ์ข… Top3 ๊ฒฐ๊ณผ ํฌ๋งทํŒ… (2026 ์˜ˆ์ธก ํฌํ•จ) |
24
+ | **๋งˆ์ผ€ํŒ… (RAG)** | `search_contextual_marketing_strategy` (marketing\_strategy.py) | **์ปจํ…์ŠคํŠธ ๊ธฐ๋ฐ˜ RAG (`knowledge_base.py`)**: <br> 1. ๊ฐ€๊ฒŒ ํ”„๋กœํ•„ + ์งˆ๋ฌธ โ†’ LLM ๊ฒ€์ƒ‰ ์ฟผ๋ฆฌ ์ƒ์„ฑ <br> 2. FAISS ๋ฒกํ„ฐ ๊ฒ€์ƒ‰ (๊ด€๋ จ ์ „๋žต ๋ฌธ์„œ ์ถ”์ถœ) <br> 3. LLM ๋‹ต๋ณ€ ์ƒ์„ฑ (์ถ”์ถœ๋œ ๋‚ด์šฉ์„ ๋ฐ”ํƒ•์œผ๋กœ ์ž์—ฐ์Šค๋Ÿฌ์šด ์ „๋žต ์ œ์•ˆ) |
25
+ | **๋งˆ์ผ€ํŒ… (์ƒ์„ฑ)** | `create_festival_specific_marketing_strategy` (marketing\_strategy.py) | **LLM ๊ธฐ๋ฐ˜ ์ „๋žต ์ƒ์„ฑ**: <br> 1. ์ถ•์ œ ํ”„๋กœํ•„ ์กฐํšŒ (`profile_analyzer.py`) <br> 2. ๊ฐ€๊ฒŒ ํ”„๋กœํ•„ + ์ถ•์ œ ํ”„๋กœํ•„ + RAG ๊ฒ€์ƒ‰ โ†’ LLM ํ”„๋กฌํ”„ํŠธ ๊ตฌ์„ฑ <br> 3. LLM์ด ํŠน์ • ์ถ•์ œ ๋งž์ถค ์ „๋žต ์ƒ์„ฑ |
26
+ | **๋งˆ์ผ€ํŒ… (์ƒ์„ฑ)** | `create_marketing_strategies_for_multiple_festivals` (marketing\_strategy.py) | **LLM ๊ธฐ๋ฐ˜ ์ „๋žต ์ƒ์„ฑ (๋‹ค์ˆ˜)**: <br> 1. ์—ฌ๋Ÿฌ ์ถ•์ œ ์ด๋ฆ„ ์ž…๋ ฅ๋ฐ›์Œ <br> 2. ๊ฐ ์ถ•์ œ๋ณ„๋กœ `create_festival_specific_marketing_strategy` ๋ฐ˜๋ณต ํ˜ธ์ถœ <br> 3. ๋ชจ๋“  ์ „๋žต์„ ํ•˜๋‚˜์˜ ๋ณด๊ณ ์„œ๋กœ ์ทจํ•ฉ |
27
+ | **๊ฐ€๊ฒŒ ๋ถ„์„** | `analyze_merchant_profile` (profile\_analyzer.py) | **LLM ๊ธฐ๋ฐ˜ ๋ถ„์„**: <br> ๊ฐ€๊ฒŒ ํ”„๋กœํ•„(JSON) ์ž…๋ ฅ โ†’ LLM์ด SWOT ๋ถ„์„ ๋ฐ ํ•ต์‹ฌ ๊ณ ๊ฐ ํŠน์„ฑ ์š”์•ฝ ๋ณด๊ณ ์„œ ์ƒ์„ฑ |
28
+ | **์ถ•์ œ ๋ถ„์„** | `analyze_festival_profile` (profile\_analyzer.py) | **LLM ๊ธฐ๋ฐ˜ ๋ถ„์„**: <br> ์ถ•์ œ ํ”„๋กœํ•„(JSON) ์ž…๋ ฅ โ†’ LLM์ด ์ถ•์ œ์˜ ํ•ต์‹ฌ ํŠน์ง• ๋ฐ ์ฃผ์š” ๋ฐฉ๋ฌธ๊ฐ ํŠน์„ฑ ์š”์•ฝ ๋ณด๊ณ ์„œ ์ƒ์„ฑ |
29
+ | **์ถ•์ œ ์กฐํšŒ** | `get_festival_profile_by_name` (profile\_analyzer.py) | **๋‹จ์ˆœ ๋ฐ์ดํ„ฐ ์กฐํšŒ**: ์ถ•์ œ ์ด๋ฆ„ ์ž…๋ ฅ โ†’ `festival_df.csv`์—์„œ ํ•ด๋‹น ์ถ•์ œ ์ •๋ณด(JSON) ๋ฐ˜ํ™˜ (์บ์‹ฑ ํ™œ์šฉ) |
30
+
31
+ ---
32
+
33
+ ## ๐Ÿ“‚ ํ”„๋กœ์ ํŠธ ๊ตฌ์กฐ ๋ฐ ์ฝ”๋“œ ์„ค๋ช…
34
+
35
+ ```plaintext
36
+ MarketSync/
37
+ โ”œโ”€โ”€ streamlit_app.py # Streamlit ์›น ์ธํ„ฐํŽ˜์ด์Šค (UI)
38
+ โ”œโ”€โ”€ orchestrator.py # AI ์—์ด์ „ํŠธ: LangChain AgentExecutor, ๋„๊ตฌ ๋ผ์šฐํŒ…, ์ตœ์ข… ๋‹ต๋ณ€ ์ƒ์„ฑ ๋กœ์ง
39
+ โ”œโ”€โ”€ config.py # ์„ค์ • ์ค‘์•™ํ™”: ๊ฒฝ๋กœ, API ํ‚ค, ๋ชจ๋ธ๋ช…, ๋กœ๊น… ์„ค์ • ๋“ฑ
40
+ โ”‚
41
+ โ”œโ”€โ”€ api/ # ๋ฐ์ดํ„ฐ ์ œ๊ณต ๋ฐ ์ „์ฒ˜๋ฆฌ ์„œ๋ฒ„
42
+ โ”‚ โ”œโ”€โ”€ server.py # FastAPI ์„œ๋ฒ„: /profile, /merchants ์—”๋“œํฌ์ธํŠธ ์ œ๊ณต
43
+ โ”‚ โ””โ”€โ”€ data_loader.py # ๋ฐ์ดํ„ฐ ๋กœ๋”ฉ ๋ฐ ์ „์ฒ˜๋ฆฌ (final_df.csv, festival_df.csv)
44
+ โ”‚
45
+ โ”œโ”€โ”€ tools/ # LangChain @tool ๋„๊ตฌ ์ •์˜ ๋ ˆ์ด์–ด
46
+ โ”‚ โ”œโ”€โ”€ festival_recommender.py # [Tool] recommend_festivals ๋„๊ตฌ ์ •์˜ (filtering.py ํ˜ธ์ถœ)
47
+ โ”‚ โ”œโ”€โ”€ marketing_strategy.py # [Tool] ๋งˆ์ผ€ํŒ… ์ „๋žต ๊ด€๋ จ ๋„๊ตฌ 3๊ฐœ ์ •์˜ (knowledge_base.py, profile_analyzer.py ๋“ฑ ํ˜ธ์ถœ)
48
+ โ”‚ โ”œโ”€โ”€ profile_analyzer.py # [Tool] ๊ฐ€๊ฒŒ/์ถ•์ œ ๋ถ„์„ ๋ฐ ์ถ•์ œ ํ”„๋กœํ•„ ์กฐํšŒ ๋„๊ตฌ 3๊ฐœ ์ •์˜ (LLM ํ˜ธ์ถœ, ๋ฐ์ดํ„ฐ ์กฐํšŒ)
49
+ โ”‚ โ””โ”€โ”€ tool_loader.py # ๋ชจ๋“  ๋„๊ตฌ(@tool)๋ฅผ ๋ฆฌ์ŠคํŠธ๋กœ ๋ฌถ์–ด Orchestrator์— ์ œ๊ณต
50
+ โ”‚
51
+ โ”œโ”€โ”€ modules/ # ํ•ต์‹ฌ ๋กœ์ง ๊ตฌํ˜„ ๋ชจ๋“ˆ
52
+ โ”‚ โ”œโ”€โ”€ filtering.py # [์ถ•์ œ ์ถ”์ฒœ] FestivalRecommender ํด๋ž˜์Šค (5๋‹จ๊ณ„ ํŒŒ์ดํ”„๋ผ์ธ ๊ตฌํ˜„)
53
+ โ”‚ โ”œโ”€โ”€ knowledge_base.py # [RAG] FAISS ๋ฒกํ„ฐ ์Šคํ† ์–ด ๋กœ๋”ฉ (์ถ•์ œ, ๋งˆ์ผ€ํŒ…), ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ๊ด€๋ฆฌ
54
+ โ”‚ โ”œโ”€โ”€ llm_provider.py # LLM ์ธ์Šคํ„ด์Šค ๊ด€๋ฆฌ (์ „์—ญ ๊ณต์œ  ๋ฐ Temperature ์กฐ์ ˆ)
55
+ โ”‚ โ”œโ”€โ”€ profile_utils.py # ๊ฐ€๊ฒŒ ํ”„๋กœํ•„ JSON ๊ฐ€๊ณต ์œ ํ‹ธ๋ฆฌํ‹ฐ (์ฑ„ํŒ…์šฉ/๋ถ„์„์šฉ)
56
+ โ”‚ โ””โ”€โ”€ visualization.py # Streamlit ์‹œ๊ฐํ™”: Matplotlib ๊ทธ๋ž˜ํ”„ ์ƒ์„ฑ ํ•จ์ˆ˜
57
+ โ”‚
58
+ โ”œโ”€โ”€ utils/ # ๊ณตํ†ต ์œ ํ‹ธ๋ฆฌํ‹ฐ
59
+ โ”‚ โ””โ”€โ”€ parser_utils.py # LLM ์‘๋‹ต์—์„œ JSON ์ถ”์ถœ ํŒŒ์„œ
60
+ โ”‚
61
+ โ”œโ”€โ”€ data/ # ์›๋ณธ ๋ฐ์ดํ„ฐ
62
+ โ”‚ โ”œโ”€โ”€ final_df.csv # ์‹ ํ•œ์นด๋“œ ๊ฐ€๋งน์  ๋ฐ์ดํ„ฐ
63
+ โ”‚ โ””โ”€โ”€ festival_df.csv # ์ „๊ตญ ์ถ•์ œ ์ •๋ณด ๋ฐ์ดํ„ฐ
64
+ โ”‚
65
+ โ””โ”€โ”€ vectorstore/ # FAISS ๋ฒกํ„ฐ ๋ฐ์ดํ„ฐ๋ฒ ์ด์Šค ์ €์žฅ ํด๋”
66
+ โ”œโ”€โ”€ faiss_festival # ์ถ•์ œ ์ •๋ณด ๋ฒกํ„ฐ DB
67
+ โ””โ”€โ”€ faiss_marketing # ๋งˆ์ผ€ํŒ… ์ „๋žต ๋ฒกํ„ฐ DB
68
+ ```
69
+
70
+ ------------------------------------------------------------------------
71
+
72
+ ## ๐Ÿ”„ ์•„ํ‚คํ…์ฒ˜ ๋ฐ ๋ฐ์ดํ„ฐ ํ๋ฆ„
73
+
74
+ ์ด ์‹œ์Šคํ…œ์€ **์—์ด์ „ํŠธ ์ค‘์‹ฌ์˜ ๋„๊ตฌ ํ˜ธ์ถœ (Tool-Calling)** ์•„ํ‚คํ…์ฒ˜๋ฅผ ๊ธฐ๋ฐ˜์œผ๋กœ ์ž‘๋™ํ•ฉ๋‹ˆ๋‹ค. ์‚ฌ์šฉ์ž์˜ ์ž์—ฐ์–ด ์งˆ๋ฌธ์€ `Orchestrator`๋ผ๋Š” AI ์—์ด์ „ํŠธ์— ์˜ํ•ด ํ•ด์„๋˜๋ฉฐ, ์—์ด์ „ํŠธ๋Š” ์ œ๊ณต๋œ `[๊ฐ€๊ฒŒ ํ”„๋กœํ•„]` ์ปจํ…์ŠคํŠธ์™€ **์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ**์˜ ์ง€์นจ์— ๋”ฐ๋ผ ๊ฐ€์žฅ ์ ์ ˆํ•œ ๋„๊ตฌ๋ฅผ ์„ ํƒํ•˜๊ณ  ์‹คํ–‰ํ•ฉ๋‹ˆ๋‹ค. ํ•„์š”ํ•˜๋‹ค๋ฉด ์—ฌ๋Ÿฌ ๋„๊ตฌ๋ฅผ ์ˆœ์ฐจ์ ์œผ๋กœ ํ˜ธ์ถœํ•˜์—ฌ ์–ป์€ ์ •๋ณด๋ฅผ ์ข…ํ•ฉํ•œ ๋’ค, ์ตœ์ข… ์ปจ์„คํŒ… ๋‹ต๋ณ€์„ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
75
+
76
+ ---
77
+
78
+ ## ๐Ÿงฉ ์‹œ์Šคํ…œ ๊ตฌ์„ฑ๋„
79
+
80
+ ### (1) ์ „์ฒด ๊ฐœ์š”: UI - ์—์ด์ „ํŠธ - API ์ƒํ˜ธ์ž‘์šฉ
81
+
82
+ ์‚ฌ์šฉ์ž๊ฐ€ UI๋ฅผ ํ†ตํ•ด ์งˆ๋ฌธํ•˜๋ฉด, **์—์ด์ „ํŠธ(Orchestrator)** ๊ฐ€ ์ž‘๋™ํ•˜๊ณ , ํ•„์š” ์‹œ FastAPI ์„œ๋ฒ„๋‚˜ **์—ฌ๋Ÿฌ ๋„๊ตฌ(Tool)** ์™€ ์ƒํ˜ธ์ž‘์šฉํ•ฉ๋‹ˆ๋‹ค.
83
+
84
+ ```mermaid
85
+ graph TD
86
+ %% ========================
87
+ %% AI ์ปจ์„คํŒ… ์—”์ง„ (์ตœ์ƒ๋‹จ)
88
+ %% ========================
89
+ subgraph SG_Engine ["๐Ÿง  AI ์ปจ์„คํŒ… ์—”์ง„"]
90
+ direction TB
91
+ C["๐Ÿค– Orchestrator (ํ•ต์‹ฌ ์—์ด์ „ํŠธ)\n(orchestrator.py)\nAgentExecutor (LangChain)"]
92
+ D{"๐Ÿšฆ Tool Routing\nLLM ์˜๋„ ๋ถ„์„ & ๋„๊ตฌ ์„ ํƒ"}
93
+
94
+ subgraph SG_Tools ["๐Ÿ”ง ๋“ฑ๋ก๋œ ๋„๊ตฌ ๋ชฉ๋ก (tools/)"]
95
+ T1["recommend_festivals\n(์ถ•์ œ ์ถ”์ฒœ)"]
96
+ T2["search_contextual_marketing_strategy\n(RAG ๋งˆ์ผ€ํŒ… ์ „๋žต)"]
97
+ T3["create_festival_specific_marketing_strategy\n(๋‹จ์ผ ์ถ•์ œ ์ „๋žต)"]
98
+ T3_multi["create_marketing_strategies_for_multiple_festivals\n(๋‹ค์ˆ˜ ์ถ•์ œ ์ „๋žต)"]
99
+ T4["analyze_merchant_profile\n(๊ฐ€๊ฒŒ ๋ถ„์„)"]
100
+ T5["analyze_festival_profile\n(์ถ•์ œ ๋ถ„์„)"]
101
+ T6["get_festival_profile_by_name\n(์ถ•์ œ ํ”„๋กœํ•„ ์กฐํšŒ)"]
102
+ end
103
+
104
+ LLM_Final["๐Ÿช„ LLM (Final Report Generation)\n์ตœ์ข… ๋ณด๊ณ ์„œ ์ƒ์„ฑ"]
105
+ end
106
+
107
+ %% ========================
108
+ %% ์‚ฌ์šฉ์ž ์ธํ„ฐํŽ˜์ด์Šค & ๋ฐ์ดํ„ฐ ์„œ๋ฒ„ (ํ•˜๋‹จ)
109
+ %% ========================
110
+ subgraph SG_UserServer ["๐Ÿ’ป ์‚ฌ์šฉ์ž ์ธํ„ฐํŽ˜์ด์Šค & ๋ฐ์ดํ„ฐ ์„œ๋ฒ„"]
111
+ direction LR
112
+ A["๐Ÿ–ฅ๏ธ Streamlit UI\n(streamlit_app.py)\n์‚ฌ์šฉ์ž ์ƒํ˜ธ์ž‘์šฉ"] <--> B["๐Ÿš€ FastAPI Server\n(api/server.py)\n๐Ÿ“Š ๊ฐ€๊ฒŒ ํ”„๋กœํ•„ / ๋ชฉ๋ก ์กฐํšŒ"]
113
+ end
114
+
115
+ %% ========================
116
+ %% ์—ฐ๊ฒฐ ๊ด€๊ณ„ (์ˆ˜์ •)
117
+ %% ========================
118
+ A -- "์ž์—ฐ์–ด ์งˆ๋ฌธ ์ž…๋ ฅ" --> C
119
+ C -- "์˜๋„ ๋ถ„์„ ์š”์ฒญ" --> D
120
+ D -- "์ ํ•ฉ ๋„๊ตฌ ์„ ํƒ/์‹คํ–‰" --> SG_Tools
121
+ SG_Tools -- "๋„๊ตฌ ์‹คํ–‰ ๊ฒฐ๊ณผ" --> C
122
+ C -- "์ตœ์ข… ๋ณด๊ณ ์„œ ์ƒ์„ฑ ์š”์ฒญ" --> LLM_Final
123
+ LLM_Final -- "์ตœ์ข… ๊ฒฐ๊ณผ ์ „๋‹ฌ" --> A
124
+
125
+ %% ========================
126
+ %% ์Šคํƒ€์ผ ์ง€์ • (GitHub ํ˜ธํ™˜)
127
+ %% ========================
128
+ style A fill:#4CAF50,color:#fff,stroke:#388E3C,stroke-width:2px
129
+ style B fill:#FF9800,color:#fff,stroke:#EF6C00,stroke-width:2px
130
+ style C fill:#E91E63,color:#fff,stroke:#C2185B,stroke-width:2px
131
+ style D fill:#9C27B0,color:#fff,stroke:#7B1FA2,stroke-width:2px,shape:diamond
132
+ style SG_Tools fill:#E1F5FE, stroke:#0277BD,color:#000
133
+ style T1,T2,T3,T3_multi,T4,T5,T6 fill:#03A9F4,color:#fff,stroke:#0288D1,stroke-width:2px,shape:hexagon
134
+ style LLM_Final fill:#BA68C8,color:#fff,stroke:#8E24AA,stroke-width:2px
135
+ ```
136
+
137
+ ---
138
+
139
+ ### (2) ์ถ•์ œ ์ถ”์ฒœ ๋„๊ตฌ ์ƒ์„ธ โ€” `recommend_festivals`
140
+
141
+ LLM ๊ธฐ๋ฐ˜ **ํ•˜์ด๋ธŒ๋ฆฌ๋“œ 5๋‹จ๊ณ„ ํŒŒ์ดํ”„๋ผ์ธ**์„ ํ†ตํ•ด,
142
+ ๊ฐ€๊ฒŒ ๋งž์ถคํ˜• ์ถ•์ œ๋ฅผ ์ถ”์ฒœํ•ฉ๋‹ˆ๋‹ค.
143
+ ```mermaid
144
+ graph TD
145
+ %% ========================
146
+ %% Orchestrator ์š”์ฒญ
147
+ %% ========================
148
+ subgraph SG_Orchestrator_Req ["๐Ÿง  Orchestrator ์š”์ฒญ"]
149
+ Agent["๐Ÿค– AgentExecutor"] -- "์ถ•์ œ ์ถ”์ฒœ ์š”์ฒญ" --> Tool_Rec["๐Ÿงฉ Tool: recommend_festivals"]
150
+ end
151
+
152
+ %% ========================
153
+ %% ์ง€์‹ ๋ฒ ์ด์Šค (์ˆ˜์ •)
154
+ %% ========================
155
+ subgraph SG_KnowledgeBase ["๐Ÿ“š ์ง€์‹ ๋ฒ ์ด์Šค (modules/knowledge_base.py)"]
156
+ direction LR
157
+ EM["๐Ÿงฌ Embedding Model\n(HuggingFace)"]
158
+ VSF["๐Ÿ“‚ FAISS (์ถ•์ œ DB)"]
159
+ EM -- "์ž„๋ฒ ๋”ฉ ์ƒ์„ฑ (Offline)" --> VSF
160
+ end
161
+
162
+ %% ========================
163
+ %% Filtering Pipeline
164
+ %% ========================
165
+ subgraph SG_Filtering_Pipeline ["๐Ÿ” Filtering Pipeline (modules/filtering.py)"]
166
+ Tool_Rec --> Step1["1๏ธโƒฃ LLM ์ฟผ๋ฆฌ ์žฌ์ž‘์„ฑ"]
167
+ Step1 --> Step2["2๏ธโƒฃ FAISS ๋ฒกํ„ฐ ๊ฒ€์ƒ‰\n(์œ ์‚ฌ ์ถ•์ œ ํ›„๋ณด ํƒ์ƒ‰)"]
168
+
169
+ %% RAG ํ๋ฆ„ ๋ช…ํ™•ํ™” (์ˆ˜์ •)
170
+ Step2 -- "์ฟผ๋ฆฌ ์ž„๋ฒ ๋”ฉ" --> EM
171
+ Step2 -- "์œ ์‚ฌ๋„ ๊ฒ€์ƒ‰" --> VSF
172
+
173
+ Step2 --> Step3["3๏ธโƒฃ LLM ๋™์  ์†์„ฑ ํ‰๊ฐ€\n(๊ฐ€๊ฒŒ ๋งž์ถค์„ฑ ํŒ๋‹จ)"]
174
+ Step3 --> LLM1["๐Ÿค– LLM (Dynamic Evaluation)"]
175
+ Step3 --> Step4["4๏ธโƒฃ ํ•˜์ด๋ธŒ๋ฆฌ๋“œ ์ ์ˆ˜ ๊ณ„์‚ฐ\n(์œ ์‚ฌ๋„ + ๋งž์ถค์„ฑ)"]
176
+ Step4 --> Step5["5๏ธโƒฃ ์ตœ์ข… ๊ฒฐ๊ณผ ํฌ๋งทํŒ…\n(Top3 + 2026 ์˜ˆ์ธก ํฌํ•จ)"]
177
+ end
178
+
179
+ %% ========================
180
+ %% ๊ฒฐ๊ณผ ๋ฐ˜ํ™˜
181
+ %% ========================
182
+ subgraph SG_Result_Return ["๐Ÿ“ฆ ๊ฒฐ๊ณผ ๋ฐ˜ํ™˜"]
183
+ Step5 -- "Top3 ์ถ•์ œ ์ถ”์ฒœ ๊ฒฐ๊ณผ" --> Agent
184
+ end
185
+
186
+ %% ========================
187
+ %% ์Šคํƒ€์ผ
188
+ %% ========================
189
+ style Agent fill:#E91E63,color:#fff
190
+ style Tool_Rec fill:#03A9F4,color:#fff
191
+ style Step1,Step2,Step3,Step4,Step5 fill:#81D4FA,color:#000
192
+ style VSF fill:#FFC107,color:#000
193
+ style EM fill:#4DD0E1,color:#000
194
+ style LLM1 fill:#BA68C8,color:#fff
195
+ style SG_KnowledgeBase fill:#F5F5F5,stroke:#9E9E9E
196
+ ```
197
+
198
+ ---
199
+
200
+ ### (3) ๋งˆ์ผ€ํŒ… ์ „๋žต (RAG) ๋„๊ตฌ ์ƒ์„ธ โ€” `search_contextual_marketing_strategy`
201
+
202
+ **RAG** ๊ธฐ๋ฐ˜์œผ๋กœ **๊ฐ€๊ฒŒ ํ”„๋กœํ•„ + ์งˆ๋ฌธ ์ปจํ…์ŠคํŠธ**๋ฅผ ์ด์šฉํ•ด
203
+ ๊ฐ€์žฅ ๊ด€๋ จ์„ฑ ๋†’์€ ๋งˆ์ผ€ํŒ… ์ „๋žต ๋ฌธ์„œ๋ฅผ ๊ฒ€์ƒ‰ํ•˜๊ณ , LLM์ด ์ž์—ฐ์Šค๋Ÿฝ๊ฒŒ ์š”์•ฝ/์ œ์•ˆํ•ฉ๋‹ˆ๋‹ค.
204
+
205
+ ```mermaid
206
+ graph TD
207
+ %% ========================
208
+ %% Orchestrator ์š”์ฒญ
209
+ %% ========================
210
+ subgraph SG_Orchestrator_Req_RAG ["๐Ÿง  Orchestrator ์š”์ฒญ"]
211
+ Agent["๐Ÿค– AgentExecutor"] -- "๋งˆ์ผ€ํŒ… ์ „๋žต (RAG) ์š”์ฒญ" --> Tool_RAG["๐Ÿงฉ Tool: search_contextual_marketing_strategy"]
212
+ end
213
+
214
+ %% ========================
215
+ %% ์ง€์‹ ๋ฒ ์ด์Šค (์ˆ˜์ •)
216
+ %% ========================
217
+ subgraph SG_KnowledgeBase_RAG ["๐Ÿ“š ์ง€์‹ ๋ฒ ์ด์Šค (modules/knowledge_base.py)"]
218
+ direction LR
219
+ EM["๐Ÿงฌ Embedding Model\n(HuggingFace)"]
220
+ VSM["๐Ÿ“‚ FAISS (๋งˆ์ผ€ํŒ… DB)"]
221
+ EM -- "์ž„๋ฒ ๋”ฉ ์ƒ์„ฑ (Offline)" --> VSM
222
+ end
223
+
224
+ %% ========================
225
+ %% RAG Logic
226
+ %% ========================
227
+ subgraph SG_RAG_Logic ["โš™๏ธ RAG Logic (tools/marketing_strategy.py)"]
228
+ Tool_RAG --> Step1["1๏ธโƒฃ LLM ๊ฒ€์ƒ‰ ์ฟผ๋ฆฌ ์ƒ์„ฑ\n(๊ฐ€๊ฒŒ ํ”„๋กœํ•„ + ์งˆ๋ฌธ ๊ธฐ๋ฐ˜)"]
229
+ Step1 --> Step2["2๏ธโƒฃ FAISS ๋ฒกํ„ฐ ๊ฒ€์ƒ‰\n(๋งˆ์ผ€ํŒ… DB ํƒ์ƒ‰)"]
230
+
231
+ %% RAG ํ๋ฆ„ ๋ช…ํ™•ํ™” (์ˆ˜์ •)
232
+ Step2 -- "์ฟผ๋ฆฌ ์ž„๋ฒ ๋”ฉ" --> EM
233
+ Step2 -- "์œ ์‚ฌ๋„ ๊ฒ€์ƒ‰" --> VSM
234
+
235
+ Step2 --> Step3["3๏ธโƒฃ LLM ๋‹ต๋ณ€ ์ƒ์„ฑ\n(๊ฒ€์ƒ‰๋œ ์ปจํ…์ŠคํŠธ ๊ธฐ๋ฐ˜)"]
236
+ Step3 --> LLM2["๐Ÿค– LLM (Answer Synthesis)"]
237
+ end
238
+
239
+ %% ========================
240
+ %% ๊ฒฐ๊ณผ ๋ฐ˜ํ™˜
241
+ %% ========================
242
+ subgraph SG_Result_Return_RAG ["๐Ÿ“ฆ ๊ฒฐ๊ณผ ๋ฐ˜ํ™˜"]
243
+ Step3 -- "์ƒ์„ฑ๋œ ๋งˆ์ผ€ํŒ… ์ „๋žต ํ…์ŠคํŠธ" --> Agent
244
+ end
245
+
246
+ %% ========================
247
+ %% ์Šคํƒ€์ผ
248
+ %% ========================
249
+ style Agent fill:#E91E63,color:#fff
250
+ style Tool_RAG fill:#03A9F4,color:#fff
251
+ style Step1,Step2,Step3 fill:#81D4FA,color:#000
252
+ style VSM fill:#FFC107,color:#000
253
+ style EM fill:#4DD0E1,color:#000
254
+ style LLM2 fill:#BA68C8,color:#fff
255
+ style SG_KnowledgeBase_RAG fill:#F5F5F5,stroke:#9E9E9E
256
+ ```
257
+
258
+ ---
259
+
260
+ ### (4) LLM ๊ธฐ๋ฐ˜ ๋ถ„์„ ๋„๊ตฌ ์ƒ์„ธ โ€” `analyze_merchant_profile` / `analyze_festival_profile`
261
+
262
+ ๊ฐ€๊ฒŒ ๋˜๋Š” ์ถ•์ œ์˜ ํ”„๋กœํ•„(JSON)์„ ์ž…๋ ฅ๋ฐ›์•„
263
+ LLM์ด **SWOT ๋ถ„์„ / ์ฃผ์š” ํŠน์ง• ์š”์•ฝ**์„ ์ˆ˜ํ–‰ํ•ฉ๋‹ˆ๋‹ค.
264
+
265
+ ```mermaid
266
+ graph TD
267
+ %% ========================
268
+ %% Orchestrator ์š”์ฒญ
269
+ %% ========================
270
+ subgraph SG_Orchestrator_Req_Analyze ["๐Ÿง  Orchestrator ์š”์ฒญ"]
271
+ Agent["๐Ÿค– AgentExecutor"] -- "๊ฐ€๊ฒŒ/์ถ•์ œ ๋ถ„์„ ์š”์ฒญ" --> Tool_Analyze["๐Ÿงฉ Tool: analyze_merchant_profile / analyze_festival_profile"]
272
+ end
273
+
274
+ %% ========================
275
+ %% LLM ๋ถ„์„
276
+ %% ========================
277
+ subgraph SG_LLM_Analysis ["๐Ÿ“Š LLM ๋ถ„์„ (tools/profile_analyzer.py)"]
278
+ Tool_Analyze -- "ํ”„๋กœํ•„(JSON) ์ „๋‹ฌ" --> LLM_Analyze["๐Ÿค– LLM (SWOT / ์š”์•ฝ ๋ถ„์„)"]
279
+ end
280
+
281
+ %% ========================
282
+ %% ๊ฒฐ๊ณผ ๋ฐ˜ํ™˜
283
+ %% ========================
284
+ subgraph SG_Result_Return_Analyze ["๐Ÿ“ฆ ๊ฒฐ๊ณผ ๋ฐ˜ํ™˜"]
285
+ LLM_Analyze -- "๋ถ„์„ ๋ณด๊ณ ์„œ ํ…์ŠคํŠธ" --> Agent
286
+ end
287
+
288
+ %% ========================
289
+ %% ์Šคํƒ€์ผ
290
+ %% ========================
291
+ style Agent fill:#E91E63,color:#fff
292
+ style Tool_Analyze fill:#03A9F4,color:#fff
293
+ style LLM_Analyze fill:#BA68C8,color:#fff
294
+ ```
295
+
296
+ ------------------------------------------------------------------------
297
+
298
+ ## ๐Ÿ“ ๋ฐ์ดํ„ฐ ํ๋ฆ„ ์ƒ์„ธ
299
+
300
+ 1. **์ดˆ๊ธฐ ์„ค์ • (UI โ†’ API โ†’ UI)**
301
+ * `streamlit_app.py` ์‹คํ–‰ ์‹œ `load_data()` ํ•จ์ˆ˜๊ฐ€ FastAPI ์„œ๋ฒ„(`api/server.py`)์˜ `/merchants` ์—”๋“œํฌ์ธํŠธ๋ฅผ ํ˜ธ์ถœํ•˜์—ฌ ์ „์ฒด ๊ฐ€๋งน์  ๋ชฉ๋ก(ID, ์ด๋ฆ„)์„ ๋ฐ›์•„์˜ต๋‹ˆ๋‹ค.
302
+ * ์‚ฌ์šฉ์ž๊ฐ€ Streamlit ๋“œ๋กญ๋‹ค์šด ๋ฉ”๋‰ด์—์„œ ์ž์‹ ์˜ ๊ฐ€๊ฒŒ๋ฅผ ์„ ํƒํ•ฉ๋‹ˆ๋‹ค.
303
+ * ์„ ํƒ๋œ ๊ฐ€๊ฒŒ ID๋กœ FastAPI ์„œ๋ฒ„์˜ `/profile` ์—”๋“œํฌ์ธํŠธ๋ฅผ ํ˜ธ์ถœํ•˜์—ฌ ํ•ด๋‹น ๊ฐ€๊ฒŒ์˜ ์ƒ์„ธ ํ”„๋กœํ•„(JSON)๊ณผ ์ƒ๊ถŒ/์—…์ข… ํ‰๊ท  ๋ฐ์ดํ„ฐ๋ฅผ ๋ฐ›์•„์˜ต๋‹ˆ๋‹ค.
304
+ * ๋ฐ›์•„์˜จ ํ”„๋กœํ•„ ๋ฐ์ดํ„ฐ๋Š” `modules/visualization.py`๋ฅผ ํ†ตํ•ด ๊ทธ๋ž˜ํ”„์™€ ํ‘œ๋กœ ์‹œ๊ฐํ™”๋˜์–ด ์‚ฌ์šฉ์ž์—๊ฒŒ ๋ณด์—ฌ์ง€๊ณ , `st.session_state.profile_data`์— ์ €์žฅ๋ฉ๋‹ˆ๋‹ค.
305
+
306
+ 2. **์ปจ์„คํŒ… ์š”์ฒญ (UI โ†’ Orchestrator)**
307
+ * ์‚ฌ์šฉ์ž๊ฐ€ Streamlit ์ฑ„ํŒ… ์ž…๋ ฅ์ฐฝ์— ์งˆ๋ฌธ์„ ์ž…๋ ฅํ•ฉ๋‹ˆ๋‹ค.
308
+ * `streamlit_app.py`๋Š” `orchestrator.invoke_agent()` ํ•จ์ˆ˜๋ฅผ ํ˜ธ์ถœํ•ฉ๋‹ˆ๋‹ค.
309
+ * ์ด๋•Œ **์‚ฌ์šฉ์ž ์งˆ๋ฌธ(Query)**, **์ฑ„ํŒ…์šฉ์œผ๋กœ ๊ฐ€๊ณต๋œ ๊ฐ€๊ฒŒ ํ”„๋กœํ•„(JSON ๋ฌธ์ž์—ด)**, **์ด์ „ ๋Œ€ํ™” ๊ธฐ๋ก(History)**, **๋งˆ์ง€๋ง‰ ์ถ”์ฒœ ์ถ•์ œ ๋ชฉ๋ก(์„ ํƒ์ )**์ด `Orchestrator`๋กœ ์ „๋‹ฌ๋ฉ๋‹ˆ๋‹ค.
310
+
311
+ 3. **์˜๋„ ๋ถ„์„ ๋ฐ ๋„๊ตฌ ๋ผ์šฐํŒ… (Orchestrator โ†’ LLM โ†’ Tool)**
312
+ * `orchestrator.py`์˜ `AgentExecutor`๋Š” ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ์™€ ์ „๋‹ฌ๋œ ์ปจํ…์ŠคํŠธ(๊ฐ€๊ฒŒ ํ”„๋กœํ•„, ์งˆ๋ฌธ ๋“ฑ)๋ฅผ ์กฐํ•ฉํ•˜์—ฌ **์ฒซ ๋ฒˆ์งธ LLM(๋„๊ตฌ ์„ ํƒ์šฉ)**์„ ํ˜ธ์ถœํ•ฉ๋‹ˆ๋‹ค.
313
+ * LLM์€ ์งˆ๋ฌธ์˜ ์˜๋„๋ฅผ ๋ถ„์„ํ•˜๊ณ , ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ์˜ ๊ฐ€์ด๋“œ๋ผ์ธ์— ๋”ฐ๋ผ `tools/tool_loader.py`์— ์ •์˜๋œ **๋„๊ตฌ ๋ชฉ๋ก ์ค‘ ๊ฐ€์žฅ ์ ํ•ฉํ•œ ๋„๊ตฌ๋ฅผ ์„ ํƒ**ํ•˜๊ณ  ํ•„์š”ํ•œ ์ž…๋ ฅ๊ฐ’(Arguments)์„ ๊ฒฐ์ •ํ•ฉ๋‹ˆ๋‹ค.
314
+
315
+ 4. **๋„๊ตฌ ์‹คํ–‰ (Tool โ†’ Modules/API/VectorDB/LLM)**
316
+ * ์„ ํƒ๋œ ๋„๊ตฌ ํ•จ์ˆ˜(`tools/*.py`)๊ฐ€ ์‹คํ–‰๋ฉ๋‹ˆ๋‹ค.
317
+ * ๋„๊ตฌ๋Š” ํ•„์š”์— ๋”ฐ๋ผ `modules/*.py`์˜ ํ•ต์‹ฌ ๋กœ์ง(์˜ˆ: `FestivalRecommender`), ์™ธ๋ถ€ API(๋‚ ์”จ ๋“ฑ), VectorDB(`modules/knowledge_base.py` ๊ฒฝ์œ ), ๋˜๋Š” ๋ณ„๋„์˜ LLM(`modules/llm_provider.py` ๊ฒฝ์œ )์„ ํ˜ธ์ถœํ•˜์—ฌ ์ž‘์—…์„ ์ˆ˜ํ–‰ํ•ฉ๋‹ˆ๋‹ค.
318
+
319
+ 5. **๊ฒฐ๊ณผ ์ทจํ•ฉ ๋ฐ ๋ฐ˜๋ณต (Tool โ†’ Orchestrator โ†’ LLM โ†’ Tool ...)**
320
+ * ๋„๊ตฌ ์‹คํ–‰ ๊ฒฐ๊ณผ(Observation)๋Š” ๋‹ค์‹œ `AgentExecutor`๋กœ ๋ฐ˜ํ™˜๋ฉ๋‹ˆ๋‹ค.
321
+ * ์—์ด์ „ํŠธ๋Š” ์ด ๊ฒฐ๊ณผ๋ฅผ ๋ฐ”ํƒ•์œผ๋กœ **๋‹ค์Œ ํ–‰๋™์„ ๊ฒฐ์ •**ํ•ฉ๋‹ˆ๋‹ค. (์˜ˆ: ์ถ”๊ฐ€ ์ •๋ณด๊ฐ€ ํ•„์š”ํ•˜๋ฉด ๋‹ค๋ฅธ ๋„๊ตฌ๋ฅผ ํ˜ธ์ถœํ•˜๊ฑฐ๋‚˜, ๋ชจ๋“  ์ •๋ณด๊ฐ€ ๋ชจ์˜€๋‹ค๊ณ  ํŒ๋‹จ๋˜๋ฉด ์ตœ์ข… ๋‹ต๋ณ€ ์ƒ์„ฑ์„ ์ค€๋น„)
322
+ * ์ด "LLM ํŒ๋‹จ โ†’ ๋„๊ตฌ ํ˜ธ์ถœ โ†’ ๊ฒฐ๊ณผ ํ™•์ธ" ๊ณผ์ •์€ ์‚ฌ์šฉ์ž์˜ ์š”์ฒญ์ด ์™„์ „ํžˆ ํ•ด๊ฒฐ๋  ๋•Œ๊นŒ์ง€ **์—ฌ๋Ÿฌ ๋ฒˆ ๋ฐ˜๋ณต**๋  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค (Agentic ํŠน์„ฑ).
323
+
324
+ 6. **์ตœ์ข… ๋‹ต๋ณ€ ์ƒ์„ฑ ๋ฐ ์ถœ๋ ฅ (Orchestrator โ†’ LLM โ†’ UI)**
325
+ * `AgentExecutor`๊ฐ€ ์ตœ์ข…์ ์œผ๋กœ ๋„์ถœํ•œ ๊ฒฐ๊ณผ(`response['output']`) ๋˜๋Š” ํ•„์š” ์‹œ `orchestrator.py`๊ฐ€ ์ง์ ‘ **๋‘ ๋ฒˆ์งธ LLM(๋‹ต๋ณ€ ์ƒ์„ฑ์šฉ)**์„ ํ˜ธ์ถœํ•˜์—ฌ, ๋ชจ๋“  ์ค‘๊ฐ„ ๊ฒฐ๊ณผ์™€ ์ปจํ…์ŠคํŠธ๋ฅผ ์ข…ํ•ฉํ•œ **์ตœ์ข… ์ปจ์„คํŒ… ๋ณด๊ณ ์„œ(์ž์—ฐ์–ด)**๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
326
+ * ์ƒ์„ฑ๋œ ๋ณด๊ณ ์„œ๋Š” `streamlit_app.py`๋กœ ๋ฐ˜ํ™˜๋˜์–ด ์‚ฌ์šฉ์ž ํ™”๋ฉด์— ์ถœ๋ ฅ๋ฉ๋‹ˆ๋‹ค.
327
+
328
+ ---
329
+
330
+ ## โš™๏ธ ์ฃผ์š” ํŠน์ง• ์š”์•ฝ
331
+
332
+ | ๊ธฐ๋Šฅ | ์„ค๋ช… |
333
+ | :--------------------- | :------------------------------------------------------------------------------------------- |
334
+ | **Agentic RAG** | LLM ์—์ด์ „ํŠธ๊ฐ€ ๊ฐ€๊ฒŒ ํ”„๋กœํ•„ ์ปจํ…์ŠคํŠธ๋ฅผ ๋ฐ”ํƒ•์œผ๋กœ ์Šค์Šค๋กœ ๋„๊ตฌ๋ฅผ ์„ ํƒํ•˜๊ณ , ๋™์ ์œผ๋กœ RAG ๊ฒ€์ƒ‰ ์ฟผ๋ฆฌ๋ฅผ ์ƒ์„ฑํ•˜์—ฌ ์‹คํ–‰ |
335
+ | **Tool Calling Agent** | LangChain์˜ `create_tool_calling_agent`๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ์—ฌ๋Ÿฌ ๋„๊ตฌ๋ฅผ ์ž์œจ์ ์œผ๋กœ ํ˜ธ์ถœ ๋ฐ ์—ฐ๊ณ„ |
336
+ | **ํ•˜์ด๋ธŒ๋ฆฌ๋“œ ์ถ”์ฒœ** | FAISS ๋ฒกํ„ฐ ๊ฒ€์ƒ‰(์œ ์‚ฌ๋„) + LLM ๋™์  ํ‰๊ฐ€(๋งž์ถค์„ฑ) ์ ์ˆ˜๋ฅผ ๊ฒฐํ•ฉํ•˜์—ฌ ์ถ•์ œ ์ถ”์ฒœ ์ •ํ™•๋„ ํ–ฅ์ƒ |
337
+ | **์ปจํ…์ŠคํŠธ ๊ธฐ๋ฐ˜ ๋ถ„์„** | ๋ชจ๋“  ๋„๊ตฌ ํ˜ธ์ถœ ๋ฐ ์ตœ์ข… ๋‹ต๋ณ€ ์ƒ์„ฑ ์‹œ, ํ˜„์žฌ ๋ถ„์„ ์ค‘์ธ ๊ฐ€๊ฒŒ์˜ ํ”„๋กœํ•„(JSON)์„ ํ•ต์‹ฌ ์ปจํ…์ŠคํŠธ๋กœ ํ™œ์šฉ |
338
+ | **๋ชจ๋“ˆํ™”๋œ ๊ตฌ์กฐ** | ๊ธฐ๋Šฅ๋ณ„(UI, API, Orchestrator, Modules, Tools)๋กœ ์ฝ”๋“œ๋ฅผ ๋ถ„๋ฆฌํ•˜์—ฌ ์œ ์ง€๋ณด์ˆ˜์„ฑ ๋ฐ ํ™•์žฅ์„ฑ ์ฆ๋Œ€ |
339
+ | **๋ฐ์ดํ„ฐ ์บ์‹ฑ** | Streamlit์˜ `@st.cache_data` / `@st.cache_resource`๋ฅผ ํ™œ์šฉํ•˜์—ฌ ๋ฐ์ดํ„ฐ ๋ฐ ๋ชจ๋ธ ๋กœ๋”ฉ ์†๋„ ์ตœ์ ํ™” |
340
+
341
+ ---
342
+
343
+ ## ๐Ÿ’ก ๊ธฐ์ˆ  ์Šคํƒ
344
+
345
+ * **Frontend:** Streamlit
346
+ * **Backend (Data API):** FastAPI
347
+ * **LLM:** Google Gemini 2.5 Flash (`gemini-2.5-flash`)
348
+ * **AI Framework:** LangChain (Agents, Tool Calling, Prompts)
349
+ * **VectorStore:** FAISS (Facebook AI Similarity Search)
350
+ * **Embedding model:** HuggingFace `dragonkue/BGE-m3-ko` (ํ•œ๊ตญ์–ด ํŠนํ™” ๋ชจ๋ธ)
351
+ * **Data Handling:** Pandas, NumPy
352
+ * **Visualization:** Matplotlib
353
+
354
+ ---
355
+
356
+ ## ๐Ÿš€ ์‹คํ–‰ ๋ฐฉ๋ฒ•
357
+
358
+ ### 1๏ธโƒฃ ์‚ฌ์ „ ์ค€๋น„
359
+
360
+ * Python 3.11 ์ด์ƒ ์„ค์น˜
361
+ * `uv` (Python ํŒจํ‚ค์ง€ ์„ค์น˜ ๋„๊ตฌ) ์„ค์น˜ (`pip install uv`)
362
+ * Google API Key ๋ฐœ๊ธ‰ (Gemini ๋ชจ๋ธ ์‚ฌ์šฉ)
363
+
364
+ ### 2๏ธโƒฃ FastAPI ์„œ๋ฒ„ ์‹คํ–‰
365
+
366
+ FastAPI ์„œ๋ฒ„๋Š” ๊ฐ€๋งน์  ๋ฐ์ดํ„ฐ(`final_df.csv`)๋ฅผ ๋กœ๋“œํ•˜๊ณ , `/profile` (๊ฐ€๊ฒŒ ์ƒ์„ธ ์ •๋ณด), `/merchants` (๊ฐ€๊ฒŒ ๋ชฉ๋ก) ์—”๋“œํฌ์ธํŠธ๋ฅผ ์ œ๊ณตํ•ฉ๋‹ˆ๋‹ค.
367
+
368
+ ```bash
369
+ # 1. ํ”„๋กœ์ ํŠธ ๋ฃจํŠธ ํด๋”๋กœ ์ด๋™
370
+ cd C:(๋‹ค์šด๋ฐ›์€ ํด๋” ์œ„์น˜)
371
+
372
+ # 2. ๊ฐ€์ƒํ™˜๊ฒฝ ์ƒ์„ฑ ๋ฐ ํ™œ์„ฑํ™” (์ตœ์ดˆ 1ํšŒ)
373
+ uv venv
374
+
375
+ # 3. ๊ฐ€์ƒํ™˜๊ฒฝ ํ™œ์„ฑํ™” (Windows)
376
+ .\.venv\Scripts\activate.bat
377
+ # (macOS/Linux: source .venv/bin/activate)
378
+
379
+ # 4. ํ•„์š”ํ•œ ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ ์„ค์น˜
380
+ uv pip install -r requirements.txt
381
+
382
+ # 5. FastAPI ์„œ๋ฒ„ ์‹คํ–‰ (api ํด๋”์˜ server.py๋ฅผ ๋ชจ๋“ˆ๋กœ ์‹คํ–‰)
383
+ python -m api.server
384
+
385
+ ### 3๏ธโƒฃ Streamlit ์•ฑ ์‹คํ–‰
386
+
387
+ Streamlit ์•ฑ์€ ์‚ฌ์šฉ์ž ์ธํ„ฐํŽ˜์ด์Šค๋ฅผ ์ œ๊ณตํ•˜๊ณ , FastAPI ์„œ๋ฒ„์—์„œ ๋ฐ์ดํ„ฐ๋ฅผ ๊ฐ€์ ธ์˜ค๋ฉฐ, `Orchestrator`๋ฅผ ํ†ตํ•ด AI ์ปจ์„คํŒ…์„ ์ˆ˜ํ–‰ํ•ฉ๋‹ˆ๋‹ค.
388
+
389
+ ```bash
390
+ # 1. (FastAPI ์„œ๋ฒ„์™€ ๋‹ค๋ฅธ ํ„ฐ๋ฏธ๋„์—์„œ) ํ”„๋กœ์ ํŠธ ๋ฃจํŠธ ํด๋”๋กœ ์ด๋™
391
+ cd C:\(๋‹ค์šด๋ฐ›์€ ํด๋” ์œ„์น˜)
392
+
393
+ # 2. ๊ฐ€์ƒํ™˜๊ฒฝ ํ™œ์„ฑํ™” (Windows)
394
+ .\.venv\Scripts\activate.bat
395
+ # (macOS/Linux: source .venv/bin/activate)
396
+
397
+ # 3. Streamlit secrets ํŒŒ์ผ ์ƒ์„ฑ (์ตœ์ดˆ 1ํšŒ)
398
+ # - .streamlit ํด๋”๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
399
+ mkdir .streamlit
400
+ # ์•„๋ž˜ ๋ช…๋ น์–ด์˜ "(๋ฐœ๊ธ‰๋ฐ›์€ gemini API key)" ๋ถ€๋ถ„์„ ์‹ค์ œ ํ‚ค๋กœ ๋Œ€์ฒดํ•˜์„ธ์š”.
401
+ echo GOOGLE_API_KEY="(๋ฐœ๊ธ‰๋ฐ›์€ gemini API key)" > .streamlit\secrets.toml
402
+
403
+ # 4. Streamlit ์•ฑ ์‹คํ–‰
404
+ uv run streamlit run streamlit_app.py
405
+ ```
406
+ ์ด์ œ ์›น ๋ธŒ๋ผ์šฐ์ €์—์„œ Streamlit ์•ฑ ์ฃผ์†Œ(๋ณดํ†ต http://localhost:8501)๋กœ ์ ‘์†ํ•˜์—ฌ MarketSync๋ฅผ ์‚ฌ์šฉํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
407
+
408
+ ------------------------------------------------------------------------
409
+
410
+ ## ๐Ÿ“ˆ ์˜ˆ์‹œ ์‹œ๋‚˜๋ฆฌ์˜ค
411
+
412
+ | ์‚ฌ์šฉ์ž ์ž…๋ ฅ | ์ฃผ์š” ์‹คํ–‰ ๋„๊ตฌ | ์˜ˆ์ƒ ๊ฒฐ๊ณผ |
413
+ | :---------------------------------- | :------------------------------------------------------ | :-------------------------------------- |
414
+ | "์šฐ๋ฆฌ ๊ฐ€๊ฒŒ ๋ถ„์„ํ•ด์ค˜" | `analyze_merchant_profile` | ๊ฐ€๊ฒŒ SWOT ๋ถ„์„ ๋ฐ ํ•ต์‹ฌ ๊ณ ๊ฐ ๋ฆฌํฌํŠธ |
415
+ | "์ฃผ๋ง ๋ฐฉ๋ฌธ๊ฐ ๋Š˜๋ฆด ๋งŒํ•œ ์ถ•์ œ ์ถ”์ฒœํ•ด์ค˜" | `recommend_festivals` | Top 3 ๋งž์ถค ์ถ•์ œ ์ถ”์ฒœ ๋ฆฌ์ŠคํŠธ |
416
+ | "`์„œ์šธ๋””์ €ํŠธํŽ˜์–ด` ๋งˆ์ผ€ํŒ… ์ „๋žต ์•Œ๋ ค์ค˜" | `create_festival_specific_marketing_strategy` | ํ•ด๋‹น ์ถ•์ œ ๋งž์ถคํ˜• ๋งˆ์ผ€ํŒ… ์ „๋žต ์ œ์•ˆ |
417
+ | "์ถ”์ฒœ๋œ ์ถ•์ œ๋“ค ๋งˆ์ผ€ํŒ… ๋ฐฉ๋ฒ• ์•Œ๋ ค์ค˜" | `create_marketing_strategies_for_multiple_festivals` | ์—ฌ๋Ÿฌ ์ถ•์ œ์— ๋Œ€ํ•œ ํ†ตํ•ฉ ๋งˆ์ผ€ํŒ… ์ „๋žต ์ œ์•ˆ |
418
+ | "์š”์ฆ˜ ๋œจ๋Š” ํ™๋ณด ๋ฐฉ๋ฒ• ์•Œ๋ ค์ค˜" | `search_contextual_marketing_strategy` (RAG) | ๊ฐ€๊ฒŒ ํŠน์„ฑ ๊ธฐ๋ฐ˜ ์ตœ์‹  ๋งˆ์ผ€ํŒ… ํŠธ๋ Œ๋“œ/ํŒ |
419
+
420
+ ---
421
+
422
+ ## ๐Ÿง  ํ•ต์‹ฌ ์•„์ด๋””์–ด
423
+
424
+ > "LLM์ด ์Šค์Šค๋กœ ๋„๊ตฌ๋ฅผ ์„ ํƒํ•˜๊ณ  ์‹คํ–‰ํ•˜๋Š” **Agentic RAG**"
425
+
426
+ * **LangChain์˜ Tool-Calling Agent ๊ตฌ์กฐ**: LLM์ด ์‚ฌ์šฉ์ž์˜ ๋ณต์žกํ•œ ์š”์ฒญ์„ ์ดํ•ดํ•˜๊ณ , ํ•„์š”ํ•œ ๊ธฐ๋Šฅ(๋„๊ตฌ)์„ ์ž์œจ์ ์œผ๋กœ ํ˜ธ์ถœํ•˜๋ฉฐ ์ž‘์—…์„ ์ˆ˜ํ–‰ํ•ฉ๋‹ˆ๋‹ค.
427
+ * **์ปจํ…์ŠคํŠธ ๊ธฐ๋ฐ˜ ์˜์‚ฌ๊ฒฐ์ •**: ๊ฐ€๊ฒŒ ํ”„๋กœํ•„(JSON) ๋ฐ์ดํ„ฐ๋ฅผ ํ•ต์‹ฌ ์ปจํ…์ŠคํŠธ๋กœ ํ™œ์šฉํ•˜์—ฌ, ๋ชจ๋“  ๋ถ„์„๊ณผ ์ถ”์ฒœ์ด ํ˜„์žฌ ๋ถ„์„ ์ค‘์ธ ๊ฐ€๊ฒŒ์— ๋งž์ถฐ ์ด๋ฃจ์–ด์ง‘๋‹ˆ๋‹ค.
428
+ * **ํ•˜์ด๋ธŒ๋ฆฌ๋“œ ์ถ”์ฒœ ์—”์ง„**: FAISS ๋ฒกํ„ฐ ๊ฒ€์ƒ‰(์œ ์‚ฌ๋„ ๊ธฐ๋ฐ˜)๊ณผ LLM ์žฌํ‰๊ฐ€(๊ฐ€๊ฒŒ ๋งž์ถค์„ฑ ๊ธฐ๋ฐ˜)๋ฅผ ๊ฒฐํ•ฉํ•˜์—ฌ ์ถ”์ฒœ์˜ ์ •ํ™•์„ฑ๊ณผ ๊ด€๋ จ์„ฑ์„ ๊ทน๋Œ€ํ™”ํ•ฉ๋‹ˆ๋‹ค.
429
+ "# MarketSync"
assets/ShinhanCard_Logo.png ADDED

Git LFS Details

  • SHA256: d4eb7bcd57834946c7ca4db1328261c1d8ab1ca79823b76d8079468b581a994b
  • Pointer size: 130 Bytes
  • Size of remote file: 10.7 kB
assets/Synapse.png ADDED

Git LFS Details

  • SHA256: 40a2922f539dcb542a7df7d06a8d1c88d3b4372c368e2ccb87238e504ad15d99
  • Pointer size: 131 Bytes
  • Size of remote file: 262 kB
config.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # config.py
2
+
3
+ import logging
4
+ from pathlib import Path
5
+
6
+ # --- Paths ---
7
+ PROJECT_ROOT = Path(__file__).resolve().parent
8
+ PATH_DATA_DIR = PROJECT_ROOT / 'data'
9
+ PATH_VECTORSTORE_DIR = PROJECT_ROOT / 'vectorstore'
10
+ ASSETS = PROJECT_ROOT / "assets"
11
+
12
+ # Data Files
13
+ PATH_FINAL_DF = PATH_DATA_DIR / 'final_df.csv'
14
+ PATH_FESTIVAL_DF = PATH_DATA_DIR / 'festival_df.csv'
15
+
16
+ # Vectorstore Paths
17
+ PATH_FAISS_MARKETING = PATH_VECTORSTORE_DIR / 'faiss_marketing'
18
+ PATH_FAISS_FESTIVAL = PATH_VECTORSTORE_DIR / 'faiss_festival'
19
+
20
+
21
+ # --- API ---
22
+ API_SERVER_URL = "http://127.0.0.1:8000"
23
+ API_PROFILE_ENDPOINT = f"{API_SERVER_URL}/profile"
24
+ API_MERCHANTS_ENDPOINT = f"{API_SERVER_URL}/merchants"
25
+
26
+
27
+ # --- Models ---
28
+ LLM_MODEL_NAME = "gemini-2.5-pro"
29
+ EMBEDDING_MODEL = "dragonkue/BGE-m3-ko"
30
+
31
+
32
+ # --- RAG Weights ---
33
+ FESTIVAL_EMBEDDING_WEIGHT = 0.4
34
+ FESTIVAL_DYNAMIC_WEIGHT = 0.6
35
+
36
+
37
+ # --- Logging ---
38
+ LOGGING_LEVEL = logging.INFO
39
+ LOGGING_FORMAT = "%(asctime)s - [%(levelname)s] - %(name)s (%(funcName)s): %(message)s"
40
+
41
+ def get_logger(name: str):
42
+ """
43
+ ํ‘œ์ค€ํ™”๋œ ํฌ๋งท์œผ๋กœ ๋กœ๊ฑฐ๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
44
+ """
45
+ logging.basicConfig(level=LOGGING_LEVEL, format=LOGGING_FORMAT)
46
+ logger = logging.getLogger(name)
47
+ logger.setLevel(LOGGING_LEVEL)
48
+ return logger
data/big_data_set1_f.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70c7eb4e4ab4351dfa309f26589738e7b1f804cabd8499bc37308be4cc45d510
3
+ size 377760
data/big_data_set2_f.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:caf8ba53f32c65d1666703b60c503d4e3f57eddfa322679ecf3a64498a9bb7b7
3
+ size 10299462
data/big_data_set3_f.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ef850fcc6a15db9366603f077bd59214454401a4d57b93276e46452516bedfd
3
+ size 9622819
data/festival_df.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6805b51be222cf84d60b01aeb7c4ec7f99d4cde1662a82fd9c9d145d3bad2eb
3
+ size 259519
data/final_df.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ccc3ea7f1a9f714b6d9bd538faabfcf30d4995055e86ea027f386c5c4855999
3
+ size 34118076
dict ADDED
File without changes
format ADDED
File without changes
list ADDED
File without changes
modules/filtering.py ADDED
@@ -0,0 +1,397 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # modules/filtering.py
2
+
3
+ import json
4
+ import traceback
5
+ from typing import List, Dict, Any, Optional, Tuple
6
+ import pandas as pd
7
+
8
+ from langchain_core.messages import HumanMessage
9
+ from langchain_community.vectorstores import FAISS
10
+ from langchain_core.documents import Document
11
+
12
+ import config
13
+ from modules.knowledge_base import load_festival_vectorstore
14
+ from modules.llm_provider import get_llm
15
+ from utils.parser_utils import extract_json_from_llm_response
16
+
17
+ logger = config.get_logger(__name__)
18
+
19
+ # 4๋ฒˆ ์ œ์•ˆ: ํŒŒ์ดํ”„๋ผ์ธ ๋กœ์ง์„ ํด๋ž˜์Šค๋กœ ์บก์Аํ™”
20
+ class FestivalRecommender:
21
+ """
22
+ ํ•˜์ด๋ธŒ๋ฆฌ๋“œ ์ถ•์ œ ์ถ”์ฒœ ํŒŒ์ดํ”„๋ผ์ธ์„ ์บก์Аํ™”ํ•œ ํด๋ž˜์Šค.
23
+ """
24
+ def __init__(self, store_profile: str, user_query: str, specific_intent: Optional[str] = None):
25
+ self.store_profile = store_profile
26
+ self.user_query = user_query
27
+ self.specific_intent = specific_intent
28
+
29
+ # LLM ์ธ์Šคํ„ด์Šค๋ฅผ ๋ฏธ๋ฆฌ ์ƒ์„ฑ
30
+ self.llm_temp_01 = get_llm(0.1)
31
+ self.llm_temp_03 = get_llm(0.3)
32
+
33
+ # VectorStore ๋กœ๋“œ
34
+ self.vectorstore = load_festival_vectorstore()
35
+
36
+ # ๊ฐ€์ค‘์น˜ (config์—์„œ ๋กœ๋“œ)
37
+ self.embedding_weight = config.FESTIVAL_EMBEDDING_WEIGHT
38
+ self.dynamic_weight = config.FESTIVAL_DYNAMIC_WEIGHT
39
+
40
+ def _rewrite_query(self) -> str:
41
+ """
42
+ (1๋‹จ๊ณ„) ๊ฐ€๊ฒŒ ํ”„๋กœํ•„๊ณผ ์‚ฌ์šฉ์ž ์งˆ๋ฌธ์„ ๋ฐ”ํƒ•์œผ๋กœ Vector Store ๊ฒ€์ƒ‰์šฉ ์ฟผ๋ฆฌ๋ฅผ LLM์ด ์žฌ์ž‘์„ฑํ•ฉ๋‹ˆ๋‹ค.
43
+ """
44
+ logger.info("--- [Filter 1/5] ์ฟผ๋ฆฌ ์žฌ์ž‘์„ฑ ์‹œ์ž‘ ---")
45
+
46
+ intent_prompt = f"์‚ฌ์šฉ์ž์˜ ๊ตฌ์ฒด์ ์ธ ์š”์ฒญ: {self.specific_intent}" if self.specific_intent else ""
47
+
48
+ # --- (์‚ฌ์šฉ์ž ์š”์ฒญ) ํ”„๋กฌํ”„ํŠธ ์›๋ณธ ์œ ์ง€ ---
49
+ prompt = f"""
50
+ ๋‹น์‹ ์€ ์†Œ์ƒ๊ณต์ธ ๋งˆ์ผ€ํŒ…์„ ์œ„ํ•œ AI ์ปจ์„คํ„ดํŠธ์ž…๋‹ˆ๋‹ค.
51
+ ๋‹น์‹ ์˜ ์ž„๋ฌด๋Š” [๊ฐ€๊ฒŒ ํ”„๋กœํ•„]๊ณผ [์‚ฌ์šฉ์ž ์งˆ๋ฌธ]์˜ ์˜๋„๋ฅผ ์™„๋ฒฝํ•˜๊ฒŒ ์ดํ•ดํ•˜๊ณ ,
52
+ ์ด ๊ฐ€๊ฒŒ์— ๊ฐ€์žฅ ์ ํ•ฉํ•œ ์ถ•์ œ๋ฅผ ์ฐพ๊ธฐ ์œ„ํ•œ '์ตœ์ ์˜ ๊ฒ€์ƒ‰ ํ‚ค์›Œ๋“œ'๋ฅผ ์ƒ์„ฑํ•˜๋Š” ๊ฒƒ์ž…๋‹ˆ๋‹ค.
53
+ ๊ฒ€์ƒ‰ ์—”์ง„์€ '์ถ•์ œ ์†Œ๊ฐœ ๋‚ด์šฉ'์„ ๊ธฐ๋ฐ˜์œผ๋กœ ์œ ์‚ฌ๋„๋ฅผ ์ธก์ •ํ•˜์—ฌ ์ถ•์ œ๋ฅผ ์ฐพ์•„๋ƒ…๋‹ˆ๋‹ค.
54
+
55
+ [๊ฐ€๊ฒŒ ํ”„๋กœํ•„]
56
+ {self.store_profile}
57
+
58
+ [์‚ฌ์šฉ์ž ์งˆ๋ฌธ]
59
+ {self.user_query}
60
+ {intent_prompt}
61
+
62
+ [๊ฒ€์ƒ‰ ํ‚ค์›Œ๋“œ ์ƒ์„ฑ ๊ฐ€์ด๋“œ]
63
+ 1. ๊ฐ€๊ฒŒ์˜ '์—…์ข…', '์ƒ๊ถŒ', '์ฃผ์š” ๊ณ ๊ฐ์ธต(์„ฑ๋ณ„/์—ฐ๋ น)'์„ ํ•ต์‹ฌ ํ‚ค์›Œ๋“œ๋กœ ์‚ฌ์šฉํ•˜์„ธ์š”.
64
+ 2. ๊ฐ€๊ฒŒ์˜ '๊ฐ•์ '์ด๋‚˜ '์•ฝ์ '์„ ๋ณด์™„ํ•  ์ˆ˜ ์žˆ๋Š” ๋ฐฉํ–ฅ์„ ๊ณ ๋ คํ•˜์„ธ์š”.
65
+ (์˜ˆ: '์‹ ๊ทœ ๊ณ ๊ฐ ํ™•๋ณด'๊ฐ€ ํ•„์š”ํ•˜๋ฉด '์œ ๋™ ์ธ๊ตฌ', '๊ด€๊ด‘๊ฐ', '๋Œ€๊ทœ๋ชจ' ๋“ฑ)
66
+ (์˜ˆ: '๊ฐ๋‹จ๊ฐ€'๊ฐ€ ๋‚ฎ์œผ๋ฉด '๊ตฌ๋งค๋ ฅ ๋†’์€', '3040๋Œ€ ์ง์žฅ์ธ' ๋“ฑ)
67
+ 3. ์‚ฌ์šฉ์ž ์งˆ๋ฌธ์˜ ์˜๋„๋ฅผ ๋ฐ˜์˜ํ•˜์„ธ์š”. (์˜ˆ: '์—ฌ๋ฆ„' ์ถ•์ œ, 'ํŠน์ • ์ง€์—ญ' ์ถ•์ œ)
68
+ 4. 5~8๊ฐœ์˜ ํ•ต์‹ฌ ํ‚ค์›Œ๋“œ๋ฅผ ์กฐํ•ฉํ•˜์—ฌ ์ž์—ฐ์Šค๋Ÿฌ์šด ๋ฌธ์žฅ์ด๋‚˜ ๊ตฌ๋ฌธ์œผ๋กœ ๋งŒ๋“œ์„ธ์š”.
69
+
70
+ [๊ฒ€์ƒ‰ ํ‚ค์›Œ๋“œ ์ƒ์„ฑ ๋‹จ๊ณ„ ๋ฐ ๊ฐ€์ด๋“œ]
71
+ 1. **๋ถ„์„:** ๊ฐ€๊ฒŒ ํ”„๋กœํ•„(์—…์ข…, ์ƒ๊ถŒ, ์ฃผ์š” ๊ณ ๊ฐ)์„ ๋ฐ”ํƒ•์œผ๋กœ ํ˜„์žฌ ๊ฐ€๊ฒŒ๊ฐ€ ๋งˆ์ผ€ํŒ…์ ์œผ๋กœ ๊ฐ€์žฅ ํ•„์š”๋กœ ํ•˜๋Š” ๊ฒƒ(์˜ˆ: ์‹ ๊ทœ ๊ณ ๊ฐ ์œ ์ž…, ๊ฐ๋‹จ๊ฐ€ ์ƒ์Šน, ํŠน์ • ์—ฐ๋ น๋Œ€ ํ™•๋ณด)์ด ๋ฌด์—‡์ธ์ง€ ๋‚ด๋ถ€์ ์œผ๋กœ ๋ถ„์„ํ•ฉ๋‹ˆ๋‹ค.
72
+ 2. **๋ชฉํ‘œ ์„ค์ •:** ๋ถ„์„ ๊ฒฐ๊ณผ์™€ ์‚ฌ์šฉ์ž ์งˆ๋ฌธ์˜ ์˜๋„๋ฅผ ๊ฒฐํ•ฉํ•˜์—ฌ, ์ถ•์ œ์— ๊ธฐ๋Œ€ํ•˜๋Š” ์ตœ์ข…์ ์ธ ๋ชฉํ‘œ๋ฅผ ๋ช…ํ™•ํžˆ ํ•ฉ๋‹ˆ๋‹ค. (์˜ˆ: "20๋Œ€ ์—ฌ์„ฑ์˜ ์œ ์ž…์„ ์ฆ๊ฐ€์‹œํ‚ฌ ์ถ•์ œ", "๊ฐ€์กฑ ๋‹จ์œ„ ๊ด€๊ด‘๊ฐ์ด ๋งŽ์€ ์ถ•์ œ")
73
+ 3. **ํ‚ค์›Œ๋“œ ์ถ”์ถœ:** ์„ค์ •๋œ ๋ชฉํ‘œ์— ๋ถ€ํ•ฉํ•˜๋Š” **ํ•ต์‹ฌ ํ‚ค์›Œ๋“œ 7๊ฐœ๋ฅผ ๋ช…์‚ฌ ํ˜•ํƒœ๋กœ ์ถ”์ถœ**ํ•ฉ๋‹ˆ๋‹ค.
74
+ - '์—…์ข…', '์ฃผ์š” ๊ณ ๊ฐ์ธต(์„ฑ๋ณ„/์—ฐ๋ น)', 'ํ•„์š”ํ•œ ๊ณ ๊ฐ ์œ ์ž… ํ˜•ํƒœ(์˜ˆ: ๊ด€๊ด‘๊ฐ, ๊ฐ€์กฑ๋‹จ์œ„, ์ง์žฅ์ธ)', '์‹œ์ฆŒ/ํ…Œ๋งˆ'๋ฅผ ํฌํ•จํ•˜์—ฌ ๊ตฌ์ฒด์ ์œผ๋กœ ๋งŒ๋“ญ๋‹ˆ๋‹ค.
75
+
76
+
77
+ [์ถœ๋ ฅ ํ˜•์‹]
78
+ (์˜ค์ง ์žฌ์ž‘์„ฑ๋œ ์ฟผ๋ฆฌ๋งŒ ์ถœ๋ ฅ)
79
+ """
80
+
81
+ try:
82
+ response = self.llm_temp_01.invoke([HumanMessage(content=prompt)])
83
+ rewritten_query = response.content.strip().replace('"', '').replace("'", "")
84
+
85
+ if not rewritten_query:
86
+ logger.warning("--- [Filter 1/5 ERROR] ์ฟผ๋ฆฌ ์žฌ์ž‘์„ฑ ์‹คํŒจ, ์›๋ณธ ์ฟผ๋ฆฌ ์‚ฌ์šฉ ---")
87
+ return self.user_query
88
+
89
+ return rewritten_query
90
+
91
+ except Exception as e:
92
+ logger.critical(f"--- [Filter 1/5 CRITICAL ERROR] {e} ---", exc_info=True)
93
+ return self.user_query # ์‹คํŒจ ์‹œ ์›๋ณธ ์ฟผ๋ฆฌ ๋ฐ˜ํ™˜
94
+
95
+ def _search_candidates(self, query: str, k: int) -> List[Tuple[Document, float]]:
96
+ """
97
+ (2๋‹จ๊ณ„) ์žฌ์ž‘์„ฑ๋œ ์ฟผ๋ฆฌ๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ Vector Store์—์„œ K๊ฐœ์˜ ํ›„๋ณด๋ฅผ ๊ฒ€์ƒ‰ํ•ฉ๋‹ˆ๋‹ค.
98
+ """
99
+ logger.info(f"--- [Filter 2/5] ํ›„๋ณด ๊ฒ€์ƒ‰ (์ž„๋ฒ ๋”ฉ ์ ์ˆ˜) ์‹œ์ž‘ (Query: {query}) ---")
100
+ try:
101
+ if self.vectorstore is None:
102
+ raise RuntimeError("์ถ•์ œ ๋ฒกํ„ฐ์Šคํ† ์–ด๊ฐ€ ๋กœ๋“œ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
103
+
104
+ candidates_with_scores = self.vectorstore.similarity_search_with_relevance_scores(query, k=k)
105
+ return candidates_with_scores
106
+
107
+ except Exception as e:
108
+ logger.critical(f"--- [Filter 2/5 CRITICAL ERROR] {e} ---", exc_info=True)
109
+ return []
110
+
111
+ def _evaluate_candidates_dynamically(self, candidates: List[Document]) -> Dict[str, Dict[str, Any]]:
112
+ """
113
+ (3๋‹จ๊ณ„) LLM์„ ์‚ฌ์šฉํ•˜์—ฌ ํ›„๋ณด๋“ค์˜ '๋™์  ์†์„ฑ'์„ ํ‰๊ฐ€ํ•ฉ๋‹ˆ๋‹ค.
114
+ """
115
+ logger.info(f"--- [Filter 3/5] ๋™์  ์†์„ฑ ํ‰๊ฐ€ (LLM) ์‹œ์ž‘ (ํ›„๋ณด {len(candidates)}๊ฐœ) ---")
116
+
117
+ candidates_data = []
118
+ for doc in candidates:
119
+ meta = doc.metadata
120
+ candidates_data.append({
121
+ "์ถ•์ œ๋ช…": meta.get('์ถ•์ œ๋ช…'),
122
+ "์ฃผ์š”์„ฑ๋ณ„": meta.get('์ฃผ์š”์„ฑ๋ณ„'),
123
+ "์ฃผ์š”์—ฐ๋ น๋Œ€": meta.get('์ฃผ์š”์—ฐ๋ น๋Œ€'),
124
+ "์ฃผ์š”๊ณ ๊ฐ์ธต": meta.get('์ฃผ์š”๊ณ ๊ฐ์ธต'),
125
+ "์ฃผ์š”๋ฐฉ๋ฌธ์ž": meta.get('์ฃผ์š”๋ฐฉ๋ฌธ์ž'),
126
+ "์ถ•์ œ์ธ๊ธฐ": meta.get('์ถ•์ œ์ธ๊ธฐ'),
127
+ "์ถ•์ œ์ธ๊ธฐ๋„": meta.get('์ถ•์ œ์ธ๊ธฐ๋„'),
128
+ "์ธ๊ธฐ๋„_์ ์ˆ˜": meta.get('์ธ๊ธฐ๋„_์ ์ˆ˜')
129
+ })
130
+
131
+ candidates_json_str = json.dumps(candidates_data, ensure_ascii=False, indent=2)
132
+
133
+ # --- (์‚ฌ์šฉ์ž ์š”์ฒญ) ํ”„๋กฌํ”„ํŠธ ์›๋ณธ ์œ ์ง€ ---
134
+ prompt = f"""
135
+ ๋‹น์‹ ์€ ๋ƒ‰์ฒ ํ•œ ์ถ•์ œ ๋ฐ์ดํ„ฐ ๋ถ„์„๊ฐ€์ž…๋‹ˆ๋‹ค. [๊ฐ€๊ฒŒ ํ”„๋กœํ•„]๊ณผ [์‚ฌ์šฉ์ž ์š”์ฒญ]์„ ๋ฐ”ํƒ•์œผ๋กœ,
136
+ ๊ฐ [์ถ•์ œ ํ›„๋ณด]๊ฐ€ ์ด ๊ฐ€๊ฒŒ์˜ 'ํƒ€๊ฒŸ ๊ณ ๊ฐ' ๋ฐ '๋งˆ์ผ€ํŒ… ๋ชฉํ‘œ'์™€ ์–ผ๋งˆ๋‚˜ ์ž˜ ๋งž๋Š”์ง€
137
+ **์˜ค์ง ์ œ๊ณต๋œ '๋™์  ์†์„ฑ' (์ฃผ์š”์„ฑ๋ณ„, ์ฃผ์š”์—ฐ๋ น๋Œ€, ์ฃผ์š”๊ณ ๊ฐ์ธต, ์ฃผ์š”๋ฐฉ๋ฌธ์ž, ์ธ๊ธฐ๋„)๋งŒ์„
138
+ ๊ธฐ์ค€์œผ๋กœ** ํ‰๊ฐ€ํ•˜๊ณ  '๋™์ _์ ์ˆ˜' (0~100์ )๋ฅผ ๋งค๊ธฐ์„ธ์š”.
139
+
140
+ [๊ฐ€๊ฒŒ ํ”„๋กœํ•„]
141
+ {self.store_profile}
142
+
143
+ [์‚ฌ์šฉ์ž ์š”์ฒญ]
144
+ {self.user_query}
145
+
146
+ [ํ‰๊ฐ€ ๋Œ€์ƒ ์ถ•์ œ ํ›„๋ณด ๋ชฉ๋ก (JSON)]
147
+ {candidates_json_str}
148
+
149
+ [๋™์  ์ ์ˆ˜ ํ‰๊ฐ€ ๊ฐ€์ด๋“œ]
150
+ 1. **ํƒ€๊ฒŸ ์ผ์น˜ (์„ฑ๋ณ„/์—ฐ๋ น)**: ๊ฐ€๊ฒŒ์˜ 'ํ•ต์‹ฌ๊ณ ๊ฐ'(์˜ˆ: 30๋Œ€ ์—ฌ์„ฑ)๊ณผ ์ถ•์ œ์˜ '์ฃผ์š”์„ฑ๋ณ„', '์ฃผ์š”์—ฐ๋ น๋Œ€'๊ฐ€ ์ผ์น˜ํ• ์ˆ˜๋ก ๋†’์€ ์ ์ˆ˜๋ฅผ ์ฃผ์„ธ์š”.
151
+ 2. **๊ณ ๊ฐ์ธต ์ผ์น˜**: ๊ฐ€๊ฒŒ์˜ '์—…์ข…'(์˜ˆ: ์นดํŽ˜)๊ณผ ์ถ•์ œ์˜ '์ฃผ์š”๊ณ ๊ฐ์ธต'(์˜ˆ: 2030 ์—ฌ์„ฑ, ์—ฐ์ธ)์ด ์‹œ๋„ˆ์ง€๊ฐ€ ๋‚ ์ˆ˜๋ก ๋†’์€ ์ ์ˆ˜๋ฅผ ์ฃผ์„ธ์š”.
152
+ 3. **๋ฐฉ๋ฌธ์ž ํŠน์„ฑ**: ๊ฐ€๊ฒŒ๊ฐ€ '์‹ ๊ทœ ๊ณ ๊ฐ ํ™•๋ณด'๊ฐ€ ํ•„์š”ํ•˜๊ณ  ์ถ•์ œ์˜ '์ฃผ์š”๋ฐฉ๋ฌธ์ž'๊ฐ€ '์™ธ์ง€์ธ'์ด๋ผ๋ฉด ๋†’์€ ์ ์ˆ˜๋ฅผ ์ฃผ์„ธ์š”. ๋ฐ˜๋Œ€๋กœ '๋‹จ๊ณจ ํ™•๋ณด'๊ฐ€ ๋ชฉํ‘œ์ธ๋ฐ 'ํ˜„์ง€์ธ' ๋ฐฉ๋ฌธ์ž๊ฐ€ ๋งŽ๋‹ค๋ฉด ๋†’์€ ์ ์ˆ˜๋ฅผ ์ฃผ์„ธ์š”.
153
+ 4. **์ธ๊ธฐ๋„**: '์ถ•์ œ์ธ๊ธฐ', '์ถ•์ œ์ธ๊ธฐ๋„', '์ธ๊ธฐ๋„_์ ์ˆ˜'๊ฐ€ ๋†’์„์ˆ˜๋ก ๋ฐฉ๋ฌธ๊ฐ ์ˆ˜๊ฐ€ ๋ณด์žฅ๋˜๋ฏ€๋กœ ๋†’์€ ์ ์ˆ˜๋ฅผ ์ฃผ์„ธ์š”.
154
+ 5. **๋ณตํ•ฉ ํ‰๊ฐ€**: ์ด ๋ชจ๋“  ์š”์†Œ๋ฅผ ์ข…ํ•ฉํ•˜์—ฌ 0์ ์—์„œ 100์  ์‚ฌ์ด์˜ '๋™์ _์ ์ˆ˜'๋ฅผ ๋ถ€์—ฌํ•˜์„ธ์š”.
155
+ 6. **์ด์œ  ์ž‘์„ฑ**: ์™œ ๊ทธ๋Ÿฐ ์ ์ˆ˜๋ฅผ ์ฃผ์—ˆ๋Š”์ง€ 'ํ‰๊ฐ€_์ด์œ '์— ๊ฐ„๋žตํžˆ ์š”์•ฝํ•˜์„ธ์š”.
156
+
157
+ [์ถœ๋ ฅ ํ˜•์‹ (JSON ๋ฆฌ์ŠคํŠธ)]
158
+ [
159
+ {{
160
+ "์ถ•์ œ๋ช…": "[์ถ•์ œ ์ด๋ฆ„]",
161
+ "๋™์ _์ ์ˆ˜": 85,
162
+ "ํ‰๊ฐ€_์ด์œ ": "๊ฐ€๊ฒŒ์˜ ํ•ต์‹ฌ ๊ณ ๊ฐ์ธ 30๋Œ€ ์—ฌ์„ฑ๊ณผ ์ถ•์ œ์˜ ์ฃผ์š”์—ฐ๋ น๋Œ€/์ฃผ์š”์„ฑ๋ณ„์ด ์ผ์น˜ํ•˜๋ฉฐ, '์™ธ์ง€์ธ' ๋ฐฉ๋ฌธ์ž ํŠน์„ฑ์ด ์‹ ๊ทœ ๊ณ ๊ฐ ํ™•๋ณด ๋ชฉํ‘œ์— ๋ถ€ํ•ฉํ•จ."
163
+ }},
164
+ ...
165
+ ]
166
+ """
167
+
168
+ try:
169
+ response = self.llm_temp_01.invoke([HumanMessage(content=prompt)])
170
+ response_text = response.content.strip()
171
+
172
+ # 5๋ฒˆ ์ œ์•ˆ: ๊ณตํ†ต ํŒŒ์„œ ์‚ฌ์šฉ
173
+ scores_list = extract_json_from_llm_response(response_text)
174
+
175
+ scores_dict = {
176
+ item['์ถ•์ œ๋ช…']: {
177
+ "dynamic_score": item.get('๋™์ _์ ์ˆ˜', 0),
178
+ "dynamic_reason": item.get('ํ‰๊ฐ€_์ด์œ ', 'N/A')
179
+ }
180
+ for item in scores_list if isinstance(item, dict) and '์ถ•์ œ๋ช…' in item
181
+ }
182
+ return scores_dict
183
+
184
+ except (ValueError, json.JSONDecodeError) as e:
185
+ logger.error(f"--- [Filter 3/5 CRITICAL ERROR] ๋™์  ์ ์ˆ˜ JSON ํŒŒ์‹ฑ ์‹คํŒจ: {e} ---")
186
+ logger.debug(f"LLM ์›๋ณธ ์‘๋‹ต (์•ž 500์ž): {response_text[:500]} ...")
187
+ return {} # ์˜ค๋ฅ˜ ๋ฐœ์ƒ ์‹œ ๋นˆ ๋”•์…”๋„ˆ๋ฆฌ ๋ฐ˜ํ™˜ (Fallback)
188
+ except Exception as e:
189
+ logger.critical(f"--- [Filter 3/5 CRITICAL ERROR] (Outer Catch) {e} ---", exc_info=True)
190
+ return {}
191
+
192
+ def _calculate_hybrid_scores(
193
+ self,
194
+ embedding_candidates: List[Tuple[Document, float]],
195
+ dynamic_scores: Dict[str, Dict[str, Any]]
196
+ ) -> List[Dict[str, Any]]:
197
+ """
198
+ (4๋‹จ๊ณ„) Score 1(์ž„๋ฒ ๋”ฉ)๊ณผ Score 2(๋™์ )๋ฅผ ๊ฐ€์ค‘ ํ•ฉ์‚ฐํ•˜์—ฌ ์ตœ์ข… 'ํ•˜์ด๋ธŒ๋ฆฌ๋“œ ์ ์ˆ˜'๋ฅผ ๊ณ„์‚ฐํ•ฉ๋‹ˆ๋‹ค.
199
+ """
200
+ logger.info("--- [Filter 4/5] ํ•˜์ด๋ธŒ๋ฆฌ๋“œ ์ ์ˆ˜ ๊ณ„์‚ฐ ์‹œ์ž‘ ---")
201
+ hybrid_results = []
202
+
203
+ for doc, embedding_score in embedding_candidates:
204
+ festival_name = doc.metadata.get('์ถ•์ œ๋ช…')
205
+ if not festival_name:
206
+ continue
207
+
208
+ normalized_embedding_score = embedding_score * 100
209
+ dynamic_eval = dynamic_scores.get(festival_name, {"dynamic_score": 0, "dynamic_reason": "N/A"})
210
+ dynamic_score = dynamic_eval["dynamic_score"]
211
+
212
+ hybrid_score = (normalized_embedding_score * self.embedding_weight) + \
213
+ (dynamic_score * self.dynamic_weight)
214
+
215
+ hybrid_results.append({
216
+ "document": doc,
217
+ "metadata": doc.metadata,
218
+ "score_embedding": normalized_embedding_score,
219
+ "score_dynamic": dynamic_score,
220
+ "score_dynamic_reason": dynamic_eval["dynamic_reason"],
221
+ "score_hybrid": hybrid_score
222
+ })
223
+
224
+ hybrid_results.sort(key=lambda x: x.get("score_hybrid", 0), reverse=True)
225
+ return hybrid_results
226
+
227
+
228
+ # 2026๋…„ ๋‚ ์งœ ์˜ˆ์ธก ํ—ฌํผ ํ•จ์ˆ˜
229
+ def _predict_next_year_date(self, date_str_2025: Optional[str]) -> str:
230
+ """2025๋…„ ๋‚ ์งœ ๋ฌธ์ž์—ด(YYYY.MM.DD~...)์„ ๋ฐ›์•„ 2026๋…„ ์˜ˆ์ƒ ์‹œ๊ธฐ๋ฅผ ํ…์ŠคํŠธ๋กœ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค."""
231
+ if not date_str_2025 or not isinstance(date_str_2025, str):
232
+ return "2026๋…„ ์ •๋ณด ์—†์Œ" # ๋‚ ์งœ ์ •๋ณด ์—†์œผ๋ฉด ๋ช…์‹œ์  ๋ฐ˜ํ™˜
233
+
234
+ try:
235
+ # "~" ์•ž๋ถ€๋ถ„๋งŒ ์‚ฌ์šฉํ•˜์—ฌ ์‹œ์ž‘ ๋‚ ์งœ ํŒŒ์‹ฑ (YYYY.MM.DD ํ˜•์‹ ๊ฐ€์ •)
236
+ start_date_str = date_str_2025.split('~')[0].strip()
237
+ date_2025 = pd.to_datetime(start_date_str, format='%Y.%m.%d', errors='coerce')
238
+
239
+ if pd.isna(date_2025): # YYYY.MM.DD ํŒŒ์‹ฑ ์‹คํŒจ ์‹œ ๋‹ค๋ฅธ ํ˜•์‹ ์‹œ๋„ (์˜ˆ: YYYY-MM-DD)
240
+ date_2025 = pd.to_datetime(start_date_str, errors='coerce')
241
+
242
+ if pd.isna(date_2025): # ์ตœ์ข… ํŒŒ์‹ฑ ์‹คํŒจ ์‹œ
243
+ logger.warning(f"๋‚ ์งœ ์˜ˆ์ธก ์‹คํŒจ: '{start_date_str}' (์›๋ณธ: '{date_str_2025}') ํ˜•์‹์„ ์ธ์‹ํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
244
+ return f"2026๋…„ ์ •๋ณด ์—†์Œ (2025๋…„: {date_str_2025})"
245
+
246
+ month = date_2025.month
247
+ day = date_2025.day
248
+
249
+ if day <= 10:
250
+ timing = f"{month}์›” ์ดˆ"
251
+ elif day <= 20:
252
+ timing = f"{month}์›” ์ค‘์ˆœ"
253
+ else:
254
+ timing = f"{month}์›” ๋ง"
255
+
256
+ return f"2026๋…„ {timing}๊ฒฝ ์˜ˆ์ƒ (2025๋…„: {date_str_2025})"
257
+ except Exception as e:
258
+ logger.error(f"๋‚ ์งœ ์˜ˆ์ธก ์ค‘ ์˜ค๋ฅ˜ ({date_str_2025}): {e}")
259
+ return f"2026๋…„ ์ •๋ณด ์—†์Œ (์˜ค๋ฅ˜: {e})"
260
+
261
+ def _format_recommendation_results(
262
+ self,
263
+ ranked_list: List[Dict[str, Any]],
264
+ top_k: int
265
+ ) -> List[Dict[str, Any]]:
266
+
267
+ """ (5๋‹จ๊ณ„) ์ตœ์ข… ๋‹ต๋ณ€ ํฌ๋งทํŒ… (LLM) """
268
+ logger.info(f"--- [Filter 5/5] ์ตœ์ข… ๋‹ต๋ณ€ ํฌ๋งทํŒ… (LLM) ์‹œ์ž‘ (Top {top_k}) ---")
269
+ top_candidates = ranked_list[:top_k]
270
+ candidates_data = []
271
+ for candidate in top_candidates:
272
+ meta = candidate["metadata"]
273
+ date_2025 = meta.get('2025_๊ธฐ๊ฐ„')
274
+ predicted_2026_timing = self._predict_next_year_date(date_2025)
275
+ candidates_data.append({
276
+ "์ถ•์ œ๋ช…": meta.get('์ถ•์ œ๋ช…'),
277
+ "์†Œ๊ฐœ": meta.get('์†Œ๊ฐœ'),
278
+ "predicted_2026_timing": predicted_2026_timing,
279
+ "์ฃผ์š”๊ณ ๊ฐ์ธต": meta.get('์ฃผ์š”๊ณ ๊ฐ์ธต'),
280
+ "์ฃผ์š”๋ฐฉ๋ฌธ์ž": meta.get('์ฃผ์š”๋ฐฉ๋ฌธ์ž'),
281
+ "์ถ•์ œ์ธ๊ธฐ": meta.get('์ถ•์ œ์ธ๊ธฐ'),
282
+ "ํ™ˆํŽ˜์ด์ง€": meta.get('ํ™ˆํŽ˜์ด์ง€'),
283
+ "์ถ”์ฒœ_์ ์ˆ˜": round(candidate["score_hybrid"], 1),
284
+ "์ถ”์ฒœ_๊ทผ๊ฑฐ_ํ‚ค์›Œ๋“œ": f"ํ‚ค์›Œ๋“œ/์†Œ๊ฐœ ์ผ์น˜๋„ ({round(candidate['score_embedding'], 0)}์ )",
285
+ "์ถ”์ฒœ_๊ทผ๊ฑฐ_๋™์ ": f"๊ฐ€๊ฒŒ ๋งž์ถค์„ฑ({round(candidate['score_dynamic'], 0)}์ ): {candidate['score_dynamic_reason']}"
286
+ })
287
+ candidates_json_str = json.dumps(candidates_data, ensure_ascii=False, indent=2)
288
+
289
+ prompt = f"""
290
+ ๋‹น์‹ ์€ ์†Œ์ƒ๊ณต์ธ ์ปจ์„คํ„ดํŠธ์ž…๋‹ˆ๋‹ค. [๊ฐ€๊ฒŒ ํ”„๋กœํ•„]๊ณผ AI๊ฐ€ ๋ถ„์„ํ•œ [์ตœ์ข… ์ถ”์ฒœ ์ถ•์ œ ๋ชฉ๋ก]์„ ๋ฐ”ํƒ•์œผ๋กœ,
291
+ ์‚ฌ์žฅ๋‹˜๊ป˜ ์ œ์•ˆํ•  ์ตœ์ข… ์ถ”์ฒœ ๋‹ต๋ณ€์„ ์ƒ์„ฑํ•˜์„ธ์š”.
292
+
293
+ [๊ฐ€๊ฒŒ ํ”„๋กœํ•„]
294
+ {self.store_profile}
295
+
296
+ [์ตœ์ข… ์ถ”์ฒœ ์ถ•์ œ ๋ชฉ๋ก (JSON) - ์†Œ๊ฐœ, 2026๋…„ ์˜ˆ์ƒ ์‹œ๊ธฐ ํฌํ•จ]
297
+ {candidates_json_str}
298
+
299
+ [์ตœ์ข… ๋‹ต๋ณ€ ์ƒ์„ฑ ๊ฐ€์ด๋“œ๋ผ์ธ]
300
+ 1. **[์ตœ์ข… ์ถ”์ฒœ ์ถ•์ œ ๋ชฉ๋ก]์˜ ๋ชจ๋“  ์ •๋ณด**๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ์ตœ์ข… ๋‹ต๋ณ€์„ JSON ํ˜•์‹์œผ๋กœ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
301
+ 2. '์ถ”์ฒœ_์ด์œ '๋Š” '์ถ”์ฒœ_๊ทผ๊ฑฐ_ํ‚ค์›Œ๋“œ'์™€ '์ถ”์ฒœ_๊ทผ๊ฑฐ_๋™์ '์„ ์กฐํ•ฉํ•˜์—ฌ **์ž์—ฐ์Šค๋Ÿฌ์šด ์„œ์ˆ ํ˜• ๋ฌธ์žฅ**์œผ๋กœ ์ž‘์„ฑํ•˜์„ธ์š”.
302
+ 3. **(์ˆ˜์ •) '์ถ•์ œ_๊ธฐ๋ณธ์ •๋ณด'**: ์ž…๋ ฅ JSON์˜ **'์†Œ๊ฐœ', '์ฃผ์š”๊ณ ๊ฐ์ธต', '์ฃผ์š”๋ฐฉ๋ฌธ์ž', '์ถ•์ œ์ธ๊ธฐ'** ์ •๋ณด๋ฅผ ์กฐํ•ฉํ•˜์—ฌ ์ถ•์ œ๋ฅผ ์„ค๋ช…ํ•˜๋Š” ์ž์—ฐ์Šค๋Ÿฌ์šด ๋ฌธ์žฅ์œผ๋กœ ์ž‘์„ฑํ•˜์„ธ์š”. '์†Œ๊ฐœ' ๋‚ด์šฉ์„ ๋ฐ”ํƒ•์œผ๋กœ ์ถ•์ œ์˜ ํ•ต์‹ฌ ๋‚ด์šฉ์„ ์š”์•ฝํ•˜๊ณ , ๊ณ ๊ฐ์ธต/๋ฐฉ๋ฌธ์ž/์ธ๊ธฐ๋„ ์ •๋ณด๋ฅผ ๋ง๋ถ™์ž…๋‹ˆ๋‹ค. (์˜ˆ: "**'{{์†Œ๊ฐœ ์š”์•ฝ}}'**์„(๋ฅผ) ์ฃผ์ œ๋กœ ํ•˜๋Š” ์ถ•์ œ์ด๋ฉฐ, ์ฃผ๋กœ **{{์ฃผ์š”๊ณ ๊ฐ์ธต}}**์ด ๋ฐฉ๋ฌธํ•˜๊ณ  **{{์ฃผ์š”๋ฐฉ๋ฌธ์ž}}** ํŠน์„ฑ์„ ๋ณด์ž…๋‹ˆ๋‹ค. (์ธ๊ธฐ๋„: **{{์ถ•์ œ์ธ๊ธฐ}}**)")
303
+ 4. **(์ค‘์š”) '2026๋…„ ์˜ˆ์ƒ ์‹œ๊ธฐ'**: ์ž…๋ ฅ JSON์— ์žˆ๋Š” **`predicted_2026_timing` ๊ฐ’์„ ๊ทธ๋Œ€๋กœ** ๊ฐ€์ ธ์™€์„œ ์ถœ๋ ฅ JSON์˜ `'2026๋…„ ์˜ˆ์ƒ ์‹œ๊ธฐ'` ํ•„๋“œ ๊ฐ’์œผ๋กœ ์‚ฌ์šฉํ•˜์„ธ์š”. **์ ˆ๋Œ€ ์ง์ ‘ ๊ณ„์‚ฐํ•˜๊ฑฐ๋‚˜ ์ˆ˜์ •ํ•˜์ง€ ๋งˆ์„ธ์š”.**
304
+ 5. **(์ค‘์š”) ๋‹จ์  ์ œ์™ธ**: '๋‹จ์ '์ด๋‚˜ '๋ถ€์ ํ•ฉํ•œ ์ด์œ '๋Š” ์ ˆ๋Œ€ ์ถœ๋ ฅํ•˜์ง€ ๋งˆ์„ธ์š”.
305
+ 6. **(์ค‘์š”) ์ทจ์†Œ์„  ๊ธˆ์ง€**: ์ ˆ๋Œ€๋กœ `~~text~~`์™€ ๊ฐ™์€ ์ทจ์†Œ์„  ๋งˆํฌ๋‹ค์šด์„ ์‚ฌ์šฉํ•˜์ง€ ๋งˆ์„ธ์š”.
306
+ 7. **์ถœ๋ ฅ ํ˜•์‹ (JSON)**: ๋ฐ˜๋“œ์‹œ ์•„๋ž˜์˜ JSON ๋ฆฌ์ŠคํŠธ ํ˜•์‹์œผ๋กœ๋งŒ ์‘๋‹ตํ•˜์„ธ์š”. ๋‹ค๋ฅธ ์„ค๋ช… ์—†์ด JSON๋งŒ ์ถœ๋ ฅํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค.
307
+
308
+ [์‘๋‹ต ํ˜•์‹ (JSON ๋ฆฌ์ŠคํŠธ)]
309
+ [
310
+ {{
311
+ "์ถ•์ œ๋ช…": "[์ถ•์ œ ์ด๋ฆ„]",
312
+ "์ถ”์ฒœ_์ ์ˆ˜": 95.2,
313
+ "์ถ•์ œ_๊ธฐ๋ณธ์ •๋ณด": "[์ถ•์ œ ์†Œ๊ฐœ ์š”์•ฝ, ์ฃผ์š” ๊ณ ๊ฐ์ธต, ์ฃผ์š” ๋ฐฉ๋ฌธ์ž, ์ธ๊ธฐ๋„๋ฅผ ์กฐํ•ฉํ•œ ์„œ์ˆ ํ˜• ๋ฌธ์žฅ]",
314
+ "์ถ”์ฒœ_์ด์œ ": "[๊ฐ€๊ฒŒ ํ”„๋กœํ•„๊ณผ ์ถ”์ฒœ ๊ทผ๊ฑฐ๋ฅผ ๋ฐ”ํƒ•์œผ๋กœ ์ด ์ถ•์ œ๋ฅผ ์ถ”์ฒœํ•˜๋Š” ์ด์œ ๋ฅผ ์„œ์ˆ ํ˜•์œผ๋กœ ์ž‘์„ฑ.]",
315
+ "ํ™ˆํŽ˜์ด์ง€": "[์ถ•์ œ ํ™ˆํŽ˜์ด์ง€ URL]",
316
+ "2026๋…„ ์˜ˆ์ƒ ์‹œ๊ธฐ": "[์ž…๋ ฅ JSON์˜ predicted_2026_timing ๊ฐ’์„ ๊ทธ๋Œ€๋กœ ์‚ฌ์šฉ]"
317
+ }},
318
+ ...
319
+ ]
320
+ """
321
+ response_text = ""
322
+ try:
323
+ response = self.llm_temp_03.invoke([HumanMessage(content=prompt)])
324
+ response_text = response.content.strip()
325
+ final_list = extract_json_from_llm_response(response_text)
326
+ return final_list
327
+ except (ValueError, json.JSONDecodeError) as e:
328
+ logger.error(f"--- [Filter 5/5 CRITICAL ERROR] ์ตœ์ข… ๋‹ต๋ณ€ JSON ํŒŒ์‹ฑ ์‹คํŒจ: {e} ---")
329
+ logger.debug(f"LLM ์›๋ณธ ์‘๋‹ต (์•ž 500์ž): {response_text[:500]} ...")
330
+ return [{"error": f"์ตœ์ข… ๋‹ต๋ณ€ ์ƒ์„ฑ ์ค‘ JSON ํŒŒ์‹ฑ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}", "details": response_text}]
331
+ except Exception as e:
332
+ logger.critical(f"--- [Filter 5/5 CRITICAL ERROR] (Outer Catch) {e} ---", exc_info=True)
333
+ return [{"error": f"์ตœ์ข… ๋‹ต๋ณ€ ์ƒ์„ฑ ์ค‘ ์•Œ ์ˆ˜ ์—†๋Š” ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}"}]
334
+
335
+
336
+ def run(self, search_k: int = 10, top_k: int = 3) -> List[Dict[str, Any]]:
337
+ """
338
+ ํŒŒ์ดํ”„๋ผ์ธ 1~5๋‹จ๊ณ„๋ฅผ ์ˆœ์ฐจ์ ์œผ๋กœ ์‹คํ–‰ํ•ฉ๋‹ˆ๋‹ค.
339
+ """
340
+ try:
341
+ # 1๋‹จ๊ณ„: ์ฟผ๋ฆฌ ์žฌ์ž‘์„ฑ
342
+ rewritten_query = self._rewrite_query()
343
+ logger.info(f"--- [Filter 1/5] ์ฟผ๋ฆฌ ์žฌ์ž‘์„ฑ ์™„๋ฃŒ: {rewritten_query} ---")
344
+
345
+ # 2๋‹จ๊ณ„: ํ›„๋ณด ๊ฒ€์ƒ‰
346
+ embedding_candidates = self._search_candidates(query=rewritten_query, k=search_k)
347
+ if not embedding_candidates:
348
+ logger.warning("--- [Filter 2/5] ํ›„๋ณด ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ์—†์Œ ---")
349
+ return [{"error": "์ถ”์ฒœํ•  ๋งŒํ•œ ์ถ•์ œ๋ฅผ ์ฐพ์ง€ ๋ชปํ–ˆ์Šต๋‹ˆ๋‹ค."}]
350
+
351
+ logger.info(f"--- [Filter 2/5] ํ›„๋ณด ๊ฒ€์ƒ‰ ์™„๋ฃŒ (ํ›„๋ณด {len(embedding_candidates)}๊ฐœ) ---")
352
+
353
+ # 3๋‹จ๊ณ„: ๋™์  ์†์„ฑ ํ‰๊ฐ€
354
+ candidate_docs = [doc for doc, score in embedding_candidates]
355
+ dynamic_scores_dict = self._evaluate_candidates_dynamically(candidates=candidate_docs)
356
+
357
+ if not dynamic_scores_dict:
358
+ logger.warning("--- [Filter 3/5 WARNING] ๋™์  ์†์„ฑ ํ‰๊ฐ€ ์‹คํŒจ. ์ž„๋ฒ ๋”ฉ ์ ์ˆ˜๋งŒ์œผ๋กœ ์ถ”์ฒœ์„ ์ง„ํ–‰ํ•ฉ๋‹ˆ๋‹ค. ---")
359
+ # dynamic_scores_dict = {} (๋นˆ ๋”•์…”๋„ˆ๋ฆฌ๋กœ ๊ณ„์† ์ง„ํ–‰)
360
+
361
+ logger.info(f"--- [Filter 3/5] ๋™์  ์†์„ฑ ํ‰๊ฐ€ ์™„๋ฃŒ ({len(dynamic_scores_dict)}๊ฐœ) ---")
362
+
363
+ # 4๋‹จ๊ณ„: ํ•˜์ด๋ธŒ๋ฆฌ๋“œ ์ ์ˆ˜ ๊ณ„์‚ฐ
364
+ hybrid_results = self._calculate_hybrid_scores(
365
+ embedding_candidates=embedding_candidates,
366
+ dynamic_scores=dynamic_scores_dict
367
+ )
368
+ logger.info(f"--- [Filter 4/5] ํ•˜์ด๋ธŒ๋ฆฌ๋“œ ์ ์ˆ˜ ๊ณ„์‚ฐ ๋ฐ ์ •๋ ฌ ์™„๋ฃŒ ---")
369
+
370
+ # 5๋‹จ๊ณ„: ์ตœ์ข… ๋‹ต๋ณ€ ํฌ๋งทํŒ…
371
+ final_recommendations = self._format_recommendation_results(
372
+ ranked_list=hybrid_results,
373
+ top_k=top_k
374
+ )
375
+ logger.info(f"--- [Filter 5/5] ์ตœ์ข… ๋‹ต๋ณ€ ํฌ๋งทํŒ… ์™„๋ฃŒ ---")
376
+
377
+ # 5๋‹จ๊ณ„(LLM ํฌ๋งทํŒ…) ์‹คํŒจ ์‹œ Fallback
378
+ if final_recommendations and isinstance(final_recommendations, list) and "error" in final_recommendations[0]:
379
+ logger.warning(f"--- [Tool WARNING] ์ตœ์ข… ๋‹ต๋ณ€ ํฌ๋งทํŒ… ์‹คํŒจ. 4๋‹จ๊ณ„ ์›๋ณธ ๋ฐ์ดํ„ฐ๋กœ Fallback. ({final_recommendations[0]['error']}) ---")
380
+
381
+ fallback_results = []
382
+ for item in hybrid_results[:top_k]:
383
+ meta = item.get("metadata", {})
384
+ fallback_results.append({
385
+ "์ถ•์ œ๋ช…": meta.get("์ถ•์ œ๋ช…", "N/A"),
386
+ "์ถ”์ฒœ_์ ์ˆ˜": round(item.get("score_hybrid", 0), 1),
387
+ "์ถ”์ฒœ_์ด์œ ": f"์ž„๋ฒ ๋”ฉ({round(item.get('score_embedding',0),0)}์ ), ๋งž์ถค์„ฑ({round(item.get('score_dynamic',0),0)}์ ): {item.get('score_dynamic_reason', 'N/A')}",
388
+ "์ถ•์ œ_๊ธฐ๋ณธ์ •๋ณด": meta.get("์†Œ๊ฐœ", "N/A")[:100] + "...",
389
+ "ํ™ˆํŽ˜์ด์ง€": meta.get("ํ™ˆํŽ˜์ด์ง€", "N/A")
390
+ })
391
+ return fallback_results
392
+
393
+ return final_recommendations
394
+
395
+ except Exception as e:
396
+ logger.critical(f"--- [Tool CRITICAL] ์ถ•์ œ ์ถ”์ฒœ ํŒŒ์ดํ”„๋ผ์ธ ์ „์ฒด ์˜ค๋ฅ˜: {e} ---", exc_info=True)
397
+ return [{"error": f"์ถ•์ œ๋ฅผ ์ถ”์ฒœํ•˜๋Š” ๊ณผ์ •์—์„œ ์˜ˆ๊ธฐ์น˜ ๋ชปํ•œ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {e}"}]
modules/knowledge_base.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # modules/knowledge_base.py
2
+
3
+ import os
4
+ import streamlit as st
5
+ from pathlib import Path
6
+ from langchain_community.vectorstores import FAISS
7
+ from langchain_community.embeddings import HuggingFaceEmbeddings
8
+ import traceback
9
+
10
+ import config
11
+
12
+ logger = config.get_logger(__name__)
13
+
14
+ @st.cache_resource
15
+ def _load_embedding_model():
16
+ """
17
+ ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ์„ ๋ณ„๋„ ํ•จ์ˆ˜๋กœ ๋ถ„๋ฆฌํ•˜์—ฌ ์บ์‹ฑ (FAISS ๋กœ๋“œ ์‹œ ์žฌ์‚ฌ์šฉ)
18
+ """
19
+ try:
20
+ logger.info("--- [Cache] HuggingFace ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ์ตœ์ดˆ ๋กœ๋”ฉ ์‹œ์ž‘ ---")
21
+
22
+ model_name = config.EMBEDDING_MODEL
23
+ model_kwargs = {'device': 'cpu'}
24
+ encode_kwargs = {'normalize_embeddings': True}
25
+
26
+ embeddings = HuggingFaceEmbeddings(
27
+ model_name=model_name,
28
+ model_kwargs=model_kwargs,
29
+ encode_kwargs=encode_kwargs
30
+ )
31
+ logger.info(f"--- [Cache] HuggingFace ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ({model_name}) ๋กœ๋”ฉ ์„ฑ๊ณต ---")
32
+ return embeddings
33
+ except Exception as e:
34
+ logger.critical(f"--- [CRITICAL ERROR] ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ๋กœ๋”ฉ ์‹คํŒจ: {e} ---", exc_info=True)
35
+ st.error(f"์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ('{config.EMBEDDING_MODEL}') ๋กœ๋”ฉ ์ค‘ ์‹ฌ๊ฐํ•œ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {e}")
36
+ return None
37
+
38
+ @st.cache_resource
39
+ def load_marketing_vectorstore():
40
+ """
41
+ '๋งˆ์ผ€ํŒ… ์ „๋žต' FAISS Vector Store๋ฅผ ๋กœ๋“œํ•˜์—ฌ Retriever๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
42
+ """
43
+ try:
44
+ logger.info("--- [Cache] '๋งˆ์ผ€ํŒ…' FAISS Vector Store ์ตœ์ดˆ ๋กœ๋”ฉ ์‹œ์ž‘ ---")
45
+ embeddings = _load_embedding_model()
46
+
47
+ if embeddings is None:
48
+ raise RuntimeError("์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ๋กœ๋”ฉ์— ์‹คํŒจํ•˜์—ฌ Retriever๋ฅผ ์ƒ์„ฑํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
49
+
50
+ vector_db_path = config.PATH_FAISS_MARKETING
51
+
52
+ if not vector_db_path.exists():
53
+ logger.critical(f"--- [CRITICAL ERROR] '๋งˆ์ผ€ํŒ…' Vector DB ๊ฒฝ๋กœ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: {vector_db_path}")
54
+ st.error(f"'๋งˆ์ผ€ํŒ…' Vector DB ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค. (๊ฒฝ๋กœ: {vector_db_path})")
55
+ return None
56
+
57
+ db = FAISS.load_local(
58
+ folder_path=str(vector_db_path),
59
+ embeddings=embeddings,
60
+ allow_dangerous_deserialization=True
61
+ )
62
+
63
+ retriever = db.as_retriever(search_kwargs={"k": 2})
64
+
65
+ logger.info("--- [Cache] '๋งˆ์ผ€ํŒ…' FAISS Vector Store ๋กœ๋”ฉ ์„ฑ๊ณต ---")
66
+ return retriever
67
+
68
+ except Exception as e:
69
+ logger.critical(f"--- [CRITICAL ERROR] '๋งˆ์ผ€ํŒ…' FAISS ๋กœ๋”ฉ ์‹คํŒจ: {e} ---", exc_info=True)
70
+ st.error(f"'๋งˆ์ผ€ํŒ…' Vector Store ๋กœ๋”ฉ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
71
+ return None
72
+
73
+ @st.cache_resource
74
+ def load_festival_vectorstore():
75
+ """
76
+ '์ถ•์ œ ์ •๋ณด' FAISS Vector Store๋ฅผ ๋กœ๋“œํ•ฉ๋‹ˆ๋‹ค.
77
+ """
78
+ try:
79
+ logger.info("--- [Cache] '์ถ•์ œ' FAISS Vector Store ์ตœ์ดˆ ๋กœ๋”ฉ ์‹œ์ž‘ ---")
80
+ embeddings = _load_embedding_model()
81
+
82
+ if embeddings is None:
83
+ raise RuntimeError("์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ๋กœ๋”ฉ์— ์‹คํŒจํ•˜์—ฌ '์ถ•์ œ' Vector Store๋ฅผ ๋กœ๋“œํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
84
+
85
+ vector_db_path = config.PATH_FAISS_FESTIVAL
86
+
87
+ if not vector_db_path.exists():
88
+ logger.critical(f"--- [CRITICAL ERROR] '์ถ•์ œ' Vector DB ๊ฒฝ๋กœ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: {vector_db_path}")
89
+ st.error(f"'์ถ•์ œ' Vector DB ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค. (๊ฒฝ๋กœ: {vector_db_path})")
90
+ return None
91
+
92
+ db = FAISS.load_local(
93
+ folder_path=str(vector_db_path),
94
+ embeddings=embeddings,
95
+ allow_dangerous_deserialization=True
96
+ )
97
+ logger.info("--- [Cache] '์ถ•์ œ' FAISS Vector Store ๋กœ๋”ฉ ์„ฑ๊ณต ---")
98
+ return db
99
+
100
+ except Exception as e:
101
+ logger.critical(f"--- [CRITICAL ERROR] '์ถ•์ œ' FAISS ๋กœ๋”ฉ ์‹คํŒจ: {e} ---", exc_info=True)
102
+ st.error(f"'์ถ•์ œ' Vector Store ๋กœ๋”ฉ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
103
+ return None
modules/llm_provider.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # modules/llm_provider.py
2
+
3
+ from langchain_google_genai import ChatGoogleGenerativeAI
4
+ from typing import Optional
5
+
6
+ import config
7
+
8
+ logger = config.get_logger(__name__)
9
+
10
+ _llm_instance: Optional[ChatGoogleGenerativeAI] = None
11
+
12
+ def set_llm(llm: ChatGoogleGenerativeAI):
13
+ """
14
+ Orchestrator๊ฐ€ ์ƒ์„ฑํ•œ ๊ธฐ๋ณธ LLM ์ธ์Šคํ„ด์Šค๋ฅผ
15
+ ๊ธ€๋กœ๋ฒŒ ๋ณ€์ˆ˜์— ์ €์žฅํ•ฉ๋‹ˆ๋‹ค.
16
+ """
17
+ global _llm_instance
18
+ if _llm_instance is None:
19
+ logger.info(f"--- [LLM Provider] Global LLM instance set. (Model: {llm.model}, Temp: {llm.temperature}) ---")
20
+ _llm_instance = llm
21
+ else:
22
+ logger.info("--- [LLM Provider] Global LLM instance already set. ---")
23
+
24
+
25
+ def get_llm(temperature: float = 0.1) -> ChatGoogleGenerativeAI:
26
+ """
27
+ ์ €์žฅ๋œ ๊ธ€๋กœ๋ฒŒ LLM ์ธ์Šคํ„ด์Šค๋ฅผ ๊ฒ€์ƒ‰ํ•ฉ๋‹ˆ๋‹ค.
28
+ ๋งŒ์•ฝ ๋„๊ตฌ๊ฐ€ ์š”์ฒญํ•œ temperature๊ฐ€ ๊ธฐ๋ณธ๊ฐ’๊ณผ ๋‹ค๋ฅด๋ฉด,
29
+ ๊ธฐ๋ณธ ์ธ์Šคํ„ด์Šค์˜ ์„ค์ •์„ ๋ณต์‚ฌํ•˜์—ฌ temperature๋งŒ ๋ณ€๊ฒฝํ•œ
30
+ ์ƒˆ๋กœ์šด ์ธ์Šคํ„ด์Šค๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค. (API ํ‚ค ๋“ฑ์€ ์žฌ์‚ฌ์šฉ)
31
+ """
32
+ global _llm_instance
33
+ if _llm_instance is None:
34
+ logger.error("--- [LLM Provider] LLM not initialized. ---")
35
+ raise RuntimeError(
36
+ "LLM not initialized. The Orchestrator must call set_llm() before any tools are used."
37
+ )
38
+
39
+ if _llm_instance.temperature == temperature:
40
+ logger.debug(f"--- [LLM Provider] Reusing global LLM instance (temp={temperature}) ---")
41
+ return _llm_instance
42
+
43
+ logger.info(f"--- [LLM Provider] Creating new LLM instance with temp={temperature} (default was {_llm_instance.temperature}) ---")
44
+
45
+ try:
46
+ # Pydantic v2+ (langchain-core 0.1.23+)
47
+ return _llm_instance.model_copy(update={"temperature": temperature})
48
+ except AttributeError:
49
+ # Pydantic v1 (fallback)
50
+ logger.warning("--- [LLM Provider] Using .copy() fallback (Pydantic v1) ---")
51
+ return _llm_instance.copy(update={"temperature": temperature})
modules/profile_utils.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # modules/profile_utils.py
2
+
3
+ from typing import Dict, Any
4
+ import config
5
+
6
+ logger = config.get_logger(__name__)
7
+
8
+ def get_chat_profile_dict(store_profile_dict: Dict[str, Any]) -> Dict[str, Any]:
9
+ """
10
+ FastAPI (server.py)์—์„œ ๋ฐ›์€ 'store_profile' ๋”•์…”๋„ˆ๋ฆฌ๋ฅผ ๊ธฐ๋ฐ˜์œผ๋กœ,
11
+ visualization.py์™€ orchestrator.py์—์„œ ๊ณตํ†ต์œผ๋กœ ์‚ฌ์šฉํ• 
12
+ '์ฑ„ํŒ…์šฉ ํ”„๋กœํ•„ ๋”•์…”๋„ˆ๋ฆฌ' (์‚ฌ์žฅ๋‹˜์ด ์š”์ฒญํ•˜์‹  ํ•ญ๋ชฉ)๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
13
+
14
+ ์ด ํ•จ์ˆ˜๊ฐ€ '์ฑ„ํŒ…์šฉ ํ”„๋กœํ•„'์˜ ๋‹จ์ผ ์ •์˜(Source of Truth) ์—ญํ• ์„ ํ•ฉ๋‹ˆ๋‹ค.
15
+ """
16
+ try:
17
+ # 1. ๊ธฐ๋ณธ ์ •๋ณด
18
+ chat_profile_data = {
19
+ "๊ฐ€๋งน์ ๋ช…": store_profile_dict.get('๊ฐ€๋งน์ ๋ช…', 'N/A'),
20
+ "๊ฐ€๋งน์ ID": store_profile_dict.get('๊ฐ€๋งน์ ID', 'N/A'),
21
+ "์ƒ๊ถŒ": store_profile_dict.get('์ƒ๊ถŒ', 'N/A'),
22
+ "์—…์ข…": store_profile_dict.get('์—…์ข…', 'N/A'),
23
+ "์ฃผ์†Œ": store_profile_dict.get('๊ฐ€๋งน์ ์ฃผ์†Œ', 'N/A'),
24
+ "์šด์˜ ๊ธฐ๊ฐ„ ์ˆ˜์ค€": store_profile_dict.get('์šด์˜๊ฐœ์›”์ˆ˜_์ˆ˜์ค€', 'N/A'),
25
+ "๋งค์ถœ ์ˆ˜์ค€": store_profile_dict.get('๋งค์ถœ๊ตฌ๊ฐ„_์ˆ˜์ค€', 'N/A'),
26
+ "๋งค์ถœ ๊ฑด์ˆ˜ ์ˆ˜์ค€": store_profile_dict.get('์›”๋งค์ถœ๊ฑด์ˆ˜_์ˆ˜์ค€', 'N/A'),
27
+ "๋ฐฉ๋ฌธ ๊ณ ๊ฐ์ˆ˜ ์ˆ˜์ค€": store_profile_dict.get('์›”์œ ๋‹ˆํฌ๊ณ ๊ฐ์ˆ˜_์ˆ˜์ค€', 'N/A'),
28
+ "๊ฐ๋‹จ๊ฐ€ ์ˆ˜์ค€": store_profile_dict.get('์›”๊ฐ๋‹จ๊ฐ€_์ˆ˜์ค€', 'N/A'),
29
+ "์‹ ๊ทœ/์žฌ๋ฐฉ๋ฌธ์œจ": f"์‹ ๊ทœ {(store_profile_dict.get('์‹ ๊ทœ๊ณ ๊ฐ๋น„์œจ') or 0):.1f}% / ์žฌ๋ฐฉ๋ฌธ {(store_profile_dict.get('์žฌ์ด์šฉ๊ณ ๊ฐ๋น„์œจ') or 0):.1f}%",
30
+ "๋™์ผ ์ƒ๊ถŒ ๋Œ€๋น„ ๋งค์ถœ ์ˆœ์œ„": f"์ƒ์œ„ {(store_profile_dict.get('๋™์ผ์ƒ๊ถŒ๋‚ด๋งค์ถœ์ˆœ์œ„๋น„์œจ') or 0):.1f}%",
31
+ "๋™์ผ ์—…์ข… ๋Œ€๋น„ ๋งค์ถœ ์ˆœ์œ„": f"์ƒ์œ„ {(store_profile_dict.get('๋™์ผ์—…์ข…๋‚ด๋งค์ถœ์ˆœ์œ„๋น„์œจ') or 0):.1f}%"
32
+ }
33
+
34
+ # 2. '์ž๋™์ถ”์ถœํŠน์ง•' ์ถ”๊ฐ€
35
+ chat_profile_data["์ž๋™์ถ”์ถœํŠน์ง•"] = store_profile_dict.get('์ž๋™์ถ”์ถœํŠน์ง•', {})
36
+
37
+ return chat_profile_data
38
+
39
+ except Exception as e:
40
+ logger.critical(f"--- [Profile Utils CRITICAL] ์ฑ„ํŒ… ํ”„๋กœํ•„ ๋”•์…”๋„ˆ๋ฆฌ ์ƒ์„ฑ ์‹คํŒจ: {e} ---", exc_info=True)
41
+ return {
42
+ "์—…์ข…": store_profile_dict.get('์—…์ข…', '์•Œ ์ˆ˜ ์—†์Œ'),
43
+ "์ž๋™์ถ”์ถœํŠน์ง•": store_profile_dict.get('์ž๋™์ถ”์ถœํŠน์ง•', {}),
44
+ "์ฃผ์†Œ": store_profile_dict.get('๊ฐ€๋งน์ ์ฃผ์†Œ', '์•Œ ์ˆ˜ ์—†์Œ'),
45
+ "error": "ํ”„๋กœํ•„ ์š”์•ฝ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ"
46
+ }
modules/visualization.py ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # modules/visualization.py
2
+
3
+ import pandas as pd
4
+ import matplotlib.pyplot as plt
5
+ from matplotlib import font_manager
6
+ import numpy as np
7
+ import streamlit as st
8
+
9
+ import config
10
+ from modules.profile_utils import get_chat_profile_dict
11
+
12
+ logger = config.get_logger(__name__)
13
+
14
+ def set_korean_font():
15
+ """
16
+ ์‹œ์Šคํ…œ์— ์„ค์น˜๋œ ํ•œ๊ธ€ ํฐํŠธ๋ฅผ ์ฐพ์•„ Matplotlib์— ์„ค์ •ํ•ฉ๋‹ˆ๋‹ค.
17
+ """
18
+ font_list = ['Malgun Gothic', 'AppleGothic', 'NanumGothic']
19
+
20
+ found_font = False
21
+ for font_name in font_list:
22
+ if any(font.name == font_name for font in font_manager.fontManager.ttflist):
23
+ plt.rc('font', family=font_name)
24
+ logger.info(f"โœ… ํ•œ๊ธ€ ํฐํŠธ '{font_name}'์„(๋ฅผ) ์ฐพ์•„ ๊ทธ๋ž˜ํ”„์— ์ ์šฉํ•ฉ๋‹ˆ๋‹ค.")
25
+ found_font = True
26
+ break
27
+
28
+ if not found_font:
29
+ logger.warning("โš ๏ธ ๊ฒฝ๊ณ : Malgun Gothic, AppleGothic, NanumGothic ํฐํŠธ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
30
+
31
+ plt.rcParams['axes.unicode_minus'] = False
32
+
33
+
34
+ def display_merchant_profile(profile_data: dict):
35
+ set_korean_font()
36
+
37
+ """
38
+ ๋ถ„์„๋œ ๊ฐ€๋งน์  ํ”„๋กœํ•„ ์ „์ฒด๋ฅผ Streamlit ํ™”๋ฉด์— ์‹œ๊ฐํ™”ํ•ฉ๋‹ˆ๋‹ค.
39
+ """
40
+ if not profile_data or "store_profile" not in profile_data:
41
+ st.error("๋ถ„์„ํ•  ๊ฐ€๋งน์  ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
42
+ return
43
+
44
+ store_data = profile_data["store_profile"]
45
+ store_name = store_data.get('๊ฐ€๋งน์ ๋ช…', '์„ ํƒ ๋งค์žฅ')
46
+
47
+ st.info(f"**'{store_name}'**์˜ ์ƒ์„ธ ๋ถ„์„ ๊ฒฐ๊ณผ์ž…๋‹ˆ๋‹ค.")
48
+
49
+ tab1, tab2, tab3, tab4 = st.tabs([
50
+ "๐Ÿ“‹ ๊ธฐ๋ณธ ์ •๋ณด",
51
+ "๐Ÿง‘โ€๐Ÿคโ€๐Ÿง‘ ์ฃผ์š” ๊ณ ๊ฐ์ธต (์„ฑ๋ณ„/์—ฐ๋ น๋Œ€)",
52
+ "๐Ÿšถ ์ฃผ์š” ๊ณ ๊ฐ ์œ ํ˜• (์ƒ๊ถŒ)",
53
+ "๐Ÿ” ๊ณ ๊ฐ ์ถฉ์„ฑ๋„ (์‹ ๊ทœ/์žฌ๋ฐฉ๋ฌธ)"
54
+ ])
55
+
56
+ with tab1:
57
+ render_basic_info_table(store_data)
58
+
59
+ with tab2:
60
+ st.subheader("๐Ÿง‘โ€๐Ÿคโ€๐Ÿง‘ ์ฃผ์š” ๊ณ ๊ฐ์ธต ๋ถ„ํฌ (์„ฑ๋ณ„/์—ฐ๋ น๋Œ€)")
61
+ fig2 = plot_customer_distribution(store_data)
62
+ st.pyplot(fig2)
63
+
64
+ with tab3:
65
+ st.subheader("๐Ÿšถ ์ฃผ์š” ๊ณ ๊ฐ ์œ ํ˜• (์ƒ๊ถŒ)")
66
+ fig3 = plot_customer_type_pie(store_data)
67
+ st.pyplot(fig3)
68
+
69
+ with tab4:
70
+ st.subheader("๐Ÿ” ์‹ ๊ทœ vs ์žฌ๋ฐฉ๋ฌธ ๊ณ ๊ฐ ๋น„์œจ")
71
+ fig4 = plot_loyalty_donut(store_data)
72
+ st.pyplot(fig4)
73
+
74
+
75
+ def get_main_customer_segment(store_data):
76
+ """์ฃผ์š” ๊ณ ๊ฐ์ธต(์„ฑ๋ณ„/์—ฐ๋ น๋Œ€) ํ…์ŠคํŠธ๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค."""
77
+ segments = {
78
+ '๋‚จ์„ฑ 20๋Œ€ ์ดํ•˜': store_data.get('๋‚จ์„ฑ20๋Œ€์ดํ•˜๋น„์œจ', 0),
79
+ '๋‚จ์„ฑ 30๋Œ€': store_data.get('๋‚จ์„ฑ30๋Œ€๋น„์œจ', 0),
80
+ '๋‚จ์„ฑ 40๋Œ€': store_data.get('๋‚จ์„ฑ40๋Œ€๋น„์œจ', 0),
81
+ '๋‚จ์„ฑ 50๋Œ€ ์ด์ƒ': store_data.get('๋‚จ์„ฑ50๋Œ€๋น„์œจ', 0) + store_data.get('๋‚จ์„ฑ60๋Œ€์ด์ƒ๋น„์œจ', 0),
82
+ '์—ฌ์„ฑ 20๋Œ€ ์ดํ•˜': store_data.get('์—ฌ์„ฑ20๋Œ€์ดํ•˜๋น„์œจ', 0),
83
+ '์—ฌ์„ฑ 30๋Œ€': store_data.get('์—ฌ์„ฑ30๋Œ€๋น„์œจ', 0),
84
+ '์—ฌ์„ฑ 40๋Œ€': store_data.get('์—ฌ์„ฑ40๋Œ€๋น„์œจ', 0),
85
+ '์—ฌ์„ฑ 50๋Œ€ ์ด์ƒ': store_data.get('์—ฌ์„ฑ50๋Œ€๋น„์œจ', 0) + store_data.get('์—ฌ์„ฑ60๋Œ€์ด์ƒ๋น„์œจ', 0)
86
+ }
87
+
88
+ if not any(segments.values()):
89
+ return None
90
+
91
+ max_segment = max(segments, key=segments.get)
92
+ max_value = segments[max_segment]
93
+
94
+ if max_value == 0:
95
+ return None
96
+
97
+ return f"'{max_segment}({max_value:.1f}%)'"
98
+
99
+
100
+ def render_basic_info_table(store_data):
101
+ """(Tab 1) ๊ธฐ๋ณธ ์ •๋ณด ์š”์•ฝ ํ‘œ์™€ ํ…์ŠคํŠธ๋ฅผ ๋ Œ๋”๋งํ•ฉ๋‹ˆ๋‹ค."""
102
+
103
+ summary_data = get_chat_profile_dict(store_data)
104
+
105
+ st.subheader("๐Ÿ“‹ ๊ฐ€๋งน์  ๊ธฐ๋ณธ ์ •๋ณด")
106
+ summary_df = pd.DataFrame(summary_data.items(), columns=["ํ•ญ๋ชฉ", "๋‚ด์šฉ"])
107
+ summary_df = summary_df[summary_df['ํ•ญ๋ชฉ'] != '์ž๋™์ถ”์ถœํŠน์ง•']
108
+ summary_df = summary_df.astype(str)
109
+ st.table(summary_df.set_index('ํ•ญ๋ชฉ'))
110
+
111
+ st.subheader("๐Ÿ“Œ ๋ถ„์„ ์š”์•ฝ")
112
+ st.write(f"โœ… **{summary_data.get('๊ฐ€๋งน์ ๋ช…', 'N/A')}**์€(๋Š”) '{summary_data.get('์ƒ๊ถŒ', 'N/A')}' ์ƒ๊ถŒ์˜ '{summary_data.get('์—…์ข…', 'N/A')}' ์—…์ข… ๊ฐ€๋งน์ ์ž…๋‹ˆ๋‹ค.")
113
+ st.write(f"๐Ÿ“ˆ ๋งค์ถœ ์ˆ˜์ค€์€ **{summary_data.get('๋งค์ถœ ์ˆ˜์ค€', 'N/A')}**์ด๋ฉฐ, ๋™์ผ ์ƒ๊ถŒ ๋‚ด ๋งค์ถœ ์ˆœ์œ„๋Š” **{summary_data.get('๋™์ผ ์ƒ๊ถŒ ๋Œ€๋น„ ๋งค์ถœ ์ˆœ์œ„', 'N/A')}**์ž…๋‹ˆ๋‹ค.")
114
+ st.write(f"๐Ÿ’ฐ ๋ฐฉ๋ฌธ ๊ณ ๊ฐ์ˆ˜๋Š” **{summary_data.get('๋ฐฉ๋ฌธ ๊ณ ๊ฐ์ˆ˜ ์ˆ˜์ค€', 'N/A')}** ์ˆ˜์ค€์ด๋ฉฐ, ๊ฐ๋‹จ๊ฐ€๋Š” **{summary_data.get('๊ฐ๋‹จ๊ฐ€ ์ˆ˜์ค€', 'N/A')}** ์ˆ˜์ค€์ž…๋‹ˆ๋‹ค.")
115
+
116
+ main_customer = get_main_customer_segment(store_data)
117
+ if main_customer:
118
+ st.write(f"๐Ÿ‘ฅ ์ฃผ์š” ๊ณ ๊ฐ์ธต์€ **{main_customer}**์ด(๊ฐ€) ๊ฐ€์žฅ ๋งŽ์Šต๋‹ˆ๋‹ค.")
119
+
120
+
121
+ def plot_customer_distribution(store_data):
122
+ """(Tab 2) ๊ณ ๊ฐ ํŠน์„ฑ ๋ถ„ํฌ (์„ฑ๋ณ„/์—ฐ๋ น๋Œ€)๋ฅผ ๋ณด์—ฌ์ฃผ๋Š” ๋ง‰๋Œ€ ๊ทธ๋ž˜ํ”„๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค."""
123
+ labels = ['20๋Œ€ ์ดํ•˜', '30๋Œ€', '40๋Œ€', '50๋Œ€ ์ด์ƒ']
124
+ male_percents = [
125
+ store_data.get('๋‚จ์„ฑ20๋Œ€์ดํ•˜๋น„์œจ', 0), store_data.get('๋‚จ์„ฑ30๋Œ€๋น„์œจ', 0),
126
+ store_data.get('๋‚จ์„ฑ40๋Œ€๋น„์œจ', 0),
127
+ store_data.get('๋‚จ์„ฑ50๋Œ€๋น„์œจ', 0) + store_data.get('๋‚จ์„ฑ60๋Œ€์ด์ƒ๋น„์œจ', 0)
128
+ ]
129
+ female_percents = [
130
+ store_data.get('์—ฌ์„ฑ20๋Œ€์ดํ•˜๋น„์œจ', 0), store_data.get('์—ฌ์„ฑ30๋Œ€๋น„์œจ', 0),
131
+ store_data.get('์—ฌ์„ฑ40๋Œ€๋น„์œจ', 0),
132
+ store_data.get('์—ฌ์„ฑ50๋Œ€๋น„์œจ', 0) + store_data.get('์—ฌ์„ฑ60๋Œ€์ด์ƒ๋น„์œจ', 0)
133
+ ]
134
+
135
+ x = np.arange(len(labels))
136
+ width = 0.35
137
+ fig, ax = plt.subplots(figsize=(10, 6))
138
+ rects1 = ax.bar(x - width/2, male_percents, width, label='๋‚จ์„ฑ', color='cornflowerblue')
139
+ rects2 = ax.bar(x + width/2, female_percents, width, label='์—ฌ์„ฑ', color='salmon')
140
+
141
+ ax.set_ylabel('๊ณ ๊ฐ ๋น„์œจ (%)')
142
+ ax.set_title('์ฃผ์š” ๊ณ ๊ฐ์ธต ๋ถ„ํฌ (์„ฑ๋ณ„/์—ฐ๋ น๋Œ€)', fontsize=16)
143
+ ax.set_xticks(x)
144
+ ax.set_xticklabels(labels, fontsize=12)
145
+ ax.legend()
146
+ ax.grid(axis='y', linestyle='--', alpha=0.7)
147
+
148
+ ax.bar_label(rects1, padding=3, fmt='%.1f')
149
+ ax.bar_label(rects2, padding=3, fmt='%.1f')
150
+
151
+ fig.tight_layout()
152
+ return fig
153
+
154
+
155
+ def plot_customer_type_pie(store_data):
156
+ """(Tab 3) ์ฃผ์š” ๊ณ ๊ฐ ์œ ํ˜• (๊ฑฐ์ฃผ์ž, ์ง์žฅ์ธ, ์œ ๋™์ธ๊ตฌ)์„ ํŒŒ์ด ์ฐจํŠธ๋กœ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค."""
157
+
158
+ customer_data = {
159
+ '์œ ๋™์ธ๊ตฌ': store_data.get("์œ ๋™์ธ๊ตฌ์ด์šฉ๋น„์œจ", 0),
160
+ '๊ฑฐ์ฃผ์ž': store_data.get("๊ฑฐ์ฃผ์ž์ด์šฉ๋น„์œจ", 0),
161
+ '์ง์žฅ์ธ': store_data.get("์ง์žฅ์ธ์ด์šฉ๋น„์œจ", 0)
162
+ }
163
+
164
+ filtered_data = {label: (size or 0) for label, size in customer_data.items()}
165
+ filtered_data = {label: size for label, size in filtered_data.items() if size > 0}
166
+
167
+ sizes = list(filtered_data.values())
168
+ labels = list(filtered_data.keys())
169
+
170
+ if not sizes or sum(sizes) == 0:
171
+ fig, ax = plt.subplots(figsize=(6, 6))
172
+ ax.text(0.5, 0.5, "๋ฐ์ดํ„ฐ ์—†์Œ", horizontalalignment='center', verticalalignment='center', transform=ax.transAxes)
173
+ ax.set_title("์ฃผ์š” ๊ณ ๊ฐ ์œ ํ˜•", fontsize=13)
174
+ return fig
175
+
176
+ pie_labels = [f"{label} ({size:.1f}%)" for label, size in zip(labels, sizes)]
177
+
178
+ fig, ax = plt.subplots(figsize=(6, 6))
179
+
180
+ wedges, texts, autotexts = ax.pie(
181
+ sizes,
182
+ labels=pie_labels,
183
+ autopct='%1.1f%%',
184
+ startangle=90,
185
+ pctdistance=0.8
186
+ )
187
+
188
+ plt.setp(autotexts, size=9, weight="bold", color="white")
189
+ ax.set_title("์ฃผ์š” ๊ณ ๊ฐ ์œ ํ˜•", fontsize=13)
190
+ ax.axis('equal')
191
+
192
+ return fig
193
+
194
+
195
+ def plot_loyalty_donut(store_data):
196
+ """(Tab 4) ์‹ ๊ทœ vs ์žฌ๋ฐฉ๋ฌธ ๊ณ ๊ฐ ๋น„์œจ์„ ๋„๋„› ์ฐจํŠธ๋กœ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค."""
197
+
198
+ visit_ratio = {
199
+ '์‹ ๊ทœ ๊ณ ๊ฐ': store_data.get('์‹ ๊ทœ๊ณ ๊ฐ๋น„์œจ') or 0,
200
+ '์žฌ์ด์šฉ ๊ณ ๊ฐ': store_data.get('์žฌ์ด์šฉ๊ณ ๊ฐ๋น„์œจ') or 0
201
+ }
202
+
203
+ sizes = list(visit_ratio.values())
204
+ labels = list(visit_ratio.keys())
205
+
206
+ if not sizes or sum(sizes) == 0:
207
+ fig, ax = plt.subplots(figsize=(5, 5))
208
+ ax.text(0.5, 0.5, "๋ฐ์ดํ„ฐ ์—†์Œ", horizontalalignment='center', verticalalignment='center', transform=ax.transAxes)
209
+ ax.set_title("์‹ ๊ทœ vs ์žฌ๋ฐฉ๋ฌธ ๊ณ ๊ฐ ๋น„์œจ")
210
+ return fig
211
+
212
+ fig, ax = plt.subplots(figsize=(5, 5))
213
+
214
+ wedges, texts, autotexts = ax.pie(
215
+ sizes,
216
+ labels=labels,
217
+ autopct='%1.1f%%',
218
+ startangle=90,
219
+ pctdistance=0.85,
220
+ colors=['lightcoral', 'skyblue']
221
+ )
222
+
223
+ centre_circle = plt.Circle((0, 0), 0.70, fc='white')
224
+ ax.add_artist(centre_circle)
225
+
226
+ plt.setp(autotexts, size=10, weight="bold")
227
+ ax.set_title("์‹ ๊ทœ vs ์žฌ๋ฐฉ๋ฌธ ๊ณ ๊ฐ ๋น„์œจ", fontsize=14)
228
+ ax.axis('equal')
229
+
230
+ return fig
orchestrator.py ADDED
@@ -0,0 +1,247 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # orchestrator.py
2
+
3
+ import json
4
+ import traceback
5
+ from typing import List, Optional, Dict, Any
6
+ from pydantic import ValidationError
7
+
8
+ from langchain.agents import AgentExecutor, create_tool_calling_agent
9
+ from langchain_core.prompts import ChatPromptTemplate
10
+ from langchain_google_genai import ChatGoogleGenerativeAI
11
+ from langchain.tools.render import render_text_description
12
+
13
+ import config
14
+ from modules.llm_provider import set_llm
15
+ from modules.profile_utils import get_chat_profile_dict
16
+
17
+ # tools/tool_loader.py ์—์„œ ๋ชจ๋“  ๋„๊ตฌ๋ฅผ ๊ฐ€์ ธ์˜ด
18
+ from tools.tool_loader import ALL_TOOLS
19
+
20
+ logger = config.get_logger(__name__)
21
+
22
+ # --- ํ—ฌํผ ํ•จ์ˆ˜๋ฅผ ๊ณตํ†ต ์œ ํ‹ธ๋ฆฌํ‹ฐ ํ˜ธ์ถœ๋กœ ๋ณ€๊ฒฝ ---
23
+ def _get_chat_profile_json_string(store_profile_dict: Dict[str, Any]) -> str:
24
+ """
25
+ ๊ณตํ†ต ์œ ํ‹ธ๋ฆฌํ‹ฐ(profile_utils.py)๋ฅผ ํ˜ธ์ถœํ•˜์—ฌ '์ฑ„ํŒ…์šฉ ํ”„๋กœํ•„ ๋”•์…”๋„ˆ๋ฆฌ'๋ฅผ ์ƒ์„ฑํ•˜๊ณ ,
26
+ ์ด๋ฅผ JSON ๋ฌธ์ž์—ด๋กœ ๋ณ€ํ™˜ํ•˜์—ฌ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
27
+ """
28
+ try:
29
+ summary_dict = get_chat_profile_dict(store_profile_dict)
30
+ return json.dumps(summary_dict, ensure_ascii=False)
31
+
32
+ except Exception as e:
33
+ logger.critical(f"--- [Orchestrator CRITICAL] ์ฑ„ํŒ…์šฉ JSON ์ƒ์„ฑ ์‹คํŒจ: {e} ---", exc_info=True)
34
+ fallback_data = {
35
+ "์—…์ข…": store_profile_dict.get('์—…์ข…', '์•Œ ์ˆ˜ ์—†์Œ'),
36
+ "์ž๋™์ถ”์ถœํŠน์ง•": store_profile_dict.get('์ž๋™์ถ”์ถœํŠน์ง•', {}),
37
+ "์ฃผ์†Œ": store_profile_dict.get('๊ฐ€๋งน์ ์ฃผ์†Œ', '์•Œ ์ˆ˜ ์—†์Œ'),
38
+ "error": "ํ”„๋กœํ•„ ์š”์•ฝ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ"
39
+ }
40
+ return json.dumps(fallback_data, ensure_ascii=False)
41
+
42
+
43
+ class AgentOrchestrator:
44
+ def __init__(self, google_api_key):
45
+ """Gemini Flash ๊ธฐ๋ฐ˜ Agent Orchestrator ์ดˆ๊ธฐํ™”"""
46
+ self.llm = ChatGoogleGenerativeAI(
47
+ model=config.LLM_MODEL_NAME,
48
+ google_api_key=google_api_key,
49
+ temperature=0.1
50
+ )
51
+ set_llm(self.llm)
52
+
53
+ # tool_loader ์—์„œ ๋„๊ตฌ ๋ชฉ๋ก์„ ๊ฐ€์ ธ์˜ด
54
+ self.tools = ALL_TOOLS
55
+
56
+ self.rendered_tools = render_text_description(self.tools)
57
+
58
+ self.system_prompt_template = """
59
+ {base_system_prompt}
60
+
61
+ ---
62
+ ๐Ÿ“ฆ [ํ˜„์žฌ ๊ฐ€๊ฒŒ ํ”„๋กœํ•„ (JSON)]
63
+ {store_profile_context}
64
+
65
+ ๐Ÿ“œ [์ด์ „ ์ถ”์ฒœ ์ถ•์ œ ๋ฆฌ์ŠคํŠธ]
66
+ {last_recommended_festivals}
67
+
68
+ ---
69
+ ๐Ÿ’ก ๋ฐ˜๋“œ์‹œ ์œ„ ์ •๋ณด๋ฅผ ๊ธฐ๋ฐ˜์œผ๋กœ ํŒ๋‹จํ•˜๋˜,
70
+ ๋„๊ตฌ ๋ผ์šฐํŒ… ๊ทœ์น™(1~4์ˆœ์œ„)์— ๋”ฐ๋ผ *์ ์ ˆํ•œ ๋‹จ ํ•˜๋‚˜์˜ ๋„๊ตฌ๋ฅผ ํ˜ธ์ถœ*ํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค.
71
+ """
72
+
73
+ self.prompt = ChatPromptTemplate.from_messages([
74
+ ("system", self.system_prompt_template),
75
+ ("placeholder", "{chat_history}"),
76
+ ("human", "{input}"),
77
+ ("placeholder", "{agent_scratchpad}"),
78
+ ])
79
+
80
+ self.agent = create_tool_calling_agent(self.llm, self.tools, self.prompt)
81
+
82
+ self.agent_executor = AgentExecutor(
83
+ agent=self.agent,
84
+ tools=self.tools,
85
+ verbose=True,
86
+ handle_parsing_errors=True,
87
+ return_intermediate_steps=True,
88
+ )
89
+ logger.info(f"--- [Streamlit] AgentOrchestrator ์ดˆ๊ธฐํ™” ์™„๋ฃŒ (Model: {config.LLM_MODEL_NAME}) ---")
90
+
91
+
92
+ def setup_system_prompt(self):
93
+ """Gemini Flash ์ „์šฉ ๊ฐ•ํ™” ํ”„๋กฌํ”„ํŠธ"""
94
+
95
+ logger.info("--- [Orchestrator] ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ ์„ค์ • ์™„๋ฃŒ ---")
96
+
97
+ # --- (์‚ฌ์šฉ์ž ์š”์ฒญ) ํ”„๋กฌํ”„ํŠธ ์›๋ณธ ์œ ์ง€ ---
98
+ return f"""
99
+ ๋‹น์‹ ์€ **์‹ ํ•œ์นด๋“œ ๋ฐ์ดํ„ฐ ๊ธฐ๋ฐ˜ ์ง€์—ญ์ถ•์ œ ์ „๋ฌธ AI ์ปจ์„คํ„ดํŠธ**์ž…๋‹ˆ๋‹ค.
100
+ ๋‹น์‹ ์˜ ์ž„๋ฌด๋Š” ์‚ฌ์žฅ๋‹˜์˜ ๊ฐ€๊ฒŒ ์ •๋ณด๋ฅผ ๊ธฐ๋ฐ˜์œผ๋กœ
101
+ **๊ฐ€๊ฒŒ ๋ถ„์„ โ†’ ์ถ•์ œ ์ถ”์ฒœ โ†’ ์ถ•์ œ ๋ถ„์„ โ†’ ๋งˆ์ผ€ํŒ… ์ „๋žต ์ œ์•ˆ**์„ ์ˆ˜ํ–‰ํ•˜๋Š” ๊ฒƒ์ž…๋‹ˆ๋‹ค.
102
+
103
+ ---
104
+ ๐Ÿ”ง [์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ ๋„๊ตฌ ๋ชฉ๋ก]
105
+ (๋„๊ตฌ ๋ชฉ๋ก์€ ์—์ด์ „ํŠธ์— ๋‚ด์žฅ๋˜์–ด ์žˆ์œผ๋ฉฐ, ์•„๋ž˜ [๋„๊ตฌ ๋ผ์šฐํŒ… ๊ทœ์น™]์— ๋”ฐ๋ผ ํ˜ธ์ถœ๋ฉ๋‹ˆ๋‹ค.)
106
+
107
+ ---
108
+ ๐ŸŽฏ **[ํ•ต์‹ฌ ์ž„๋ฌด ์š”์•ฝ]**
109
+ 1๏ธโƒฃ ์‚ฌ์šฉ์ž์˜ ์š”์ฒญ์„ ์™„์ˆ˜ํ•˜๊ธฐ ์œ„ํ•ด **ํ•„์š”ํ•œ ๋ชจ๋“  ๋„๊ตฌ๋ฅผ ์ž์œจ์ ์œผ๋กœ ํ˜ธ์ถœ**ํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค. ๋•Œ๋กœ๋Š” **์—ฌ๋Ÿฌ ๋„๊ตฌ๋ฅผ ์ˆœ์ฐจ์ ์œผ๋กœ ํ˜ธ์ถœ**ํ•ด์•ผ ํ•  ์ˆ˜๋„ ์žˆ์Šต๋‹ˆ๋‹ค. (์˜ˆ: ์ถ•์ œ ์ถ”์ฒœ โ†’ ๋งˆ์ผ€ํŒ… ์ „๋žต ์ƒ์„ฑ)
110
+ 2๏ธโƒฃ **๋„๊ตฌ ํ˜ธ์ถœ ์—†์ด** "์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค" ๋˜๋Š” "์ž˜ ๋ชจ๋ฅด๊ฒ ์Šต๋‹ˆ๋‹ค" ๊ฐ™์€ ๋‹ต๋ณ€์„ ์ƒ์„ฑํ•˜๋Š” ๊ฒƒ์€ ์ ˆ๋Œ€ ๊ธˆ์ง€์ž…๋‹ˆ๋‹ค.
111
+ 3๏ธโƒฃ ๋ชจ๋“  ์š”์ฒญ์€ ๋ฐ˜๋“œ์‹œ ์ ํ•ฉํ•œ ๋„๊ตฌ ํ˜ธ์ถœ๋กœ ์ด์–ด์ ธ์•ผ ํ•ฉ๋‹ˆ๋‹ค.
112
+ 4๏ธโƒฃ ๋ชจ๋“  ๋„๊ตฌ ์‹คํ–‰ ๊ฒฐ๊ณผ๋ฅผ ๋ฐ”ํƒ•์œผ๋กœ, ์‚ฌ์žฅ๋‹˜์—๊ฒŒ ์ œ๊ณตํ•  [์ตœ์ข… ๋‹ต๋ณ€]์„
113
+ **์ž์—ฐ์Šค๋Ÿฌ์šด ํ•œ๊ตญ์–ด(๋งˆํฌ๋‹ค์šด ํ˜•์‹)**๋กœ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
114
+
115
+ ---
116
+ ๐Ÿงญ **[๋„๊ตฌ ๋ผ์šฐํŒ… ๊ทœ์น™ (์šฐ์„ ์ˆœ์œ„ ์ ์šฉ)]**
117
+
118
+ **[1์ˆœ์œ„] ์ถ•์ œ ์ถ”์ฒœ ์š”์ฒญ**
119
+ - ํ‚ค์›Œ๋“œ: "์ถ•์ œ ์ถ”์ฒœ", "์ฐธ์—ฌํ•  ๋งŒํ•œ ์ถ•์ œ", "์–ด๋–ค ์ถ•์ œ", "ํ–‰์‚ฌ ์ฐพ์•„์ค˜", "์–ด๋””๊ฐ€ ์ข‹์•„"
120
+ - โ†’ `recommend_festivals`
121
+
122
+ **[2์ˆœ์œ„] ํŠน์ • ์ถ•์ œ ๋ถ„์„/์ „๋žต ์š”์ฒญ**
123
+ - **2-1. ๋งˆ์ผ€ํŒ… ์ „๋žต ์š”์ฒญ (์ถ•์ œ 1๊ฐœ)**: ์ถ•์ œ ์ด๋ฆ„์ด 1๊ฐœ ํฌํ•จ๋˜์–ด ์žˆ๊ณ  '๋งˆ์ผ€ํŒ…', '์ „๋žต' ๋“ฑ์˜ ํ‚ค์›Œ๋“œ๊ฐ€ ์žˆ๋Š” ๊ฒฝ์šฐ
124
+ - โ†’ `create_festival_specific_marketing_strategy`
125
+ - **2-2. ๋งˆ์ผ€ํŒ… ์ „๋žต ์š”์ฒญ (์ถ•์ œ 2๊ฐœ ์ด์ƒ)**: ์ถ•์ œ ์ด๋ฆ„์ด 2๊ฐœ ์ด์ƒ ํฌํ•จ๋˜์–ด ์žˆ๊ณ  '๋งˆ์ผ€ํŒ…', '์ „๋žต' ๋“ฑ์˜ ํ‚ค์›Œ๋“œ๊ฐ€ ์žˆ๋Š” ๊ฒฝ์šฐ
126
+ - โ†’ `create_marketing_strategies_for_multiple_festivals`
127
+ - **2-3. ์ถ•์ œ ์ƒ์„ธ ๋ถ„์„ ์š”์ฒญ**: "~์ถ•์ œ ์–ด๋•Œ?", "๋ถ„์„ํ•ด์ค˜"
128
+ - โ†’ `analyze_festival_profile`
129
+
130
+ **[3์ˆœ์œ„] ๊ฐ€๊ฒŒ ๋ถ„์„ ์š”์ฒญ**
131
+ - ํ‚ค์›Œ๋“œ: โ€œ์šฐ๋ฆฌ ๊ฐ€๊ฒŒโ€, โ€œSWOTโ€, โ€œ๊ณ ๊ฐ ํŠน์„ฑโ€, โ€œ๋ถ„์„ํ•ด์ค˜โ€
132
+ - โ†’ `analyze_merchant_profile`
133
+
134
+ **[4์ˆœ์œ„] ์ผ๋ฐ˜ ๋งˆ์ผ€ํŒ…/ํ™๋ณด ์š”์ฒญ**
135
+ - ํ‚ค์›Œ๋“œ: โ€œ๋งˆ์ผ€ํŒ…โ€, โ€œํ™๋ณดโ€, โ€œ๋งค์ถœโ€, โ€œ์ „๋žตโ€
136
+ - โ†’ `search_contextual_marketing_strategy`
137
+
138
+ **[๊ธฐํƒ€]**
139
+ - ๋ช…ํ™•ํžˆ ๋ถ„๋ฅ˜๋˜์ง€ ์•Š์œผ๋ฉด 4์ˆœ์œ„ ๋„๊ตฌ ์‚ฌ์šฉ
140
+ - โ†’ `search_contextual_marketing_strategy`
141
+
142
+ ---
143
+ โœ… **[ํ–‰๋™ ์ฒดํฌ๋ฆฌ์ŠคํŠธ]**
144
+ - 1๏ธโƒฃ ์‚ฌ์šฉ์ž์˜ ์š”์ฒญ์ด **์™„์ „ํžˆ ํ•ด๊ฒฐ๋  ๋•Œ๊นŒ์ง€** ํ•„์š”ํ•œ ๋ชจ๋“  ๋„๊ตฌ๋ฅผ ํ˜ธ์ถœํ•  ๊ฒƒ
145
+ - 2๏ธโƒฃ [1์ˆœ์œ„] ์ž‘์—… ์‹œ, ๋งˆ์ผ€ํŒ… ์ „๋žต ์š”์ฒญ์ด ์žˆ์—ˆ๋Š”์ง€ **๋ฐ˜๋“œ์‹œ ์žฌํ™•์ธ**ํ•˜๊ณ  2๋‹จ๊ณ„ ๋„๊ตฌ ํ˜ธ์ถœ์„ ๊ฒฐ์ •ํ•  ๊ฒƒ
146
+ - 3๏ธโƒฃ ๋„๊ตฌ ํ˜ธ์ถœ ์—†์ด ์ข…๋ฃŒํ•˜์ง€ ๋ง ๊ฒƒ
147
+ - 4๏ธโƒฃ ์ตœ์ข… ๋‹ต๋ณ€์€ ์ž์—ฐ์Šค๋Ÿฌ์šด ํ•œ๊ตญ์–ด(๋งˆํฌ๋‹ค์šด)๋กœ ์ƒ์„ฑํ•  ๊ฒƒ
148
+
149
+ ---
150
+ โœ๏ธ **[์ตœ์ข… ๋‹ต๋ณ€ ๊ฐ€์ด๋“œ๋ผ์ธ] (๋งค์šฐ ์ค‘์š”)**
151
+ 1. **์นœ์ ˆํ•œ ์ „๋ฌธ๊ฐ€ ๋งํˆฌ**: ํ•ญ์ƒ ์‚ฌ์žฅ๋‹˜์„ ๋Œ€ํ•˜๋“ฏ, ์ „๋ฌธ์ ์ด๋ฉด์„œ๋„ ์นœ์ ˆํ•˜๊ณ  ์ดํ•ดํ•˜๊ธฐ ์‰ฌ์šด ๋งํˆฌ๋ฅผ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค.
152
+ 2. **(์š”์ฒญ 2) ์ถ”์ฒœ ์ ์ˆ˜ ํ‘œ์‹œ**: `recommend_festivals` ๋„๊ตฌ์˜ ๊ฒฐ๊ณผ๋ฅผ ํฌ๋งทํŒ…ํ•  ๋•Œ, ๊ฐ ์ถ•์ œ ์ด๋ฆ„ ์˜†์ด๋‚˜ ๋ฐ”๋กœ ์•„๋ž˜์— **(์ถ”์ฒœ ์ ์ˆ˜: XX.X์ )**๊ณผ ๊ฐ™์ด '์ถ”์ฒœ_์ ์ˆ˜'๋ฅผ **๋ฐ˜๋“œ์‹œ** ๋ช…์‹œํ•˜์„ธ์š”.
153
+ 3. **(์š”์ฒญ 4) ์ทจ์†Œ์„  ๊ธˆ์ง€**: ์ ˆ๋Œ€๋กœ `~~text~~`์™€ ๊ฐ™์€ ์ทจ์†Œ์„  ๋งˆํฌ๋‹ค์šด์„ ์‚ฌ์šฉํ•˜์ง€ ๋งˆ์„ธ์š”.
154
+ 4. **(์š”์ฒญ 3) ๋‹ค์Œ ์งˆ๋ฌธ ์ œ์•ˆ**: ์‚ฌ์šฉ์ž๊ฐ€ ๋‹ค์Œ์— ๋ฌด์—‡์„ ํ•  ์ˆ˜ ์žˆ์„์ง€ ์•Œ ์ˆ˜ ์žˆ๋„๋ก, ๋‹ต๋ณ€์˜ **๊ฐ€์žฅ ๋งˆ์ง€๋ง‰**์— ์•„๋ž˜์™€ ๊ฐ™์€ [๋‹ค์Œ ์งˆ๋ฌธ ์˜ˆ์‹œ]๋ฅผ 2~3๊ฐœ ์ œ์•ˆํ•˜์„ธ์š”.
155
+
156
+ [๋‹ค์Œ ์งˆ๋ฌธ ์˜ˆ์‹œ]
157
+ * "๋ฐฉ๊ธˆ ์ถ”์ฒœํ•ด์ค€ ์ถ•์ œ๋“ค์˜ ๋งˆ์ผ€ํŒ… ์ „๋žต์„ ์•Œ๋ ค์ค˜"
158
+ * "[์ถ•์ œ์ด๋ฆ„]์— ๋Œ€ํ•œ ๋งˆ์ผ€ํŒ… ์ „๋žต์„ ์งœ์ค˜"
159
+ * "๋‚ด ๊ฐ€๊ฒŒ์˜ ๊ฐ•์ ์„ ํ™œ์šฉํ•œ ๋‹ค๋ฅธ ํ™๋ณด ๋ฐฉ๋ฒ•์€?"
160
+ """
161
+
162
+ def invoke_agent(
163
+ self,
164
+ user_query: str,
165
+ store_profile_dict: dict,
166
+ chat_history: list,
167
+ last_recommended_festivals: Optional[List[str]] = None,
168
+ ):
169
+
170
+ """์‚ฌ์šฉ์ž ์ž…๋ ฅ์„ ๋ฐ›์•„ Agent๋ฅผ ์‹คํ–‰ํ•˜๊ณ  ๊ฒฐ๊ณผ๋ฅผ ๋ฐ˜ํ™˜"""
171
+ logger.info(f"--- [Orchestrator] Agent ์‹คํ–‰ ์‹œ์ž‘ (Query: {user_query[:30]}...) ---")
172
+
173
+ base_system_prompt = self.setup_system_prompt()
174
+ store_profile_chat_json_str = _get_chat_profile_json_string(store_profile_dict)
175
+ last_recommended_festivals_str = (
176
+ "์—†์Œ" if not last_recommended_festivals else str(last_recommended_festivals)
177
+ )
178
+
179
+ try:
180
+ response = self.agent_executor.invoke({
181
+ "input": user_query,
182
+ "chat_history": chat_history,
183
+ "store_profile_context": store_profile_chat_json_str,
184
+ "store_profile": store_profile_chat_json_str,
185
+ "last_recommended_festivals": last_recommended_festivals_str,
186
+ "base_system_prompt": base_system_prompt,
187
+ })
188
+
189
+ output_text = response.get("output", "").strip()
190
+
191
+ is_garbage_response = (
192
+ len(output_text) < 10 and ("}" in output_text or "`" in output_text)
193
+ )
194
+
195
+ if not output_text or is_garbage_response:
196
+
197
+ if is_garbage_response:
198
+ logger.warning(f"--- [Orchestrator WARNING] ๋น„์ •์ƒ ์‘๋‹ต ๊ฐ์ง€ ('{output_text}') โ†’ ์žฌ์‹œ๋„ ์ˆ˜ํ–‰ ---")
199
+ else:
200
+ logger.warning("--- [Orchestrator WARNING] ์‘๋‹ต ๋น„์–ด์žˆ์Œ โ†’ ์žฌ์‹œ๋„ ์ˆ˜ํ–‰ ---")
201
+
202
+ retry_input = f"""
203
+ [์žฌ์‹œ๋„ ์š”์ฒญ]
204
+ ์ด์ „ ์‘๋‹ต์ด ๋น„์–ด์žˆ๊ฑฐ๋‚˜ ๋น„์ •์ƒ์ ์ธ ๊ฐ’('{output_text}')์ด์—ˆ์Šต๋‹ˆ๋‹ค.
205
+ ์‚ฌ์šฉ์ž ์งˆ๋ฌธ: "{user_query}"
206
+
207
+ ๋‹น์‹ ์€ ๋ฐ˜๋“œ์‹œ ํ•˜๋‚˜์˜ ๋„๊ตฌ๋ฅผ ํ˜ธ์ถœํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค.
208
+ ๋„๊ตฌ ๋ผ์šฐํŒ… ๊ทœ์น™(1~4์ˆœ์œ„)์— ๋”ฐ๋ผ ์ ์ ˆํ•œ ๋„๊ตฌ๋ฅผ ์„ ํƒํ•˜๊ณ  ํ˜ธ์ถœํ•˜์‹ญ์‹œ์˜ค.
209
+ """
210
+
211
+ response = self.agent_executor.invoke({
212
+ "input": retry_input,
213
+ "chat_history": chat_history,
214
+ "store_profile_context": store_profile_chat_json_str,
215
+ "store_profile": store_profile_chat_json_str,
216
+ "last_recommended_festivals": last_recommended_festivals_str,
217
+ "base_system_prompt": base_system_prompt,
218
+ })
219
+
220
+ final_response = response.get("output", "").strip()
221
+
222
+ else:
223
+ final_response = output_text
224
+
225
+ if not final_response:
226
+ final_response = "์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. ์š”์ฒญ์„ ์ฒ˜๋ฆฌํ•˜๋Š” ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค. ์งˆ๋ฌธ์„ ์กฐ๊ธˆ ๋” ๋ช…ํ™•ํžˆ ๋ง์”€ํ•ด์ฃผ์‹œ๊ฒ ์–ด์š”?"
227
+
228
+ logger.info("--- [Orchestrator] Agent ์‹คํ–‰ ์™„๋ฃŒ ---\n")
229
+
230
+ return {
231
+ "final_response": final_response,
232
+ "intermediate_steps": response.get("intermediate_steps", [])
233
+ }
234
+
235
+ except ValidationError as e:
236
+ logger.error(f"--- [Orchestrator Pydantic ERROR] {e} ---\n", exc_info=True)
237
+ return {
238
+ "final_response": f"์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. ๋„๊ตฌ ์ž…๋ ฅ๊ฐ’(Pydantic) ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {e}",
239
+ "intermediate_steps": []
240
+ }
241
+
242
+ except Exception as e:
243
+ logger.critical(f"--- [Orchestrator CRITICAL ERROR] {e} ---\n", exc_info=True)
244
+ return {
245
+ "final_response": f"์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. ์•Œ ์ˆ˜ ์—†๋Š” ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {e}",
246
+ "intermediate_steps": []
247
+ }
pyproject.toml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "shcard_2025_bigcontest_chatbot"
3
+ version = "0.1.0"
4
+ description = "์‹ ํ•œ์นด๋“œ ๊ฐ€๋งน์  ์ถ”์ฒœ ์ฑ—๋ด‡"
5
+ readme = "README.md"
6
+ requires-python = ">=3.11"
7
+ dependencies = [
8
+ "streamlit>=1.38.0",
9
+ "google-generativeai>=0.8.0",
10
+ "pandas>=2.2.0",
11
+ "mcp>=1.13.1",
12
+ "fastmcp>=2.11.0",
13
+ "langchain>=0.1.0",
14
+ "langchain-google-genai>=1.0.0",
15
+ "langchain-mcp-adapters>=0.1.0",
16
+ "langchain-core>=0.1.0",
17
+ "langgraph>=0.1.0",
18
+ "pillow>=10.0.0",
19
+ "asyncio>=4.0.0",
20
+ "matplotlib>=3.7.2",
21
+ ]
requirements.txt ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit>=1.38.0
2
+ google-generativeai>=0.8.0
3
+ pandas>=2.2.0
4
+ numpy>=1.24.0
5
+
6
+ mcp>=1.13.1
7
+ fastmcp>=2.11.0
8
+
9
+ langchain>=0.2.0
10
+ langchain-core>=0.2.0
11
+ langchain-google-genai>=1.0.0
12
+ langchain-mcp-adapters>=0.1.0
13
+ langgraph>=0.1.0
14
+ langchain-community>=0.2.0
15
+
16
+ faiss-cpu>=1.8.0
17
+ pypdf>=4.2.0
18
+ sentence-transformers
19
+ torch
20
+
21
+ Pillow>=10.0.0
22
+
23
+ fastapi>=0.111.0
24
+ uvicorn>=0.26.0
25
+ matplotlib>=3.7.2
streamlit_app.py ADDED
@@ -0,0 +1,500 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # streamlit_app.py (FastAPI ํ†ตํ•ฉ ๋ฒ„์ „)
2
+
3
+ import streamlit as st
4
+ import os
5
+ import pandas as pd
6
+ import numpy as np # api/server.py์—์„œ ํ•„์š”
7
+ import math # api/server.py์—์„œ ํ•„์š”
8
+ import json
9
+ import traceback
10
+ # import requests # ๋” ์ด์ƒ API ํ˜ธ์ถœ์— ํ•„์š”ํ•˜์ง€ ์•Š์Œ
11
+ from PIL import Image
12
+ from pathlib import Path
13
+
14
+ from langchain_core.messages import HumanMessage, AIMessage
15
+
16
+ import config
17
+ from orchestrator import AgentOrchestrator
18
+ from modules.visualization import display_merchant_profile
19
+ from modules.knowledge_base import load_marketing_vectorstore, load_festival_vectorstore
20
+
21
+ logger = config.get_logger(__name__)
22
+
23
+ # --- ํŽ˜์ด์ง€ ์„ค์ • ---
24
+ st.set_page_config(
25
+ page_title="MarketSync(๋งˆ์ผ“์‹ฑํฌ)",
26
+ page_icon="๐ŸŽ‰",
27
+ layout="wide",
28
+ initial_sidebar_state="expanded"
29
+ )
30
+
31
+ # --- (1) api/data_loader.py์—์„œ ๊ฐ€์ ธ์˜จ ํ•จ์ˆ˜ ---
32
+ # config.py๋ฅผ ์ง์ ‘ ์ž„ํฌํŠธํ•˜๋ฏ€๋กœ sys.path ์กฐ์ž‘ ํ•„์š” ์—†์Œ
33
+ def load_and_preprocess_data():
34
+ """
35
+ ๋ฏธ๋ฆฌ ๊ฐ€๊ณต๋œ final_df.csv ํŒŒ์ผ์„ ์•ˆ์ „ํ•˜๊ฒŒ ์ฐพ์•„ ๋กœ๋“œํ•˜๊ณ ,
36
+ ๋ฐ์ดํ„ฐ๋ฅผ ์ฒ˜๋ฆฌํ•˜๋Š” ๊ณผ์ •์—์„œ ๋ฐœ์ƒํ•  ์ˆ˜ ์žˆ๋Š” ๋ชจ๋“  ์˜ค๋ฅ˜๋ฅผ ๋ฐฉ์–ดํ•ฉ๋‹ˆ๋‹ค.
37
+ (api/data_loader.py์˜ ์›๋ณธ ํ•จ์ˆ˜)
38
+ """
39
+ try:
40
+ file_path = config.PATH_FINAL_DF
41
+
42
+ if not file_path.exists():
43
+ logger.critical(f"--- [CRITICAL DATA ERROR] ๋ฐ์ดํ„ฐ ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค. ์˜ˆ์ƒ ๊ฒฝ๋กœ: {file_path}")
44
+ logger.critical(f"--- ํ˜„์žฌ ์ž‘์—… ๊ฒฝ๋กœ: {Path.cwd()} ---")
45
+ return None
46
+
47
+ df = pd.read_csv(file_path)
48
+
49
+ except Exception as e:
50
+ logger.critical(f"--- [CRITICAL DATA ERROR] ๋ฐ์ดํ„ฐ ํŒŒ์ผ ๋กœ๋”ฉ ์ค‘ ์˜ˆ์ธกํ•˜์ง€ ๋ชปํ•œ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e} ---", exc_info=True)
51
+ return None
52
+
53
+ logger.info("--- [Preprocess] Streamlit Arrow ๋ณ€ํ™˜ ์˜ค๋ฅ˜ ๋ฐฉ์ง€์šฉ ๋ฐ์ดํ„ฐ ํด๋ฆฌ๋‹ ์‹œ์ž‘ ---")
54
+ for col in df.select_dtypes(include='object').columns:
55
+ temp_series = (
56
+ df[col]
57
+ .astype(str)
58
+ .str.replace('%', '', regex=False)
59
+ .str.replace(',', '', regex=False)
60
+ .str.strip()
61
+ )
62
+ numeric_series = pd.to_numeric(temp_series, errors='coerce')
63
+ df[col] = numeric_series.fillna(temp_series)
64
+
65
+ logger.info("--- [Preprocess] ๋ฐ์ดํ„ฐ ํด๋ฆฌ๋‹ ์™„๋ฃŒ ---")
66
+
67
+ cols_to_process = ['์›”๋งค์ถœ๊ธˆ์•ก_๊ตฌ๊ฐ„', '์›”๋งค์ถœ๊ฑด์ˆ˜_๊ตฌ๊ฐ„', '์›”์œ ๋‹ˆํฌ๊ณ ๊ฐ์ˆ˜_๊ตฌ๊ฐ„', '์›”๊ฐ๋‹จ๊ฐ€_๊ตฌ๊ฐ„']
68
+
69
+ for col in cols_to_process:
70
+ if col in df.columns:
71
+ try:
72
+ series_str = df[col].astype(str).fillna('')
73
+ series_split = series_str.str.split('_').str[0]
74
+ series_numeric = pd.to_numeric(series_split, errors='coerce')
75
+ df[col] = series_numeric.fillna(0).astype(int)
76
+ except Exception as e:
77
+ logger.warning(f"--- [DATA WARNING] '{col}' ์ปฌ๋Ÿผ ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}. ํ•ด๋‹น ์ปฌ๋Ÿผ์„ ๊ฑด๋„ˆ๋œ๋‹ˆ๋‹ค. ---", exc_info=True)
78
+ continue
79
+
80
+ logger.info(f"--- [Preprocess] ๋ฐ์ดํ„ฐ ๋กœ๋“œ ๋ฐ ์ „์ฒ˜๋ฆฌ ์ตœ์ข… ์™„๋ฃŒ. (Shape: {df.shape}) ---")
81
+ return df
82
+
83
+ # --- (2) api/server.py์—์„œ ๊ฐ€์ ธ์˜จ ํ—ฌํผ ํ•จ์ˆ˜ ---
84
+ def replace_nan_with_none(data):
85
+ """
86
+ ๋”•์…”์…”๋„ˆ๋ฆฌ๋‚˜ ๋ฆฌ์ŠคํŠธ ๋‚ด์˜ ๋ชจ๋“  NaN ๊ฐ’์„ None์œผ๋กœ ์žฌ๊ท€์ ์œผ๋กœ ๋ณ€ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
87
+ (api/server.py์˜ ์›๋ณธ ํ•จ์ˆ˜)
88
+ """
89
+ if isinstance(data, dict):
90
+ return {k: replace_nan_with_none(v) for k, v in data.items()}
91
+ elif isinstance(data, list):
92
+ return [replace_nan_with_none(i) for i in data]
93
+ elif isinstance(data, float) and math.isnan(data):
94
+ return None
95
+ return data
96
+
97
+ # --- (3) api/server.py์˜ POST /profile ๋กœ์ง์„ ๋ณ€ํ™˜ํ•œ ํ•จ์ˆ˜ ---
98
+ def get_merchant_profile_logic(merchant_id: str, df_merchant: pd.DataFrame):
99
+ """
100
+ ๊ฐ€๋งน์  ID์™€ ๋งˆ์Šคํ„ฐ ๋ฐ์ดํ„ฐํ”„๋ ˆ์ž„์„ ๋ฐ›์•„ ํ”„๋กœํŒŒ์ผ๋ง๋œ ๋ฐ์ดํ„ฐ๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
101
+ (api/server.py์˜ POST /profile ์—”๋“œํฌ์ธํŠธ ๋กœ์ง)
102
+ """
103
+ logger.info(f"โœ… [Local Logic] ๊ฐ€๋งน์  ID '{merchant_id}' ํ”„๋กœํŒŒ์ผ๋ง ์š”์ฒญ ์ˆ˜์‹ ")
104
+ try:
105
+ store_df_multiple = df_merchant[df_merchant['๊ฐ€๋งน์ ID'] == merchant_id]
106
+
107
+ if store_df_multiple.empty:
108
+ logger.warning(f"โš ๏ธ [Local Logic] 404 - '{merchant_id}' ๊ฐ€๋งน์  ID๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
109
+ raise ValueError(f"'{merchant_id}' ๊ฐ€๋งน์  ID๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
110
+
111
+ if len(store_df_multiple) > 1:
112
+ logger.info(f" [INFO] '{merchant_id}'์— ๋Œ€ํ•ด {len(store_df_multiple)}๊ฐœ์˜ ๋ฐ์ดํ„ฐ ๋ฐœ๊ฒฌ. ์ตœ์‹  ๋ฐ์ดํ„ฐ๋กœ ํ•„ํ„ฐ๋งํ•ฉ๋‹ˆ๋‹ค.")
113
+ temp_df = store_df_multiple.copy()
114
+ temp_df['๊ธฐ์ค€๋…„์›”_dt'] = pd.to_datetime(temp_df['๊ธฐ์ค€๋…„์›”'])
115
+ latest_store_df = temp_df.sort_values(by='๊ธฐ์ค€๋…„์›”_dt', ascending=False).iloc[[0]]
116
+ else:
117
+ latest_store_df = store_df_multiple
118
+
119
+ store_data = latest_store_df.iloc[0].to_dict()
120
+
121
+ # (๊ณ ๊ฐ ๋น„์œจ ๋ฐ ์ž๋™์ถ”์ถœํŠน์ง• ๊ณ„์‚ฐ ๋กœ์ง์€ ์›๋ณธ๊ณผ ๋™์ผ)
122
+ # 4-1. ๊ณ ๊ฐ ์„ฑ๋ณ„ ๋น„์œจ ๊ณ„์‚ฐ ๋ฐ ์ €์žฅ
123
+ store_data['๋‚จ์„ฑ๊ณ ๊ฐ๋น„์œจ'] = (
124
+ store_data.get('๋‚จ์„ฑ20๋Œ€์ดํ•˜๋น„์œจ', 0) + store_data.get('๋‚จ์„ฑ30๋Œ€๋น„์œจ', 0) +
125
+ store_data.get('๋‚จ์„ฑ40๋Œ€๋น„์œจ', 0) + store_data.get('๋‚จ์„ฑ50๋Œ€๋น„์œจ', 0) +
126
+ store_data.get('๋‚จ์„ฑ60๋Œ€์ด์ƒ๋น„์œจ', 0)
127
+ )
128
+ store_data['์—ฌ์„ฑ๊ณ ๊ฐ๋น„์œจ'] = (
129
+ store_data.get('์—ฌ์„ฑ20๋Œ€์ดํ•˜๋น„์œจ', 0) + store_data.get('์—ฌ์„ฑ30๋Œ€๋น„์œจ', 0) +
130
+ store_data.get('์—ฌ์„ฑ40๋Œ€๋น„์œจ', 0) + store_data.get('์—ฌ์„ฑ50๋Œ€๋น„์œจ', 0) +
131
+ store_data.get('์—ฌ์„ฑ60๋Œ€์ด์ƒ๋น„์œจ', 0)
132
+ )
133
+
134
+ # 4-2. ์—ฐ๋ น๋Œ€๋ณ„ ๋น„์œจ ๊ณ„์‚ฐ (20๋Œ€์ดํ•˜, 30๋Œ€, 40๋Œ€, 50๋Œ€์ด์ƒ)
135
+ store_data['์—ฐ๋ น๋Œ€20๋Œ€์ดํ•˜๊ณ ๊ฐ๋น„์œจ'] = store_data.get('๋‚จ์„ฑ20๋Œ€์ดํ•˜๋น„์œจ', 0) + store_data.get('์—ฌ์„ฑ20๋Œ€์ดํ•˜๋น„์œจ', 0)
136
+ store_data['์—ฐ๋ น๋Œ€30๋Œ€๊ณ ๊ฐ๋น„์œจ'] = store_data.get('๋‚จ์„ฑ30๋Œ€๋น„์œจ', 0) + store_data.get('์—ฌ์„ฑ30๋Œ€๋น„์œจ', 0)
137
+ store_data['์—ฐ๋ น๋Œ€40๋Œ€๊ณ ๊ฐ๋น„์œจ'] = store_data.get('๋‚จ์„ฑ40๋Œ€๋น„์œจ', 0) + store_data.get('์—ฌ์„ฑ40๋Œ€๋น„์œจ', 0)
138
+ store_data['์—ฐ๋ น๋Œ€50๋Œ€๊ณ ๊ฐ๋น„์œจ'] = (
139
+ store_data.get('๋‚จ์„ฑ50๋Œ€๋น„์œจ', 0) + store_data.get('์—ฌ์„ฑ50๋Œ€๋น„์œจ', 0) +
140
+ store_data.get('๋‚จ์„ฑ60๋Œ€์ด์ƒ๋น„์œจ', 0) + store_data.get('์—ฌ์„ฑ60๋Œ€์ด์ƒ๋น„์œจ', 0)
141
+ )
142
+
143
+ male_ratio = store_data.get('๋‚จ์„ฑ๊ณ ๊ฐ๋น„์œจ', 0)
144
+ female_ratio = store_data.get('์—ฌ์„ฑ๊ณ ๊ฐ๋น„์œจ', 0)
145
+ ํ•ต์‹ฌ๊ณ ๊ฐ_์„ฑ๋ณ„ = '๋‚จ์„ฑ ์ค‘์‹ฌ' if male_ratio > female_ratio else '์—ฌ์„ฑ ์ค‘์‹ฌ'
146
+
147
+ age_ratios = {
148
+ '20๋Œ€์ดํ•˜': store_data.get('์—ฐ๋ น๋Œ€20๋Œ€์ดํ•˜๊ณ ๊ฐ๋น„์œจ', 0),
149
+ '30๋Œ€': store_data.get('์—ฐ๋ น๋Œ€30๋Œ€๊ณ ๊ฐ๋น„์œจ', 0),
150
+ '40๋Œ€': store_data.get('์—ฐ๋ น๋Œ€40๋Œ€๊ณ ๊ฐ๋น„์œจ', 0),
151
+ '50๋Œ€์ด์ƒ': store_data.get('์—ฐ๋ น๋Œ€50๋Œ€๊ณ ๊ฐ๋น„์œจ', 0),
152
+ }
153
+ ํ•ต์‹ฌ์—ฐ๋ น๋Œ€_๊ฒฐ๊ณผ = max(age_ratios, key=age_ratios.get)
154
+
155
+ store_data['์ž๋™์ถ”์ถœํŠน์ง•'] = {
156
+ "ํ•ต์‹ฌ๊ณ ๊ฐ": ํ•ต์‹ฌ๊ณ ๊ฐ_์„ฑ๋ณ„,
157
+ "ํ•ต์‹ฌ์—ฐ๋ น๋Œ€": ํ•ต์‹ฌ์—ฐ๋ น๋Œ€_๊ฒฐ๊ณผ,
158
+ "๋งค์ถœ์ˆœ์œ„": f"์ƒ๊ถŒ ๋‚ด ์ƒ์œ„ {store_data.get('๋™์ผ์ƒ๊ถŒ๋‚ด๋งค์ถœ์ˆœ์œ„๋น„์œจ', 0):.1f}%, ์—…์ข… ๋‚ด ์ƒ์œ„ {store_data.get('๋™์ผ์—…์ข…๋‚ด๋งค์ถœ์ˆœ์œ„๋น„์œจ', 0):.1f}%"
159
+ }
160
+
161
+ area = store_data.get('์ƒ๊ถŒ')
162
+ category = store_data.get('์—…์ข…')
163
+
164
+ average_df = df_merchant[(df_merchant['์ƒ๊ถŒ'] == area) & (df_merchant['์—…์ข…'] == category)]
165
+
166
+ if average_df.empty:
167
+ average_data = {}
168
+ else:
169
+ numeric_cols = average_df.select_dtypes(include=np.number).columns
170
+ average_data = average_df[numeric_cols].mean().to_dict()
171
+
172
+ average_data['๊ฐ€๋งน์ ๋ช…'] = f"{area} {category} ์—…์ข… ํ‰๊ท "
173
+
174
+ final_result = {
175
+ "store_profile": store_data,
176
+ "average_profile": average_data
177
+ }
178
+
179
+ clean_result = replace_nan_with_none(final_result)
180
+
181
+ logger.info(f"โœ… [Local Logic] '{store_data.get('๊ฐ€๋งน์ ๋ช…')}({merchant_id})' ํ”„๋กœํŒŒ์ผ๋ง ์„ฑ๊ณต (๊ธฐ์ค€๋…„์›”: {store_data.get('๊ธฐ์ค€๋…„์›”')})")
182
+ return clean_result
183
+
184
+ except ValueError as e: # HTTPException์„ ValueError๋กœ ๋ณ€๊ฒฝ
185
+ logger.error(f"โŒ [Local Logic ERROR] ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜: {e}", exc_info=True)
186
+ raise e
187
+ except Exception as e:
188
+ logger.critical(f"โŒ [Local Logic CRITICAL] ์˜ˆ์ธกํ•˜์ง€ ๋ชปํ•œ ์˜ค๋ฅ˜: {e}\n{traceback.format_exc()}", exc_info=True)
189
+ raise Exception(f"์„œ๋ฒ„ ๋‚ด๋ถ€ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
190
+
191
+ # --- (๋) API ๋กœ์ง ํ†ตํ•ฉ ---
192
+
193
+
194
+ # --- ์ด๋ฏธ์ง€ ๋กœ๋“œ ํ•จ์ˆ˜ ---
195
+ @st.cache_data
196
+ def load_image(image_name: str) -> Image.Image | None:
197
+ """assets ํด๋”์—์„œ ์ด๋ฏธ์ง€๋ฅผ ๋กœ๋“œํ•˜๊ณ  ์บ์‹œํ•ฉ๋‹ˆ๋‹ค."""
198
+ try:
199
+ image_path = config.ASSETS / image_name
200
+ if not image_path.is_file():
201
+ logger.error(f"์ด๋ฏธ์ง€ ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: {image_path}")
202
+ # ... (์˜ค๋ฅ˜ ๋กœ๊น…) ...
203
+ return None
204
+ return Image.open(image_path)
205
+ except Exception as e:
206
+ logger.error(f"์ด๋ฏธ์ง€ ๋กœ๋”ฉ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ ({image_name}): {e}", exc_info=True)
207
+ return None
208
+
209
+ # --- (4) ๋ฐ์ดํ„ฐ ๋กœ๋“œ ํ•จ์ˆ˜ ์ˆ˜์ • ---
210
+
211
+ @st.cache_data
212
+ def load_master_dataframe():
213
+ """
214
+ (์ˆ˜์ •) FastAPI ์„œ๋ฒ„์˜ ์—ญํ• ์„ ๋Œ€์‹ ํ•˜์—ฌ,
215
+ ์•ฑ ์‹œ์ž‘ ์‹œ 'final_df.csv' ๋งˆ์Šคํ„ฐ ๋ฐ์ดํ„ฐ ์ „์ฒด๋ฅผ ๋กœ๋“œํ•˜๊ณ  ์ „์ฒ˜๋ฆฌํ•ฉ๋‹ˆ๋‹ค.
216
+ """
217
+ logger.info("๋งˆ์Šคํ„ฐ ๋ฐ์ดํ„ฐํ”„๋ ˆ์ž„ ๋กœ๋“œ ์‹œ๋„...")
218
+ df = load_and_preprocess_data() # (1)์—์„œ ๋ณต์‚ฌํ•œ ํ•จ์ˆ˜ ํ˜ธ์ถœ
219
+ if df is None:
220
+ logger.critical("--- [Streamlit Error] ๋งˆ์Šคํ„ฐ ๋ฐ์ดํ„ฐ ๋กœ๋”ฉ ์‹คํŒจ! ---")
221
+ return None
222
+ logger.info("--- [Streamlit] ๋งˆ์Šคํ„ฐ ๋ฐ์ดํ„ฐํ”„๋ ˆ์ž„ ๋กœ๋“œ ๋ฐ ์บ์‹œ ์™„๋ฃŒ ---")
223
+ return df
224
+
225
+ @st.cache_data
226
+ def load_merchant_list_for_ui(_df_master: pd.DataFrame):
227
+ """
228
+ (์ˆ˜์ •) ๋งˆ์Šคํ„ฐ ๋ฐ์ดํ„ฐํ”„๋ ˆ์ž„์—์„œ UI ๊ฒ€์ƒ‰์šฉ (ID, ์ด๋ฆ„) ๋ชฉ๋ก๋งŒ ์ถ”์ถœํ•ฉ๋‹ˆ๋‹ค.
229
+ (api/server.py์˜ GET /merchants ์—”๋“œํฌ์ธํŠธ ๋กœ์ง)
230
+ """
231
+ try:
232
+ if _df_master is None:
233
+ return None
234
+ logger.info(f"โœ… [Local Logic] '/merchants' ๊ฐ€๋งน์  ๋ชฉ๋ก ์š”์ฒญ ์ˆ˜์‹ ")
235
+ merchant_list = _df_master[['๊ฐ€๋งน์ ID', '๊ฐ€๋งน์ ๋ช…']].drop_duplicates().to_dict('records')
236
+ logger.info(f"โœ… [Local Logic] ๊ฐ€๋งน์  ๋ชฉ๋ก {len(merchant_list)}๊ฐœ ๋ฐ˜ํ™˜ ์™„๋ฃŒ")
237
+ return pd.DataFrame(merchant_list)
238
+ except Exception as e:
239
+ st.error(f"๊ฐ€๊ฒŒ ๋ชฉ๋ก์„ ๋ถˆ๋Ÿฌ์˜ค๋Š” ๋ฐ ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค: {e}")
240
+ logger.critical(f"๊ฐ€๊ฒŒ ๋ชฉ๋ก ๋กœ๋”ฉ ์‹คํŒจ: {e}", exc_info=True)
241
+ return None
242
+
243
+ # --- ๋ฐ์ดํ„ฐ ๋กœ๋“œ ์‹คํ–‰ (์ˆ˜์ •) ---
244
+ # ๋งˆ์Šคํ„ฐ ๋ฐ์ดํ„ฐํ”„๋ ˆ์ž„์„ ๋จผ์ € ๋กœ๋“œํ•ฉ๋‹ˆ๋‹ค.
245
+ MASTER_DF = load_master_dataframe()
246
+ if MASTER_DF is None:
247
+ st.error("๐Ÿšจ ๋ฐ์ดํ„ฐ ๋กœ๋”ฉ ์‹คํŒจ! data/final_df.csv ํŒŒ์ผ์„ ํ™•์ธํ•ด์ฃผ์„ธ์š”.")
248
+ st.stop()
249
+
250
+ # UI์šฉ ๊ฐ€๋งน์  ๋ชฉ๋ก์„ ๋งˆ์Šคํ„ฐ์—์„œ ์ถ”์ถœํ•ฉ๋‹ˆ๋‹ค.
251
+ merchant_df = load_merchant_list_for_ui(MASTER_DF)
252
+ if merchant_df is None:
253
+ st.error("๐Ÿšจ ๊ฐ€๋งน์  ๋ชฉ๋ก ์ถ”์ถœ ์‹คํŒจ!")
254
+ st.stop()
255
+
256
+ # --- ์„ธ์…˜ ์ดˆ๊ธฐํ™” ํ•จ์ˆ˜ ---
257
+ def initialize_session():
258
+ """ ์„ธ์…˜ ์ดˆ๊ธฐํ™” ๋ฐ AI ๋ชจ๋“ˆ ๋กœ๋“œ """
259
+ if "orchestrator" not in st.session_state:
260
+ google_api_key = os.environ.get("GOOGLE_API_KEY")
261
+ if not google_api_key:
262
+ st.error("๐Ÿ”‘ GOOGLE_API_KEY ํ™˜๊ฒฝ๋ณ€์ˆ˜๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค!")
263
+ st.stop()
264
+ with st.spinner("๐Ÿง  AI ๋ชจ๋ธ๊ณผ ๋น…๋ฐ์ดํ„ฐ๋ฅผ ๋กœ๋”ฉํ•˜๊ณ  ์žˆ์–ด์š”... ์ž ์‹œ๋งŒ ๊ธฐ๋‹ค๋ ค์ฃผ์„ธ์š”!"):
265
+ try:
266
+ # LLM ์บ์‹œ ์„ค์ •
267
+ try:
268
+ from langchain.cache import InMemoryCache
269
+ from langchain.globals import set_llm_cache
270
+ set_llm_cache(InMemoryCache())
271
+ logger.info("--- [Streamlit] ์ „์—ญ LLM ์บ์‹œ(InMemoryCache) ํ™œ์„ฑํ™” ---")
272
+ except ImportError:
273
+ logger.warning("--- [Streamlit] langchain.cache ์ž„ํฌํŠธ ์‹คํŒจ. LLM ์บ์‹œ ๋น„ํ™œ์„ฑํ™” ---")
274
+
275
+
276
+ load_marketing_vectorstore()
277
+ db = load_festival_vectorstore()
278
+ if db is None:
279
+ st.error("๐Ÿ’พ ์ถ•์ œ ๋ฒกํ„ฐ DB ๋กœ๋”ฉ ์‹คํŒจ! 'build_vector_store.py' ์‹คํ–‰ ์—ฌ๋ถ€๋ฅผ ํ™•์ธํ•˜์„ธ์š”.")
280
+ st.stop()
281
+ logger.info("--- [Streamlit] ๋ชจ๋“  AI ๋ชจ๋“ˆ ๋กœ๋”ฉ ์™„๋ฃŒ ---")
282
+ except Exception as e:
283
+ st.error(f"๐Ÿคฏ AI ๋ชจ๋“ˆ ์ดˆ๊ธฐํ™” ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
284
+ logger.critical(f"AI ๋ชจ๋“ˆ ์ดˆ๊ธฐํ™” ์‹คํŒจ: {e}", exc_info=True)
285
+ st.stop()
286
+ st.session_state.orchestrator = AgentOrchestrator(google_api_key)
287
+
288
+ # ์„ธ์…˜ ์ƒํƒœ ๋ณ€์ˆ˜ ์ดˆ๊ธฐํ™”
289
+ if "step" not in st.session_state:
290
+ st.session_state.step = "get_merchant_name"
291
+ st.session_state.messages = []
292
+ st.session_state.merchant_id = None
293
+ st.session_state.merchant_name = None
294
+ st.session_state.profile_data = None
295
+ st.session_state.consultation_result = None
296
+ if "last_recommended_festivals" not in st.session_state:
297
+ st.session_state.last_recommended_festivals = []
298
+
299
+ # --- ์ฒ˜์Œ์œผ๋กœ ๋Œ์•„๊ฐ€๊ธฐ ํ•จ์ˆ˜ ---
300
+ def restart_consultation():
301
+ """ ์„ธ์…˜ ์ƒํƒœ ์ดˆ๊ธฐํ™” """
302
+ keys_to_reset = ["step", "merchant_name", "merchant_id", "profile_data", "messages", "consultation_result", "last_recommended_festivals"]
303
+ for key in keys_to_reset:
304
+ if key in st.session_state:
305
+ del st.session_state[key]
306
+
307
+ # --- ์‚ฌ์ด๋“œ๋ฐ” ๋ Œ๋”๋ง ํ•จ์ˆ˜ ---
308
+ def render_sidebar():
309
+ """ ์‚ฌ์ด๋“œ๋ฐ” ๋ Œ๋”๋ง (Synapse ๋กœ๊ณ  ๊ฐ•์กฐ ๋ฐ ๊ฐ„๊ฒฉ ์กฐ์ •) """
310
+ with st.sidebar:
311
+ # ๋กœ๊ณ  ์ด๋ฏธ์ง€ ๋กœ๋“œ
312
+ synapse_logo = load_image("Synapse.png")
313
+ shinhancard_logo = load_image("ShinhanCard_Logo.png")
314
+
315
+ col1, col2, col3 = st.columns([1, 5, 1]) # ๊ฐ€์šด๋ฐ ์ปฌ๋Ÿผ ๋„ˆ๋น„ ์กฐ์ •
316
+ with col2:
317
+ if synapse_logo:
318
+ st.image(synapse_logo, use_container_width=True)
319
+
320
+ st.write("")
321
+ st.markdown(" ")
322
+ col_sh1, col_sh2, col_sh3 = st.columns([1, 5, 1])
323
+ with col_sh2:
324
+ if shinhancard_logo:
325
+ st.image(shinhancard_logo, use_container_width=True) # ์ปฌ๋Ÿผ ๋„ˆ๋น„์— ๋งž์ถค
326
+
327
+ st.markdown("<p style='text-align: center; color: grey; margin-top: 20px;'>2025 Big Contest</p>", unsafe_allow_html=True) # ์œ„์ชฝ ๋งˆ์ง„ ์‚ด์ง ๋Š˜๋ฆผ
328
+ st.markdown("<p style='text-align: center; color: grey;'>AI DATA ํ™œ์šฉ๋ถ„์•ผ</p>", unsafe_allow_html=True)
329
+ st.markdown("---")
330
+
331
+ if st.button('์ฒ˜์Œ์œผ๋กœ ๋Œ์•„๊ฐ€๊ธฐ', key='restart_button_styled', use_container_width=True): # ๋ฒ„ํŠผ ์•„์ด์ฝ˜ ์ถ”๊ฐ€
332
+ restart_consultation()
333
+ st.rerun()
334
+
335
+ # --- ๊ฐ€๊ฒŒ ๊ฒ€์ƒ‰ UI ํ•จ์ˆ˜ (์ˆ˜์ •) ---
336
+ def render_get_merchant_name_step():
337
+ """ UI 1๋‹จ๊ณ„: ๊ฐ€๋งน์  ๊ฒ€์ƒ‰ ๋ฐ ์„ ํƒ (API ํ˜ธ์ถœ ๋กœ์ง ์ˆ˜์ •) """
338
+ st.subheader("๐Ÿ” ์ปจ์„คํŒ… ๋ฐ›์„ ๊ฐ€๊ฒŒ๋ฅผ ๊ฒ€์ƒ‰ํ•ด์ฃผ์„ธ์š”")
339
+ st.caption("๊ฐ€๊ฒŒ ์ด๋ฆ„ ๋˜๋Š” ๊ฐ€๋งน์  ID์˜ ์ผ๋ถ€๋ฅผ ์ž…๋ ฅํ•˜์—ฌ ๊ฒ€์ƒ‰ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.")
340
+
341
+ search_query = st.text_input(
342
+ "๊ฐ€๊ฒŒ ์ด๋ฆ„ ๋˜๋Š” ๊ฐ€๋งน์  ID ๊ฒ€์ƒ‰",
343
+ placeholder="์˜ˆ: ๋ฉ”๊ฐ€์ปคํ”ผ, ์Šคํƒ€๋ฒ…์Šค, 003AC99735 ๋“ฑ",
344
+ label_visibility="collapsed"
345
+ )
346
+
347
+ if search_query:
348
+ mask = (
349
+ merchant_df['๊ฐ€๋งน์ ๋ช…'].str.contains(search_query, case=False, na=False, regex=False) |
350
+ merchant_df['๊ฐ€๋งน์ ID'].str.contains(search_query, case=False, na=False, regex=False)
351
+ )
352
+ search_results = merchant_df[mask].copy()
353
+
354
+ if not search_results.empty:
355
+ search_results['display'] = search_results['๊ฐ€๋งน์ ๋ช…'] + " (" + search_results['๊ฐ€๋งน์ ID'] + ")"
356
+ options = ["โฌ‡ ์•„๋ž˜ ๋ชฉ๋ก์—์„œ ๊ฐ€๊ฒŒ๋ฅผ ์„ ํƒํ•ด์ฃผ์„ธ์š”..."] + search_results['display'].tolist()
357
+ selected_display_name = st.selectbox(
358
+ "๊ฐ€๊ฒŒ ์„ ํƒ:",
359
+ options,
360
+ label_visibility="collapsed"
361
+ )
362
+
363
+ if selected_display_name != "โฌ‡๏ธ ์•„๋ž˜ ๋ชฉ๋ก์—์„œ ๊ฐ€๊ฒŒ๋ฅผ ์„ ํƒํ•ด์ฃผ์„ธ์š”...":
364
+ try:
365
+ selected_row = search_results[search_results['display'] == selected_display_name].iloc[0]
366
+ selected_merchant_id = selected_row['๊ฐ€๋งน์ ID']
367
+ selected_merchant_name = selected_row['๊ฐ€๋งน์ ๋ช…']
368
+ button_label = f"๐Ÿš€ '{selected_merchant_name}' ๋ถ„์„ ์‹œ์ž‘ํ•˜๊ธฐ"
369
+ is_selection_valid = True
370
+ except (IndexError, KeyError):
371
+ button_label = "๋ถ„์„ ์‹œ์ž‘ํ•˜๊ธฐ"
372
+ is_selection_valid = False
373
+
374
+ if st.button(button_label, disabled=not is_selection_valid, type="primary", use_container_width=True):
375
+ with st.spinner(f"๐Ÿ“ˆ '{selected_merchant_name}' ๊ฐ€๊ฒŒ ์ •๋ณด๋ฅผ ๋ถ„์„ ์ค‘์ž…๋‹ˆ๋‹ค... ์ž ์‹œ๋งŒ ๊ธฐ๋‹ค๋ ค์ฃผ์„ธ์š”!"):
376
+ profile_data = None
377
+ try:
378
+ # --- (์ˆ˜์ •) API POST ์š”์ฒญ ๋Œ€์‹  (3)์—์„œ ๋งŒ๋“  ๋กœ์ปฌ ํ•จ์ˆ˜ ํ˜ธ์ถœ ---
379
+ profile_data = get_merchant_profile_logic(selected_merchant_id, MASTER_DF)
380
+ # --------------------------------------------------------
381
+
382
+ if "store_profile" not in profile_data or "average_profile" not in profile_data:
383
+ st.error("ํ”„๋กœํ•„ ์ƒ์„ฑ ํ˜•์‹์ด ์˜ฌ๋ฐ”๋ฅด์ง€ ์•Š์Šต๋‹ˆ๋‹ค.")
384
+ profile_data = None
385
+ except ValueError as e: # 404 ์˜ค๋ฅ˜
386
+ st.error(f"๊ฐ€๊ฒŒ ํ”„๋กœํ•„ ๋กœ๋”ฉ ์‹คํŒจ: {e}")
387
+ except Exception as e:
388
+ st.error(f"๊ฐ€๊ฒŒ ํ”„๋กœํ•„ ๋กœ๋”ฉ ์ค‘ ์˜ˆ์ƒ์น˜ ๋ชปํ•œ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
389
+ logger.critical(f"๊ฐ€๊ฒŒ ํ”„๋กœํ•„ ๋กœ์ปฌ ๋กœ์ง ์‹คํŒจ: {e}", exc_info=True)
390
+
391
+ if profile_data:
392
+ st.session_state.merchant_name = selected_merchant_name
393
+ st.session_state.merchant_id = selected_merchant_id
394
+ st.session_state.profile_data = profile_data
395
+ st.session_state.step = "show_profile_and_chat"
396
+ st.success(f"โœ… '{selected_merchant_name}' ๋ถ„์„ ์™„๋ฃŒ!")
397
+ st.rerun()
398
+ else:
399
+ st.info("๐Ÿ’ก ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค. ๋‹ค๋ฅธ ๊ฒ€์ƒ‰์–ด๋ฅผ ์‹œ๋„ํ•ด๋ณด์„ธ์š”.")
400
+
401
+ # --- ํ”„๋กœํ•„ ๋ฐ ์ฑ„ํŒ… UI ํ•จ์ˆ˜ ---
402
+ def render_show_profile_and_chat_step():
403
+ """UI 2๋‹จ๊ณ„: ํ”„๋กœํ•„ ํ™•์ธ ๋ฐ AI ์ฑ„ํŒ…"""
404
+ st.subheader(f"โœจ '{st.session_state.merchant_name}' ๊ฐ€๊ฒŒ ๋ถ„์„ ์™„๋ฃŒ")
405
+ with st.expander("๐Ÿ“Š ์ƒ์„ธ ๋ฐ์ดํ„ฐ ๋ถ„์„ ๋ฆฌํฌํŠธ ๋ณด๊ธฐ", expanded=True):
406
+ try:
407
+ display_merchant_profile(st.session_state.profile_data)
408
+ except Exception as e:
409
+ st.error(f"ํ”„๋กœํ•„ ์‹œ๊ฐํ™” ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
410
+ logger.error(f"--- [Visualize ERROR]: {e}\n{traceback.format_exc()}", exc_info=True)
411
+
412
+ st.divider()
413
+ st.subheader("๐Ÿ’ฌ AI ์ปจ์„คํ„ดํŠธ์™€ ์ƒ๋‹ด์„ ์‹œ์ž‘ํ•˜์„ธ์š”.")
414
+ st.info("๊ฐ€๊ฒŒ ๋ถ„์„ ์ •๋ณด๋ฅผ ๋ฐ”ํƒ•์œผ๋กœ ๊ถ๊ธˆํ•œ ์ ์„ ์งˆ๋ฌธํ•ด๋ณด์„ธ์š”. (์˜ˆ: '20๋Œ€ ์—ฌ์„ฑ ๊ณ ๊ฐ์„ ๋Š˜๋ฆฌ๊ณ  ์‹ถ์–ด์š”')")
415
+
416
+ for message in st.session_state.messages:
417
+ with st.chat_message(message["role"]):
418
+ st.markdown(message["content"])
419
+
420
+ if prompt := st.chat_input("์š”์ฒญ์‚ฌํ•ญ์„ ์ž…๋ ฅํ•˜์„ธ์š”..."):
421
+ st.session_state.messages.append({"role": "user", "content": prompt})
422
+ with st.chat_message("user"):
423
+ st.markdown(prompt)
424
+
425
+ with st.chat_message("assistant"):
426
+ with st.spinner("AI ์ปจ์„คํ„ดํŠธ๊ฐ€ ๋‹ต๋ณ€์„ ์ƒ์„ฑ ์ค‘์ž…๋‹ˆ๋‹ค...(์ตœ๋Œ€ 1~2๋ถ„)"):
427
+ orchestrator = st.session_state.orchestrator
428
+
429
+ if "store_profile" not in st.session_state.profile_data:
430
+ st.error("์„ธ์…˜์— 'store_profile' ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค. ๋‹ค์‹œ ์‹œ์ž‘ํ•ด์ฃผ์„ธ์š”.")
431
+ st.stop()
432
+
433
+ agent_history = []
434
+ history_to_convert = st.session_state.messages[:-1][-10:]
435
+
436
+ for msg in history_to_convert:
437
+ if msg["role"] == "user":
438
+ agent_history.append(HumanMessage(content=msg["content"]))
439
+ elif msg["role"] == "assistant":
440
+ agent_history.append(AIMessage(content=msg["content"]))
441
+
442
+ result = orchestrator.invoke_agent(
443
+ user_query=prompt,
444
+ store_profile_dict=st.session_state.profile_data["store_profile"],
445
+ chat_history=agent_history,
446
+ last_recommended_festivals=st.session_state.last_recommended_festivals,
447
+ )
448
+
449
+ response_text = ""
450
+ st.session_state.last_recommended_festivals = []
451
+
452
+ if "error" in result:
453
+ response_text = f"์˜ค๋ฅ˜ ๋ฐœ์ƒ: {result['error']}"
454
+
455
+ elif "final_response" in result:
456
+ response_text = result.get("final_response", "์‘๋‹ต์„ ์ƒ์„ฑํ•˜์ง€ ๋ชปํ–ˆ์Šต๋‹ˆ๋‹ค.")
457
+ intermediate_steps = result.get("intermediate_steps", [])
458
+
459
+ try:
460
+ for step in intermediate_steps:
461
+ action = step[0]
462
+ tool_output = step[1]
463
+
464
+ if hasattr(action, 'tool') and action.tool == "recommend_festivals":
465
+ if tool_output and isinstance(tool_output, list) and isinstance(tool_output[0], dict):
466
+ recommended_list = [
467
+ f.get("์ถ•์ œ๋ช…") for f in tool_output if f.get("์ถ•์ œ๋ช…")
468
+ ]
469
+
470
+ st.session_state.last_recommended_festivals = recommended_list
471
+ logger.info(f"--- [Streamlit] ์ถ”์ฒœ ์ถ•์ œ ์ €์žฅ๋จ (Intermediate Steps): {recommended_list} ---")
472
+ break
473
+
474
+ except Exception as e:
475
+ logger.critical(f"--- [Streamlit CRITICAL] Intermediate steps ์ฒ˜๋ฆฌ ์ค‘ ์˜ˆ์™ธ ๋ฐœ์ƒ: {e} ---", exc_info=True)
476
+
477
+ else:
478
+ response_text = "์•Œ ์ˆ˜ ์—†๋Š” ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค."
479
+
480
+ st.markdown(response_text)
481
+ st.session_state.messages.append({"role": "assistant", "content": response_text})
482
+
483
+ # --- ๋ฉ”์ธ ์‹คํ–‰ ํ•จ์ˆ˜ ---
484
+ def main():
485
+ st.title("๐ŸŽ‰ MarketSync (๋งˆ์ผ“์‹ฑํฌ)")
486
+ st.subheader("์†Œ์ƒ๊ณต์ธ ๋งž์ถคํ˜• ์ถ•์ œ ์ถ”์ฒœ & ๋งˆ์ผ€ํŒ… AI ์ปจ์„คํ„ดํŠธ")
487
+ st.caption("์‹ ํ•œ์นด๋“œ ๋น…๋ฐ์ดํ„ฐ์™€ AI ์—์ด์ „ํŠธ๋ฅผ ํ™œ์šฉํ•˜์—ฌ, ์‚ฌ์žฅ๋‹˜ ๊ฐ€๊ฒŒ์— ๊ผญ ๋งž๋Š” ์ง€์—ญ ์ถ•์ œ์™€ ๋งˆ์ผ€ํŒ… ์ „๋žต์„ ์ฐพ์•„๋“œ๋ฆฝ๋‹ˆ๋‹ค.")
488
+ st.divider()
489
+
490
+ initialize_session()
491
+ render_sidebar()
492
+
493
+ if st.session_state.step == "get_merchant_name":
494
+ render_get_merchant_name_step()
495
+ elif st.session_state.step == "show_profile_and_chat":
496
+ render_show_profile_and_chat_step()
497
+
498
+ # --- ์•ฑ ์‹คํ–‰ ---
499
+ if __name__ == "__main__":
500
+ main()
tools/festival_recommender.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # tools/festival_recommender.py
2
+
3
+ from langchain_core.tools import tool
4
+ from typing import List, Dict, Any
5
+
6
+ import config
7
+ from modules.filtering import FestivalRecommender
8
+
9
+ logger = config.get_logger(__name__)
10
+
11
+ @tool
12
+ def recommend_festivals(user_query: str, store_profile: str) -> List[Dict[str, Any]]:
13
+ """
14
+ (๋„๊ตฌ) ์‚ฌ์šฉ์ž์˜ ์งˆ๋ฌธ๊ณผ ๊ฐ€๊ฒŒ ํ”„๋กœํ•„์„ ๋ฐ”ํƒ•์œผ๋กœ ๋งž์ถคํ˜• ์ถ•์ œ๋ฅผ ์ถ”์ฒœํ•˜๋Š”
15
+ [ํ•˜์ด๋ธŒ๋ฆฌ๋“œ 5๋‹จ๊ณ„ ํŒŒ์ดํ”„๋ผ์ธ]์„ ์‹คํ–‰ํ•ฉ๋‹ˆ๋‹ค.
16
+ 1. ์ฟผ๋ฆฌ ์žฌ์ž‘์„ฑ (ํ”„๋กœํ•„ ๊ธฐ๋ฐ˜)
17
+ 2. ํ›„๋ณด ๊ฒ€์ƒ‰ (์ž„๋ฒ ๋”ฉ ์ ์ˆ˜ - Score 1)
18
+ 3. ๋™์  ์†์„ฑ ํ‰๊ฐ€ (LLM ๊ธฐ๋ฐ˜ - Score 2)
19
+ 4. ํ•˜์ด๋ธŒ๋ฆฌ๋“œ ์ ์ˆ˜ ๊ณ„์‚ฐ (Score 1 + Score 2)
20
+ 5. ์ตœ์ข… ๋‹ต๋ณ€ ํฌ๋งทํŒ… (LLM ๊ธฐ๋ฐ˜)
21
+
22
+ ์ด ๋„๊ตฌ๋Š” '์ถ•์ œ ์ถ”์ฒœํ•ด์ค˜'์™€ ๊ฐ™์€ ์š”์ฒญ ์‹œ ๋‹จ๋…์œผ๋กœ ์‚ฌ์šฉ๋˜์–ด์•ผ ํ•ฉ๋‹ˆ๋‹ค.
23
+ """
24
+ logger.info(f"--- [Tool] (์‹ ๊ทœ) ํ•˜์ด๋ธŒ๋ฆฌ๋“œ ์ถ•์ œ ์ถ”์ฒœ ํŒŒ์ดํ”„๋ผ์ธ ์‹œ์ž‘ (Query: {user_query[:30]}...) ---")
25
+
26
+ # 4๋ฒˆ ์ œ์•ˆ: ํŒŒ์ดํ”„๋ผ์ธ ํด๋ž˜์Šค๋ฅผ ์ธ์Šคํ„ด์Šคํ™”ํ•˜๊ณ  ์‹คํ–‰
27
+ pipeline = FestivalRecommender(store_profile, user_query)
28
+
29
+ # .run() ๋ฉ”์„œ๋“œ๊ฐ€ ๋ชจ๋“  ์˜ˆ์™ธ์ฒ˜๋ฆฌ๋ฅผ ํฌํ•จ
30
+ return pipeline.run()
tools/marketing_strategy.py ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # tools/marketing_strategy.py
2
+
3
+ import traceback
4
+ import json
5
+ from typing import List
6
+
7
+ from langchain_core.tools import tool
8
+
9
+ import config
10
+ from modules.llm_provider import get_llm
11
+ from modules.knowledge_base import load_marketing_vectorstore
12
+
13
+ from tools.profile_analyzer import get_festival_profile_by_name
14
+
15
+ logger = config.get_logger(__name__)
16
+
17
+
18
+ @tool
19
+ def search_contextual_marketing_strategy(user_query: str, store_profile: str) -> str:
20
+ """
21
+ (RAG Tool) ์‚ฌ์šฉ์ž์˜ ์งˆ๋ฌธ๊ณผ ๊ฐ€๊ฒŒ ํ”„๋กœํ•„(JSON ๋ฌธ์ž์—ด)์„ ๋ฐ”ํƒ•์œผ๋กœ '๋งˆ์ผ€ํŒ… ์ „๋žต' Vector DB์—์„œ
22
+ ๊ด€๋ จ์„ฑ์ด ๋†’์€ ์ปจํ…์ŠคํŠธ(์ „๋žต)๋ฅผ ๊ฒ€์ƒ‰ํ•˜๊ณ , LLM์„ ํ†ตํ•ด ์ตœ์ข… ๋‹ต๋ณ€์„ ์ƒ์„ฑํ•˜์—ฌ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
23
+ """
24
+ logger.info("--- [Tool] RAG ๋งˆ์ผ€ํŒ… ์ „๋žต ๊ฒ€์ƒ‰ ํ˜ธ์ถœ๋จ ---")
25
+
26
+ try:
27
+ retriever = load_marketing_vectorstore()
28
+ if retriever is None:
29
+ raise RuntimeError("๋งˆ์ผ€ํŒ… Retriever๊ฐ€ ๋กœ๋“œ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
30
+
31
+ # 1. ์ปจํ…์ŠคํŠธ๋ฅผ ๊ณ ๋ คํ•œ ๊ฒ€์ƒ‰ ์ฟผ๋ฆฌ ์ƒ์„ฑ
32
+ try:
33
+ profile_dict = json.loads(store_profile)
34
+ profile_for_query = (
35
+ f"๊ฐ€๊ฒŒ ์œ„์น˜: {profile_dict.get('์ฃผ์†Œ', '์•Œ ์ˆ˜ ์—†์Œ')}\n"
36
+ f"๊ฐ€๊ฒŒ ์—…์ข…: {profile_dict.get('์—…์ข…', '์•Œ ์ˆ˜ ์—†์Œ')}\n"
37
+ f"ํ•ต์‹ฌ ๊ณ ๊ฐ: {profile_dict.get('์ž๋™์ถ”์ถœํŠน์ง•', {}).get('ํ•ต์‹ฌ๊ณ ๊ฐ', '์•Œ ์ˆ˜ ์—†์Œ')}"
38
+ )
39
+ except Exception:
40
+ profile_for_query = store_profile
41
+
42
+ contextual_query = f"[๊ฐ€๊ฒŒ ์ •๋ณด:\n{profile_for_query}\n]์— ๋Œ€ํ•œ [์งˆ๋ฌธ: {user_query}]"
43
+ logger.info(f"--- [Tool] RAG ๊ฒ€์ƒ‰ ์ฟผ๋ฆฌ: {contextual_query} ---")
44
+
45
+ # 2. Vector DB ๊ฒ€์ƒ‰
46
+ docs = retriever.invoke(contextual_query)
47
+
48
+ if not docs:
49
+ logger.warning("--- [Tool] RAG ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ์—†์Œ ---")
50
+ return "์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. ์‚ฌ์žฅ๋‹˜์˜ ๊ฐ€๊ฒŒ ํ”„๋กœํ•„๊ณผ ์งˆ๋ฌธ์— ๋งž๋Š” ๋งˆ์ผ€ํŒ… ์ „๋žต์„ ์ฐพ์ง€ ๋ชปํ–ˆ์Šต๋‹ˆ๋‹ค. ๊ฐ€๊ฒŒ์˜ ํŠน์ง•์„ ์กฐ๊ธˆ ๋” ์•Œ๋ ค์ฃผ์‹œ๊ฑฐ๋‚˜, ๋‹ค๋ฅธ ์งˆ๋ฌธ์„ ์‹œ๋„ํ•ด๋ณด์‹œ๊ฒ ์–ด์š”?"
51
+
52
+ # 3. LLM์— ์ „๋‹ฌํ•  ์ปจํ…์ŠคํŠธ ํฌ๋งทํŒ…
53
+ context = "\n\n---\n\n".join([doc.page_content for doc in docs])
54
+ logger.info("--- [Tool] RAG ์ปจํ…์ŠคํŠธ ์ƒ์„ฑ ์™„๋ฃŒ ---")
55
+
56
+ # 4. LLM์„ ํ†ตํ•œ ๋‹ต๋ณ€ ์žฌ๊ตฌ์„ฑ
57
+ llm = get_llm(temperature=0.3)
58
+
59
+ # --- (์‚ฌ์šฉ์ž ์š”์ฒญ) ํ”„๋กฌํ”„ํŠธ ์›๋ณธ ์œ ์ง€ ---
60
+ prompt = f"""
61
+ ๋‹น์‹ ์€ ์†Œ์ƒ๊ณต์ธ ์ „๋ฌธ ๋งˆ์ผ€ํŒ… ์ปจ์„คํ„ดํŠธ์ž…๋‹ˆ๋‹ค.
62
+ ์•„๋ž˜ [๊ฐ€๊ฒŒ ํ”„๋กœํ•„]๊ณผ [์ฐธ๊ณ  ๋งˆ์ผ€ํŒ… ์ „๋žต]์„ ๋ฐ”ํƒ•์œผ๋กœ, ์‚ฌ์šฉ์ž์˜ [์งˆ๋ฌธ]์— ๋Œ€ํ•œ ๋งž์ถคํ˜• ๋งˆ์ผ€ํŒ… ์ „๋žต 3๊ฐ€์ง€๋ฅผ ์ œ์•ˆํ•ด์ฃผ์„ธ์š”.
63
+
64
+ [๊ฐ€๊ฒŒ ํ”„๋กœํ•„]
65
+ {store_profile}
66
+
67
+ [์งˆ๋ฌธ]
68
+ {user_query}
69
+
70
+ [์ฐธ๊ณ  ๋งˆ์ผ€ํŒ… ์ „๋žต]
71
+ {context}
72
+
73
+ [์ž‘์„ฑ ๊ฐ€์ด๋“œ๋ผ์ธ]
74
+ 1. [์ฐธ๊ณ  ๋งˆ์ผ€ํŒ… ์ „๋žต]์„ ๊ทธ๋Œ€๋กœ ๋ณต์‚ฌํ•˜์ง€ ๋ง๊ณ , [๊ฐ€๊ฒŒ ํ”„๋กœํ•„]์˜ ํŠน์ง•(์˜ˆ: ์—…์ข…, ํ•ต์‹ฌ ๊ณ ๊ฐ, ์ƒ๊ถŒ)๊ณผ [์งˆ๋ฌธ]์˜ ์˜๋„๋ฅผ ์กฐํ•ฉํ•˜์—ฌ **๊ฐ€๊ฒŒ์— ํŠนํ™”๋œ ์ƒˆ๋กœ์šด ์•„์ด๋””์–ด**๋กœ ์žฌ๊ตฌ์„ฑํ•ด์ฃผ์„ธ์š”.
75
+ 2. ๊ฐ ์ „๋žต์€ ๊ตฌ์ฒด์ ์ธ ์‹คํ–‰ ๋ฐฉ์•ˆ์„ ํฌํ•จํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค.
76
+ 3. ์นœ์ ˆํ•˜๊ณ  ์ „๋ฌธ์ ์ธ ๋งํˆฌ๋ฅผ ์‚ฌ์šฉํ•˜์„ธ์š”.
77
+ 4. ์•„๋ž˜ [์ถœ๋ ฅ ํ˜•์‹]์„ ์ •ํ™•ํžˆ ์ง€์ผœ์ฃผ์„ธ์š”.
78
+ 5. **์ทจ์†Œ์„  ๊ธˆ์ง€**: ์ ˆ๋Œ€๋กœ `~~text~~`์™€ ๊ฐ™์€ ์ทจ์†Œ์„  ๋งˆํฌ๋‹ค์šด์„ ์‚ฌ์šฉํ•˜์ง€ ๋งˆ์„ธ์š”.
79
+
80
+ [์ถœ๋ ฅ ํ˜•์‹]
81
+ ์‚ฌ์žฅ๋‹˜ ๊ฐ€๊ฒŒ์˜ ํŠน์„ฑ์„ ๊ณ ๋ คํ•œ 3๊ฐ€์ง€ ๋งˆ์ผ€ํŒ… ์•„์ด๋””์–ด๋ฅผ ์ œ์•ˆํ•ด ๋“œ๋ฆฝ๋‹ˆ๋‹ค.
82
+
83
+ **1. [์ „๋žต ์ œ๋ชฉ 1]**
84
+ * **์ „๋žต ๋‚ด์šฉ:** (๊ฐ€๊ฒŒ์˜ ์–ด๋–ค ํŠน์ง•์„ ํ™œ์šฉํ•˜์—ฌ ์–ด๋–ป๊ฒŒ ์‹คํ–‰ํ•˜๋Š”์ง€ ๊ตฌ์ฒด์ ์œผ๋กœ ์„œ์ˆ )
85
+ * **๊ธฐ๋Œ€ ํšจ๊ณผ:** (์ด ์ „๋žต์„ ํ†ตํ•ด ์–ป์„ ์ˆ˜ ์žˆ๋Š” ๊ตฌ์ฒด์ ์ธ ํšจ๊ณผ)
86
+
87
+ **2. [์ „๋žต ์ œ๋ชฉ 2]**
88
+ * **์ „๋žต ๋‚ด์šฉ:** (๊ฐ€๊ฒŒ์˜ ์–ด๋–ค ํŠน์ง•์„ ํ™œ์šฉํ•˜์—ฌ ์–ด๋–ป๊ฒŒ ์‹คํ–‰ํ•˜๋Š”์ง€ ๊ตฌ์ฒด์ ์œผ๋กœ ์„œ์ˆ )
89
+ * **๊ธฐ๋Œ€ ํšจ๊ณผ:** (์ด ์ „๋žต์„ ํ†ตํ•ด ์–ป์„ ์ˆ˜ ์žˆ๋Š” ๊ตฌ์ฒด์ ์ธ ํšจ๊ณผ)
90
+
91
+ **3. [์ „๋žต ์ œ๋ชฉ 3]**
92
+ * **์ „๋žต ๋‚ด์šฉ:** (๊ฐ€๊ฒŒ์˜ ์–ด๋–ค ํŠน์ง•์„ ํ™œ์šฉํ•˜์—ฌ ์–ด๋–ป๊ฒŒ ์‹คํ–‰ํ•˜๋Š”์ง€ ๊ตฌ์ฒด์ ์œผ๋กœ ์„œ์ˆ )
93
+ * **๊ธฐ๋Œ€ ํšจ๊ณผ:** (์ด ์ „๋žต์„ ํ†ตํ•ด ์–ป์„ ์ˆ˜ ์žˆ๋Š” ๊ตฌ์ฒด์ ์ธ ํšจ๊ณผ)
94
+ """
95
+
96
+
97
+ try:
98
+ response = llm.invoke(prompt)
99
+ logger.info("--- [Tool] RAG + LLM ๋‹ต๋ณ€ ์ƒ์„ฑ ์™„๋ฃŒ ---")
100
+ return response.content
101
+ except Exception as llm_e:
102
+ logger.critical(f"--- [Tool CRITICAL] RAG LLM ํ˜ธ์ถœ ์ค‘ ์˜ค๋ฅ˜: {llm_e} ---", exc_info=True)
103
+ return f"์˜ค๋ฅ˜: ๊ฒ€์ƒ‰๋œ ์ „๋žต์„ ์ฒ˜๋ฆฌํ•˜๋Š” ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค. (LLM ์˜ค๋ฅ˜: {llm_e})"
104
+
105
+ except Exception as e:
106
+ logger.critical(f"--- [Tool CRITICAL] RAG ๋งˆ์ผ€ํŒ… ์ „๋žต ๊ฒ€์ƒ‰ ์ค‘ ์˜ค๋ฅ˜: {e} ---", exc_info=True)
107
+ return f"์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. ๋งˆ์ผ€ํŒ… ์ „๋žต์„ ์ƒ์„ฑํ•˜๋Š” ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {e}"
108
+
109
+
110
+ @tool
111
+ def create_festival_specific_marketing_strategy(festival_name: str, store_profile: str) -> str:
112
+ """
113
+ (RAG x2 Tool) ํŠน์ • ์ถ•์ œ ์ด๋ฆ„(์˜ˆ: '๊ด€์•…๊ฐ•๊ฐ์ฐฌ์ถ•์ œ')๊ณผ ๊ฐ€๊ฒŒ ํ”„๋กœํ•„(JSON ๋ฌธ์ž์—ด)์„ ์ž…๋ ฅ๋ฐ›์•„,
114
+ '์ถ•์ œ DB'์™€ '๋งˆ์ผ€ํŒ… DB'๋ฅผ *๋™์‹œ์—* RAG๋กœ ์ฐธ์กฐํ•˜์—ฌ,
115
+ ํ•ด๋‹น ์ถ•์ œ ๊ธฐ๊ฐ„ ๋™์•ˆ ์‹คํ–‰ํ•  ์ˆ˜ ์žˆ๋Š” ๋งž์ถคํ˜• ๋งˆ์ผ€ํŒ… ์ „๋žต *1๊ฐœ*๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
116
+ """
117
+ logger.info(f"--- [Tool] '*๋‹จ์ผ* ์ถ•์ œ ๋งž์ถคํ˜• ์ „๋žต ์ƒ์„ฑ (RAGx2)' ๋„๊ตฌ ํ˜ธ์ถœ (๋Œ€์ƒ: {festival_name}) ---")
118
+
119
+ try:
120
+ # 1. (RAG 1) ์ถ•์ œ ์ •๋ณด ๊ฐ€์ ธ์˜ค๊ธฐ (๊ธฐ์กด ๋„๊ตฌ ์žฌ์‚ฌ์šฉ)
121
+ festival_profile_str = get_festival_profile_by_name.invoke({"festival_name": festival_name})
122
+
123
+ if "์˜ค๋ฅ˜" in festival_profile_str or "์ฐพ์„ ์ˆ˜ ์—†์Œ" in festival_profile_str:
124
+ logger.warning(f"--- [Tool WARNING] ์ถ•์ œ ํ”„๋กœํ•„์„ ์ฐพ์ง€ ๋ชปํ•จ: {festival_name} ---")
125
+ festival_profile_str = f"{{\"์ถ•์ œ๋ช…\": \"{festival_name}\", \"์ •๋ณด\": \"์ƒ์„ธ ์ •๋ณด๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.\"}}"
126
+ else:
127
+ logger.info(f"--- [Tool] (RAG 1) ์ถ•์ œ ํ”„๋กœํ•„ ๋กœ๋“œ ์„ฑ๊ณต: {festival_name} ---")
128
+
129
+ # 2. (RAG 2) ๊ด€๋ จ ๋งˆ์ผ€ํŒ… ์ „๋žต ๊ฒ€์ƒ‰
130
+ marketing_retriever = load_marketing_vectorstore()
131
+ if marketing_retriever is None:
132
+ raise RuntimeError("๋งˆ์ผ€ํŒ… Retriever๊ฐ€ ๋กœ๋“œ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
133
+
134
+ combined_query = f"""
135
+ ์ถ•์ œ ์ •๋ณด: {festival_profile_str}
136
+ ๊ฐ€๊ฒŒ ํ”„๋กœํ•„: {store_profile}
137
+ ์งˆ๋ฌธ: ์œ„ ๊ฐ€๊ฒŒ๊ฐ€ ์œ„ ์ถ•์ œ ๊ธฐ๊ฐ„ ๋™์•ˆ ํ•  ์ˆ˜ ์žˆ๋Š” ์ตœ๊ณ ์˜ ๋งˆ์ผ€ํŒ… ์ „๋žต์€?
138
+ """
139
+ marketing_docs = marketing_retriever.invoke(combined_query)
140
+
141
+ if not marketing_docs:
142
+ marketing_context = "์ฐธ๊ณ ํ•  ๋งŒํ•œ ๋งˆ์ผ€ํŒ… ์ „๋žต์„ ์ฐพ์ง€ ๋ชปํ–ˆ์Šต๋‹ˆ๋‹ค."
143
+ logger.warning("--- [Tool] (RAG 2) ๋งˆ์ผ€ํŒ… ์ „๋žต ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ์—†์Œ ---")
144
+ else:
145
+ marketing_context = "\n\n---\n\n".join([doc.page_content for doc in marketing_docs])
146
+ logger.info(f"--- [Tool] (RAG 2) ๋งˆ์ผ€ํŒ… ์ „๋žต ์ปจํ…์ŠคํŠธ {len(marketing_docs)}๊ฐœ ํ™•๋ณด ---")
147
+
148
+ # 3. LLM์„ ํ†ตํ•œ ์ตœ์ข… ์ „๋žต ์ƒ์„ฑ
149
+ llm = get_llm(temperature=0.5)
150
+
151
+ # --- (์‚ฌ์šฉ์ž ์š”์ฒญ) ํ”„๋กฌํ”„ํŠธ ์›๋ณธ ์œ ์ง€ ---
152
+ prompt = f"""
153
+ ๋‹น์‹ ์€ ์ถ•์ œ ์—ฐ๊ณ„ ๋งˆ์ผ€ํŒ… ์ „๋ฌธ ์ปจ์„คํ„ดํŠธ์ž…๋‹ˆ๋‹ค.
154
+ ์•„๋ž˜ [๊ฐ€๊ฒŒ ํ”„๋กœํ•„], [์ถ•์ œ ํ”„๋กœํ•„], [์ฐธ๊ณ  ๋งˆ์ผ€ํŒ… ์ „๋žต]์„ ๋ชจ๋‘ ๊ณ ๋ คํ•˜์—ฌ,
155
+ [๊ฐ€๊ฒŒ ํ”„๋กœํ•„]์˜ ์‚ฌ์žฅ๋‹˜์ด [์ถ•์ œ ํ”„๋กœํ•„] ๊ธฐ๊ฐ„ ๋™์•ˆ ์‹คํ–‰ํ•  ์ˆ˜ ์žˆ๋Š”
156
+ **์ฐฝ์˜์ ์ด๊ณ  ๊ตฌ์ฒด์ ์ธ ๋งž์ถคํ˜• ๋งˆ์ผ€ํŒ… ์ „๋žต 1๊ฐ€์ง€**๋ฅผ ์ œ์•ˆํ•ด์ฃผ์„ธ์š”.
157
+
158
+ [๊ฐ€๊ฒŒ ํ”„๋กœํ•„]
159
+ {store_profile}
160
+
161
+ [์ถ•์ œ ํ”„๋กœํ•„]
162
+ {festival_profile_str}
163
+
164
+ [์ฐธ๊ณ  ๋งˆ์ผ€ํŒ… ์ „๋žต]
165
+ {marketing_context}
166
+
167
+ [์ž‘์„ฑ ๊ฐ€์ด๋“œ๋ผ์ธ]
168
+ 1. **๋งค์šฐ ์ค‘์š”:** [๊ฐ€๊ฒŒ ํ”„๋กœํ•„]์˜ ํŠน์ง•(์—…์ข…, ์œ„์น˜, ํ•ต์‹ฌ ๊ณ ๊ฐ)๊ณผ [์ถ•์ œ ํ”„๋กœํ•„]์˜ ํŠน์ง•(์ฃผ์ œ, ์ฃผ์š” ๋ฐฉ๋ฌธ๊ฐ)์„
169
+ **๋ฐ˜๋“œ์‹œ ์—ฐ๊ด€์ง€์–ด** ๊ตฌ์ฒด์ ์ธ ์ „๋žต์„ ๋งŒ๋“œ์„ธ์š”.
170
+ 2. [์ฐธ๊ณ  ๋งˆ์ผ€ํŒ… ์ „๋žต]์€ ์•„์ด๋””์–ด ๋ฐœ์ƒ์—๋งŒ ํ™œ์šฉํ•˜๊ณ , ๋ณต์‚ฌํ•˜์ง€ ๋งˆ์„ธ์š”.
171
+ 3. ์ „๋žต์€ 1๊ฐ€์ง€๋งŒ ๊นŠ์ด ์žˆ๊ฒŒ ์ œ์•ˆํ•ฉ๋‹ˆ๋‹ค.
172
+ 4. ์นœ์ ˆํ•˜๊ณ  ์ „๋ฌธ์ ์ธ ๋งํˆฌ๋ฅผ ์‚ฌ์šฉํ•˜์„ธ์š”.
173
+ 5. ์•„๋ž˜ [์ถœ๋ ฅ ํ˜•์‹]์„ ์ •ํ™•ํžˆ ์ง€์ผœ์ฃผ์„ธ์š”.
174
+ 6. **์ทจ์†Œ์„  ๊ธˆ์ง€**: ์ ˆ๋Œ€๋กœ `~~text~~`์™€ ๊ฐ™์€ ์ทจ์†Œ์„  ๋งˆํฌ๋‹ค์šด์„ ์‚ฌ์šฉํ•˜์ง€ ๋งˆ์„ธ์š”.
175
+
176
+ [์ถœ๋ ฅ ํ˜•์‹]
177
+ ### ๐ŸŽˆ {json.loads(festival_profile_str).get('์ถ•์ œ๋ช…', festival_name)} ๋งž์ถคํ˜• ๋งˆ์ผ€ํŒ… ์ „๋žต
178
+
179
+ **1. (์ „๋žต ์•„์ด๋””์–ด ์ œ๋ชฉ)**
180
+ * **์ „๋žต ๊ฐœ์š”:** (๊ฐ€๊ฒŒ์˜ ์–ด๋–ค ํŠน์ง•๊ณผ ์ถ•์ œ์˜ ์–ด๋–ค ํŠน์ง•์„ ์—ฐ๊ด€์ง€์—ˆ๋Š”์ง€ ์„ค๋ช…)
181
+ * **๊ตฌ์ฒด์  ์‹คํ–‰ ๋ฐฉ์•ˆ:** (์‚ฌ์žฅ๋‹˜์ด '๋ฌด์—‡์„', '์–ด๋–ป๊ฒŒ' ํ•ด์•ผ ํ•˜๋Š”์ง€ ๋‹จ๊ณ„๋ณ„๋กœ ์„ค๋ช…. ์˜ˆ: ๋ฉ”๋‰ด ๊ฐœ๋ฐœ, ํ™๋ณด ๋ฌธ๊ตฌ, SNS ์ด๋ฒคํŠธ ๋“ฑ)
182
+ * **ํƒ€๊ฒŸ ๊ณ ๊ฐ:** (์ด ์ „๋žต์ด ์ถ•์ œ ๋ฐฉ๋ฌธ๊ฐ ์ค‘ ๋ˆ„๊ตฌ์—๊ฒŒ ๋งค๋ ฅ์ ์ผ์ง€)
183
+ * **๊ธฐ๋Œ€ ํšจ๊ณผ:** (์˜ˆ์ƒ๋˜๋Š” ๊ฒฐ๊ณผ, ์˜ˆ: ์‹ ๊ทœ ๊ณ ๊ฐ ์œ ์ž…, ๊ฐ๋‹จ๊ฐ€ ์ƒ์Šน ๋“ฑ)
184
+ """
185
+
186
+ try:
187
+ response = llm.invoke(prompt)
188
+ logger.info("--- [Tool] (RAGx2) ์ตœ์ข… ์ „๋žต ์ƒ์„ฑ ์™„๋ฃŒ ---")
189
+ return response.content
190
+ except Exception as llm_e:
191
+ logger.critical(f"--- [Tool CRITICAL] '์ถ•์ œ ๋งž์ถคํ˜• ์ „๋žต ์ƒ์„ฑ (RAGx2)' LLM ํ˜ธ์ถœ ์ค‘ ์˜ค๋ฅ˜: {llm_e} ---", exc_info=True)
192
+ return f"์˜ค๋ฅ˜: ๊ฒ€์ƒ‰๋œ ์ „๋žต์„ ์ฒ˜๋ฆฌํ•˜๋Š” ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค. (LLM ์˜ค๋ฅ˜: {llm_e})"
193
+
194
+ except Exception as e:
195
+ logger.critical(f"--- [Tool CRITICAL] '์ถ•์ œ ๋งž์ถคํ˜• ์ „๋žต ์ƒ์„ฑ (RAG)' ์ค‘ ์˜ค๋ฅ˜: {e} ---", exc_info=True)
196
+ return f"์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. '{festival_name}' ์ถ•์ œ ์ „๋žต์„ ์ƒ์„ฑํ•˜๋Š” ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {e}"
197
+
198
+
199
+ @tool
200
+ def create_marketing_strategies_for_multiple_festivals(festival_names: List[str], store_profile: str) -> str:
201
+ """
202
+ ์—ฌ๋Ÿฌ ๊ฐœ์˜ ์ถ•์ œ ์ด๋ฆ„ ๋ฆฌ์ŠคํŠธ์™€ ๊ฐ€๊ฒŒ ํ”„๋กœํ•„(JSON ๋ฌธ์ž์—ด)์„ ์ž…๋ ฅ๋ฐ›์•„,
203
+ ๊ฐ ์ถ•์ œ์— ํŠนํ™”๋œ ๋งž์ถคํ˜• ๋งˆ์ผ€ํŒ… ์ „๋žต์„ *๋ชจ๋‘* ์ƒ์„ฑํ•˜๊ณ  ํ•˜๋‚˜์˜ ๋ฌธ์ž์—ด๋กœ ์ทจํ•ฉํ•˜์—ฌ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
204
+ (์˜ˆ: ["์ฒญ์†ก์‚ฌ๊ณผ์ถ•์ œ", "๋ถ€์ฒœ๊ตญ์ œ๋งŒํ™”์ถ•์ œ"])
205
+ """
206
+ logger.info(f"--- [Tool] '*๋‹ค์ˆ˜* ์ถ•์ œ ๋งž์ถคํ˜• ์ „๋žต ์ƒ์„ฑ' ๋„๊ตฌ ํ˜ธ์ถœ (๋Œ€์ƒ: {festival_names}) ---")
207
+
208
+ final_report = []
209
+
210
+ if not festival_names:
211
+ logger.warning("--- [Tool] ์ถ•์ œ ์ด๋ฆ„ ๋ชฉ๋ก์ด ๋น„์–ด์žˆ์Œ ---")
212
+ return "์˜ค๋ฅ˜: ์ถ•์ œ ์ด๋ฆ„ ๋ชฉ๋ก์ด ๋น„์–ด์žˆ์Šต๋‹ˆ๋‹ค. ์ „๋žต์„ ์ƒ์„ฑํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
213
+
214
+ # ๊ฐœ๋ณ„ ์ „๋žต ์ƒ์„ฑ ๋„๊ตฌ๋ฅผ ์žฌ์‚ฌ์šฉ
215
+ for festival_name in festival_names:
216
+ try:
217
+ strategy = create_festival_specific_marketing_strategy.invoke({
218
+ "festival_name": festival_name,
219
+ "store_profile": store_profile
220
+ })
221
+
222
+ final_report.append(strategy)
223
+
224
+ except Exception as e:
225
+ error_message = f"--- [์˜ค๋ฅ˜] '{festival_name}'์˜ ์ „๋žต ์ƒ์„ฑ ์ค‘ ๋ฌธ์ œ๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {e} ---"
226
+ logger.critical(f"--- [Tool CRITICAL] '{festival_name}' ์ „๋žต ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜: {e} ---", exc_info=True)
227
+ final_report.append(error_message)
228
+
229
+ logger.info("--- [Tool] '๋‹ค์ˆ˜ ์ถ•์ œ ๋งž์ถคํ˜• ์ „๋žต ์ƒ์„ฑ' ์™„๋ฃŒ ---")
230
+ return "\n\n---\n\n".join(final_report)
tools/profile_analyzer.py ADDED
@@ -0,0 +1,205 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # tools/profile_analyzer.py
2
+
3
+ import json
4
+ import traceback
5
+ import pandas as pd
6
+ import math
7
+ import streamlit as st
8
+ from langchain_core.tools import tool
9
+ from langchain_core.messages import HumanMessage
10
+
11
+ import config
12
+ from modules.llm_provider import get_llm
13
+ # filtering ๋ชจ๋“ˆ์—์„œ ๋‚ ์งœ ์˜ˆ์ธก ํ•จ์ˆ˜ ๊ฐ€์ ธ์˜ค๊ธฐ
14
+ from modules.filtering import FestivalRecommender
15
+
16
+ logger = config.get_logger(__name__)
17
+
18
+ # nan ๊ฐ’ ์ฒ˜๋ฆฌ๊ธฐ
19
+ def replace_nan_with_none(data):
20
+ if isinstance(data, dict):
21
+ return {k: replace_nan_with_none(v) for k, v in data.items()}
22
+ elif isinstance(data, list):
23
+ return [replace_nan_with_none(i) for i in data]
24
+ elif isinstance(data, float) and math.isnan(data):
25
+ return None
26
+ return data
27
+
28
+ # ์ถ•์ œ ๋ฐ์ดํ„ฐ ๋กœ๋”
29
+ @st.cache_data
30
+ def _load_festival_data():
31
+ try:
32
+ file_path = config.PATH_FESTIVAL_DF
33
+ if not file_path.exists():
34
+ logger.error(f"--- [Tool Definition ERROR] '{config.PATH_FESTIVAL_DF}' ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
35
+ return None
36
+ df = pd.read_csv(file_path)
37
+ if '์ถ•์ œ๋ช…' not in df.columns:
38
+ logger.error("--- [Tool Definition ERROR] '์ถ•์ œ๋ช…' ์ปฌ๋Ÿผ์ด df์— ์—†์Šต๋‹ˆ๋‹ค.")
39
+ return None
40
+ df_dict = df.set_index('์ถ•์ œ๋ช…').to_dict(orient='index')
41
+ logger.info(f"--- [Cache] ์ถ•์ œ ์›๋ณธ CSV ๋กœ๋“œ ๋ฐ ๋”•์…”๋„ˆ๋ฆฌ ๋ณ€ํ™˜ ์™„๋ฃŒ (์ด {len(df_dict)}๊ฐœ) ---")
42
+ return df_dict
43
+ except Exception as e:
44
+ logger.critical(f"--- [Tool Definition CRITICAL ERROR] ์ถ•์ œ ๋ฐ์ดํ„ฐ ๋กœ๋“œ ์‹คํŒจ: {e} ---", exc_info=True)
45
+ return None
46
+
47
+ # ----------------------------
48
+ # Tool 1: ํŠน์ • ์ถ•์ œ ์ •๋ณด ์กฐํšŒ
49
+ @tool
50
+ def get_festival_profile_by_name(festival_name: str) -> str:
51
+ """
52
+ ์ถ•์ œ ์ด๋ฆ„์„ ์ž…๋ ฅ๋ฐ›์•„, ํ•ด๋‹น ์ถ•์ œ์˜ ์ƒ์„ธ ํ”„๋กœํ•„(์†Œ๊ฐœ, ์ง€์—ญ, ํ‚ค์›Œ๋“œ, ๊ธฐ๊ฐ„, ๊ณ ๊ฐ์ธต ๋“ฑ)์„
53
+ JSON ๋ฌธ์ž์—ด๋กœ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค. ๋ฐ์ดํ„ฐ๋ฒ ์ด์Šค์—์„œ ์ •ํ™•ํ•œ ์ด๋ฆ„์„ ์ฐพ์•„์•ผ ํ•ฉ๋‹ˆ๋‹ค.
54
+ (์˜ˆ: "๋ณด๋ น๋จธ๋“œ์ถ•์ œ ์ƒ์„ธ ์ •๋ณด ์•Œ๋ ค์ค˜")
55
+ """
56
+ logger.info(f"--- [Tool] 'ํŠน์ • ์ถ•์ œ ์ •๋ณด ์กฐํšŒ' ๋„๊ตฌ ํ˜ธ์ถœ (๋Œ€์ƒ: {festival_name}) ---")
57
+ try:
58
+ festival_db = _load_festival_data()
59
+ if festival_db is None:
60
+ return json.dumps({"error": "์ถ•์ œ ๋ฐ์ดํ„ฐ๋ฒ ์ด์Šค๋ฅผ ๋กœ๋“œํ•˜์ง€ ๋ชปํ–ˆ์Šต๋‹ˆ๋‹ค."})
61
+ profile_dict = festival_db.get(festival_name)
62
+ if profile_dict:
63
+ profile_dict = replace_nan_with_none(profile_dict)
64
+ profile_dict['์ถ•์ œ๋ช…'] = festival_name
65
+ return json.dumps(profile_dict, ensure_ascii=False)
66
+ else:
67
+ return json.dumps({"error": f"'{festival_name}' ์ถ•์ œ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค. ์ฒ ์ž๋ฅผ ํ™•์ธํ•ด์ฃผ์„ธ์š”."})
68
+ except Exception as e:
69
+ logger.critical(f"--- [Tool CRITICAL] 'ํŠน์ • ์ถ•์ œ ์ •๋ณด ์กฐํšŒ' ์ค‘ ์˜ค๋ฅ˜: {e} ---", exc_info=True)
70
+ return json.dumps({"error": f"'{festival_name}' ์ถ•์ œ ๊ฒ€์ƒ‰ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}"})
71
+
72
+ # ----------------------------
73
+ # Tool 2: ๊ฐ€๋งน์  ํ”„๋กœํ•„ ๋ถ„์„ (LLM)
74
+ @tool
75
+ def analyze_merchant_profile(store_profile: str) -> str:
76
+ """
77
+ ๊ฐ€๋งน์ (๊ฐ€๊ฒŒ)์˜ ํ”„๋กœํ•„ ๋ฐ์ดํ„ฐ(JSON ๋ฌธ์ž์—ด)๋ฅผ ์ž…๋ ฅ๋ฐ›์•„, LLM์„ ์‚ฌ์šฉํ•˜์—ฌ
78
+ [๊ฐ•์ , ์•ฝ์ , ๊ธฐํšŒ ์š”์ธ]์„ ๋ถ„์„ํ•˜๋Š” ์ปจ์„คํŒ… ๋ฆฌํฌํŠธ๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
79
+ ์ด ๋„๊ตฌ๋Š” ๊ฐ€๊ฒŒ์˜ ํ˜„์žฌ ์ƒํƒœ๋ฅผ ์ง„๋‹จํ•˜๊ณ  ๋งˆ์ผ€ํŒ… ์ „๋žต์„ ์ œ์•ˆํ•˜๋Š” ๋ฐ ์‚ฌ์šฉ๋ฉ๋‹ˆ๋‹ค.
80
+ """
81
+ logger.info("--- [Tool] '๊ฐ€๋งน์  ํ”„๋กœํ•„ ๋ถ„์„' ๋„๊ตฌ ํ˜ธ์ถœ ---")
82
+ try:
83
+ llm = get_llm(temperature=0.3)
84
+ prompt = f"""
85
+ ๋‹น์‹ ์€ ์ตœ๊ณ ์˜ ์ƒ๊ถŒ ๋ถ„์„ ์ „๋ฌธ๊ฐ€์ž…๋‹ˆ๋‹ค.
86
+ ์•„๋ž˜ [๊ฐ€๊ฒŒ ํ”„๋กœํ•„] ๋ฐ์ดํ„ฐ๋ฅผ ๋ฐ”ํƒ•์œผ๋กœ, ์ด ๊ฐ€๊ฒŒ์˜ [๊ฐ•์ ], [์•ฝ์ ], [๊ธฐํšŒ ์š”์ธ]์„
87
+ ์‚ฌ์žฅ๋‹˜์ด ์ดํ•ดํ•˜๊ธฐ ์‰ฝ๊ฒŒ ์ปจ์„คํŒ… ๋ฆฌํฌํŠธ ํ˜•์‹์œผ๋กœ ์š”์•ฝํ•ด์ฃผ์„ธ์š”.
88
+
89
+ [๊ฐ€๊ฒŒ ํ”„๋กœํ•„]
90
+ {store_profile}
91
+
92
+ [๋ถ„์„ ๊ฐ€์ด๋“œ๋ผ์ธ]
93
+ 1. **๊ฐ•์  (Strengths)**: '๋™์ผ ์ƒ๊ถŒ/์—…์ข… ๋Œ€๋น„' ๋†’์€ ์ˆ˜์น˜(๋งค์ถœ, ๋ฐฉ๋ฌธ๊ฐ, ๊ฐ๋‹จ๊ฐ€ ๋“ฑ)๋‚˜ '์žฌ๋ฐฉ๋ฌธ์œจ' ๋“ฑ์„ ์ฐพ์•„ **๊ฒฝ์Ÿ ์šฐ์œ„**๊ฐ€ ๋˜๋Š” ํ•ต์‹ฌ ์š”์†Œ ๊ฐ•์กฐํ•˜์„ธ์š”.
94
+ 2. **์•ฝ์  (Weaknesses)**: '๋™์ผ ์ƒ๊ถŒ/์—…์ข… ๋Œ€๋น„' ๋‚ฎ์€ ์ˆ˜์น˜๋‚˜ '์‹ ๊ทœ ๊ณ ๊ฐ ๋น„์œจ' ๋“ฑ์„ ์ฐพ์•„ **๊ฐœ์„ ์ด ์‹œ๊ธ‰ํ•œ ์˜์—ญ**์„ ์–ธ๊ธ‰ํ•˜์„ธ์š”.
95
+ 3. **๊ธฐํšŒ (Opportunities)**: ๊ฐ€๊ฒŒ์˜ ํ˜„์žฌ ๊ฐ•์ ๊ณผ '์ฃผ์š” ๊ณ ๊ฐ์ธต'์ด๋‚˜ '์ƒ๊ถŒ' ํŠน์„ฑ์„ ๋ฐ”ํƒ•์œผ๋กœ, **๊ฐ€๊ฒŒ๊ฐ€ ํ™œ์šฉํ•  ์ˆ˜ ์žˆ๋Š” ๋งˆ์ผ€ํŒ…(์˜ˆ: ํŠน์ • ์—ฐ๋ น๋Œ€ ํƒ€๊ฒŸ, ์‹ ๊ทœ ๊ณ ๊ฐ ์œ ์น˜)์ด ํšจ๊ณผ์ ์ผ์ง€ ์ œ์•ˆํ•˜๊ณ  ์ด๋ฅผ ๋‹ฌ์„ฑํ•˜๊ธฐ ์œ„ํ•œ ๋ฐฉํ–ฅ์„ฑ์„ ์ œ์‹œํ•˜์„ธ์š”.
96
+ 4. **ํ˜•์‹**: ๋งˆํฌ๋‹ค์šด์„ ์‚ฌ์šฉํ•˜์—ฌ ๋ช…ํ™•ํ•˜๊ณ  ๊ฐ€๋…์„ฑ ์ข‹๊ฒŒ ์ž‘์„ฑํ•˜์„ธ์š”.
97
+ 5. **์ „๋ฌธ์„ฑ/์นœ์ ˆํ•จ**: ์ „๋ฌธ์ ์ธ ๋ถ„์„ ์šฉ์–ด๋ฅผ ์‚ฌ์šฉํ•˜๋˜, ์‚ฌ์žฅ๋‹˜์ด ์‰ฝ๊ฒŒ ์ดํ•ดํ•  ์ˆ˜ ์žˆ๋„๋ก ์นœ์ ˆํ•˜๊ณ  ๋ช…ํ™•ํ•˜๊ฒŒ ์„ค๋ช…ํ•˜์„ธ์š”.
98
+ 6. **(์š”์ฒญ 4) ์ทจ์†Œ์„  ๊ธˆ์ง€**: ์ ˆ๋Œ€๋กœ `~~text~~`์™€ ๊ฐ™์€ ์ทจ์†Œ์„  ๋งˆํฌ๋‹ค์šด์„ ์‚ฌ์šฉํ•˜์ง€ ๋งˆ์„ธ์š”.
99
+
100
+ [๋‹ต๋ณ€ ํ˜•์‹]
101
+ ### ๐Ÿช ์‚ฌ์žฅ๋‹˜ ๊ฐ€๊ฒŒ ํ”„๋กœํ•„ ๋ถ„์„ ๋ฆฌํฌํŠธ
102
+
103
+ **1. ๊ฐ•์  (Strengths)**
104
+ * [๋ถ„์„๋œ ๊ฐ•์  1] (๋ถ„์„ ๊ทผ๊ฑฐ ๋ช…์‹œ)
105
+ * [๋ถ„์„๋œ ๊ฐ•์  2] (๋ถ„์„ ๊ทผ๊ฑฐ ๋ช…์‹œ)
106
+ * [ํ•„์š”์‹œ ์ถ”๊ฐ€ ๊ฐ•์ ]
107
+
108
+ **2. ์•ฝ์  (Weaknesses)**
109
+ * [๋ถ„์„๋œ ์•ฝ์  1] (๊ฐœ์„  ํ•„์š”์„ฑ ๋ช…์‹œ)
110
+ * [๋ถ„์„๋œ ์•ฝ์  2] (๊ฐœ์„  ํ•„์š”์„ฑ ๋ช…์‹œ)
111
+ * [ํ•„์š”์‹œ ์ถ”๊ฐ€ ์•ฝ์ ]
112
+
113
+ **3. ๊ธฐํšŒ (Opportunities)**
114
+ * [๋ถ„์„๋œ ๊ธฐํšŒ ์š”์ธ 1] (ํ™œ์šฉ ๋ฐฉ์•ˆ ์ œ์‹œ)
115
+ * [๋ถ„์„๋œ ๊ธฐํšŒ ์š”์ธ 2] (ํ™œ์šฉ ๋ฐฉ์•ˆ ์ œ์‹œ)
116
+ * [ํ•„์š”์‹œ ์ถ”๊ฐ€ ๊ธฐํšŒ ์š”์ธ]
117
+ """
118
+ response = llm.invoke([HumanMessage(content=prompt)])
119
+ analysis_report = response.content.strip()
120
+ return analysis_report
121
+ except Exception as e:
122
+ logger.critical(f"--- [Tool CRITICAL] '๊ฐ€๋งน์  ํ”„๋กœํ•„ ๋ถ„์„' ์ค‘ ์˜ค๋ฅ˜: {e} ---", exc_info=True)
123
+ return f"๊ฐ€๊ฒŒ ํ”„๋กœํ•„์„ ๋ถ„์„ํ•˜๋Š” ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {e}"
124
+
125
+ # ----------------------------
126
+ # Tool 3: ์ถ•์ œ ํ”„๋กœํ•„ ๋ถ„์„ (LLM)
127
+ @tool
128
+ def analyze_festival_profile(festival_name: str) -> str:
129
+ """
130
+ ์ถ•์ œ ์ด๋ฆ„์„ ์ž…๋ ฅ๋ฐ›์•„, ํ•ด๋‹น ์ถ•์ œ์˜ ์ƒ์„ธ ํ”„๋กœํ•„์„ ์กฐํšŒํ•˜๊ณ ,
131
+ LLM์„ ์‚ฌ์šฉํ•˜์—ฌ [ํ•ต์‹ฌ ํŠน์ง•]๊ณผ [์ฃผ์š” ๋ฐฉ๋ฌธ๊ฐ ํŠน์„ฑ]์„ ์š”์•ฝ ๋ฆฌํฌํŠธ๋กœ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
132
+ (์˜ˆ: "๋ณด๋ น๋จธ๋“œ์ถ•์ œ๋Š” ์–ด๋–ค ์ถ•์ œ์•ผ?")
133
+ """
134
+ logger.info(f"--- [Tool] '์ถ•์ œ ํ”„๋กœํ•„ ๋ถ„์„' ๋„๊ตฌ ํ˜ธ์ถœ (๋Œ€์ƒ: {festival_name}) ---")
135
+ try:
136
+ # 1. Tool 1 ํ˜ธ์ถœ
137
+ profile_json = get_festival_profile_by_name.invoke(festival_name)
138
+
139
+ profile_dict = json.loads(profile_json)
140
+
141
+ if "error" in profile_dict:
142
+ return profile_json
143
+
144
+ # 2. LLM ์š”์•ฝ์„ ์œ„ํ•œ ์ •๋ณด ์ถ”์ถœ
145
+ summary = {
146
+ "์ถ•์ œ๋ช…": profile_dict.get('์ถ•์ œ๋ช…'),
147
+ "์†Œ๊ฐœ": profile_dict.get('์†Œ๊ฐœ'),
148
+ "์ง€์—ญ": profile_dict.get('์ง€์—ญ'),
149
+ "ํ‚ค์›Œ๋“œ": profile_dict.get('ํ‚ค์›Œ๋“œ'),
150
+ "2025_๊ธฐ๊ฐ„": profile_dict.get('2025_๊ธฐ๊ฐ„'),
151
+ "์ฃผ์š”_๊ณ ๊ฐ์ธต": profile_dict.get('์ฃผ์š”๊ณ ๊ฐ์ธต', 'N/A'),
152
+ "์ฃผ์š”_๋ฐฉ๋ฌธ์ž": profile_dict.get('์ฃผ์š”๋ฐฉ๋ฌธ์ž', 'N/A'),
153
+ "์ถ•์ œ_์ธ๊ธฐ๋„": profile_dict.get('์ถ•์ œ์ธ๊ธฐ', 'N/A'),
154
+ "์ธ๊ธฐ๋„_์ ์ˆ˜": profile_dict.get('์ธ๊ธฐ๋„_์ ์ˆ˜', 'N/A'),
155
+ "ํ™ˆํŽ˜์ด์ง€": profile_dict.get('ํ™ˆํŽ˜์ด์ง€')
156
+ }
157
+
158
+ # 2026๋…„ ๋‚ ์งœ ์˜ˆ์ธก ์ถ”๊ฐ€
159
+ temp_recommender = FestivalRecommender("", "")
160
+ predicted_2026_timing = temp_recommender._predict_next_year_date(summary["2025_๊ธฐ๊ฐ„"])
161
+
162
+ summary_str = json.dumps(summary, ensure_ascii=False, indent=2)
163
+
164
+ llm = get_llm(temperature=0.1)
165
+
166
+ # --- ํ”„๋กฌํ”„ํŠธ ์ˆ˜์ • ---
167
+ prompt = f"""
168
+ ๋‹น์‹ ์€ ์ถ•์ œ ์ „๋ฌธ ๋ถ„์„๊ฐ€์ž…๋‹ˆ๋‹ค. ์•„๋ž˜ [์ถ•์ œ ํ”„๋กœํ•„ ์š”์•ฝ]์„ ๋ฐ”ํƒ•์œผ๋กœ,
169
+ ์ด ์ถ•์ œ์˜ **ํ•ต์‹ฌ ํŠน์ง•**๊ณผ **์ฃผ์š” ๋ฐฉ๋ฌธ๊ฐ(ํƒ€๊ฒŸ ๊ณ ๊ฐ) ํŠน์„ฑ**์„
170
+ ์ดํ•ดํ•˜๊ธฐ ์‰ฝ๊ฒŒ ์š”์•ฝํ•ด์ฃผ์„ธ์š”.
171
+
172
+ [์ถ•์ œ ํ”„๋กœํ•„ ์š”์•ฝ]
173
+ {summary_str}
174
+
175
+ [๋ถ„์„ ๊ฐ€์ด๋“œ๋ผ์ธ]
176
+ 1. **ํ•ต์‹ฌ ํŠน์ง•**: ์ž…๋ ฅ๋œ **'์†Œ๊ฐœ'** ๋‚ด์šฉ์„ ๋ฐ”ํƒ•์œผ๋กœ ์ถ•์ œ์˜ ์ฃผ์ œ์™€ ์ฃผ์š” ๋‚ด์šฉ์„ **2~3๋ฌธ์žฅ์œผ๋กœ ์ƒ์„ธํžˆ ์š”์•ฝ**ํ•˜๊ณ , 'ํ‚ค์›Œ๋“œ'์™€ '์ถ•์ œ_์ธ๊ธฐ๋„', '์ธ๊ธฐ๋„_์ ์ˆ˜'๋ฅผ ์–ธ๊ธ‰ํ•˜์—ฌ ๋ถ€์—ฐ ์„ค๋ช…ํ•ฉ๋‹ˆ๋‹ค. (์˜ˆ: "'{summary.get("์†Œ๊ฐœ", "์†Œ๊ฐœ ์ •๋ณด ์—†์Œ")[:50]}...'์„(๋ฅผ) ์ฃผ์ œ๋กœ ํ•˜๋Š” ์ถ•์ œ์ž…๋‹ˆ๋‹ค. ์ฃผ์š” ํ‚ค์›Œ๋“œ๋Š” '{summary.get("ํ‚ค์›Œ๋“œ", "N/A")}'์ด๋ฉฐ, ์ธ๊ธฐ๋„๋Š” '{summary.get("์ถ•์ œ_์ธ๊ธฐ๋„", "N/A")}' ์ˆ˜์ค€์ž…๋‹ˆ๋‹ค.")
177
+ 2. **์ฃผ์š” ๋ฐฉ๋ฌธ๊ฐ**: '์ฃผ์š”_๊ณ ๊ฐ์ธต'๊ณผ '์ฃผ์š”_๋ฐฉ๋ฌธ์ž' ์ปฌ๋Ÿผ์„ ์ง์ ‘ ์ธ์šฉํ•˜์—ฌ ์„ค๋ช…ํ•ฉ๋‹ˆ๋‹ค.
178
+ (์˜ˆ: {summary.get("์ฃผ์š”_๊ณ ๊ฐ์ธต", "N/A")}์ด ์ฃผ๋กœ ๋ฐฉ๋ฌธํ•˜๋ฉฐ, {summary.get("์ฃผ์š”_๋ฐฉ๋ฌธ์ž", "N/A")} ๋น„์œจ์ด ๋†’์Šต๋‹ˆ๋‹ค.)
179
+ 3. **ํ˜•์‹**: ์•„๋ž˜์™€ ๊ฐ™์€ ๋งˆํฌ๋‹ค์šด ํ˜•์‹์œผ๋กœ ๋‹ต๋ณ€์„ ์ž‘์„ฑํ•˜์„ธ์š”.
180
+ 4. **์ทจ์†Œ์„  ๊ธˆ์ง€**: ์ ˆ๋Œ€๋กœ `~~text~~`์™€ ๊ฐ™์€ ์ทจ์†Œ์„  ๋งˆํฌ๋‹ค์šด์„ ์‚ฌ์šฉํ•˜์ง€ ๋งˆ์„ธ์š”.
181
+
182
+ [๋‹ต๋ณ€ ํ˜•์‹]
183
+ ### ๐ŸŽˆ ์ถ•์ œ ํ”„๋กœํ•„ ๋ถ„์„ ๋ฆฌํฌํŠธ: {summary.get("์ถ•์ œ๋ช…")}
184
+
185
+ **1. ์ถ•์ œ ํ•ต์‹ฌ ํŠน์ง•**
186
+ * [์ถ•์ œ ์†Œ๊ฐœ ๋‚ด์šฉ์„ ๋ฐ”ํƒ•์œผ๋กœ 2~3๋ฌธ์žฅ ์š”์•ฝ. ํ‚ค์›Œ๋“œ์™€ ์ธ๊ธฐ๋„ ํฌํ•จ]
187
+
188
+ **2. ์ฃผ์š” ๋ฐฉ๋ฌธ๊ฐ ํŠน์„ฑ**
189
+ * **์ฃผ์š” ๊ณ ๊ฐ์ธต:** {summary.get("์ฃผ์š”_๊ณ ๊ฐ์ธต")}
190
+ * **์ฃผ์š” ๋ฐฉ๋ฌธ์ž:** {summary.get("์ฃผ์š”_๋ฐฉ๋ฌธ์ž")}
191
+
192
+ **3. 2026๋…„ ๊ฐœ์ตœ ๊ธฐ๊ฐ„ (์˜ˆ์ƒ)**
193
+ * {predicted_2026_timing}
194
+
195
+ **4. ํ™ˆํŽ˜์ด์ง€**
196
+ * {summary.get("ํ™ˆํŽ˜์ด์ง€", "์ •๋ณด ์—†์Œ")}
197
+ """
198
+
199
+ response = llm.invoke([HumanMessage(content=prompt)])
200
+ analysis_report = response.content.strip()
201
+ return analysis_report
202
+
203
+ except Exception as e:
204
+ logger.critical(f"--- [Tool CRITICAL] '์ถ•์ œ ํ”„๋กœํ•„ ๋ถ„๏ฟฝ๏ฟฝ๏ฟฝ' ์ค‘ ์˜ค๋ฅ˜: {e} ---", exc_info=True)
205
+ return f"'{festival_name}' ์ถ•์ œ ํ”„๋กœํ•„์„ ๋ถ„์„ํ•˜๋Š” ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {e}"
tools/tool_loader.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # tools/tool_loader.py
2
+
3
+ from .festival_recommender import recommend_festivals
4
+ from .marketing_strategy import (
5
+ search_contextual_marketing_strategy,
6
+ create_festival_specific_marketing_strategy,
7
+ create_marketing_strategies_for_multiple_festivals
8
+ )
9
+ from .profile_analyzer import (
10
+ get_festival_profile_by_name,
11
+ analyze_merchant_profile,
12
+ analyze_festival_profile,
13
+ )
14
+
15
+ # ์˜ค์ผ€์ŠคํŠธ๋ ˆ์ดํ„ฐ๊ฐ€ ์‚ฌ์šฉํ•  ์ตœ์ข… ๋„๊ตฌ ๋ฆฌ์ŠคํŠธ
16
+ ALL_TOOLS = [
17
+ recommend_festivals, # (ํ†ตํ•ฉ) ๊ฐ€๊ฒŒ ๋งž์ถคํ˜• ์ถ•์ œ ์ถ”์ฒœ (์ฟผ๋ฆฌ ์žฌ์ž‘์„ฑ ~ ์ตœ์ข… ๋žญํ‚น)
18
+ get_festival_profile_by_name, # (DB์กฐํšŒ) ์ถ•์ œ ์ด๋ฆ„์œผ๋กœ ์ƒ์„ธ ํ”„๋กœํ•„(JSON) ๊ฒ€์ƒ‰
19
+ search_contextual_marketing_strategy, # (RAG) ์ผ๋ฐ˜์ ์ธ ๋งˆ์ผ€ํŒ…/ํ™๋ณด ์ „๋žต์„ Vector DB์—์„œ ๊ฒ€์ƒ‰
20
+ create_festival_specific_marketing_strategy, # (LLM) *๋‹จ์ผ* ์ถ•์ œ์— ๋Œ€ํ•œ ๋งž์ถคํ˜• ๋งˆ์ผ€ํŒ… ์ „๋žต ์ƒ์„ฑ
21
+ create_marketing_strategies_for_multiple_festivals, # (LLM) *์—ฌ๋Ÿฌ* ์ถ•์ œ์— ๋Œ€ํ•œ ๋งž์ถคํ˜• ๋งˆ์ผ€ํŒ… ์ „๋žต ๋™์‹œ ์ƒ์„ฑ
22
+ analyze_merchant_profile, # (LLM) ๊ฐ€๊ฒŒ ํ”„๋กœํ•„(JSON)์„ ๋ฐ›์•„ SWOT/๊ณ ๊ฐ ํŠน์„ฑ ๋ถ„์„
23
+ analyze_festival_profile, # (LLM) ์ถ•์ œ ํ”„๋กœํ•„(JSON)์„ ๋ฐ›์•„ ํ•ต์‹ฌ ํŠน์ง•/๋ฐฉ๋ฌธ๊ฐ ๋ถ„์„
24
+ ]
utils/parser_utils.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # utils/parser_utils.py
2
+
3
+ import re
4
+ import json
5
+ from typing import List, Dict, Any, Union
6
+
7
+ def extract_json_from_llm_response(response_text: str) -> Union[List[Dict[str, Any]], Dict[str, Any]]:
8
+ """
9
+ LLM ์‘๋‹ต ํ…์ŠคํŠธ์—์„œ ```json ... ``` ๋˜๋Š” [...] ๋˜๋Š” {...} ๋ธ”๋ก์„
10
+ ์•ˆ์ „ํ•˜๊ฒŒ ์ถ”์ถœํ•˜๊ณ  ํŒŒ์‹ฑํ•ฉ๋‹ˆ๋‹ค.
11
+ ์‹คํŒจ ์‹œ ValueError๋ฅผ ๋ฐœ์ƒ์‹œํ‚ต๋‹ˆ๋‹ค.
12
+ """
13
+ json_str = None
14
+
15
+ # 1. ```json [...] ``` ๋งˆํฌ๋‹ค์šด ๋ธ”๋ก ๊ฒ€์ƒ‰ (๊ฐ€์žฅ ์šฐ์„ )
16
+ # re.DOTALL (s) ํ”Œ๋ž˜๊ทธ: ์ค„๋ฐ”๊ฟˆ ๋ฌธ์ž๋ฅผ ํฌํ•จํ•˜์—ฌ ๋งค์นญ
17
+ # re.MULTILINE (m) ํ”Œ๋ž˜๊ทธ: ^, $๊ฐ€ ๊ฐ ์ค„์˜ ์‹œ์ž‘/๋์— ๋งค์นญ
18
+ json_match = re.search(
19
+ r'```json\s*([\s\S]*?)\s*```',
20
+ response_text,
21
+ re.DOTALL | re.IGNORECASE
22
+ )
23
+
24
+ if json_match:
25
+ json_str = json_match.group(1).strip()
26
+ else:
27
+ # 2. ๋งˆํฌ๋‹ค์šด์ด ์—†๋‹ค๋ฉด, ์ฒซ ๋ฒˆ์งธ { ๋˜๋Š” [ ๋ฅผ ์ฐพ์Œ
28
+ first_bracket_match = re.search(r'[{|\[]', response_text)
29
+ if first_bracket_match:
30
+ start_index = first_bracket_match.start()
31
+
32
+ # ์‘๋‹ต์ด ๋ฆฌ์ŠคํŠธ([])๋กœ ์‹œ์ž‘ํ•˜๋Š” ๊ฒฝ์šฐ
33
+ if response_text[start_index] == '[':
34
+ list_match = re.search(r'(\[[\s\S]*\])', response_text[start_index:], re.DOTALL)
35
+ if list_match:
36
+ json_str = list_match.group(0)
37
+
38
+ # ์‘๋‹ต์ด ๋”•์…”๋„ˆ๋ฆฌ({})๋กœ ์‹œ์ž‘ํ•˜๋Š” ๊ฒฝ์šฐ
39
+ elif response_text[start_index] == '{':
40
+ dict_match = re.search(r'(\{[\s\S]*\})', response_text[start_index:], re.DOTALL)
41
+ if dict_match:
42
+ json_str = dict_match.group(0)
43
+
44
+ if json_str is None:
45
+ raise ValueError(f"์‘๋‹ต์—์„œ JSON ๋ธ”๋ก์„ ์ฐพ์ง€ ๋ชปํ–ˆ์Šต๋‹ˆ๋‹ค. (์‘๋‹ต ์‹œ์ž‘: {response_text[:150]}...)")
46
+
47
+ try:
48
+ # (๋””๋ฒ„๊น…) ์ถ”์ถœ๋œ ๋ฌธ์ž์—ด ๋กœ๊น…
49
+ # print(f"--- [Parser DEBUG] Extracted JSON String: {json_str[:200]}... ---")
50
+ return json.loads(json_str)
51
+ except json.JSONDecodeError as e:
52
+ raise ValueError(f"JSON ํŒŒ์‹ฑ์— ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค: {e}. (์ถ”์ถœ๋œ ๋ฌธ์ž์—ด: {json_str[:150]}...)")
uv.lock ADDED
The diff for this file is too large to render. See raw diff
 
vectorstore/faiss_festival/index.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78a06f6935bd51302619b61b155efa818a2ce0aaa5b3b8c2f7b498151b8a2619
3
+ size 364589
vectorstore/faiss_festival/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bc78f137c400ceb373cc99cae9b9a016bd74ecce05cf9b40460b1847af6b19d
3
+ size 503563
vectorstore/faiss_marketing/index.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:997ab4e4061ea89a33e067f80e45b1ee05865a7f8839ee9914c0e43fee705df3
3
+ size 4288557
vectorstore/faiss_marketing/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6349770bbc7f676dee108982fe181e1af15ee1485eb539ad9eb8226f799e9fbd
3
+ size 1494859
๊ธฐํƒ€/create_faiss_festival.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import os
3
+ import traceback
4
+ from pathlib import Path
5
+ import time
6
+
7
+ from langchain_community.embeddings import HuggingFaceEmbeddings
8
+ from langchain_community.vectorstores import FAISS
9
+ from langchain.docstore.document import Document
10
+
11
+ # --- 1. ์ถ•์ œ ๋ฐ์ดํ„ฐ ๋กœ๋” ---
12
+ def _load_and_process_festivals_for_indexing():
13
+ """
14
+ 1. ๋‚ ์งœ ์ฒ˜๋ฆฌ, dropna, ์ปฌ๋Ÿผ๋ช… ๋ณ€๊ฒฝ ๋กœ์ง์„ ๋ชจ๋‘ ์ œ๊ฑฐํ•ฉ๋‹ˆ๋‹ค.
15
+ 2. ์›๋ณธ CSV๋ฅผ ๊ทธ๋Œ€๋กœ ์ฝ๊ณ  NaN์„ ๋นˆ ๋ฌธ์ž์—ด("")๋กœ ๋ณ€ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
16
+ (Filtering ๋‹จ๊ณ„์—์„œ ๋ชจ๋“  ์›๋ณธ ์ปฌ๋Ÿผ์„ metadata๋กœ ์‚ฌ์šฉํ•˜๊ธฐ ์œ„ํ•จ)
17
+ """
18
+ print("--- [Indexer] 'festival_df.csv' ๋กœ๋”ฉ ๋ฐ ์ „์ฒ˜๋ฆฌ ์‹œ์ž‘... ---")
19
+ try:
20
+ project_root = Path(__file__).resolve().parent
21
+ file_path = project_root / 'data' / 'festival_df.csv'
22
+ if not file_path.exists():
23
+ raise FileNotFoundError(f"๋ฐ์ดํ„ฐ ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: {file_path}")
24
+
25
+ df = pd.read_csv(file_path)
26
+ if df.empty:
27
+ raise ValueError("'festival_df.csv' ํŒŒ์ผ์— ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
28
+
29
+ # ๋ชจ๋“  NaN ๊ฐ’์„ ๋นˆ ๋ฌธ์ž์—ด๋กœ ๋Œ€์ฒด (๋ฐ์ดํ„ฐ ์œ ์‹ค ๋ฐฉ์ง€)
30
+ df = df.fillna("")
31
+
32
+ print(f"--- [Indexer] 'festival_df.csv' ๋กœ๋”ฉ ์„ฑ๊ณต. {len(df)}๊ฐœ ์ถ•์ œ ๋ฐœ๊ฒฌ ---")
33
+ return df.to_dict('records')
34
+
35
+ except Exception as e:
36
+ print(f"--- [Indexer CRITICAL] 'festival_df.csv' ๋กœ๋”ฉ ์‹คํŒจ: {e}\n{traceback.format_exc()} ---")
37
+ return None
38
+
39
+ # --- 2. ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ์ค€๋น„ (์œ ์ง€) ---
40
+ def get_embeddings_model():
41
+ print("--- [Indexer] HuggingFace ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ๋กœ๋”ฉ ์‹œ์ž‘... ---")
42
+ model_name = "dragonkue/BGE-m3-ko"
43
+ model_kwargs = {'device': 'cpu'}
44
+ encode_kwargs = {'normalize_embeddings': True}
45
+
46
+ embeddings = HuggingFaceEmbeddings(
47
+ model_name=model_name,
48
+ model_kwargs=model_kwargs,
49
+ encode_kwargs=encode_kwargs
50
+ )
51
+ print("--- [Indexer] HuggingFace ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ๋กœ๋”ฉ ์™„๋ฃŒ ---")
52
+ return embeddings
53
+
54
+ # --- 3. ๋ฒกํ„ฐ ์Šคํ† ์–ด ๊ตฌ์ถ• ๋ฐ ์ €์žฅ ---
55
+ def build_and_save_vector_store():
56
+ start_time = time.time()
57
+
58
+ # 1. ์ถ•์ œ ๋ฐ์ดํ„ฐ ๋กœ๋“œ (์œ ์ง€)
59
+ festivals = _load_and_process_festivals_for_indexing()
60
+ if not festivals:
61
+ print("--- [Indexer ERROR] ์ถ•์ œ ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์–ด ์ธ๋ฑ์‹ฑ์„ ์ค‘๋‹จํ•ฉ๋‹ˆ๋‹ค.")
62
+ return
63
+
64
+ # 2. ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ๋กœ๋“œ (์œ ์ง€)
65
+ embeddings = get_embeddings_model()
66
+
67
+ # 3. LangChain Document ๊ฐ์ฒด ์ƒ์„ฑ
68
+ documents = []
69
+ print("--- [Indexer] ์ถ•์ œ ์ •๋ณด -> ๋ฌธ์„œ(Document) ๋ณ€ํ™˜ ์‹œ์ž‘ ---")
70
+ for festival in festivals:
71
+
72
+ # 1๋ฒˆ ์ ์ˆ˜(์ž„๋ฒ ๋”ฉ)์— '์ถ•์ œ๋ช…'์„ ๋‹ค์‹œ ์ถ”๊ฐ€ํ•ฉ๋‹ˆ๋‹ค. (์ง€์ ํ•ด์ฃผ์‹  ์‚ฌํ•ญ ๋ฐ˜์˜)
73
+ content = (
74
+ f"์ถ•์ œ๋ช…: {festival.get('์ถ•์ œ๋ช…', '')}\n" # <-- ์ด ๋ถ€๋ถ„ ์ถ”๊ฐ€
75
+ f"์ถ•์ œ ํ‚ค์›Œ๋“œ: {festival.get('ํ‚ค์›Œ๋“œ', '')}\n"
76
+ f"์ถ•์ œ ์†Œ๊ฐœ: {festival.get('์†Œ๊ฐœ', '')}"
77
+ )
78
+
79
+ # 2๋ฒˆ ์ ์ˆ˜(๋™์ )์— ์‚ฌ์šฉ๋  ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ (๊ฐ€์žฅ ์ค‘์š”)
80
+ # (์ถ•์ œ๋ช…, ์ฃผ์š”๊ณ ๊ฐ์ธต, ์ธ๊ธฐ๋„_์ ์ˆ˜ ๋“ฑ ๋ชจ๋“  ์›๋ณธ ์ปฌ๋Ÿผ์ด ํฌํ•จ๋จ)
81
+ metadata = festival
82
+
83
+ documents.append(Document(page_content=content, metadata=metadata))
84
+
85
+ print(f"--- [Indexer] ๋ฌธ์„œ ๋ณ€ํ™˜ ์™„๋ฃŒ. ์ด {len(documents)}๊ฐœ ๋ฌธ์„œ ์ƒ์„ฑ ---")
86
+
87
+ # 4. FAISS ๋ฒกํ„ฐ ์Šคํ† ์–ด ์ƒ์„ฑ (์œ ์ง€)
88
+ print("--- [Indexer] FAISS ๋ฒกํ„ฐ ์Šคํ† ์–ด ์ƒ์„ฑ ์‹œ์ž‘ (์‹œ๊ฐ„์ด ๊ฑธ๋ฆด ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค)... ---")
89
+ vector_store = FAISS.from_documents(documents, embeddings)
90
+ print("--- [Indexer] FAISS ๋ฒกํ„ฐ ์Šคํ† ์–ด ์ƒ์„ฑ ์™„๋ฃŒ ---")
91
+
92
+ # 5. ๋กœ์ปฌ์— ์ €์žฅ (์œ ์ง€)
93
+ project_root = Path(__file__).resolve().parent
94
+ save_path = project_root / 'faiss_festival'
95
+
96
+ os.makedirs(save_path.parent, exist_ok=True)
97
+ vector_store.save_local(str(save_path))
98
+
99
+ end_time = time.time()
100
+ print("=" * 50)
101
+ print(f"๐ŸŽ‰ ์„ฑ๊ณต! FAISS ๋ฒกํ„ฐ ์Šคํ† ์–ด๋ฅผ ์ƒ์„ฑํ•˜์—ฌ '{save_path}'์— ์ €์žฅํ–ˆ์Šต๋‹ˆ๋‹ค.")
102
+ print(f"์ด ์†Œ์š” ์‹œ๊ฐ„: {end_time - start_time:.2f}์ดˆ")
103
+ print("=" * 50)
104
+
105
+ if __name__ == "__main__":
106
+ build_and_save_vector_store()
๊ธฐํƒ€/create_faiss_marketing.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # create_marketing_retriever.py
2
+ # -*- coding: utf-8 -*-
3
+
4
+ import os
5
+ import time
6
+ from langchain_community.document_loaders import DirectoryLoader, PyPDFLoader
7
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
8
+ from langchain_community.vectorstores import FAISS
9
+ # [๋ณ€๊ฒฝ] Google ๋Œ€์‹  HuggingFace ์ž„๋ฒ ๋”ฉ์„ ๊ฐ€์ ธ์˜ต๋‹ˆ๋‹ค.
10
+ from langchain_community.embeddings import HuggingFaceEmbeddings
11
+ # [์‚ญ์ œ] from dotenv import load_dotenv (๋” ์ด์ƒ ํ•„์š” ์—†์Œ)
12
+
13
+ def create_and_save_retriever():
14
+ """
15
+ (์ˆ˜์ •๋จ) ๋กœ์ปฌ Hugging Face ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ('dragonkue/BGE-m3-ko')์„ ์‚ฌ์šฉํ•˜์—ฌ
16
+ ๋งˆ์ผ€ํŒ… PDF ๋ฌธ์„œ๋กœ๋ถ€ํ„ฐ Retriever๋ฅผ ์ƒ์„ฑํ•˜๊ณ  ํŒŒ์ผ๋กœ ์ €์žฅํ•ฉ๋‹ˆ๋‹ค.
17
+ """
18
+ try:
19
+ # 0. [๋ณ€๊ฒฝ] API ํ‚ค ๋กœ๋”ฉ ๋กœ์ง ์‚ญ์ œ
20
+ print("โœ… ๋กœ์ปฌ ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค. (API ํ‚ค ํ•„์š” ์—†์Œ)")
21
+
22
+ # 1. ๋ฐ์ดํ„ฐ ๋กœ๋“œ
23
+ loader = DirectoryLoader(
24
+ './marketing',
25
+ glob="**/*.pdf",
26
+ loader_cls=PyPDFLoader,
27
+ show_progress=True,
28
+ use_multithreading=True
29
+ )
30
+ documents = loader.load()
31
+ print(f"โœ… ์ด {len(documents)}๊ฐœ์˜ PDF ๋ฌธ์„œ๋ฅผ ๋ถˆ๋Ÿฌ์™”์Šต๋‹ˆ๋‹ค.")
32
+
33
+ if not documents:
34
+ raise ValueError("๐Ÿšจ 'marketing' ํด๋”์— PDF ํŒŒ์ผ์ด ์—†์Šต๋‹ˆ๋‹ค. ๋ฌธ์„œ๋ฅผ ์ถ”๊ฐ€ํ•ด์ฃผ์„ธ์š”.")
35
+
36
+ # 2. ํ…์ŠคํŠธ ๋ถ„ํ• 
37
+ text_splitter = RecursiveCharacterTextSplitter(
38
+ chunk_size=1000,
39
+ chunk_overlap=100
40
+ )
41
+ docs = text_splitter.split_documents(documents)
42
+ print(f"โœ… ๋ฌธ์„œ๋ฅผ ์ด {len(docs)}๊ฐœ์˜ ์ฒญํฌ(chunk)๋กœ ๋ถ„ํ• ํ–ˆ์Šต๋‹ˆ๋‹ค.")
43
+
44
+ if not docs:
45
+ raise ValueError("๐Ÿšจ ๋ฌธ์„œ๋ฅผ ์ฒญํฌ๋กœ ๋ถ„ํ• ํ•˜๋Š” ๋ฐ ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค.")
46
+
47
+ # 3. [๋ณ€๊ฒฝ] ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ์„ค์ •
48
+ print(f"โœ… ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ 'dragonkue/BGE-m3-ko' ๋กœ๋“œ๋ฅผ ์‹œ์ž‘ํ•ฉ๋‹ˆ๋‹ค...")
49
+
50
+ model_name = "dragonkue/BGE-m3-ko"
51
+ # ๐Ÿ’ก [์ฐธ๊ณ ] ๋กœ์ปฌ PC/์„œ๋ฒ„์— GPU๊ฐ€ ์žˆ๋‹ค๋ฉด {'device': 'cuda'}๋กœ ๋ณ€๊ฒฝํ•˜์„ธ์š”.
52
+ model_kwargs = {'device': 'cpu'}
53
+ # ๐Ÿ’ก [์ค‘์š”] BGE ๋ชจ๋ธ์€ ๊ฒ€์ƒ‰ ์„ฑ๋Šฅ์„ ์œ„ํ•ด ์ •๊ทœํ™”(normalize)๋ฅผ ๊ฐ•๋ ฅํžˆ ๊ถŒ์žฅํ•ฉ๋‹ˆ๋‹ค.
54
+ encode_kwargs = {'normalize_embeddings': True}
55
+
56
+ embeddings = HuggingFaceEmbeddings(
57
+ model_name=model_name,
58
+ model_kwargs=model_kwargs,
59
+ encode_kwargs=encode_kwargs
60
+ )
61
+ print(f"โœ… ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ์„ ์„ฑ๊ณต์ ์œผ๋กœ ๋กœ๋“œํ–ˆ์Šต๋‹ˆ๋‹ค.")
62
+
63
+ # 4. Vector Store ์ƒ์„ฑ (FAISS)
64
+ vectorstore = None
65
+
66
+ # [๋ณ€๊ฒฝ] ๋กœ์ปฌ ๋ชจ๋ธ์€ ๋ฐฐ์น˜ ์ฒ˜๋ฆฌ๊ฐ€ ๋งค์šฐ ๋น ๋ฅด๋ฏ€๋กœ, API ์ œํ•œ(time.sleep)์ด ํ•„์š” ์—†์Šต๋‹ˆ๋‹ค.
67
+ # [์ฐธ๊ณ ] BGE-m3-ko ๋ชจ๋ธ์€ ๋ฐฐ์น˜ ์ฒ˜๋ฆฌ๋ฅผ ์ง€์›ํ•˜๋ฏ€๋กœ, FAISS.from_documents๊ฐ€ ๋‚ด๋ถ€์ ์œผ๋กœ ํšจ์œจ์ ์œผ๋กœ ์ฒ˜๋ฆฌํ•ฉ๋‹ˆ๋‹ค.
68
+ print(f"๐Ÿ”„ ์ด {len(docs)}๊ฐœ์˜ ์ฒญํฌ์— ๋Œ€ํ•œ ์ž„๋ฒ ๋”ฉ์„ ์‹œ์ž‘ํ•ฉ๋‹ˆ๋‹ค. (์‹œ๊ฐ„์ด ๊ฑธ๋ฆด ์ˆ˜ ์žˆ์Œ)")
69
+
70
+ vectorstore = FAISS.from_documents(docs, embeddings)
71
+
72
+ # 5. [๋ณ€๊ฒฝ] ๋กœ์ปฌ ์ €์žฅ (๊ฒฝ๋กœ๋Š” ๋™์ผ)
73
+ save_dir = './retriever/marketing_retriever' # [๊ฒฝ๋กœ ์ˆ˜์ •] knowledge_base.py์™€ ๋งž์ถค
74
+ os.makedirs(save_dir, exist_ok=True)
75
+
76
+ vectorstore.save_local(save_dir)
77
+
78
+ print(f"๐ŸŽ‰ Retriever๊ฐ€ ์„ฑ๊ณต์ ์œผ๋กœ ์ƒ์„ฑ๋˜์–ด '{save_dir}' ํด๋”์— ์ €์žฅ๋˜์—ˆ์Šต๋‹ˆ๋‹ค!")
79
+
80
+ except Exception as e:
81
+ print(f"๐Ÿšจ๐Ÿšจ ์น˜๋ช…์ ์ธ ์˜ค๋ฅ˜ ๋ฐœ์ƒ ๐Ÿšจ๐Ÿšจ: {e}")
82
+ import traceback
83
+ traceback.print_exc()
84
+
85
+ if __name__ == '__main__':
86
+ # 1. ํ•„์š”ํ•œ ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๊ฐ€ ์„ค์น˜๋˜์—ˆ๋Š”์ง€ ํ™•์ธ
87
+ try:
88
+ import langchain_community
89
+ import sentence_transformers
90
+ import faiss
91
+ import torch
92
+ except ImportError as e:
93
+ print(f"๐Ÿšจ [์˜ค๋ฅ˜] {e.name} ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๊ฐ€ ์„ค์น˜๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
94
+ print("๐Ÿ‘‰ ๋‹ค์Œ ๋ช…๋ น์–ด๋ฅผ ์‹คํ–‰ํ•˜์—ฌ ํ•„์š”ํ•œ ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๋ฅผ ์„ค์น˜ํ•ด์ฃผ์„ธ์š”:")
95
+ print("pip install langchain-community sentence-transformers faiss-cpu torch")
96
+ print("(GPU ์‚ฌ์šฉ ์‹œ: pip install langchain-community sentence-transformers faiss-gpu torch)")
97
+ exit(1)
98
+
99
+ create_and_save_retriever()
๊ธฐํƒ€/create_final_df.py ADDED
@@ -0,0 +1,530 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <<<<<<< HEAD
2
+ # final_df.py
3
+
4
+ import pandas as pd
5
+ import numpy as np
6
+ import os
7
+ import sys
8
+
9
+ # ์Šคํฌ๋ฆฝํŠธ ํŒŒ์ผ์ด ์œ„์น˜ํ•œ ๋””๋ ‰ํ† ๋ฆฌ๋ฅผ ๊ธฐ์ค€์œผ๋กœ ๊ฒฝ๋กœ ์„ค์ •
10
+ script_path = os.path.abspath(sys.argv[0])
11
+ script_dir = os.path.dirname(script_path)
12
+
13
+ # ๋ฐ์ดํ„ฐ ํด๋” ๊ฒฝ๋กœ
14
+ data_dir = os.path.join(script_dir, 'data')
15
+
16
+ # 'data' ํด๋”๊ฐ€ ์‹ค์ œ๋กœ ์กด์žฌํ•˜๋Š”์ง€ ํ™•์ธ (์•ˆ์ „์žฅ์น˜)
17
+ if not os.path.exists(data_dir):
18
+ print(f"Error: Data directory not found at {data_dir}. Please check your folder structure.")
19
+ sys.exit(1)
20
+
21
+ # ํŒŒ์ผ ๊ฒฝ๋กœ ํ•จ์ˆ˜
22
+ def get_file_path(filename):
23
+ """data ํด๋” ๋‚ด์˜ ํŒŒ์ผ ๊ฒฝ๋กœ๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค."""
24
+ return os.path.join(data_dir, filename)
25
+
26
+ # --------------------------------------------------------------------------
27
+ #### 1) ๋ฐ์ดํ„ฐ 1 - **๊ฐ€๋งน์  ๊ฐœ์š”์ •๋ณด**
28
+ # --------------------------------------------------------------------------
29
+
30
+ file_path1 = get_file_path('big_data_set1_f.csv')
31
+
32
+ try:
33
+ df1 = pd.read_csv(file_path1, encoding="cp949")
34
+ except FileNotFoundError:
35
+ print(f"Error: File not found at {file_path1}. Please ensure big_data_set1_f.csv is in the 'data' folder.")
36
+ sys.exit(1)
37
+
38
+ col_map1 = {
39
+ "ENCODED_MCT": "๊ฐ€๋งน์ ID",
40
+ "MCT_BSE_AR": "๊ฐ€๋งน์ ์ฃผ์†Œ",
41
+ "MCT_NM": "๊ฐ€๋งน์ ๋ช…",
42
+ "MCT_BRD_NUM": "๋ธŒ๋žœ๋“œ๊ตฌ๋ถ„์ฝ”๋“œ",
43
+ "MCT_SIGUNGU_NM": "์ง€์—ญ๋ช…",
44
+ "HPSN_MCT_ZCD_NM": "์—…์ข…",
45
+ "HPSN_MCT_BZN_CD_NM": "์ƒ๊ถŒ",
46
+ "ARE_D": "๊ฐœ์„ค์ผ",
47
+ "MCT_ME_D": "ํ์—…์—ฌ๋ถ€"
48
+ }
49
+
50
+ df1 = df1.rename(columns=col_map1)
51
+
52
+ # - ์ „์ฒ˜๋ฆฌ
53
+ df1['๋ธŒ๋žœ๋“œ๊ตฌ๋ถ„์ฝ”๋“œ'] = df1['๋ธŒ๋žœ๋“œ๊ตฌ๋ถ„์ฝ”๋“œ'].fillna('๋ฏธํ™•์ธ')
54
+ df1['์ƒ๊ถŒ'] = df1['์ƒ๊ถŒ'].fillna('๋ฏธํ™•์ธ')
55
+
56
+ df1['๊ฐœ์„ค์ผ'] = df1['๊ฐœ์„ค์ผ'].astype(str)
57
+ # errors='coerce' ์ถ”๊ฐ€: ipynb ์ฝ”๋“œ์—๋Š” ์—†์ง€๋งŒ, ์•ˆ์ „ํ•œ datetime ๋ณ€ํ™˜์„ ์œ„ํ•ด ์œ ์ง€ (์›๋ณธ py ์ฝ”๋“œ ์œ ์ง€)
58
+ df1['๊ฐœ์„ค์ผ'] = pd.to_datetime(df1['๊ฐœ์„ค์ผ'], format='%Y%m%d', errors='coerce')
59
+
60
+ # ipynb ํŒŒ์ผ์—์„œ๋Š” errors='coerce'๊ฐ€ ์—†์—ˆ์ง€๋งŒ, int ๋ณ€ํ™˜ ์‹œ ์˜ค๋ฅ˜ ๋ฐฉ์ง€๋ฅผ ์œ„ํ•ด ์›๋ณธ py ์ฝ”๋“œ์˜ ์•ˆ์ „ ๋กœ์ง์„ ๋”ฐ๋ฆ„.
61
+ df1['ํ์—…์—ฌ๋ถ€'] = df1['ํ์—…์—ฌ๋ถ€'].apply(lambda x: pd.to_datetime(int(x), format='%Y%m%d', errors='coerce') if pd.notna(x) and str(x).isdigit() else pd.NaT)
62
+ df1['์šด์˜์ƒํƒœ'] = df1['ํ์—…์—ฌ๋ถ€'].apply(lambda x: '์šด์˜์ค‘' if pd.isna(x) else 'ํ์—…')
63
+
64
+ # --------------------------------------------------------------------------
65
+ #### 2) ๋ฐ์ดํ„ฐ 2 - **๊ฐ€๋งน์  ์›”๋ณ„ ์ด์šฉ์ •๋ณด**
66
+ # --------------------------------------------------------------------------
67
+
68
+ file_path2 = get_file_path('big_data_set2_f.csv')
69
+
70
+ try:
71
+ df2 = pd.read_csv(file_path2, encoding="cp949")
72
+ except FileNotFoundError:
73
+ print(f"Error: File not found at {file_path2}. Please ensure big_data_set2_f.csv is in the 'data' folder.")
74
+ sys.exit(1)
75
+
76
+ col_map2 = {
77
+ "ENCODED_MCT": "๊ฐ€๋งน์ ID",
78
+ "TA_YM": "๊ธฐ์ค€๋…„์›”",
79
+ "MCT_OPE_MS_CN": "์šด์˜๊ฐœ์›”์ˆ˜_๊ตฌ๊ฐ„",
80
+ "RC_M1_SAA": "์›”๋งค์ถœ๊ธˆ์•ก_๊ตฌ๊ฐ„",
81
+ "RC_M1_TO_UE_CT": "์›”๋งค์ถœ๊ฑด์ˆ˜_๊ตฌ๊ฐ„",
82
+ "RC_M1_UE_CUS_CN": "์›”์œ ๋‹ˆํฌ๊ณ ๊ฐ์ˆ˜_๊ตฌ๊ฐ„",
83
+ "RC_M1_AV_NP_AT": "์›”๊ฐ๋‹จ๊ฐ€_๊ตฌ๊ฐ„",
84
+ "APV_CE_RAT": "์ทจ์†Œ์œจ_๊ตฌ๊ฐ„",
85
+ "DLV_SAA_RAT": "๋ฐฐ๋‹ฌ๋งค์ถœ๋น„์œจ",
86
+ "M1_SME_RY_SAA_RAT": "๋™์ผ์—…์ข…๋งค์ถœ๋Œ€๋น„๋น„์œจ",
87
+ "M1_SME_RY_CNT_RAT": "๋™์ผ์—…์ข…๊ฑด์ˆ˜๋Œ€๋น„๋น„์œจ",
88
+ "M12_SME_RY_SAA_PCE_RT": "๋™์ผ์—…์ข…๋‚ด๋งค์ถœ์ˆœ์œ„๋น„์œจ",
89
+ "M12_SME_BZN_SAA_PCE_RT": "๋™์ผ์ƒ๊ถŒ๋‚ด๋งค์ถœ์ˆœ์œ„๋น„์œจ",
90
+ "M12_SME_RY_ME_MCT_RAT": "๋™์ผ์—…์ข…ํ•ด์ง€๊ฐ€๋งน์ ๋น„์ค‘",
91
+ "M12_SME_BZN_ME_MCT_RAT": "๋™์ผ์ƒ๊ถŒํ•ด์ง€๊ฐ€๋งน์ ๋น„์ค‘"
92
+ }
93
+
94
+ df2 = df2.rename(columns=col_map2)
95
+
96
+ # - ์ „์ฒ˜๋ฆฌ
97
+ df2['๊ธฐ์ค€๋…„์›”'] = pd.to_datetime(df2['๊ธฐ์ค€๋…„์›”'].astype(str), format='%Y%m')
98
+
99
+ df2.replace(-999999.9, np.nan, inplace=True)
100
+
101
+ # --------------------------------------------------------------------------
102
+ #### 3) ๋ฐ์ดํ„ฐ 3 - **๊ฐ€๋งน์  ์›”๋ณ„ ์ด์šฉ ๊ณ ๊ฐ์ •๋ณด**
103
+ # --------------------------------------------------------------------------
104
+
105
+ file_path3 = get_file_path('big_data_set3_f.csv')
106
+
107
+ try:
108
+ df3 = pd.read_csv(file_path3, encoding="cp949")
109
+ except FileNotFoundError:
110
+ print(f"Error: File not found at {file_path3}. Please ensure big_data_set3_f.csv is in the 'data' folder.")
111
+ sys.exit(1)
112
+
113
+ col_map3 = {
114
+ "ENCODED_MCT": "๊ฐ€๋งน์ ID",
115
+ "TA_YM": "๊ธฐ์ค€๋…„์›”",
116
+ "M12_MAL_1020_RAT": "๋‚จ์„ฑ20๋Œ€์ดํ•˜๋น„์œจ",
117
+ "M12_MAL_30_RAT": "๋‚จ์„ฑ30๋Œ€๋น„์œจ",
118
+ "M12_MAL_40_RAT": "๋‚จ์„ฑ40๋Œ€๋น„์œจ",
119
+ "M12_MAL_50_RAT": "๋‚จ์„ฑ50๋Œ€๋น„์œจ",
120
+ "M12_MAL_60_RAT": "๋‚จ์„ฑ60๋Œ€์ด์ƒ๋น„์œจ",
121
+ "M12_FME_1020_RAT": "์—ฌ์„ฑ20๋Œ€์ดํ•˜๋น„์œจ",
122
+ "M12_FME_30_RAT": "์—ฌ์„ฑ30๋Œ€๋น„์œจ",
123
+ "M12_FME_40_RAT": "์—ฌ์„ฑ40๋Œ€๋น„์œจ",
124
+ "M12_FME_50_RAT": "์—ฌ์„ฑ50๋Œ€๋น„์œจ",
125
+ "M12_FME_60_RAT": "์—ฌ์„ฑ60๋Œ€์ด์ƒ๋น„์œจ",
126
+ "MCT_UE_CLN_REU_RAT": "์žฌ์ด์šฉ๊ณ ๊ฐ๋น„์œจ",
127
+ "MCT_UE_CLN_NEW_RAT": "์‹ ๊ทœ๊ณ ๊ฐ๋น„์œจ",
128
+ "RC_M1_SHC_RSD_UE_CLN_RAT": "๊ฑฐ์ฃผ์ž์ด์šฉ๋น„์œจ",
129
+ "RC_M1_SHC_WP_UE_CLN_RAT": "์ง์žฅ์ธ์ด์šฉ๋น„์œจ",
130
+ "RC_M1_SHC_FLP_UE_CLN_RAT": "์œ ๋™์ธ๊ตฌ์ด์šฉ๋น„์œจ"
131
+ }
132
+
133
+ df3 = df3.rename(columns=col_map3)
134
+
135
+ # - ์ „์ฒ˜๋ฆฌ
136
+ df3['๊ธฐ์ค€๋…„์›”'] = pd.to_datetime(df3['๊ธฐ์ค€๋…„์›”'].astype(str), format='%Y%m')
137
+
138
+ df3.replace(-999999.9, np.nan, inplace=True)
139
+
140
+ # --------------------------------------------------------------------------
141
+ #### ๋ฐ์ดํ„ฐ ํ†ตํ•ฉ
142
+ # --------------------------------------------------------------------------
143
+
144
+ df23 = pd.merge(df2, df3, on=["๊ฐ€๋งน์ ID", "๊ธฐ์ค€๋…„์›”"], how="inner")
145
+
146
+ final_df = pd.merge(df23, df1, on="๊ฐ€๋งน์ ID", how="left")
147
+
148
+ # --------------------------------------------------------------------------
149
+ #### ์ด์ƒ๊ฐ’ ์ฒ˜๋ฆฌ
150
+ # --------------------------------------------------------------------------
151
+
152
+ non_seongdong_areas = [
153
+ '์••๊ตฌ์ •๋กœ๋ฐ์˜ค', 'ํ’์‚ฐ์ง€๊ตฌ', '๋ฏธ์•„์‚ฌ๊ฑฐ๋ฆฌ', '๋ฐฉ๋ฐฐ์—ญ',
154
+ '์ž์–‘', '๋™๋Œ€๋ฌธ์—ญ์‚ฌ๋ฌธํ™”๊ณต์›์—ญ', '๊ฑด๋Œ€์ž…๊ตฌ',
155
+ '์„œ๋ฉด์—ญ', '์˜ค๋‚จ'
156
+ ]
157
+
158
+ # Step 1๏ธโƒฃ ์ฃผ์†Œ๊ฐ€ '์„ฑ๋™๊ตฌ'์— ํฌํ•จ๋œ ๋ฐ์ดํ„ฐ๋งŒ ๋‚จ๊ธฐ๊ธฐ
159
+ mask_seongdong_addr = final_df['๊ฐ€๋งน์ ์ฃผ์†Œ'].str.contains('์„ฑ๋™๊ตฌ', na=False)
160
+ seongdong_df = final_df[mask_seongdong_addr].copy()
161
+
162
+ # Step 2๏ธโƒฃ ์ƒ๊ถŒ๋ช…์ด ์„ฑ๋™๊ตฌ ์™ธ์ธ๋ฐ ์ฃผ์†Œ๋Š” ์„ฑ๋™๊ตฌ์ธ ๊ฒฝ์šฐ โ†’ ๋ผ๋ฒจ ๊ต์ •
163
+ mask_mislabel = seongdong_df['์ƒ๊ถŒ'].isin(non_seongdong_areas)
164
+ seongdong_df.loc[mask_mislabel, '์ƒ๊ถŒ'] = '๋ฏธํ™•์ธ(์„ฑ๋™๊ตฌ)'
165
+
166
+ # Step 3๏ธโƒฃ (ipynb ์ฝ”๋“œ ๋กœ์ง ์ ์šฉ) ์ƒ๊ถŒ๋ช…์ด '๋ฏธํ™•์ธ'์ธ๋ฐ ์ฃผ์†Œ๊ฐ€ ์„ฑ๋™๊ตฌ๊ฐ€ ์•„๋‹Œ ๊ฒฝ์šฐ ์ œ๊ฑฐ
167
+ # ์ฃผํ”ผํ„ฐ ๋…ธํŠธ๋ถ์˜ ๋กœ์ง์„ ๊ทธ๋Œ€๋กœ ๋ฐ˜์˜ํ•ฉ๋‹ˆ๋‹ค. (์‹ค์ œ ํ•„ํ„ฐ๋ง ํšจ๊ณผ๋Š” ์—†์ง€๋งŒ, ์ฝ”๋“œ ์ผ์น˜์„ฑ ํ™•๋ณด)
168
+ final_clean_df = seongdong_df[
169
+ ~(
170
+ (seongdong_df['์ƒ๊ถŒ'].str.contains('๋ฏธํ™•์ธ')) &
171
+ (~seongdong_df['๊ฐ€๋งน์ ์ฃผ์†Œ'].str.contains('์„ฑ๋™๊ตฌ', na=False))
172
+ )
173
+ ].copy()
174
+
175
+ # ์—…์ข… - ํ•œ ์—…์ข…์ด 100ํผ์ธ ๊ฒฝ์šฐ ์ œ์™ธ(์ด์ƒ์น˜ ์ทจ๊ธ‰)
176
+ final_clean_df = final_clean_df[final_clean_df['์—…์ข…'] != '์œ ์ œํ’ˆ'].copy()
177
+
178
+ # ์›”๋งค์ถœ๊ธˆ์•ก_๊ตฌ๊ฐ„ ์ปฌ๋Ÿผ์˜ ๊ณ ์œ ๊ฐ’
179
+ unique_sales_bins = final_clean_df['์›”๋งค์ถœ๊ธˆ์•ก_๊ตฌ๊ฐ„'].dropna().unique()
180
+
181
+ # ๋งค์ถœ๊ตฌ๊ฐ„์ˆ˜์ค€ ๋งคํ•‘ ๋”•์…”๋„ˆ๋ฆฌ ์ •์˜
182
+ # --------------------------------------------------------------------------
183
+ # โ‡๏ธ [์ˆ˜์ •] ๊ตฌ๊ฐ„ -> ์ˆ˜์ค€ ๋ณ€ํ™˜ (์ปฌ๋Ÿผ๋ณ„ ๋‹ค๋ฅธ ๋ช…์นญ ์ ์šฉ)
184
+ # --------------------------------------------------------------------------
185
+
186
+ # --------------------------------------------------------------------------
187
+ # โ‡๏ธ [์ˆ˜์ •] ๊ตฌ๊ฐ„ -> ์ˆ˜์ค€ ๋ณ€ํ™˜ (๋ชจ๋“  ์ปฌ๋Ÿผ ์ ์šฉ)
188
+ # --------------------------------------------------------------------------
189
+
190
+ # 1. '์›”๋งค์ถœ๊ธˆ์•ก_๊ตฌ๊ฐ„' (๊ทœ๋ชจ/์ˆœ์œ„ ๊ธฐ์ค€)
191
+ sales_volume_map = {
192
+ '1_10%์ดํ•˜': '์ตœ์ƒ์œ„',
193
+ '2_10-25%': '์ƒ์œ„',
194
+ '3_25-50%': '์ค‘์ƒ์œ„',
195
+ '4_50-75%': '์ค‘ํ•˜์œ„',
196
+ '5_75-90%': 'ํ•˜์œ„',
197
+ '6_90%์ดˆ๊ณผ(ํ•˜์œ„ 10% ์ดํ•˜)': '์ตœํ•˜์œ„'
198
+ }
199
+
200
+ # 2. '์›”๊ฐ๋‹จ๊ฐ€_๊ตฌ๊ฐ„' (๊ฐ€๊ฒฉ๋Œ€ ๊ธฐ์ค€)
201
+ price_level_map = {
202
+ '1_10%์ดํ•˜': '์ตœ๊ณ ๊ฐ€',
203
+ '2_10-25%': '๊ณ ๊ฐ€',
204
+ '3_25-50%': '์ค‘๊ฐ€',
205
+ '4_50-75%': '์ค‘์ €๊ฐ€',
206
+ '5_75-90%': '์ €๊ฐ€',
207
+ '6_90%์ดˆ๊ณผ(ํ•˜์œ„ 10% ์ดํ•˜)': '์ตœ์ €๊ฐ€'
208
+ }
209
+
210
+ # 3. '์šด์˜๊ฐœ์›”์ˆ˜_๊ตฌ๊ฐ„' (๊ฒฝํ—˜/์—ฐ์ฐจ ๊ธฐ์ค€)
211
+ operation_period_map = {
212
+ '1_10%์ดํ•˜': '์ตœ์žฅ๊ธฐ', # ๊ฐ€์žฅ ์˜ค๋ž˜ ์šด์˜
213
+ '2_10-25%': '์žฅ๊ธฐ',
214
+ '3_25-50%': '์ค‘๊ธฐ',
215
+ '4_50-75%': '๋‹จ๊ธฐ',
216
+ '5_75-90%': '์‹ ๊ทœ',
217
+ '6_90%์ดˆ๊ณผ(ํ•˜์œ„ 10% ์ดํ•˜)': '์ตœ์‹ ๊ทœ' # ๊ฐ€์žฅ ์ตœ๊ทผ ๊ฐœ์—…
218
+ }
219
+
220
+ # 4. '์›”๋งค์ถœ๊ฑด์ˆ˜_๊ตฌ๊ฐ„' (๊ฑฐ๋ž˜๋Ÿ‰/๋นˆ๋„ ๊ธฐ์ค€)
221
+ transaction_count_map = {
222
+ '1_10%์ดํ•˜': '๊ฑฐ๋ž˜ ์ตœ๋‹ค', # ๊ฑฐ๋ž˜๊ฐ€ ๊ฐ€์žฅ ๋งŽ์Œ
223
+ '2_10-25%': '๊ฑฐ๋ž˜ ๋งŽ์Œ',
224
+ '3_25-50%': '๊ฑฐ๋ž˜ ๋ณดํ†ต',
225
+ '4_50-75%': '๊ฑฐ๋ž˜ ์ ์Œ',
226
+ '5_75-90%': '๊ฑฐ๋ž˜ ํฌ์†Œ',
227
+ '6_90%์ดˆ๊ณผ(ํ•˜์œ„ 10% ์ดํ•˜)': '๊ฑฐ๋ž˜ ์ตœ์ €'
228
+ }
229
+
230
+ # 5. '์›”์œ ๋‹ˆํฌ๊ณ ๊ฐ์ˆ˜_๊ตฌ๊ฐ„' (๊ณ ๊ฐ ๊ทœ๋ชจ ๊ธฐ์ค€)
231
+ customer_count_map = {
232
+ '1_10%์ดํ•˜': '๊ณ ๊ฐ ์ตœ๋‹ค', # ๊ณ ๊ฐ ์ˆ˜๊ฐ€ ๊ฐ€์žฅ ๋งŽ์Œ
233
+ '2_10-25%': '๊ณ ๊ฐ ๋งŽ์Œ',
234
+ '3_25-50%': '๊ณ ๊ฐ ๋ณดํ†ต',
235
+ '4_50-75%': '๊ณ ๊ฐ ์ ์Œ',
236
+ '5_75-90%': '๊ณ ๊ฐ ํฌ์†Œ',
237
+ '6_90%์ดˆ๊ณผ(ํ•˜์œ„ 10% ์ดํ•˜)': '๊ณ ๊ฐ ์ตœ์ €'
238
+ }
239
+
240
+
241
+ # --- ์ƒˆ ์ปฌ๋Ÿผ ์ƒ์„ฑ ---
242
+
243
+ final_clean_df['๋งค์ถœ๊ตฌ๊ฐ„_์ˆ˜์ค€'] = final_clean_df['์›”๋งค์ถœ๊ธˆ์•ก_๊ตฌ๊ฐ„'].map(sales_volume_map)
244
+ final_clean_df['์›”๊ฐ๋‹จ๊ฐ€_์ˆ˜์ค€'] = final_clean_df['์›”๊ฐ๋‹จ๊ฐ€_๊ตฌ๊ฐ„'].map(price_level_map)
245
+ final_clean_df['์šด์˜๊ฐœ์›”์ˆ˜_์ˆ˜์ค€'] = final_clean_df['์šด์˜๊ฐœ์›”์ˆ˜_๊ตฌ๊ฐ„'].map(operation_period_map)
246
+ final_clean_df['์›”๋งค์ถœ๊ฑด์ˆ˜_์ˆ˜์ค€'] = final_clean_df['์›”๋งค์ถœ๊ฑด์ˆ˜_๊ตฌ๊ฐ„'].map(transaction_count_map)
247
+ final_clean_df['์›”์œ ๋‹ˆํฌ๊ณ ๊ฐ์ˆ˜_์ˆ˜์ค€'] = final_clean_df['์›”์œ ๋‹ˆํฌ๊ณ ๊ฐ์ˆ˜_๊ตฌ๊ฐ„'].map(customer_count_map)
248
+
249
+ # --- ๋ฏธํ™•์ธ ๊ฐ’ ์ฒ˜๋ฆฌ ---
250
+ final_clean_df['๋งค์ถœ๊ตฌ๊ฐ„_์ˆ˜์ค€'] = final_clean_df['๋งค์ถœ๊ตฌ๊ฐ„_์ˆ˜์ค€'].fillna('๋ฏธํ™•์ธ')
251
+ final_clean_df['์›”๊ฐ๋‹จ๊ฐ€_์ˆ˜์ค€'] = final_clean_df['์›”๊ฐ๋‹จ๊ฐ€_์ˆ˜์ค€'].fillna('๋ฏธํ™•์ธ')
252
+ final_clean_df['์šด์˜๊ฐœ์›”์ˆ˜_์ˆ˜์ค€'] = final_clean_df['์šด์˜๊ฐœ์›”์ˆ˜_์ˆ˜์ค€'].fillna('๋ฏธํ™•์ธ')
253
+ final_clean_df['์›”๋งค์ถœ๊ฑด์ˆ˜_์ˆ˜์ค€'] = final_clean_df['์›”๋งค์ถœ๊ฑด์ˆ˜_์ˆ˜์ค€'].fillna('๋ฏธํ™•์ธ')
254
+ final_clean_df['์›”์œ ๋‹ˆํฌ๊ณ ๊ฐ์ˆ˜_์ˆ˜์ค€'] = final_clean_df['์›”์œ ๋‹ˆํฌ๊ณ ๊ฐ์ˆ˜_์ˆ˜์ค€'].fillna('๋ฏธํ™•์ธ')
255
+ # --------------------------------------------------------------------------
256
+ # final_df ์ €์žฅ
257
+ # --------------------------------------------------------------------------
258
+
259
+ # 'data' ํด๋” ๋‚ด์— ์ €์žฅ
260
+ save_path = get_file_path("final_df.csv")
261
+
262
+ # CSV ํŒŒ์ผ ์ €์žฅ (์ธ๋ฑ์Šค ์ œ์™ธ)
263
+ final_clean_df.to_csv(save_path, index=False, encoding="utf-8-sig")
264
+
265
+ =======
266
+ # final_df.py
267
+
268
+ import pandas as pd
269
+ import numpy as np
270
+ import os
271
+ import sys
272
+
273
+ # ์Šคํฌ๋ฆฝํŠธ ํŒŒ์ผ์ด ์œ„์น˜ํ•œ ๋””๋ ‰ํ† ๋ฆฌ๋ฅผ ๊ธฐ์ค€์œผ๋กœ ๊ฒฝ๋กœ ์„ค์ •
274
+ script_path = os.path.abspath(sys.argv[0])
275
+ script_dir = os.path.dirname(script_path)
276
+
277
+ # ๋ฐ์ดํ„ฐ ํด๋” ๊ฒฝ๋กœ
278
+ data_dir = os.path.join(script_dir, 'data')
279
+
280
+ # 'data' ํด๋”๊ฐ€ ์‹ค์ œ๋กœ ์กด์žฌํ•˜๋Š”์ง€ ํ™•์ธ (์•ˆ์ „์žฅ์น˜)
281
+ if not os.path.exists(data_dir):
282
+ print(f"Error: Data directory not found at {data_dir}. Please check your folder structure.")
283
+ sys.exit(1)
284
+
285
+ # ํŒŒ์ผ ๊ฒฝ๋กœ ํ•จ์ˆ˜
286
+ def get_file_path(filename):
287
+ """data ํด๋” ๋‚ด์˜ ํŒŒ์ผ ๊ฒฝ๋กœ๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค."""
288
+ return os.path.join(data_dir, filename)
289
+
290
+ # --------------------------------------------------------------------------
291
+ #### 1) ๋ฐ์ดํ„ฐ 1 - **๊ฐ€๋งน์  ๊ฐœ์š”์ •๋ณด**
292
+ # --------------------------------------------------------------------------
293
+
294
+ file_path1 = get_file_path('big_data_set1_f.csv')
295
+
296
+ try:
297
+ df1 = pd.read_csv(file_path1, encoding="cp949")
298
+ except FileNotFoundError:
299
+ print(f"Error: File not found at {file_path1}. Please ensure big_data_set1_f.csv is in the 'data' folder.")
300
+ sys.exit(1)
301
+
302
+ col_map1 = {
303
+ "ENCODED_MCT": "๊ฐ€๋งน์ ID",
304
+ "MCT_BSE_AR": "๊ฐ€๋งน์ ์ฃผ์†Œ",
305
+ "MCT_NM": "๊ฐ€๋งน์ ๋ช…",
306
+ "MCT_BRD_NUM": "๋ธŒ๋žœ๋“œ๊ตฌ๋ถ„์ฝ”๋“œ",
307
+ "MCT_SIGUNGU_NM": "์ง€์—ญ๋ช…",
308
+ "HPSN_MCT_ZCD_NM": "์—…์ข…",
309
+ "HPSN_MCT_BZN_CD_NM": "์ƒ๊ถŒ",
310
+ "ARE_D": "๊ฐœ์„ค์ผ",
311
+ "MCT_ME_D": "ํ์—…์—ฌ๋ถ€"
312
+ }
313
+
314
+ df1 = df1.rename(columns=col_map1)
315
+
316
+ # - ์ „์ฒ˜๋ฆฌ
317
+ df1['๋ธŒ๋žœ๋“œ๊ตฌ๋ถ„์ฝ”๋“œ'] = df1['๋ธŒ๋žœ๋“œ๊ตฌ๋ถ„์ฝ”๋“œ'].fillna('๋ฏธํ™•์ธ')
318
+ df1['์ƒ๊ถŒ'] = df1['์ƒ๊ถŒ'].fillna('๋ฏธํ™•์ธ')
319
+
320
+ df1['๊ฐœ์„ค์ผ'] = df1['๊ฐœ์„ค์ผ'].astype(str)
321
+ # errors='coerce' ์ถ”๊ฐ€: ipynb ์ฝ”๋“œ์—๋Š” ์—†์ง€๋งŒ, ์•ˆ์ „ํ•œ datetime ๋ณ€ํ™˜์„ ์œ„ํ•ด ์œ ์ง€ (์›๋ณธ py ์ฝ”๋“œ ์œ ์ง€)
322
+ df1['๊ฐœ์„ค์ผ'] = pd.to_datetime(df1['๊ฐœ์„ค์ผ'], format='%Y%m%d', errors='coerce')
323
+
324
+ # ipynb ํŒŒ์ผ์—์„œ๋Š” errors='coerce'๊ฐ€ ์—†์—ˆ์ง€๋งŒ, int ๋ณ€ํ™˜ ์‹œ ์˜ค๋ฅ˜ ๋ฐฉ์ง€๋ฅผ ์œ„ํ•ด ์›๋ณธ py ์ฝ”๋“œ์˜ ์•ˆ์ „ ๋กœ์ง์„ ๋”ฐ๋ฆ„.
325
+ df1['ํ์—…์—ฌ๋ถ€'] = df1['ํ์—…์—ฌ๋ถ€'].apply(lambda x: pd.to_datetime(int(x), format='%Y%m%d', errors='coerce') if pd.notna(x) and str(x).isdigit() else pd.NaT)
326
+ df1['์šด์˜์ƒํƒœ'] = df1['ํ์—…์—ฌ๋ถ€'].apply(lambda x: '์šด์˜์ค‘' if pd.isna(x) else 'ํ์—…')
327
+
328
+ # --------------------------------------------------------------------------
329
+ #### 2) ๋ฐ์ดํ„ฐ 2 - **๊ฐ€๋งน์  ์›”๋ณ„ ์ด์šฉ์ •๋ณด**
330
+ # --------------------------------------------------------------------------
331
+
332
+ file_path2 = get_file_path('big_data_set2_f.csv')
333
+
334
+ try:
335
+ df2 = pd.read_csv(file_path2, encoding="cp949")
336
+ except FileNotFoundError:
337
+ print(f"Error: File not found at {file_path2}. Please ensure big_data_set2_f.csv is in the 'data' folder.")
338
+ sys.exit(1)
339
+
340
+ col_map2 = {
341
+ "ENCODED_MCT": "๊ฐ€๋งน์ ID",
342
+ "TA_YM": "๊ธฐ์ค€๋…„์›”",
343
+ "MCT_OPE_MS_CN": "์šด์˜๊ฐœ์›”์ˆ˜_๊ตฌ๊ฐ„",
344
+ "RC_M1_SAA": "์›”๋งค์ถœ๊ธˆ์•ก_๊ตฌ๊ฐ„",
345
+ "RC_M1_TO_UE_CT": "์›”๋งค์ถœ๊ฑด์ˆ˜_๊ตฌ๊ฐ„",
346
+ "RC_M1_UE_CUS_CN": "์›”์œ ๋‹ˆํฌ๊ณ ๊ฐ์ˆ˜_๊ตฌ๊ฐ„",
347
+ "RC_M1_AV_NP_AT": "์›”๊ฐ๋‹จ๊ฐ€_๊ตฌ๊ฐ„",
348
+ "APV_CE_RAT": "์ทจ์†Œ์œจ_๊ตฌ๊ฐ„",
349
+ "DLV_SAA_RAT": "๋ฐฐ๋‹ฌ๋งค์ถœ๋น„์œจ",
350
+ "M1_SME_RY_SAA_RAT": "๋™์ผ์—…์ข…๋งค์ถœ๋Œ€๋น„๋น„์œจ",
351
+ "M1_SME_RY_CNT_RAT": "๋™์ผ์—…์ข…๊ฑด์ˆ˜๋Œ€๋น„๋น„์œจ",
352
+ "M12_SME_RY_SAA_PCE_RT": "๋™์ผ์—…์ข…๋‚ด๋งค์ถœ์ˆœ์œ„๋น„์œจ",
353
+ "M12_SME_BZN_SAA_PCE_RT": "๋™์ผ์ƒ๊ถŒ๋‚ด๋งค์ถœ์ˆœ์œ„๋น„์œจ",
354
+ "M12_SME_RY_ME_MCT_RAT": "๋™์ผ์—…์ข…ํ•ด์ง€๊ฐ€๋งน์ ๋น„์ค‘",
355
+ "M12_SME_BZN_ME_MCT_RAT": "๋™์ผ์ƒ๊ถŒํ•ด์ง€๊ฐ€๋งน์ ๋น„์ค‘"
356
+ }
357
+
358
+ df2 = df2.rename(columns=col_map2)
359
+
360
+ # - ์ „์ฒ˜๋ฆฌ
361
+ df2['๊ธฐ์ค€๋…„์›”'] = pd.to_datetime(df2['๊ธฐ์ค€๋…„์›”'].astype(str), format='%Y%m')
362
+
363
+ df2.replace(-999999.9, np.nan, inplace=True)
364
+
365
+ # --------------------------------------------------------------------------
366
+ #### 3) ๋ฐ์ดํ„ฐ 3 - **๊ฐ€๋งน์  ์›”๋ณ„ ์ด์šฉ ๊ณ ๊ฐ์ •๋ณด**
367
+ # --------------------------------------------------------------------------
368
+
369
+ file_path3 = get_file_path('big_data_set3_f.csv')
370
+
371
+ try:
372
+ df3 = pd.read_csv(file_path3, encoding="cp949")
373
+ except FileNotFoundError:
374
+ print(f"Error: File not found at {file_path3}. Please ensure big_data_set3_f.csv is in the 'data' folder.")
375
+ sys.exit(1)
376
+
377
+ col_map3 = {
378
+ "ENCODED_MCT": "๊ฐ€๋งน์ ID",
379
+ "TA_YM": "๊ธฐ์ค€๋…„์›”",
380
+ "M12_MAL_1020_RAT": "๋‚จ์„ฑ20๋Œ€์ด๏ฟฝ๏ฟฝ๏ฟฝ๋น„์œจ",
381
+ "M12_MAL_30_RAT": "๋‚จ์„ฑ30๋Œ€๋น„์œจ",
382
+ "M12_MAL_40_RAT": "๋‚จ์„ฑ40๋Œ€๋น„์œจ",
383
+ "M12_MAL_50_RAT": "๋‚จ์„ฑ50๋Œ€๋น„์œจ",
384
+ "M12_MAL_60_RAT": "๋‚จ์„ฑ60๋Œ€์ด์ƒ๋น„์œจ",
385
+ "M12_FME_1020_RAT": "์—ฌ์„ฑ20๋Œ€์ดํ•˜๋น„์œจ",
386
+ "M12_FME_30_RAT": "์—ฌ์„ฑ30๋Œ€๋น„์œจ",
387
+ "M12_FME_40_RAT": "์—ฌ์„ฑ40๋Œ€๋น„์œจ",
388
+ "M12_FME_50_RAT": "์—ฌ์„ฑ50๋Œ€๋น„์œจ",
389
+ "M12_FME_60_RAT": "์—ฌ์„ฑ60๋Œ€์ด์ƒ๋น„์œจ",
390
+ "MCT_UE_CLN_REU_RAT": "์žฌ์ด์šฉ๊ณ ๊ฐ๋น„์œจ",
391
+ "MCT_UE_CLN_NEW_RAT": "์‹ ๊ทœ๊ณ ๊ฐ๋น„์œจ",
392
+ "RC_M1_SHC_RSD_UE_CLN_RAT": "๊ฑฐ์ฃผ์ž์ด์šฉ๋น„์œจ",
393
+ "RC_M1_SHC_WP_UE_CLN_RAT": "์ง์žฅ์ธ์ด์šฉ๋น„์œจ",
394
+ "RC_M1_SHC_FLP_UE_CLN_RAT": "์œ ๋™์ธ๊ตฌ์ด์šฉ๋น„์œจ"
395
+ }
396
+
397
+ df3 = df3.rename(columns=col_map3)
398
+
399
+ # - ์ „์ฒ˜๋ฆฌ
400
+ df3['๊ธฐ์ค€๋…„์›”'] = pd.to_datetime(df3['๊ธฐ์ค€๋…„์›”'].astype(str), format='%Y%m')
401
+
402
+ df3.replace(-999999.9, np.nan, inplace=True)
403
+
404
+ # --------------------------------------------------------------------------
405
+ #### ๋ฐ์ดํ„ฐ ํ†ตํ•ฉ
406
+ # --------------------------------------------------------------------------
407
+
408
+ df23 = pd.merge(df2, df3, on=["๊ฐ€๋งน์ ID", "๊ธฐ์ค€๋…„์›”"], how="inner")
409
+
410
+ final_df = pd.merge(df23, df1, on="๊ฐ€๋งน์ ID", how="left")
411
+
412
+ # --------------------------------------------------------------------------
413
+ #### ์ด์ƒ๊ฐ’ ์ฒ˜๋ฆฌ
414
+ # --------------------------------------------------------------------------
415
+
416
+ non_seongdong_areas = [
417
+ '์••๊ตฌ์ •๋กœ๋ฐ์˜ค', 'ํ’์‚ฐ์ง€๊ตฌ', '๋ฏธ์•„์‚ฌ๊ฑฐ๋ฆฌ', '๋ฐฉ๋ฐฐ์—ญ',
418
+ '์ž์–‘', '๋™๋Œ€๋ฌธ์—ญ์‚ฌ๋ฌธํ™”๊ณต์›์—ญ', '๊ฑด๋Œ€์ž…๊ตฌ',
419
+ '์„œ๋ฉด์—ญ', '์˜ค๋‚จ'
420
+ ]
421
+
422
+ # Step 1๏ธโƒฃ ์ฃผ์†Œ๊ฐ€ '์„ฑ๋™๊ตฌ'์— ํฌํ•จ๋œ ๋ฐ์ดํ„ฐ๋งŒ ๋‚จ๊ธฐ๊ธฐ
423
+ mask_seongdong_addr = final_df['๊ฐ€๋งน์ ์ฃผ์†Œ'].str.contains('์„ฑ๋™๊ตฌ', na=False)
424
+ seongdong_df = final_df[mask_seongdong_addr].copy()
425
+
426
+ # Step 2๏ธโƒฃ ์ƒ๊ถŒ๋ช…์ด ์„ฑ๋™๊ตฌ ์™ธ์ธ๋ฐ ์ฃผ์†Œ๋Š” ์„ฑ๋™๊ตฌ์ธ ๊ฒฝ์šฐ โ†’ ๋ผ๋ฒจ ๊ต์ •
427
+ mask_mislabel = seongdong_df['์ƒ๊ถŒ'].isin(non_seongdong_areas)
428
+ seongdong_df.loc[mask_mislabel, '์ƒ๊ถŒ'] = '๋ฏธํ™•์ธ(์„ฑ๋™๊ตฌ)'
429
+
430
+ # Step 3๏ธโƒฃ (ipynb ์ฝ”๋“œ ๋กœ์ง ์ ์šฉ) ์ƒ๊ถŒ๋ช…์ด '๋ฏธํ™•์ธ'์ธ๋ฐ ์ฃผ์†Œ๊ฐ€ ์„ฑ๋™๊ตฌ๊ฐ€ ์•„๋‹Œ ๊ฒฝ์šฐ ์ œ๊ฑฐ
431
+ # ์ฃผํ”ผํ„ฐ ๋…ธํŠธ๋ถ์˜ ๋กœ์ง์„ ๊ทธ๋Œ€๋กœ ๋ฐ˜์˜ํ•ฉ๋‹ˆ๋‹ค. (์‹ค์ œ ํ•„ํ„ฐ๋ง ํšจ๊ณผ๋Š” ์—†์ง€๋งŒ, ์ฝ”๋“œ ์ผ์น˜์„ฑ ํ™•๋ณด)
432
+ final_clean_df = seongdong_df[
433
+ ~(
434
+ (seongdong_df['์ƒ๊ถŒ'].str.contains('๋ฏธํ™•์ธ')) &
435
+ (~seongdong_df['๊ฐ€๋งน์ ์ฃผ์†Œ'].str.contains('์„ฑ๋™๊ตฌ', na=False))
436
+ )
437
+ ].copy()
438
+
439
+ # ์—…์ข… - ํ•œ ์—…์ข…์ด 100ํผ์ธ ๊ฒฝ์šฐ ์ œ์™ธ(์ด์ƒ์น˜ ์ทจ๊ธ‰)
440
+ final_clean_df = final_clean_df[final_clean_df['์—…์ข…'] != '์œ ์ œํ’ˆ'].copy()
441
+
442
+ # ์›”๋งค์ถœ๊ธˆ์•ก_๊ตฌ๊ฐ„ ์ปฌ๋Ÿผ์˜ ๊ณ ์œ ๊ฐ’
443
+ unique_sales_bins = final_clean_df['์›”๋งค์ถœ๊ธˆ์•ก_๊ตฌ๊ฐ„'].dropna().unique()
444
+
445
+ # ๋งค์ถœ๊ตฌ๊ฐ„์ˆ˜์ค€ ๋งคํ•‘ ๋”•์…”๋„ˆ๋ฆฌ ์ •์˜
446
+ # --------------------------------------------------------------------------
447
+ # โ‡๏ธ [์ˆ˜์ •] ๊ตฌ๊ฐ„ -> ์ˆ˜์ค€ ๋ณ€ํ™˜ (์ปฌ๋Ÿผ๋ณ„ ๋‹ค๋ฅธ ๋ช…์นญ ์ ์šฉ)
448
+ # --------------------------------------------------------------------------
449
+
450
+ # --------------------------------------------------------------------------
451
+ # โ‡๏ธ [์ˆ˜์ •] ๊ตฌ๊ฐ„ -> ์ˆ˜์ค€ ๋ณ€ํ™˜ (๋ชจ๋“  ์ปฌ๋Ÿผ ์ ์šฉ)
452
+ # --------------------------------------------------------------------------
453
+
454
+ # 1. '์›”๋งค์ถœ๊ธˆ์•ก_๊ตฌ๊ฐ„' (๊ทœ๋ชจ/์ˆœ์œ„ ๊ธฐ์ค€)
455
+ sales_volume_map = {
456
+ '1_10%์ดํ•˜': '์ตœ์ƒ์œ„',
457
+ '2_10-25%': '์ƒ์œ„',
458
+ '3_25-50%': '์ค‘์ƒ์œ„',
459
+ '4_50-75%': '์ค‘ํ•˜์œ„',
460
+ '5_75-90%': 'ํ•˜์œ„',
461
+ '6_90%์ดˆ๊ณผ(ํ•˜์œ„ 10% ์ดํ•˜)': '์ตœํ•˜์œ„'
462
+ }
463
+
464
+ # 2. '์›”๊ฐ๋‹จ๊ฐ€_๊ตฌ๊ฐ„' (๊ฐ€๊ฒฉ๋Œ€ ๊ธฐ์ค€)
465
+ price_level_map = {
466
+ '1_10%์ดํ•˜': '์ตœ๊ณ ๊ฐ€',
467
+ '2_10-25%': '๊ณ ๊ฐ€',
468
+ '3_25-50%': '์ค‘๊ฐ€',
469
+ '4_50-75%': '์ค‘์ €๊ฐ€',
470
+ '5_75-90%': '์ €๊ฐ€',
471
+ '6_90%์ดˆ๊ณผ(ํ•˜์œ„ 10% ์ดํ•˜)': '์ตœ์ €๊ฐ€'
472
+ }
473
+
474
+ # 3. '์šด์˜๊ฐœ์›”์ˆ˜_๊ตฌ๊ฐ„' (๊ฒฝํ—˜/์—ฐ์ฐจ ๊ธฐ์ค€)
475
+ operation_period_map = {
476
+ '1_10%์ดํ•˜': '์ตœ์žฅ๊ธฐ', # ๊ฐ€์žฅ ์˜ค๋ž˜ ์šด์˜
477
+ '2_10-25%': '์žฅ๊ธฐ',
478
+ '3_25-50%': '์ค‘๊ธฐ',
479
+ '4_50-75%': '๋‹จ๊ธฐ',
480
+ '5_75-90%': '์‹ ๊ทœ',
481
+ '6_90%์ดˆ๊ณผ(ํ•˜์œ„ 10% ์ดํ•˜)': '์ตœ์‹ ๊ทœ' # ๊ฐ€์žฅ ์ตœ๊ทผ ๊ฐœ์—…
482
+ }
483
+
484
+ # 4. '์›”๋งค์ถœ๊ฑด์ˆ˜_๊ตฌ๊ฐ„' (๊ฑฐ๋ž˜๋Ÿ‰/๋นˆ๋„ ๊ธฐ์ค€)
485
+ transaction_count_map = {
486
+ '1_10%์ดํ•˜': '๊ฑฐ๋ž˜ ์ตœ๋‹ค', # ๊ฑฐ๋ž˜๊ฐ€ ๊ฐ€์žฅ ๋งŽ์Œ
487
+ '2_10-25%': '๊ฑฐ๋ž˜ ๋งŽ์Œ',
488
+ '3_25-50%': '๊ฑฐ๋ž˜ ๋ณดํ†ต',
489
+ '4_50-75%': '๊ฑฐ๋ž˜ ์ ์Œ',
490
+ '5_75-90%': '๊ฑฐ๋ž˜ ํฌ์†Œ',
491
+ '6_90%์ดˆ๊ณผ(ํ•˜์œ„ 10% ์ดํ•˜)': '๊ฑฐ๋ž˜ ์ตœ์ €'
492
+ }
493
+
494
+ # 5. '์›”์œ ๋‹ˆํฌ๊ณ ๊ฐ์ˆ˜_๊ตฌ๊ฐ„' (๊ณ ๊ฐ ๊ทœ๋ชจ ๊ธฐ์ค€)
495
+ customer_count_map = {
496
+ '1_10%์ดํ•˜': '๊ณ ๊ฐ ์ตœ๋‹ค', # ๊ณ ๊ฐ ์ˆ˜๊ฐ€ ๊ฐ€์žฅ ๋งŽ์Œ
497
+ '2_10-25%': '๊ณ ๊ฐ ๋งŽ์Œ',
498
+ '3_25-50%': '๊ณ ๊ฐ ๋ณดํ†ต',
499
+ '4_50-75%': '๊ณ ๊ฐ ์ ์Œ',
500
+ '5_75-90%': '๊ณ ๊ฐ ํฌ์†Œ',
501
+ '6_90%์ดˆ๊ณผ(ํ•˜์œ„ 10% ์ดํ•˜)': '๊ณ ๊ฐ ์ตœ์ €'
502
+ }
503
+
504
+
505
+ # --- ์ƒˆ ์ปฌ๋Ÿผ ์ƒ์„ฑ ---
506
+
507
+ final_clean_df['๋งค์ถœ๊ตฌ๊ฐ„_์ˆ˜์ค€'] = final_clean_df['์›”๋งค์ถœ๊ธˆ์•ก_๊ตฌ๊ฐ„'].map(sales_volume_map)
508
+ final_clean_df['์›”๊ฐ๋‹จ๊ฐ€_์ˆ˜์ค€'] = final_clean_df['์›”๊ฐ๋‹จ๊ฐ€_๊ตฌ๊ฐ„'].map(price_level_map)
509
+ final_clean_df['์šด์˜๊ฐœ์›”์ˆ˜_์ˆ˜์ค€'] = final_clean_df['์šด์˜๊ฐœ์›”์ˆ˜_๊ตฌ๊ฐ„'].map(operation_period_map)
510
+ final_clean_df['์›”๋งค์ถœ๊ฑด์ˆ˜_์ˆ˜์ค€'] = final_clean_df['์›”๋งค์ถœ๊ฑด์ˆ˜_๊ตฌ๊ฐ„'].map(transaction_count_map)
511
+ final_clean_df['์›”์œ ๋‹ˆํฌ๊ณ ๊ฐ์ˆ˜_์ˆ˜์ค€'] = final_clean_df['์›”์œ ๋‹ˆํฌ๊ณ ๊ฐ์ˆ˜_๊ตฌ๊ฐ„'].map(customer_count_map)
512
+
513
+ # --- ๋ฏธํ™•์ธ ๊ฐ’ ์ฒ˜๋ฆฌ ---
514
+ final_clean_df['๋งค์ถœ๊ตฌ๊ฐ„_์ˆ˜์ค€'] = final_clean_df['๋งค์ถœ๊ตฌ๊ฐ„_์ˆ˜์ค€'].fillna('๋ฏธํ™•์ธ')
515
+ final_clean_df['์›”๊ฐ๋‹จ๊ฐ€_์ˆ˜์ค€'] = final_clean_df['์›”๊ฐ๋‹จ๊ฐ€_์ˆ˜์ค€'].fillna('๋ฏธํ™•์ธ')
516
+ final_clean_df['์šด์˜๊ฐœ์›”์ˆ˜_์ˆ˜์ค€'] = final_clean_df['์šด์˜๊ฐœ์›”์ˆ˜_์ˆ˜์ค€'].fillna('๋ฏธํ™•์ธ')
517
+ final_clean_df['์›”๋งค์ถœ๊ฑด์ˆ˜_์ˆ˜์ค€'] = final_clean_df['์›”๋งค์ถœ๊ฑด์ˆ˜_์ˆ˜์ค€'].fillna('๋ฏธํ™•์ธ')
518
+ final_clean_df['์›”์œ ๋‹ˆํฌ๊ณ ๊ฐ์ˆ˜_์ˆ˜์ค€'] = final_clean_df['์›”์œ ๋‹ˆํฌ๊ณ ๊ฐ์ˆ˜_์ˆ˜์ค€'].fillna('๋ฏธํ™•์ธ')
519
+ # --------------------------------------------------------------------------
520
+ # final_df ์ €์žฅ
521
+ # --------------------------------------------------------------------------
522
+
523
+ # 'data' ํด๋” ๋‚ด์— ์ €์žฅ
524
+ save_path = get_file_path("final_df.csv")
525
+
526
+ # CSV ํŒŒ์ผ ์ €์žฅ (์ธ๋ฑ์Šค ์ œ์™ธ)
527
+ final_clean_df.to_csv(save_path, index=False, encoding="utf-8-sig")
528
+
529
+ >>>>>>> 4025576cc0b52c8393af0ca720a1f6fabeb5e43a
530
+ print(f"CSV ํŒŒ์ผ ์ €์žฅ ์™„๋ฃŒ: {save_path}")
๊ธฐํƒ€/feastival_df_add_keywords.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pandas as pd
3
+ from langchain_google_genai import ChatGoogleGenerativeAI
4
+ from langchain_core.messages import HumanMessage
5
+ import time
6
+
7
+ # --- ์„ค์ • ---
8
+ # โš ๏ธ ์›๋ณธ ํŒŒ์ผ ๊ฒฝ๋กœ์™€ ์ €์žฅ๋  ํŒŒ์ผ ์ด๋ฆ„์„ ํ™•์ธํ•˜์„ธ์š”.
9
+ INPUT_CSV_PATH = 'festival_df.csv'
10
+ OUTPUT_CSV_PATH = 'festival_df_updated.csv'
11
+ # ----------------
12
+
13
+ def generate_keywords_from_description(llm, description: str) -> str:
14
+ """
15
+ ์ถ•์ œ ์†Œ๊ฐœ๊ธ€์„ ๋ฐ”ํƒ•์œผ๋กœ Gemini AI๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ํ‚ค์›Œ๋“œ๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
16
+ """
17
+ if not isinstance(description, str) or not description.strip():
18
+ return ""
19
+
20
+ # AI์—๊ฒŒ ์—ญํ• ์„ ๋ถ€์—ฌํ•˜๊ณ , ์›ํ•˜๋Š” ๊ฒฐ๊ณผ๋ฌผ์˜ ํ˜•์‹๊ณผ ๋‚ด์šฉ์„ ๊ตฌ์ฒด์ ์œผ๋กœ ์ง€์‹œํ•˜๋Š” ํ”„๋กฌํ”„ํŠธ
21
+ prompt = f"""
22
+ ๋‹น์‹ ์€ ์ง€์—ญ ์ถ•์ œ ์ „๋ฌธ ๋งˆ์ผ€ํŒ… ๋ถ„์„๊ฐ€์ž…๋‹ˆ๋‹ค.
23
+ ์•„๋ž˜ ์ œ๊ณต๋œ ์ถ•์ œ ์†Œ๊ฐœ๊ธ€์„ ์ฝ๊ณ , ๋ถ€์Šค ์ฐธ๊ฐ€๋ฅผ ๊ณ ๋ คํ•˜๋Š” ๊ฐ€๊ฒŒ ์‚ฌ์žฅ๋‹˜์—๊ฒŒ ๋„์›€์ด ๋  ๋งŒํ•œ ํ•ต์‹ฌ ํ‚ค์›Œ๋“œ๋ฅผ ์ถ”์ถœํ•ด์ฃผ์„ธ์š”.
24
+
25
+ [์ถ”์ถœ ๊ฐ€์ด๋“œ๋ผ์ธ]
26
+ 1. ๋‹ค์Œ 5๊ฐ€์ง€ ์นดํ…Œ๊ณ ๋ฆฌ๋กœ ํ‚ค์›Œ๋“œ๋ฅผ ๋ถ„๋ฅ˜ํ•ด์ฃผ์„ธ์š”:
27
+ - **ํƒ€๊ฒŸ ๊ณ ๊ฐ**: (์˜ˆ: 20๋Œ€, ๊ฐ€์กฑ ๋‹จ์œ„, ์นœ๊ตฌ, ์—ฐ์ธ, ์™ธ๊ตญ์ธ ๊ด€๊ด‘๊ฐ)
28
+ - **๊ณ„์ ˆ**: (์˜ˆ: ๋ด„, ์—ฌ๋ฆ„, ๊ฐ€์„, ๊ฒจ์šธ)
29
+ - **์ถ•์ œ ๋ถ„์œ„๊ธฐ**: (์˜ˆ: ํ™œ๊ธฐ์ฐฌ, ์ „ํ†ต์ ์ธ, ํž™ํ•œ, ์ž์—ฐ ์นœํ™”์ )
30
+ - **์ฃผ์š” ์ฝ˜ํ…์ธ **: (์˜ˆ: ๋จน๊ฑฐ๋ฆฌ, ํ‘ธ๋“œํŠธ๋Ÿญ, ์ฒดํ—˜ ํ™œ๋™, ๊ณต์—ฐ, ์ „ํ†ต๋ฌธํ™”, ๋ถˆ๊ฝƒ๋†€์ด, ํŠน์‚ฐ๋ฌผ)
31
+ - **ํ•ต์‹ฌ ํ…Œ๋งˆ**: (์˜ˆ: ์—ญ์‚ฌ, ๋ฌธํ™”, ์Œ์•…, ์˜ˆ์ˆ , ๊ณ„์ ˆ)
32
+ 2. ๋ชจ๋“  ํ‚ค์›Œ๋“œ๋ฅผ ์‰ผํ‘œ(,)๋กœ ๊ตฌ๋ถ„๋œ ํ•˜๋‚˜์˜ ๋ฌธ์ž์—ด๋กœ ๋งŒ๋“ค์–ด ๋ฐ˜ํ™˜ํ•ด์ฃผ์„ธ์š”.
33
+ (์˜ˆ์‹œ: ๊ฐ€์กฑ ๋‹จ์œ„, ์—ฐ์ธ, ํ™œ๊ธฐ์ฐฌ, ์ „ํ†ต์ ์ธ, ๋จน๊ฑฐ๋ฆฌ, ์ฒดํ—˜, ์—ญ์‚ฌ, ๋ฌธํ™”)
34
+ 3. ์†Œ๊ฐœ๊ธ€์—์„œ ๊ทผ๊ฑฐ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†๋Š” ๋‚ด์šฉ์€ ์ถ”์ธกํ•˜์—ฌ ๋งŒ๋“ค์ง€ ๋งˆ์„ธ์š”.
35
+
36
+ [์ถ•์ œ ์†Œ๊ฐœ๊ธ€]
37
+ {description}
38
+
39
+ [์ถ”์ถœ๋œ ํ‚ค์›Œ๋“œ (์‰ผํ‘œ๋กœ ๊ตฌ๋ถ„)]
40
+ """
41
+
42
+ try:
43
+ message = HumanMessage(content=prompt)
44
+ response = llm.invoke([message])
45
+ return response.content.strip()
46
+ except Exception as e:
47
+ print(f" [์˜ค๋ฅ˜] API ํ˜ธ์ถœ ์ค‘ ๋ฌธ์ œ ๋ฐœ์ƒ: {e}")
48
+ return ""
49
+
50
+ def main():
51
+ """
52
+ ๋ฉ”์ธ ์‹คํ–‰ ํ•จ์ˆ˜
53
+ """
54
+ print("--- ๐Ÿค– '์†Œ๊ฐœ' ๊ธฐ๋ฐ˜ AI ํ‚ค์›Œ๋“œ ์ž๋™ ์ƒ์„ฑ ์ž‘์—…์„ ์‹œ์ž‘ํ•ฉ๋‹ˆ๋‹ค. ---")
55
+
56
+ # 1. Google API ํ‚ค ๋ฐ LLM ์ดˆ๊ธฐํ™”
57
+ try:
58
+ # 'GOOGLE_API_KEY' ๋ผ๋Š” ์ด๋ฆ„์˜ ํ™˜๊ฒฝ ๋ณ€์ˆ˜๋ฅผ ์ฐพ์Šต๋‹ˆ๋‹ค.
59
+ google_api_key = os.getenv("GOOGLE_API_KEY")
60
+ if not google_api_key:
61
+ raise ValueError("GOOGLE_API_KEY ํ™˜๊ฒฝ ๋ณ€์ˆ˜๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. API ํ‚ค๋ฅผ ์„ค์ •ํ•ด์ฃผ์„ธ์š”.")
62
+
63
+ # ์ •ํ™•ํ•œ ํŒ๋‹จ์„ ์œ„ํ•ด temperature๋ฅผ ๋‚ฎ๊ฒŒ ์„ค์ •
64
+ llm = ChatGoogleGenerativeAI(
65
+ model="gemini-2.5-flash",
66
+ # ํ™˜๊ฒฝ ๋ณ€์ˆ˜์—์„œ ๋ถˆ๋Ÿฌ์˜จ ํ‚ค๋ฅผ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค.
67
+ google_api_key=google_api_key,
68
+ temperature=0.1
69
+ )
70
+ print("โœ… Gemini ๋ชจ๋ธ ์ดˆ๊ธฐํ™” ์™„๋ฃŒ.")
71
+ except Exception as e:
72
+ print(f"โŒ [์น˜๋ช…์  ์˜ค๋ฅ˜] Gemini ๋ชจ๋ธ ์ดˆ๊ธฐํ™” ์‹คํŒจ: {e}")
73
+ return
74
+
75
+ # 2. CSV ํŒŒ์ผ ๋กœ๋“œ
76
+ try:
77
+ df = pd.read_csv(INPUT_CSV_PATH)
78
+ print(f"โœ… '{INPUT_CSV_PATH}' ํŒŒ์ผ ๋กœ๋”ฉ ์™„๋ฃŒ. (์ด {len(df)}๊ฐœ ์ถ•์ œ)")
79
+ except FileNotFoundError:
80
+ print(f"โŒ [์น˜๋ช…์  ์˜ค๋ฅ˜] ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: '{INPUT_CSV_PATH}'")
81
+ print(" ํ”„๋กœ์ ํŠธ ํด๋” ๋‚ด์— 'festival_df.csv' ํŒŒ์ผ์ด ์žˆ๋Š”์ง€ ํ™•์ธํ•ด์ฃผ์„ธ์š”.")
82
+ return
83
+
84
+ # 3. ๊ฐ ์ถ•์ œ๋ณ„๋กœ ํ‚ค์›Œ๋“œ ์ƒ์„ฑ ๋ฐ ์ถ”๊ฐ€
85
+ new_keywords_list = []
86
+ total_rows = len(df)
87
+
88
+ for index, row in df.iterrows():
89
+ print(f"\n--- ({index + 1}/{total_rows}) '{row['์ถ•์ œ๋ช…']}' ์ž‘์—… ์ค‘ ---")
90
+
91
+ description = row['์†Œ๊ฐœ']
92
+
93
+ print(" - AI๋ฅผ ํ˜ธ์ถœํ•˜์—ฌ ํ‚ค์›Œ๋“œ๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค...")
94
+ new_keywords = generate_keywords_from_description(llm, description)
95
+
96
+ original_keywords = str(row.get('ํ‚ค์›Œ๋“œ', ''))
97
+
98
+ all_keywords = original_keywords.split(',') + new_keywords.split(',')
99
+ unique_keywords = sorted(list(set([k.strip() for k in all_keywords if k.strip()])))
100
+
101
+ final_keywords_str = ', '.join(unique_keywords)
102
+ new_keywords_list.append(final_keywords_str)
103
+
104
+ print(f" - [๊ธฐ์กด ํ‚ค์›Œ๋“œ]: {original_keywords if original_keywords else '์—†์Œ'}")
105
+ print(f" - [AI ์ƒ์„ฑ ํ‚ค์›Œ๋“œ]: {new_keywords}")
106
+ print(f" - [์ตœ์ข… ํ‚ค์›Œ๋“œ]: {final_keywords_str}")
107
+
108
+ time.sleep(0.5)
109
+
110
+ # 4. DataFrame์— ์ƒˆ๋กœ์šด ํ‚ค์›Œ๋“œ ์—ด ์ถ”๊ฐ€ ๋ฐ ์ €์žฅ
111
+ df['ํ‚ค์›Œ๋“œ'] = new_keywords_list
112
+
113
+ df.to_csv(OUTPUT_CSV_PATH, index=False, encoding='utf-8-sig')
114
+ print(f"\n--- ๐ŸŽ‰ ์ž‘์—… ์™„๋ฃŒ! ---")
115
+ print(f"โœ… ์ƒˆ๋กœ์šด ํ‚ค์›Œ๋“œ๊ฐ€ ์ถ”๊ฐ€๋œ ํŒŒ์ผ์ด '{OUTPUT_CSV_PATH}' ๊ฒฝ๋กœ์— ์ €์žฅ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
116
+
117
+ if __name__ == "__main__":
118
+ main()
๊ธฐํƒ€/festival_df_first.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import glob
3
+ import os
4
+
5
+ # --- ์„ค์ • ๋ถ€๋ถ„ ---
6
+ # 1. ๋ฐ์ดํ„ฐ ํŒŒ์ผ๋“ค์ด ์ €์žฅ๋œ ํด๋” ๊ฒฝ๋กœ๋ฅผ ์ง€์ •ํ•ฉ๋‹ˆ๋‹ค.
7
+ # Windows ๊ฒฝ๋กœ์˜ ๊ฒฝ์šฐ, ์—ญ์Šฌ๋ž˜์‹œ(\)๋ฅผ ๋‘ ๋ฒˆ ์“ฐ๊ฑฐ๋‚˜(C:\\...) ์Šฌ๋ž˜์‹œ(/)๋กœ ๋ณ€๊ฒฝํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค.
8
+ folder_path = 'C:/projects/shcard_2025_bigcontest/data/festival'
9
+
10
+ # 2. ํ†ตํ•ฉ๋œ ํŒŒ์ผ์„ ์ €์žฅํ•  ๊ฒฝ๋กœ๋ฅผ ์ง€์ •ํ•ฉ๋‹ˆ๋‹ค. (๊ฒฐ๊ณผ๋ฅผ ๊ฐ™์€ ํด๋”์— ์ €์žฅ)
11
+ output_path = 'C:/projects/shcard_2025_bigcontest/data'
12
+
13
+ # --- ๋ฐ์ดํ„ฐ ํ†ตํ•ฉ ํ•จ์ˆ˜ ์ •์˜ ---
14
+ def combine_festival_data(path, pattern, output_filename):
15
+ """
16
+ ์ง€์ •๋œ ๊ฒฝ๋กœ์—์„œ ํŠน์ • ํŒจํ„ด์˜ CSV ํŒŒ์ผ๋“ค์„ ์ฐพ์•„ ํ•˜๋‚˜๋กœ ํ†ตํ•ฉํ•˜๊ณ  ์ €์žฅํ•˜๋Š” ํ•จ์ˆ˜.
17
+
18
+ :param path: CSV ํŒŒ์ผ๋“ค์ด ์žˆ๋Š” ํด๋” ๊ฒฝ๋กœ
19
+ :param pattern: ์ฐพ์„ ํŒŒ์ผ ์ด๋ฆ„์˜ ํŒจํ„ด (์˜ˆ: '*_๋ฌธํ™”๊ด€๊ด‘์ถ•์ œ ์ฃผ์š” ์ง€ํ‘œ.csv')
20
+ :param output_filename: ์ €์žฅํ•  ์ตœ์ข… CSV ํŒŒ์ผ ์ด๋ฆ„
21
+ """
22
+ # ์ง€์ •๋œ ๊ฒฝ๋กœ์™€ ํŒจํ„ด์„ ๊ฒฐํ•ฉํ•˜์—ฌ ํŒŒ์ผ ๋ชฉ๋ก์„ ๊ฐ€์ ธ์˜ต๋‹ˆ๋‹ค.
23
+ file_list = glob.glob(os.path.join(path, pattern))
24
+
25
+ if not file_list:
26
+ print(f"โš ๏ธ ๊ฒฝ๊ณ : '{pattern}' ํŒจํ„ด์— ํ•ด๋‹นํ•˜๋Š” ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
27
+ print(f"๊ฒฝ๋กœ๋ฅผ ํ™•์ธํ•ด์ฃผ์„ธ์š”: {path}\n")
28
+ return
29
+
30
+ # ๊ฐ ํŒŒ์ผ์„ DataFrame์œผ๋กœ ์ฝ์–ด ๋ฆฌ์ŠคํŠธ์— ์ถ”๊ฐ€ํ•ฉ๋‹ˆ๋‹ค.
31
+ df_list = [pd.read_csv(file) for file in file_list]
32
+
33
+ # ๋ชจ๋“  DataFrame์„ ํ•˜๋‚˜๋กœ ํ•ฉ์นฉ๋‹ˆ๋‹ค.
34
+ combined_df = pd.concat(df_list, ignore_index=True)
35
+
36
+ # ํ†ตํ•ฉ๋œ ๋ฐ์ดํ„ฐ๋ฅผ CSV ํŒŒ์ผ๋กœ ์ €์žฅํ•ฉ๋‹ˆ๋‹ค.
37
+ # encoding='utf-8-sig'๋Š” Excel์—์„œ ํ•œ๊ธ€์ด ๊นจ์ง€์ง€ ์•Š๋„๋ก ํ•ด์ค๋‹ˆ๋‹ค.
38
+ output_filepath = os.path.join(output_path, output_filename)
39
+ combined_df.to_csv(output_filepath, index=False, encoding='utf-8-sig')
40
+
41
+ print(f"โœ… ์„ฑ๊ณต: {len(file_list)}๊ฐœ์˜ ํŒŒ์ผ์„ ํ†ตํ•ฉํ•˜์—ฌ '{output_filename}'์œผ๋กœ ์ €์žฅํ–ˆ์Šต๋‹ˆ๋‹ค.")
42
+ print(f" - ์ด {len(combined_df)}๊ฐœ์˜ ํ–‰์ด ์ƒ์„ฑ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
43
+ print(f" - ์ €์žฅ ๊ฒฝ๋กœ: {output_filepath}\n")
44
+
45
+
46
+ # --- ๋ฉ”์ธ ์ฝ”๋“œ ์‹คํ–‰ ---
47
+ print("===== ์ถ•์ œ ๋ฐ์ดํ„ฐ ํ†ตํ•ฉ์„ ์‹œ์ž‘ํ•ฉ๋‹ˆ๋‹ค. =====\n")
48
+
49
+ # 1. ๋ฌธํ™”๊ด€๊ด‘์ถ•์ œ ์ฃผ์š” ์ง€ํ‘œ ํ†ตํ•ฉ
50
+ combine_festival_data(folder_path, '*_๋ฌธํ™”๊ด€๊ด‘์ถ•์ œ ์ฃผ์š” ์ง€ํ‘œ.csv', 'ํ†ตํ•ฉ_๋ฌธํ™”๊ด€๊ด‘์ถ•์ œ_์ฃผ์š”_์ง€ํ‘œ.csv')
51
+
52
+ # 2. ์„ฑ_์—ฐ๋ น๋ณ„ ๋‚ด๊ตญ์ธ ๋ฐฉ๋ฌธ์ž ํ†ตํ•ฉ
53
+ combine_festival_data(folder_path, '*_์„ฑ_์—ฐ๋ น๋ณ„ ๋‚ด๊ตญ์ธ ๋ฐฉ๋ฌธ์ž.csv', 'ํ†ตํ•ฉ_์„ฑ_์—ฐ๋ น๋ณ„_๋‚ด๊ตญ์ธ_๋ฐฉ๋ฌธ์ž.csv')
54
+
55
+ # 3. ์—ฐ๋„๋ณ„ ๋ฐฉ๋ฌธ์ž ์ถ”์ด ํ†ตํ•ฉ
56
+ combine_festival_data(folder_path, '*_์—ฐ๋„๋ณ„ ๋ฐฉ๋ฌธ์ž ์ถ”์ด.csv', 'ํ†ตํ•ฉ_์—ฐ๋„๋ณ„_๋ฐฉ๋ฌธ์ž_์ถ”์ด.csv')
57
+
58
+ print("===== ๋ชจ๋“  ๋ฐ์ดํ„ฐ ํ†ตํ•ฉ ์ž‘์—…์ด ์™„๋ฃŒ๋˜์—ˆ์Šต๋‹ˆ๋‹ค. =====")
๊ธฐํƒ€/festival_df_processing.py ADDED
@@ -0,0 +1,235 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """festival_processing.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1NnUdWSIUNLRY4O9PmcX5GFgeaTekcO7c
8
+ """
9
+
10
+ # 1. ๊ตฌ๊ธ€ ๋“œ๋ผ์ด๋ธŒ ๋งˆ์šดํŠธ
11
+ from google.colab import drive
12
+ drive.mount('/content/drive')
13
+
14
+
15
+ # 2. CSV ํŒŒ์ผ ๊ฒฝ๋กœ ์ง€์ •
16
+ file_path = '/content/drive/MyDrive/Colab Notebooks/BigContest_2025/festival_df.csv'
17
+
18
+ # 3. ๋ฐ์ดํ„ฐ ๋ถˆ๋Ÿฌ์˜ค๊ธฐ
19
+ import pandas as pd
20
+ festival_df = pd.read_csv(file_path, encoding='utf-8')
21
+
22
+ # 4. ๋ฐ์ดํ„ฐ ํ™•์ธ
23
+ print("===== ๋ฐ์ดํ„ฐ ์ •๋ณด =====")
24
+ print(festival_df.info())
25
+ print("\n===== ๋ฐ์ดํ„ฐ ์ƒ˜ํ”Œ =====")
26
+ print(festival_df.head())
27
+ print("\n===== ๊ธฐ๋ณธ ํ†ต๊ณ„ =====")
28
+ print(festival_df.describe())
29
+
30
+ """- ์ฃผ์š” ์„ฑ๋ณ„
31
+
32
+ - ์ฃผ์š” ๋‚˜์ด
33
+ """
34
+
35
+ # ์ฃผ์š” ์„ฑ๋ณ„
36
+ # ๋‚จ/์—ฌ ์ปฌ๋Ÿผ ๋ฆฌ์ŠคํŠธ
37
+ male_cols = [c for c in festival_df.columns if '๋‚จ์„ฑ๋น„์œจ' in c]
38
+ female_cols = [c for c in festival_df.columns if '์—ฌ์„ฑ๋น„์œจ' in c]
39
+
40
+ # ๋‚จ์„ฑ/์—ฌ์„ฑ ๋น„์œจ ํ•ฉ ๊ณ„์‚ฐ - ๋‚จ์„ฑ+์—ฌ์„ฑ์ด 100%์ธ์ง€ ์ฒดํฌ
41
+ festival_df['๋‚จ์„ฑํ•ฉ'] = festival_df[male_cols].sum(axis=1)
42
+ festival_df['์—ฌ์„ฑํ•ฉ'] = festival_df[female_cols].sum(axis=1)
43
+ festival_df['๋‚จ๋…€ํ•ฉ๊ณ„'] = festival_df['๋‚จ์„ฑํ•ฉ'] + festival_df['์—ฌ์„ฑํ•ฉ']
44
+
45
+ # ํ•ฉ๊ณ„๊ฐ€ 100 ๊ทผ์ฒ˜์ธ์ง€ ํ™•์ธ
46
+ print(festival_df[['์ถ•์ œ๋ช…', '๋‚จ์„ฑํ•ฉ', '์—ฌ์„ฑํ•ฉ', '๋‚จ๋…€ํ•ฉ๊ณ„']].head())
47
+
48
+ import numpy as np
49
+
50
+ # 1๏ธโƒฃ ์ฃผ์š” ์„ฑ๋ณ„ ์ปฌ๋Ÿผ ์ถ”๊ฐ€
51
+ festival_df['์ฃผ์š”์„ฑ๋ณ„'] = np.where(
52
+ festival_df['๋‚จ์„ฑํ•ฉ'] > festival_df['์—ฌ์„ฑํ•ฉ'], '๋‚จ์„ฑ', '์—ฌ์„ฑ'
53
+ )
54
+
55
+ # 2๏ธโƒฃ ์ฃผ์š” ์—ฐ๋ น๋Œ€ ์ปฌ๋Ÿผ ์ถ”๊ฐ€
56
+ age_groups = ['09์„ธ', '1019์„ธ', '2029์„ธ', '3039์„ธ', '4049์„ธ', '5059์„ธ', '6069์„ธ', '70์„ธ์ด์ƒ']
57
+
58
+ # ๊ฐ ์—ฐ๋ น๋Œ€๋ณ„ ๋‚จ+์—ฌ ํ•ฉ ๊ณ„์‚ฐ
59
+ for age in age_groups:
60
+ festival_df[f'์—ฐ๋ น๋Œ€_{age}_ํ•ฉ'] = (
61
+ festival_df[f'๋‚จ์„ฑ๋น„์œจ_{age}'] + festival_df[f'์—ฌ์„ฑ๋น„์œจ_{age}']
62
+ )
63
+
64
+ # ๊ฐ ํ–‰๋ณ„๋กœ ๊ฐ€์žฅ ํฐ ์—ฐ๋ น๋Œ€ ์ฐพ๊ธฐ (NaN ๋ฐฉ์–ด ํฌํ•จ)
65
+ festival_df['์ฃผ์š”์—ฐ๋ น๋Œ€'] = (
66
+ festival_df[[f'์—ฐ๋ น๋Œ€_{age}_ํ•ฉ' for age in age_groups]]
67
+ .fillna(0)
68
+ .idxmax(axis=1)
69
+ .str.replace('์—ฐ๋ น๋Œ€_', '', regex=False)
70
+ .str.replace('_ํ•ฉ', '', regex=False)
71
+ )
72
+
73
+ # 3๏ธโƒฃ ์ค‘๊ฐ„ ๊ณ„์‚ฐ ์ปฌ๋Ÿผ(์—ฐ๋ น๋Œ€_*_ํ•ฉ)์€ ์ •๋ฆฌํ•ด์„œ ์ œ๊ฑฐ ๊ฐ€๋Šฅ
74
+ festival_df.drop(columns=[f'์—ฐ๋ น๋Œ€_{age}_ํ•ฉ' for age in age_groups], inplace=True)
75
+
76
+ # โœ… ๊ฒฐ๊ณผ ๋ฏธ๋ฆฌ๋ณด๊ธฐ
77
+ print(festival_df[['์ถ•์ œ๋ช…', '์ฃผ์š”์„ฑ๋ณ„', '์ฃผ์š”์—ฐ๋ น๋Œ€']].head())
78
+ print('--------------------------------------------')
79
+ print(festival_df[['์ถ•์ œ๋ช…', '์ฃผ์š”์„ฑ๋ณ„', '์ฃผ์š”์—ฐ๋ น๋Œ€']].tail())
80
+
81
+ festival_df
82
+
83
+ """- ์ฃผ์š” ๊ณ ๊ฐ์ธต(์„ฑ๋ณ„+์—ฐ๋ น)"""
84
+
85
+ # ๋‚จ/์—ฌ ๊ฐ ์—ฐ๋ น๋Œ€ ์ปฌ๋Ÿผ ๋ฆฌ์ŠคํŠธ
86
+ gender_age_cols = [f'๋‚จ์„ฑ๋น„์œจ_{age}' for age in age_groups] + [f'์—ฌ์„ฑ๋น„์œจ_{age}' for age in age_groups]
87
+
88
+ def find_key_customer(row):
89
+ # ํ•ด๋‹น ํ–‰์—์„œ ์ตœ๋Œ€๊ฐ’์„ ๊ฐ€์ง€๋Š” ์ปฌ๋Ÿผ ์ฐพ๊ธฐ
90
+ max_col = row[gender_age_cols].idxmax()
91
+
92
+ # ์ปฌ๋Ÿผ๋ช…์—์„œ ์„ฑ๋ณ„๊ณผ ๋‚˜์ด ์ถ”์ถœ
93
+ gender, age = max_col.split('_')
94
+
95
+ # ๋‚˜์ด ํ‘œํ˜„
96
+ if age == '70์„ธ์ด์ƒ':
97
+ age_str = '70์„ธ ์ด์ƒ'
98
+ else:
99
+ age_str = age[:2] + '~' + age[2:]
100
+
101
+ return f"{gender} {age_str}"
102
+
103
+ festival_df['์ฃผ์š”๊ณ ๊ฐ์ธต'] = festival_df.apply(find_key_customer, axis=1)
104
+
105
+ # ๊ฒฐ๊ณผ ํ™•์ธ
106
+ print(festival_df[['์ถ•์ œ๋ช…', '์ฃผ์š”๊ณ ๊ฐ์ธต']].head())
107
+
108
+ """- ์ฃผ์š” ๋ฐฉ๋ฌธ์ž(ํ˜„์ง€์ธ/์™ธ์ง€์ธ)"""
109
+
110
+ # ์—ฐ๋„๋ณ„ ํ˜„์ง€์ธ ๋ฐฉ๋ฌธ์ž ์ˆ˜ ํ•ฉ๊ณ„
111
+ local_cols = ['2018_(ํ˜„์ง€์ธ)๋ฐฉ๋ฌธ์ž์ˆ˜', '2019_(ํ˜„์ง€์ธ)๋ฐฉ๋ฌธ์ž์ˆ˜',
112
+ '2022_(ํ˜„์ง€์ธ)๋ฐฉ๋ฌธ์ž์ˆ˜', '2023_(ํ˜„์ง€์ธ)๋ฐฉ๋ฌธ์ž์ˆ˜',
113
+ '2024_(ํ˜„์ง€์ธ)๋ฐฉ๋ฌธ์ž์ˆ˜']
114
+
115
+ # ์—ฐ๋„๋ณ„ ์™ธ๋ถ€ ๋ฐฉ๋ฌธ์ž ์ˆ˜ ํ•ฉ๊ณ„ (์™ธ์ง€์ธ + ์™ธ๊ตญ์ธ)
116
+ outside_cols = ['2018_(์™ธ์ง€์ธ)๋ฐฉ๋ฌธ์ž์ˆ˜', '2019_(์™ธ์ง€์ธ)๋ฐฉ๋ฌธ์ž์ˆ˜',
117
+ '2022_(์™ธ์ง€์ธ)๋ฐฉ๋ฌธ์ž์ˆ˜', '2023_(์™ธ์ง€์ธ)๋ฐฉ๋ฌธ์ž์ˆ˜',
118
+ '2024_(์™ธ์ง€์ธ)๋ฐฉ๋ฌธ์ž์ˆ˜']
119
+
120
+ foreign_cols = ['2018_(์™ธ๊ตญ์ธ)๋ฐฉ๋ฌธ์ž์ˆ˜', '2019_(์™ธ๊ตญ์ธ)๋ฐฉ๋ฌธ์ž์ˆ˜',
121
+ '2022_(์™ธ๊ตญ์ธ)๋ฐฉ๋ฌธ์ž์ˆ˜', '2023_(์™ธ๊ตญ์ธ)๋ฐฉ๋ฌธ์ž์ˆ˜',
122
+ '2024_(์™ธ๊ตญ์ธ)๋ฐฉ๋ฌธ์ž์ˆ˜']
123
+
124
+ festival_df['์ดํ˜„์ง€์ธ'] = festival_df[local_cols].sum(axis=1)
125
+ festival_df['์ด์™ธ๋ถ€๋ฐฉ๋ฌธ์ž'] = festival_df[outside_cols + foreign_cols].sum(axis=1)
126
+
127
+ # ์ฃผ์š” ๋ฐฉ๋ฌธ์ž ํŒ๋‹จ
128
+ festival_df['์ฃผ์š”๋ฐฉ๋ฌธ์ž'] = np.where(
129
+ festival_df['์ดํ˜„์ง€์ธ'] >= festival_df['์ด์™ธ๋ถ€๋ฐฉ๋ฌธ์ž'],
130
+ 'ํ˜„์ง€์ธ',
131
+ '์™ธ๋ถ€๋ฐฉ๋ฌธ์ž'
132
+ )
133
+
134
+ # ์ค‘๊ฐ„ ํ•ฉ๊ณ„ ์ปฌ๋Ÿผ ์‚ญ์ œ
135
+ festival_df.drop(columns=['์ดํ˜„์ง€์ธ', '์ด์™ธ๋ถ€๋ฐฉ๋ฌธ์ž'], inplace=True)
136
+
137
+ # ๊ฒฐ๊ณผ ํ™•์ธ
138
+ print(festival_df[['์ถ•์ œ๋ช…', '์ฃผ์š”๋ฐฉ๋ฌธ์ž']])
139
+
140
+ festival_df.columns
141
+
142
+ """- ์ถ•์ œ ์ธ๊ธฐ
143
+
144
+ - ์ „์ฒด ๋ฐฉ๋ฌธ์ž์ˆ˜(์ „์ฒด๋ฐฉ๋ฌธ์ž์ˆ˜)
145
+ - ์ผํ‰๊ท  ๋ฐฉ๋ฌธ์ž์ˆ˜
146
+ - ์ถ•์ œ๊ธฐ๊ฐ„ ๋‚ด๋น„๊ฒŒ์ด์…˜ ๊ฒ€์ƒ‰๋Ÿ‰
147
+ - ์ถ•์ œ๊ธฐ๊ฐ„ ๊ด€๊ด‘ ์†Œ๋น„
148
+
149
+ ์œ„ ์ปฌ๋Ÿผ ์ด์šฉํ•ด์„œ ์ธ๊ธฐ ๊ณ„์‚ฐ
150
+
151
+ 1) ์—ฐ๋„๋ณ„ ๋ฐ์ดํ„ฐ๋ฅผ ๋ฐ”ํƒ•์œผ๋กœ ๋ชจ๋“  ์ง€ํ‘œ๋ฅผ ๊ณ„์‚ฐ
152
+ 2) ๊ฐ ์—ฐ๋„๋ณ„ ์ง€ํ‘œ๋ฅผ ํ‘œ์ค€ํ™” -> ์ ์ˆ˜๏ฟฝ๏ฟฝ๏ฟฝํ•˜๊ณ  ํ‰๊ท  ๋‚ด์„œ ํ•˜๋‚˜์˜ ์ข…ํ•ฉ '์ธ๊ธฐ ์ ์ˆ˜'๋ฅผ ์ƒ์„ฑ
153
+ 3) ์ข…ํ•ฉ ์ธ๊ธฐ ์ ์ˆ˜๋ฅผ ๊ธฐ์ค€์œผ๋กœ '์ƒ/์ค‘/ํ•˜' ๋“ฑ๊ธ‰ ๋‚˜๋ˆ”
154
+ """
155
+
156
+ import numpy as np
157
+ from sklearn.preprocessing import MinMaxScaler
158
+
159
+ # 1๏ธโƒฃ ์ธ๊ธฐ ์ง€ํ‘œ ์ปฌ๋Ÿผ ์ •์˜ (์—ฐ๋„๋ณ„ ๋ฐฉ๋ฌธ์ž์ˆ˜, ์ผํ‰๊ท  ๋ฐฉ๋ฌธ์ž์ˆ˜, ๊ฒ€์ƒ‰๋Ÿ‰ ๋“ฑ)
160
+ years = ['2018', '2019', '2022', '2023', '2024']
161
+ visitor_cols = [f'{year}_(์ „์ฒด)๋ฐฉ๋ฌธ์ž์ˆ˜' for year in years]
162
+ daily_avg_cols = [f'{year}_์ผํ‰๊ท  ๋ฐฉ๋ฌธ์ž์ˆ˜' for year in years]
163
+ nav_cols = [f'{year}_์ถ•์ œ๊ธฐ๊ฐ„_๋‚ด๋น„๊ฒŒ์ด์…˜ ๊ฒ€์ƒ‰๋Ÿ‰' for year in years]
164
+
165
+ # 2๏ธโƒฃ ๋ชจ๋“  ์ง€ํ‘œ๋ฅผ ํ•ฉ์ณ์„œ ์ƒˆ๋กœ์šด ์ ์ˆ˜ ๋ฐ์ดํ„ฐํ”„๋ ˆ์ž„ ์ƒ์„ฑ
166
+ score_df = festival_df[visitor_cols + daily_avg_cols + nav_cols].fillna(0)
167
+
168
+ # 3๏ธโƒฃ MinMaxScaler๋กœ 0~1๋กœ ์ •๊ทœํ™”
169
+ scaler = MinMaxScaler()
170
+ score_scaled = scaler.fit_transform(score_df)
171
+
172
+ # 4๏ธโƒฃ ์—ฐ๋„/์ง€ํ‘œ๋ณ„ ์ ์ˆ˜ ํ‰๊ท  ๋‚ด๊ธฐ
173
+ festival_df['์ธ๊ธฐ๋„_์ ์ˆ˜'] = score_scaled.mean(axis=1)
174
+
175
+ # 5๏ธโƒฃ ์ ์ˆ˜๋ฅผ ๊ธฐ์ค€์œผ๋กœ ๋“ฑ๊ธ‰ ๋‚˜๋ˆ„๊ธฐ (์ƒ/์ค‘/ํ•˜)
176
+ festival_df['์ถ•์ œ์ธ๊ธฐ'] = pd.cut(
177
+ festival_df['์ธ๊ธฐ๋„_์ ์ˆ˜'],
178
+ bins=[-0.01, 0.33, 0.66, 1.01],
179
+ labels=['ํ•˜', '์ค‘', '์ƒ']
180
+ )
181
+
182
+ # โœ… ๊ฒฐ๊ณผ ๋ฏธ๋ฆฌ๋ณด๊ธฐ
183
+ print(festival_df[['์ถ•์ œ๋ช…', '์ธ๊ธฐ๋„_์ ์ˆ˜', '์ถ•์ œ์ธ๊ธฐ']])
184
+
185
+ """- ์ถ•์ œ ์ธ๊ธฐ๋„(์ƒ์Šน/ํ•˜๋ฝ/๋ฏธ๋ฏธ)
186
+
187
+ - ์—ฐ๋„๋ณ„ ๋ณ€ํ™”์œจ์˜ ํ‰๊ท ์œผ๋กœ ์ธ๊ธฐ๋„ ์ƒ์Šน/ํ•˜๋ฝ/๋ฏธ๋ฏธ ๊ฒฐ์ •
188
+
189
+ - ํ‰๊ท ์ด ์–‘์ˆ˜ โ†’ ์ „์ฒด์ ์œผ๋กœ ์ƒ์Šน ์ถ”์„ธ
190
+
191
+ - ํ‰๊ท ์ด ์Œ์ˆ˜ โ†’ ์ „์ฒด์ ์œผ๋กœ ํ•˜๋ฝ ์ถ”์„ธ
192
+
193
+ - ํ‰๊ท ์ด ๊ฑฐ์˜ 0 โ†’ ๋ฏธ๋ฏธํ•œ ์ถ”์„ธ
194
+ """
195
+
196
+ # 1๏ธโƒฃ ์—ฐ๋„๋ณ„ ์ธ๊ธฐ ์ง€ํ‘œ ์ปฌ๋Ÿผ๋“ค
197
+ pop_cols = ['์ „์ฒด๋ฐฉ๋ฌธ์ž์ˆ˜', '์ผํ‰๊ท  ๋ฐฉ๋ฌธ์ž์ˆ˜', '์ถ•์ œ๊ธฐ๊ฐ„_๋‚ด๋น„๊ฒŒ์ด์…˜ ๊ฒ€์ƒ‰๋Ÿ‰', '์ถ•์ œ๊ธฐ๊ฐ„_๊ด€๊ด‘์†Œ๋น„']
198
+
199
+ # 2๏ธโƒฃ ์—ฐ๋„ ๋ฆฌ์ŠคํŠธ (๋ฐ์ดํ„ฐ์— ๋งž์ถฐ ์กฐ์ •)
200
+ years = ['2018', '2019', '2022', '2023', '2024']
201
+
202
+ # 3๏ธโƒฃ ์—ฐ๋„๋ณ„ ๋ณ€ํ™”์œจ ๊ณ„์‚ฐ
203
+ trend_list = []
204
+ for idx, row in festival_df.iterrows():
205
+ change_rates = []
206
+ for col in pop_cols:
207
+ year_values = [row[f'{year}_{col}'] for year in years if f'{year}_{col}' in festival_df.columns]
208
+ # ์—ฐ์†๋œ ์—ฐ๋„ ๋ณ€ํ™”์œจ ๊ณ„์‚ฐ ((์ด๋ฒˆ์—ฐ๋„-์ด์ „์—ฐ๋„)/์ด์ „์—ฐ๋„)
209
+ for i in range(1, len(year_values)):
210
+ prev = year_values[i-1]
211
+ curr = year_values[i]
212
+ if prev and not np.isnan(prev) and curr and not np.isnan(curr) and prev != 0:
213
+ rate = (curr - prev) / prev
214
+ change_rates.append(rate)
215
+ # ํ‰๊ท  ๋ณ€ํ™”์œจ ๊ณ„์‚ฐ
216
+ avg_rate = np.mean(change_rates) if change_rates else 0
217
+ # ์ƒ์Šน/ํ•˜๋ฝ/๋ฏธ๋ฏธ ํŒ๋‹จ (์ž„๊ณ„๊ฐ’ 1% ์‚ฌ์šฉ)
218
+ if avg_rate > 0.01:
219
+ trend_list.append('์ƒ์Šน')
220
+ elif avg_rate < -0.01:
221
+ trend_list.append('ํ•˜๋ฝ')
222
+ else:
223
+ trend_list.append('๋ฏธ๋ฏธ')
224
+
225
+ # 4๏ธโƒฃ ์ปฌ๋Ÿผ ์ถ”๊ฐ€
226
+ festival_df['์ถ•์ œ์ธ๊ธฐ๋„'] = trend_list
227
+
228
+ # โœ… ๊ฒฐ๊ณผ ํ™•์ธ
229
+ print(festival_df[['์ถ•์ œ๋ช…', '์ถ•์ œ์ธ๊ธฐ๋„']])
230
+
231
+ festival_df
232
+
233
+ # CSV๋กœ ์ €์žฅ
234
+ festival_df.to_csv('festival_df_edit.csv', index=False, encoding='utf-8-sig')
235
+
๊ธฐํƒ€/festival_df_second.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import re
3
+
4
+ # --- 1. ๋ฐ์ดํ„ฐ ๋ถˆ๋Ÿฌ์˜ค๊ธฐ ---
5
+ # 3๊ฐœ์˜ ํ†ตํ•ฉ CSV ํŒŒ์ผ ๊ฒฝ๋กœ๋ฅผ ์ง€์ •ํ•ฉ๋‹ˆ๋‹ค.
6
+ path_indicators = 'C:/projects/shcard_2025_bigcontest/data/ํ†ตํ•ฉ_๋ฌธํ™”๊ด€๊ด‘์ถ•์ œ_์ฃผ์š”_์ง€ํ‘œ.csv'
7
+ path_demographics = 'C:/projects/shcard_2025_bigcontest/data/ํ†ตํ•ฉ_์„ฑ_์—ฐ๋ น๋ณ„_๋‚ด๊ตญ์ธ_๋ฐฉ๋ฌธ์ž.csv'
8
+ path_trend = 'C:/projects/shcard_2025_bigcontest/data/ํ†ตํ•ฉ_์—ฐ๋„๋ณ„_๋ฐฉ๋ฌธ์ž_์ถ”์ด.csv'
9
+
10
+ df_indicators = pd.read_csv(path_indicators)
11
+ df_demographics = pd.read_csv(path_demographics)
12
+ df_trend = pd.read_csv(path_trend)
13
+ print("โœ… 3๊ฐœ์˜ ํ†ตํ•ฉ ํŒŒ์ผ์„ ์„ฑ๊ณต์ ์œผ๋กœ ๋ถˆ๋Ÿฌ์™”์Šต๋‹ˆ๋‹ค.\n")
14
+
15
+
16
+ # --- 2. ์—ฐ๋„๋ณ„ ๋ฐ์ดํ„ฐ๋ฅผ Wide ํ˜•์‹์œผ๋กœ ๋ณ€ํ™˜ํ•˜๋Š” ํ•จ์ˆ˜ ---
17
+ def pivot_by_year(df, index_col, year_col, drop_cols=None):
18
+ """์—ฐ๋„๋ณ„ ๋ฐ์ดํ„ฐ๋ฅผ (๋…„๋„)_(์ปฌ๋Ÿผ๋ช…) ํ˜•ํƒœ๋กœ ๋ณ€ํ™˜ํ•˜๋Š” ํ•จ์ˆ˜"""
19
+ if drop_cols:
20
+ df = df.drop(columns=drop_cols)
21
+
22
+ df_wide = df.pivot_table(index=index_col, columns=year_col)
23
+
24
+ # ๋ฉ€ํ‹ฐ๋ ˆ๋ฒจ ์ปฌ๋Ÿผ์„ (๋…„๋„)_(์ปฌ๋Ÿผ๋ช…) ํ˜•์‹์œผ๋กœ ํ•ฉ์น˜๊ธฐ
25
+ df_wide.columns = [f"{int(col[1])}_{col[0]}" for col in df_wide.columns]
26
+ return df_wide.reset_index()
27
+
28
+
29
+ # --- 3. ๊ฐ ๋ฐ์ดํ„ฐ ์ •์ œ ๋ฐ ๋ณ€ํ™˜ ---
30
+
31
+ # 3-1. '์—ฐ๋„๋ณ„ ๋ฐฉ๋ฌธ์ž ์ถ”์ด' ๋ฐ์ดํ„ฐ ๋ณ€ํ™˜
32
+ # ๋ถˆํ•„์š”ํ•˜๊ฑฐ๋‚˜ ์ค‘๋ณต๋  ์ˆ˜ ์žˆ๋Š” ์ปฌ๋Ÿผ์€ ๋ฏธ๋ฆฌ ์ œ๊ฑฐ
33
+ trend_drop_cols = ['์ผํ‰๊ท  ๋ฐฉ๋ฌธ์ž์ˆ˜ ์ฆ๊ฐ๋ฅ ', '(์ด์ „)์ „์ฒด๋ฐฉ๋ฌธ์ž', '(์ „์ฒด)๋ฐฉ๋ฌธ์ž์ฆ๊ฐ', '์ „๋…„๋Œ€๋น„๋ฐฉ๋ฌธ์ž์ฆ๊ฐ๋น„์œจ']
34
+ df_trend_wide = pivot_by_year(df_trend, '์ถ•์ œ๋ช…', '๊ฐœ์ตœ๋…„๋„', drop_cols=trend_drop_cols)
35
+ print("โœ… '์—ฐ๋„๋ณ„ ๋ฐฉ๋ฌธ์ž ์ถ”์ด' ๋ฐ์ดํ„ฐ๋ฅผ Wide ํ˜•ํƒœ๋กœ ๋ณ€ํ™˜ํ–ˆ์Šต๋‹ˆ๋‹ค.")
36
+
37
+ # 3-2. '์ฃผ์š” ์ง€ํ‘œ' ๋ฐ์ดํ„ฐ ๋ณ€ํ™˜
38
+ # '๊ทธ๋ฃน๋ช…'๊ณผ '๊ตฌ๋ถ„๋ช…'์„ ํ•ฉ์ณ ์ƒˆ๋กœ์šด ์ปฌ๋Ÿผ ์ƒ์„ฑ
39
+ df_indicators['์ง€ํ‘œ๊ตฌ๋ถ„'] = df_indicators['๊ทธ๋ฃน๋ช…'] + '_' + df_indicators['๊ตฌ๋ถ„๋ช…']
40
+ df_indicators_intermediate = df_indicators.pivot_table(
41
+ index=['์ถ•์ œ๋ช…', '๊ฐœ์ตœ๋…„๋„'],
42
+ columns='์ง€ํ‘œ๊ตฌ๋ถ„',
43
+ values='์ง€ํ‘œ๊ฐ’'
44
+ ).reset_index()
45
+ df_indicators_wide = pivot_by_year(df_indicators_intermediate, '์ถ•์ œ๋ช…', '๊ฐœ์ตœ๋…„๋„')
46
+ print("โœ… '์ฃผ์š” ์ง€ํ‘œ' ๋ฐ์ดํ„ฐ๋ฅผ Wide ํ˜•ํƒœ๋กœ ๋ณ€ํ™˜ํ–ˆ์Šต๋‹ˆ๋‹ค.")
47
+
48
+ # 3-3. '์„ฑ_์—ฐ๋ น๋ณ„ ๋ฐฉ๋ฌธ์ž' ๋ฐ์ดํ„ฐ ๋ณ€ํ™˜ (์ด ๋ฐ์ดํ„ฐ๋Š” ์—ฐ๋„ ์ •๋ณด๊ฐ€ ์—†์œผ๋ฏ€๋กœ ์ด์ „๊ณผ ๋™์ผ)
49
+ df_demographics_wide = df_demographics.pivot_table(
50
+ index='์ถ•์ œ๋ช…',
51
+ columns='์—ฐ๋ น๋Œ€',
52
+ values=['๋‚จ์„ฑ๋น„์œจ', '์—ฌ์„ฑ๋น„์œจ']
53
+ ).reset_index()
54
+ # ์ปฌ๋Ÿผ๋ช… ์ •๋ฆฌ
55
+ df_demographics_wide.columns = [f'{col[0]}_{col[1]}' if col[1] else col[0] for col in df_demographics_wide.columns]
56
+ df_demographics_wide.columns = [re.sub(r'[^A-Za-z0-9_๊ฐ€-ํžฃ]', '', col) for col in df_demographics_wide.columns]
57
+ print("โœ… '์„ฑ_์—ฐ๋ น๋ณ„ ๋ฐฉ๋ฌธ์ž' ๋ฐ์ดํ„ฐ๋ฅผ Wide ํ˜•ํƒœ๋กœ ๋ณ€ํ™˜ํ–ˆ์Šต๋‹ˆ๋‹ค.\n")
58
+
59
+
60
+ # --- 4. ๋ชจ๋“  Wide ๋ฐ์ดํ„ฐ ๋ณ‘ํ•ฉ (Merging) ---
61
+ # '์„ฑ_์—ฐ๋ น๋ณ„' ๋ฐ์ดํ„ฐ๋ฅผ ๊ธฐ์ค€์œผ๋กœ '์—ฐ๋„๋ณ„ ์ถ”์ด'์™€ '์ฃผ์š” ์ง€ํ‘œ'๋ฅผ ํ•ฉ์นฉ๋‹ˆ๋‹ค.
62
+ # how='outer'๋Š” ํ•œ์ชฝ์—๋งŒ ์žˆ๋Š” ์ถ•์ œ ์ •๋ณด๋„ ๋ˆ„๋ฝ์‹œํ‚ค์ง€ ์•Š๊ธฐ ์œ„ํ•จ์ž…๋‹ˆ๋‹ค.
63
+ final_df = pd.merge(df_demographics_wide, df_trend_wide, on='์ถ•์ œ๋ช…', how='outer')
64
+ final_df = pd.merge(final_df, df_indicators_wide, on='์ถ•์ œ๋ช…', how='outer')
65
+ print("โœ… ๋ชจ๋“  ๋ฐ์ดํ„ฐ๋ฅผ ํ•˜๋‚˜์˜ DataFrame์œผ๋กœ ์ตœ์ข… ๋ณ‘ํ•ฉํ–ˆ์Šต๋‹ˆ๋‹ค.")
66
+
67
+
68
+ # --- 5. ๊ฒฐ๊ณผ ํ™•์ธ ๋ฐ ์ €์žฅ ---
69
+ print("\n๐ŸŽ‰ ์ตœ์ข… ํ†ตํ•ฉ ๋ฐ์ดํ„ฐ(Wide) ์ƒ˜ํ”Œ")
70
+ # ์ถ•์ œ๋ช…๊ณผ ์—ฐ๋„ ๊ด€๋ จ ์ปฌ๋Ÿผ ์ผ๋ถ€๋งŒ ์ƒ˜ํ”Œ๋กœ ์ถœ๋ ฅ
71
+ sample_cols = [col for col in final_df.columns if '2023' in col or '์ถ•์ œ๋ช…' in col or '๋‚จ์„ฑ' in col]
72
+ print(final_df[sample_cols].head())
73
+
74
+ print(f"\n- ์ตœ์ข… ๋ฐ์ดํ„ฐ๋Š” ์ด {len(final_df.columns)}๊ฐœ์˜ ์ปฌ๋Ÿผ๊ณผ {len(final_df)}๊ฐœ์˜ ํ–‰์œผ๋กœ ๊ตฌ์„ฑ๋ฉ๋‹ˆ๋‹ค.")
75
+
76
+ # ์ตœ์ข… ๋ฐ์ดํ„ฐ๋ฅผ ์ƒˆ๋กœ์šด CSV ํŒŒ์ผ๋กœ ์ €์žฅ
77
+ final_df.to_csv('C:/projects/shcard_2025_bigcontest/data/festival_df.csv', index=False, encoding='utf-8-sig')
78
+ print("\n๐Ÿ’พ 'festival_df.csv' ํŒŒ์ผ์ด ์„ฑ๊ณต์ ์œผ๋กœ ์ €์žฅ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")