Spaces:
Running
Running
Commit
·
81fb8ba
1
Parent(s):
0d480c5
Add recommender system
Browse files- app/app.py +1 -1
- app/database/models/user.py +7 -0
- app/database/models/user_action.py +32 -0
- app/database/models/user_search.py +29 -0
- app/domains/properties/controller.py +18 -4
- app/domains/properties/service.py +26 -2
- app/domains/property_verification/controller.py +8 -0
- app/domains/user_action/__init__.py +0 -0
- app/domains/user_action/controller.py +24 -0
- app/domains/user_action/dtos.py +11 -0
- app/domains/user_action/service.py +21 -0
- app/domains/user_search/__init__.py +0 -0
- app/domains/user_search/service.py +9 -0
- app/migrations/versions/2025-04-10_make_table_for_recommender_system_997d3eb808d6.py +122 -0
- poetry.lock +0 -0
- pyproject.toml +15 -0
- recommender/__init__.py +0 -0
- recommender/data.py +23 -0
- recommender/faiss.py +16 -0
- recommender/main.py +231 -0
- recommender/reranker.py +31 -0
- recommender/tower.py +34 -0
app/app.py
CHANGED
|
@@ -68,7 +68,7 @@ def on_startup():
|
|
| 68 |
|
| 69 |
@get(path="/schema", include_in_schema=False)
|
| 70 |
async def schema(request: Request) -> dict:
|
| 71 |
-
schema = request.
|
| 72 |
return schema.to_schema()
|
| 73 |
|
| 74 |
|
|
|
|
| 68 |
|
| 69 |
@get(path="/schema", include_in_schema=False)
|
| 70 |
async def schema(request: Request) -> dict:
|
| 71 |
+
schema = request.openapi_schema
|
| 72 |
return schema.to_schema()
|
| 73 |
|
| 74 |
|
app/database/models/user.py
CHANGED
|
@@ -4,6 +4,7 @@ from typing import Optional
|
|
| 4 |
import uuid
|
| 5 |
from sqlalchemy import (
|
| 6 |
DateTime,
|
|
|
|
| 7 |
String,
|
| 8 |
Text,
|
| 9 |
Boolean,
|
|
@@ -92,6 +93,12 @@ class User(BaseModel):
|
|
| 92 |
tags: Mapped[list[Tag]] = relationship(
|
| 93 |
"Tag", secondary=UserTag.__table__, lazy="selectin"
|
| 94 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
|
| 96 |
|
| 97 |
class UserSchema(BaseSchema):
|
|
|
|
| 4 |
import uuid
|
| 5 |
from sqlalchemy import (
|
| 6 |
DateTime,
|
| 7 |
+
Numeric,
|
| 8 |
String,
|
| 9 |
Text,
|
| 10 |
Boolean,
|
|
|
|
| 93 |
tags: Mapped[list[Tag]] = relationship(
|
| 94 |
"Tag", secondary=UserTag.__table__, lazy="selectin"
|
| 95 |
)
|
| 96 |
+
min_price: Mapped[float] = mapped_column(
|
| 97 |
+
Numeric(12, 2, asdecimal=False), nullable=True
|
| 98 |
+
)
|
| 99 |
+
max_price: Mapped[float] = mapped_column(
|
| 100 |
+
Numeric(12, 2, asdecimal=False), nullable=True
|
| 101 |
+
)
|
| 102 |
|
| 103 |
|
| 104 |
class UserSchema(BaseSchema):
|
app/database/models/user_action.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
from datetime import datetime
|
| 3 |
+
from typing import Optional
|
| 4 |
+
import uuid
|
| 5 |
+
from sqlalchemy import (
|
| 6 |
+
DateTime,
|
| 7 |
+
String,
|
| 8 |
+
Text,
|
| 9 |
+
Boolean,
|
| 10 |
+
ForeignKey,
|
| 11 |
+
)
|
| 12 |
+
from sqlalchemy.dialects.postgresql import UUID as PG_UUID
|
| 13 |
+
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
| 14 |
+
|
| 15 |
+
from database.models.base import BaseModel
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class UserAction(BaseModel):
|
| 19 |
+
__tablename__ = "user_actions"
|
| 20 |
+
user_id: Mapped[Optional[uuid.UUID]] = mapped_column(
|
| 21 |
+
PG_UUID(as_uuid=True),
|
| 22 |
+
ForeignKey("images.id", ondelete="SET NULL"),
|
| 23 |
+
nullable=True,
|
| 24 |
+
unique=True,
|
| 25 |
+
)
|
| 26 |
+
property_id: Mapped[uuid.UUID] = mapped_column(
|
| 27 |
+
PG_UUID(as_uuid=True),
|
| 28 |
+
ForeignKey("images.id", ondelete="SET NULL"),
|
| 29 |
+
nullable=True,
|
| 30 |
+
unique=True,
|
| 31 |
+
)
|
| 32 |
+
action: Mapped[str] = mapped_column(String, nullable=False, unique=False)
|
app/database/models/user_search.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
from typing import Optional
|
| 3 |
+
import uuid
|
| 4 |
+
from sqlalchemy import (
|
| 5 |
+
Numeric,
|
| 6 |
+
String,
|
| 7 |
+
ForeignKey,
|
| 8 |
+
)
|
| 9 |
+
from sqlalchemy.dialects.postgresql import UUID as PG_UUID
|
| 10 |
+
from sqlalchemy.orm import Mapped, mapped_column
|
| 11 |
+
from database.models.base import BaseModel
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class UserSearch(BaseModel):
|
| 15 |
+
__tablename__="user_searchs"
|
| 16 |
+
user_id: Mapped[Optional[uuid.UUID]] = mapped_column(
|
| 17 |
+
PG_UUID(as_uuid=True),
|
| 18 |
+
ForeignKey("images.id", ondelete="SET NULL"),
|
| 19 |
+
nullable=True,
|
| 20 |
+
unique=True,
|
| 21 |
+
)
|
| 22 |
+
search_query: Mapped[Optional[str]] = mapped_column(String, nullable=False, unique=False)
|
| 23 |
+
type: Mapped[Optional[str]] = mapped_column(String, nullable=False, unique=False)
|
| 24 |
+
min_price: Mapped[Optional[float]] = mapped_column(
|
| 25 |
+
Numeric(12, 2, asdecimal=False), nullable=True
|
| 26 |
+
)
|
| 27 |
+
max_price: Mapped[Optional[float]] = mapped_column(
|
| 28 |
+
Numeric(12, 2, asdecimal=False), nullable=True
|
| 29 |
+
)
|
app/domains/properties/controller.py
CHANGED
|
@@ -52,10 +52,19 @@ class PropertyController(Controller):
|
|
| 52 |
async def get_properties(
|
| 53 |
self,
|
| 54 |
params: PropertySearchParams,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
property_service: PropertyService,
|
| 56 |
pagination: LimitOffset,
|
| 57 |
) -> OffsetPagination[Property]:
|
| 58 |
-
return await property_service.search(
|
|
|
|
|
|
|
| 59 |
|
| 60 |
@post(
|
| 61 |
"/",
|
|
@@ -102,7 +111,9 @@ class PropertyController(Controller):
|
|
| 102 |
else:
|
| 103 |
user_id = None
|
| 104 |
return property_service.to_schema(
|
| 105 |
-
await property_service.update_property(
|
|
|
|
|
|
|
| 106 |
schema_type=PropertySchema,
|
| 107 |
)
|
| 108 |
|
|
@@ -155,6 +166,9 @@ class PropertyController(Controller):
|
|
| 155 |
return await property_service.update_activation(
|
| 156 |
property_id=property_id, activate=data.active, user_id=user_id
|
| 157 |
)
|
|
|
|
| 158 |
@get("/count", no_auth=True)
|
| 159 |
-
async def count_by_city(
|
| 160 |
-
|
|
|
|
|
|
|
|
|
| 52 |
async def get_properties(
|
| 53 |
self,
|
| 54 |
params: PropertySearchParams,
|
| 55 |
+
user_id: Annotated[
|
| 56 |
+
Optional[uuid.UUID],
|
| 57 |
+
Parameter(
|
| 58 |
+
default=None,
|
| 59 |
+
title="User id",
|
| 60 |
+
),
|
| 61 |
+
],
|
| 62 |
property_service: PropertyService,
|
| 63 |
pagination: LimitOffset,
|
| 64 |
) -> OffsetPagination[Property]:
|
| 65 |
+
return await property_service.search(
|
| 66 |
+
search_param=params, pagination=pagination, user_id=user_id
|
| 67 |
+
)
|
| 68 |
|
| 69 |
@post(
|
| 70 |
"/",
|
|
|
|
| 111 |
else:
|
| 112 |
user_id = None
|
| 113 |
return property_service.to_schema(
|
| 114 |
+
await property_service.update_property(
|
| 115 |
+
property_id, data=data, user_id=user_id
|
| 116 |
+
),
|
| 117 |
schema_type=PropertySchema,
|
| 118 |
)
|
| 119 |
|
|
|
|
| 166 |
return await property_service.update_activation(
|
| 167 |
property_id=property_id, activate=data.active, user_id=user_id
|
| 168 |
)
|
| 169 |
+
|
| 170 |
@get("/count", no_auth=True)
|
| 171 |
+
async def count_by_city(
|
| 172 |
+
self, property_service: PropertyService, type: Optional[str]
|
| 173 |
+
) -> Any:
|
| 174 |
+
return await property_service.count_by_city(type=type)
|
app/domains/properties/service.py
CHANGED
|
@@ -14,6 +14,7 @@ from litestar.params import Parameter
|
|
| 14 |
from litestar.openapi.spec.example import Example
|
| 15 |
from litestar.exceptions import ValidationException, NotAuthorizedException
|
| 16 |
from sqlalchemy.orm import joinedload, selectinload
|
|
|
|
| 17 |
from database.models.property_type import PropertyType
|
| 18 |
from domains.address.service import AddressService
|
| 19 |
from database.models.user import User
|
|
@@ -58,6 +59,10 @@ class PropertyRepository(SQLAlchemyAsyncRepository[Property]):
|
|
| 58 |
|
| 59 |
|
| 60 |
class PropertySearchParams(BaseModel):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
lat: Optional[float] = Parameter(
|
| 62 |
None,
|
| 63 |
title="Latitude",
|
|
@@ -164,6 +169,7 @@ class PropertyService(SQLAlchemyAsyncRepositoryService[Property]):
|
|
| 164 |
self,
|
| 165 |
search_param: PropertySearchParams,
|
| 166 |
pagination: LimitOffset,
|
|
|
|
| 167 |
) -> OffsetPagination[Property]:
|
| 168 |
query = select(Property).options(
|
| 169 |
joinedload(Property.address),
|
|
@@ -208,6 +214,9 @@ class PropertyService(SQLAlchemyAsyncRepositoryService[Property]):
|
|
| 208 |
if search_param.min_sqm:
|
| 209 |
query = query.where(Property.sqm >= search_param.min_sqm)
|
| 210 |
|
|
|
|
|
|
|
|
|
|
| 211 |
# Apply Vietnam-specific filters
|
| 212 |
if search_param.city:
|
| 213 |
query = query.where(Property.address.city.ilike(f"%{search_param.city}%"))
|
|
@@ -243,7 +252,18 @@ class PropertyService(SQLAlchemyAsyncRepositoryService[Property]):
|
|
| 243 |
.all(),
|
| 244 |
)
|
| 245 |
total = await self.count()
|
| 246 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 247 |
return OffsetPagination(
|
| 248 |
items=items[0],
|
| 249 |
total=total,
|
|
@@ -414,6 +434,9 @@ async def provide_property_service(
|
|
| 414 |
|
| 415 |
|
| 416 |
async def query_params_extractor(
|
|
|
|
|
|
|
|
|
|
| 417 |
lat: Optional[float] = Parameter(
|
| 418 |
None,
|
| 419 |
title="Latitude",
|
|
@@ -493,6 +516,7 @@ async def query_params_extractor(
|
|
| 493 |
),
|
| 494 |
) -> PropertySearchParams:
|
| 495 |
return PropertySearchParams(
|
|
|
|
| 496 |
lat=lat,
|
| 497 |
lng=lng,
|
| 498 |
radius=radius,
|
|
@@ -527,7 +551,7 @@ async def fetch_city_image(city_name: str) -> str:
|
|
| 527 |
data = response.json()
|
| 528 |
if data.get("results"):
|
| 529 |
result = data["results"][0]["urls"]["regular"]
|
| 530 |
-
city_name = city_name.replace(
|
| 531 |
store.set(f"city_{city_name}", result)
|
| 532 |
return result
|
| 533 |
return None
|
|
|
|
| 14 |
from litestar.openapi.spec.example import Example
|
| 15 |
from litestar.exceptions import ValidationException, NotAuthorizedException
|
| 16 |
from sqlalchemy.orm import joinedload, selectinload
|
| 17 |
+
from domains.user_search.service import UserSearchService
|
| 18 |
from database.models.property_type import PropertyType
|
| 19 |
from domains.address.service import AddressService
|
| 20 |
from database.models.user import User
|
|
|
|
| 59 |
|
| 60 |
|
| 61 |
class PropertySearchParams(BaseModel):
|
| 62 |
+
|
| 63 |
+
search: Optional[str] = Parameter(
|
| 64 |
+
title="Search Query", description="Search query to check in the description"
|
| 65 |
+
)
|
| 66 |
lat: Optional[float] = Parameter(
|
| 67 |
None,
|
| 68 |
title="Latitude",
|
|
|
|
| 169 |
self,
|
| 170 |
search_param: PropertySearchParams,
|
| 171 |
pagination: LimitOffset,
|
| 172 |
+
user_id: uuid.UUID | None = None,
|
| 173 |
) -> OffsetPagination[Property]:
|
| 174 |
query = select(Property).options(
|
| 175 |
joinedload(Property.address),
|
|
|
|
| 214 |
if search_param.min_sqm:
|
| 215 |
query = query.where(Property.sqm >= search_param.min_sqm)
|
| 216 |
|
| 217 |
+
# Search query
|
| 218 |
+
if search_param.search:
|
| 219 |
+
query = query.where(Property.description.contains(search_param.search))
|
| 220 |
# Apply Vietnam-specific filters
|
| 221 |
if search_param.city:
|
| 222 |
query = query.where(Property.address.city.ilike(f"%{search_param.city}%"))
|
|
|
|
| 252 |
.all(),
|
| 253 |
)
|
| 254 |
total = await self.count()
|
| 255 |
+
|
| 256 |
+
user_search_service = UserSearchService(session=self.repository.session)
|
| 257 |
+
await user_search_service.create(
|
| 258 |
+
{
|
| 259 |
+
"user_id": user_id,
|
| 260 |
+
"search_query": search_param.search,
|
| 261 |
+
"type": search_param.property_category,
|
| 262 |
+
"min_price": search_param.min_price,
|
| 263 |
+
"max_price": search_param.max_price,
|
| 264 |
+
},
|
| 265 |
+
auto_commit=True,
|
| 266 |
+
)
|
| 267 |
return OffsetPagination(
|
| 268 |
items=items[0],
|
| 269 |
total=total,
|
|
|
|
| 434 |
|
| 435 |
|
| 436 |
async def query_params_extractor(
|
| 437 |
+
search: Optional[str] = Parameter(
|
| 438 |
+
title="Search Query", description="Search query to check in the description"
|
| 439 |
+
),
|
| 440 |
lat: Optional[float] = Parameter(
|
| 441 |
None,
|
| 442 |
title="Latitude",
|
|
|
|
| 516 |
),
|
| 517 |
) -> PropertySearchParams:
|
| 518 |
return PropertySearchParams(
|
| 519 |
+
search=search,
|
| 520 |
lat=lat,
|
| 521 |
lng=lng,
|
| 522 |
radius=radius,
|
|
|
|
| 551 |
data = response.json()
|
| 552 |
if data.get("results"):
|
| 553 |
result = data["results"][0]["urls"]["regular"]
|
| 554 |
+
city_name = city_name.replace(" ", "_")
|
| 555 |
store.set(f"city_{city_name}", result)
|
| 556 |
return result
|
| 557 |
return None
|
app/domains/property_verification/controller.py
CHANGED
|
@@ -8,6 +8,7 @@ from database.models.user import User
|
|
| 8 |
from domains.property_verification.dtos import (
|
| 9 |
VerificationConfirmDTO,
|
| 10 |
)
|
|
|
|
| 11 |
from domains.property_verification.service import (
|
| 12 |
VerificationService,
|
| 13 |
provide_verification_service,
|
|
@@ -63,3 +64,10 @@ class VerificationController(Controller):
|
|
| 63 |
validation_method=data.method,
|
| 64 |
code=data.code,
|
| 65 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
from domains.property_verification.dtos import (
|
| 9 |
VerificationConfirmDTO,
|
| 10 |
)
|
| 11 |
+
from litestar.exceptions import NotAuthorizedException
|
| 12 |
from domains.property_verification.service import (
|
| 13 |
VerificationService,
|
| 14 |
provide_verification_service,
|
|
|
|
| 64 |
validation_method=data.method,
|
| 65 |
code=data.code,
|
| 66 |
)
|
| 67 |
+
|
| 68 |
+
@get('/allow')
|
| 69 |
+
async def check_allow(self, property_id: UUID, verification_service: VerificationService, request: Request[User, Token, Any]) -> PropertyVerification:
|
| 70 |
+
verification = await verification_service.get_one_or_none(PropertyVerification.user_id == request.user.id, PropertyVerification.property_id == property_id)
|
| 71 |
+
if not verification:
|
| 72 |
+
raise NotAuthorizedException("You are not allowed to review this property.")
|
| 73 |
+
return verification
|
app/domains/user_action/__init__.py
ADDED
|
File without changes
|
app/domains/user_action/controller.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from litestar import Controller, post
|
| 2 |
+
from litestar.di import Provide
|
| 3 |
+
|
| 4 |
+
from database.models.user_action import UserAction
|
| 5 |
+
from domains.user_action.dtos import CreateUserActionDTO
|
| 6 |
+
from domains.user_action.service import (
|
| 7 |
+
UserActionService,
|
| 8 |
+
provide_user_action_service,
|
| 9 |
+
)
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class UserActionController(Controller):
|
| 13 |
+
path = "actions"
|
| 14 |
+
tags = ["actions"]
|
| 15 |
+
|
| 16 |
+
dependencies = {"user_action_service": Provide(provide_user_action_service)}
|
| 17 |
+
|
| 18 |
+
@post(no_auth=True)
|
| 19 |
+
async def createAction(
|
| 20 |
+
self, body: CreateUserActionDTO, user_action_service: UserActionService
|
| 21 |
+
) -> UserAction:
|
| 22 |
+
return await user_action_service.create(
|
| 23 |
+
body.to_dict(), auto_commit=True, auto_refresh=True
|
| 24 |
+
)
|
app/domains/user_action/dtos.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Optional
|
| 2 |
+
from uuid import UUID
|
| 3 |
+
from pydantic import ConfigDict
|
| 4 |
+
from database.models.base import BaseModel
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class CreateUserActionDTO(BaseModel):
|
| 8 |
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
| 9 |
+
action: str
|
| 10 |
+
user_id: Optional[UUID]
|
| 11 |
+
property_id: UUID
|
app/domains/user_action/service.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from collections.abc import AsyncGenerator
|
| 2 |
+
from database.models.user_action import UserAction
|
| 3 |
+
from advanced_alchemy.repository import SQLAlchemyAsyncRepository
|
| 4 |
+
from advanced_alchemy.service import SQLAlchemyAsyncRepositoryService
|
| 5 |
+
from sqlalchemy.ext.asyncio import AsyncSession
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class UserActionRepository(SQLAlchemyAsyncRepository[UserAction]):
|
| 9 |
+
model_type = UserAction
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class UserActionService(SQLAlchemyAsyncRepositoryService[UserAction]):
|
| 13 |
+
repository_type = UserActionRepository
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
async def provide_user_action_service(
|
| 17 |
+
db_session: AsyncSession,
|
| 18 |
+
) -> AsyncGenerator[UserActionService, None]:
|
| 19 |
+
|
| 20 |
+
async with UserActionService.new(session=db_session) as service:
|
| 21 |
+
yield service
|
app/domains/user_search/__init__.py
ADDED
|
File without changes
|
app/domains/user_search/service.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from database.models.user_search import UserSearch
|
| 2 |
+
from database.models.user_action import UserAction
|
| 3 |
+
from advanced_alchemy.repository import SQLAlchemyAsyncRepository
|
| 4 |
+
from advanced_alchemy.service import SQLAlchemyAsyncRepositoryService
|
| 5 |
+
|
| 6 |
+
class UserSearchRepository(SQLAlchemyAsyncRepository[UserSearch]):
|
| 7 |
+
model_type = UserSearch
|
| 8 |
+
class UserSearchService(SQLAlchemyAsyncRepositoryService[UserSearch]):
|
| 9 |
+
repository_type = UserSearchRepository
|
app/migrations/versions/2025-04-10_make_table_for_recommender_system_997d3eb808d6.py
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# type: ignore
|
| 2 |
+
"""Make table for recommender system
|
| 3 |
+
|
| 4 |
+
Revision ID: 997d3eb808d6
|
| 5 |
+
Revises: dfa22bca0d19
|
| 6 |
+
Create Date: 2025-04-10 02:59:05.940463
|
| 7 |
+
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
import warnings
|
| 11 |
+
from typing import TYPE_CHECKING
|
| 12 |
+
|
| 13 |
+
import sqlalchemy as sa
|
| 14 |
+
from alembic import op
|
| 15 |
+
from advanced_alchemy.types import EncryptedString, EncryptedText, GUID, ORA_JSONB, DateTimeUTC
|
| 16 |
+
from sqlalchemy import Text # noqa: F401
|
| 17 |
+
|
| 18 |
+
if TYPE_CHECKING:
|
| 19 |
+
from collections.abc import Sequence
|
| 20 |
+
|
| 21 |
+
__all__ = ["downgrade", "upgrade", "schema_upgrades", "schema_downgrades", "data_upgrades", "data_downgrades"]
|
| 22 |
+
|
| 23 |
+
sa.GUID = GUID
|
| 24 |
+
sa.DateTimeUTC = DateTimeUTC
|
| 25 |
+
sa.ORA_JSONB = ORA_JSONB
|
| 26 |
+
sa.EncryptedString = EncryptedString
|
| 27 |
+
sa.EncryptedText = EncryptedText
|
| 28 |
+
|
| 29 |
+
# revision identifiers, used by Alembic.
|
| 30 |
+
revision = '997d3eb808d6'
|
| 31 |
+
down_revision = 'dfa22bca0d19'
|
| 32 |
+
branch_labels = None
|
| 33 |
+
depends_on = None
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def upgrade() -> None:
|
| 37 |
+
with warnings.catch_warnings():
|
| 38 |
+
warnings.filterwarnings("ignore", category=UserWarning)
|
| 39 |
+
with op.get_context().autocommit_block():
|
| 40 |
+
schema_upgrades()
|
| 41 |
+
data_upgrades()
|
| 42 |
+
|
| 43 |
+
def downgrade() -> None:
|
| 44 |
+
with warnings.catch_warnings():
|
| 45 |
+
warnings.filterwarnings("ignore", category=UserWarning)
|
| 46 |
+
with op.get_context().autocommit_block():
|
| 47 |
+
data_downgrades()
|
| 48 |
+
schema_downgrades()
|
| 49 |
+
|
| 50 |
+
def schema_upgrades() -> None:
|
| 51 |
+
"""schema upgrade migrations go here."""
|
| 52 |
+
# ### commands auto generated by Alembic - please adjust! ###
|
| 53 |
+
op.create_table('user_actions',
|
| 54 |
+
sa.Column('id', sa.UUID(), nullable=False),
|
| 55 |
+
sa.Column('user_id', sa.UUID(), nullable=True),
|
| 56 |
+
sa.Column('property_id', sa.UUID(), nullable=True),
|
| 57 |
+
sa.Column('action', sa.String(), nullable=False),
|
| 58 |
+
sa.Column('created_at', sa.DateTime(timezone=True), nullable=False),
|
| 59 |
+
sa.Column('updated_at', sa.DateTime(timezone=True), nullable=False),
|
| 60 |
+
sa.ForeignKeyConstraint(['property_id'], ['images.id'], name=op.f('fk_user_actions_property_id_images'), ondelete='SET NULL'),
|
| 61 |
+
sa.ForeignKeyConstraint(['user_id'], ['images.id'], name=op.f('fk_user_actions_user_id_images'), ondelete='SET NULL'),
|
| 62 |
+
sa.PrimaryKeyConstraint('id', name=op.f('pk_user_actions')),
|
| 63 |
+
sa.UniqueConstraint('id'),
|
| 64 |
+
sa.UniqueConstraint('id', name=op.f('uq_user_actions_id')),
|
| 65 |
+
sa.UniqueConstraint('property_id'),
|
| 66 |
+
sa.UniqueConstraint('property_id', name=op.f('uq_user_actions_property_id')),
|
| 67 |
+
sa.UniqueConstraint('user_id'),
|
| 68 |
+
sa.UniqueConstraint('user_id', name=op.f('uq_user_actions_user_id'))
|
| 69 |
+
)
|
| 70 |
+
op.create_table('user_searchs',
|
| 71 |
+
sa.Column('id', sa.UUID(), nullable=False),
|
| 72 |
+
sa.Column('user_id', sa.UUID(), nullable=True),
|
| 73 |
+
sa.Column('search_query', sa.String(), nullable=True),
|
| 74 |
+
sa.Column('type', sa.String(), nullable=True),
|
| 75 |
+
sa.Column('min_price', sa.Numeric(precision=12, scale=2, asdecimal=False), nullable=True),
|
| 76 |
+
sa.Column('max_price', sa.Numeric(precision=12, scale=2, asdecimal=False), nullable=True),
|
| 77 |
+
sa.Column('created_at', sa.DateTime(timezone=True), nullable=False),
|
| 78 |
+
sa.Column('updated_at', sa.DateTime(timezone=True), nullable=False),
|
| 79 |
+
sa.ForeignKeyConstraint(['user_id'], ['images.id'], name=op.f('fk_user_searchs_user_id_images'), ondelete='SET NULL'),
|
| 80 |
+
sa.PrimaryKeyConstraint('id', name=op.f('pk_user_searchs')),
|
| 81 |
+
sa.UniqueConstraint('id'),
|
| 82 |
+
sa.UniqueConstraint('id', name=op.f('uq_user_searchs_id')),
|
| 83 |
+
sa.UniqueConstraint('user_id'),
|
| 84 |
+
sa.UniqueConstraint('user_id', name=op.f('uq_user_searchs_user_id'))
|
| 85 |
+
)
|
| 86 |
+
with op.batch_alter_table('banners', schema=None) as batch_op:
|
| 87 |
+
batch_op.create_unique_constraint(batch_op.f('uq_banners_id'), ['id'])
|
| 88 |
+
|
| 89 |
+
with op.batch_alter_table('users', schema=None) as batch_op:
|
| 90 |
+
batch_op.add_column(sa.Column('min_price', sa.Numeric(precision=12, scale=2, asdecimal=False), nullable=True))
|
| 91 |
+
batch_op.add_column(sa.Column('max_price', sa.Numeric(precision=12, scale=2, asdecimal=False), nullable=True))
|
| 92 |
+
|
| 93 |
+
# ### end Alembic commands ###
|
| 94 |
+
|
| 95 |
+
def schema_downgrades() -> None:
|
| 96 |
+
"""schema downgrade migrations go here."""
|
| 97 |
+
# ### commands auto generated by Alembic - please adjust! ###
|
| 98 |
+
with op.batch_alter_table('users', schema=None) as batch_op:
|
| 99 |
+
batch_op.drop_column('max_price')
|
| 100 |
+
batch_op.drop_column('min_price')
|
| 101 |
+
|
| 102 |
+
with op.batch_alter_table('banners', schema=None) as batch_op:
|
| 103 |
+
batch_op.drop_constraint(batch_op.f('uq_banners_id'), type_='unique')
|
| 104 |
+
|
| 105 |
+
op.create_table('spatial_ref_sys',
|
| 106 |
+
sa.Column('srid', sa.INTEGER(), autoincrement=False, nullable=False),
|
| 107 |
+
sa.Column('auth_name', sa.VARCHAR(length=256), autoincrement=False, nullable=True),
|
| 108 |
+
sa.Column('auth_srid', sa.INTEGER(), autoincrement=False, nullable=True),
|
| 109 |
+
sa.Column('srtext', sa.VARCHAR(length=2048), autoincrement=False, nullable=True),
|
| 110 |
+
sa.Column('proj4text', sa.VARCHAR(length=2048), autoincrement=False, nullable=True),
|
| 111 |
+
sa.CheckConstraint('srid > 0 AND srid <= 998999', name='spatial_ref_sys_srid_check'),
|
| 112 |
+
sa.PrimaryKeyConstraint('srid', name='spatial_ref_sys_pkey')
|
| 113 |
+
)
|
| 114 |
+
op.drop_table('user_searchs')
|
| 115 |
+
op.drop_table('user_actions')
|
| 116 |
+
# ### end Alembic commands ###
|
| 117 |
+
|
| 118 |
+
def data_upgrades() -> None:
|
| 119 |
+
"""Add any optional data upgrade migrations here!"""
|
| 120 |
+
|
| 121 |
+
def data_downgrades() -> None:
|
| 122 |
+
"""Add any optional data downgrade migrations here!"""
|
poetry.lock
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
pyproject.toml
CHANGED
|
@@ -25,11 +25,26 @@ dependencies = [
|
|
| 25 |
"requests (>=2.32.3,<3.0.0)",
|
| 26 |
"qrcode (>=8.1,<9.0)",
|
| 27 |
"pillow (>=11.1.0,<12.0.0)",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
]
|
| 29 |
|
| 30 |
[tool.poetry]
|
| 31 |
package-mode = false
|
| 32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
[build-system]
|
| 34 |
requires = ["poetry-core>=2.0.0,<3.0.0"]
|
| 35 |
build-backend = "poetry.core.masonry.api"
|
|
|
|
| 25 |
"requests (>=2.32.3,<3.0.0)",
|
| 26 |
"qrcode (>=8.1,<9.0)",
|
| 27 |
"pillow (>=11.1.0,<12.0.0)",
|
| 28 |
+
"torch (>=2.6.0,<3.0.0)",
|
| 29 |
+
"torchvision (>=0.21.0,<0.22.0)",
|
| 30 |
+
"scikit-learn (>=1.6.1,<2.0.0)",
|
| 31 |
+
"pandas (>=2.2.3,<3.0.0)",
|
| 32 |
+
"faiss-cpu (>=1.10.0,<2.0.0)",
|
| 33 |
+
"sentence-transformers (>=4.0.2,<5.0.0)",
|
| 34 |
]
|
| 35 |
|
| 36 |
[tool.poetry]
|
| 37 |
package-mode = false
|
| 38 |
|
| 39 |
+
[[tool.poetry.source]]
|
| 40 |
+
name = "pytorch_cpu"
|
| 41 |
+
url = "https://download.pytorch.org/whl/cpu"
|
| 42 |
+
priority = "explicit"
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
[tool.poetry.dependencies]
|
| 46 |
+
torch = {source = "pytorch_cpu"}
|
| 47 |
+
torchvision = {source = "pytorch_cpu"}
|
| 48 |
[build-system]
|
| 49 |
requires = ["poetry-core>=2.0.0,<3.0.0"]
|
| 50 |
build-backend = "poetry.core.masonry.api"
|
recommender/__init__.py
ADDED
|
File without changes
|
recommender/data.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sentence_transformers import SentenceTransformer
|
| 2 |
+
from torch.utils.data import Dataset
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import numpy as np
|
| 5 |
+
from dotenv import load_dotenv
|
| 6 |
+
load_dotenv()
|
| 7 |
+
search_model = SentenceTransformer("all-MiniLM-L6-v2")
|
| 8 |
+
|
| 9 |
+
class RecommenderDataset(Dataset):
|
| 10 |
+
def __init__(self, user_feats, prop_feats, targets):
|
| 11 |
+
self.user_feats = user_feats
|
| 12 |
+
self.prop_feats = prop_feats
|
| 13 |
+
self.targets = targets
|
| 14 |
+
def __len__(self):
|
| 15 |
+
return len(self.targets)
|
| 16 |
+
def __getitem__(self, idx):
|
| 17 |
+
return {"user": self.user_feats[idx],
|
| 18 |
+
"property": self.prop_feats[idx],
|
| 19 |
+
"target": self.targets[idx]}
|
| 20 |
+
def get_search_embedding(query):
|
| 21 |
+
if pd.isnull(query) or query.strip() == "":
|
| 22 |
+
return np.zeros(16)
|
| 23 |
+
return search_model.encode(query)
|
recommender/faiss.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import numpy as np
|
| 3 |
+
import faiss
|
| 4 |
+
|
| 5 |
+
def build_faiss_index(model, prop_df):
|
| 6 |
+
prop_vec = prop_df[["price", "rating", "freshness", "latitude", "longitude", "type_enc"]].values.astype(np.float32)
|
| 7 |
+
ids = prop_df["id"].values
|
| 8 |
+
prop_tensor = torch.tensor(prop_vec, dtype=torch.float32)
|
| 9 |
+
with torch.no_grad():
|
| 10 |
+
emb = model.prop_tower(prop_tensor).numpy()
|
| 11 |
+
faiss.normalize_L2(emb)
|
| 12 |
+
dim = emb.shape[1]
|
| 13 |
+
index = faiss.IndexFlatIP(dim)
|
| 14 |
+
index.add(emb)
|
| 15 |
+
faiss.write_index(index, "property_faiss.index")
|
| 16 |
+
np.save("property_id_map.npy", ids)
|
recommender/main.py
ADDED
|
@@ -0,0 +1,231 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import torch
|
| 3 |
+
import torch.nn as nn
|
| 4 |
+
import torch.optim as optim
|
| 5 |
+
from torch.utils.data import Dataset, DataLoader
|
| 6 |
+
import pandas as pd
|
| 7 |
+
import numpy as np
|
| 8 |
+
import faiss
|
| 9 |
+
from sqlalchemy import create_engine
|
| 10 |
+
from datetime import datetime
|
| 11 |
+
from sklearn.model_selection import train_test_split
|
| 12 |
+
from sentence_transformers import SentenceTransformer
|
| 13 |
+
from dotenv import load_dotenv
|
| 14 |
+
from recommender.data import RecommenderDataset, get_search_embedding
|
| 15 |
+
from recommender.faiss import build_faiss_index
|
| 16 |
+
from recommender.reranker import Reranker, build_rerank_data
|
| 17 |
+
from recommender.tower import TwoTowerRec, cosine_loss
|
| 18 |
+
load_dotenv()
|
| 19 |
+
property_field_list = ["price", "average_rating", "lat_prop", "lon_prop", "type_enc"]
|
| 20 |
+
user_field_list = ["age", "min_price", "max_price", "lat", "lon"]
|
| 21 |
+
# -------------------- Step 1: Load Data from PostgreSQL --------------------
|
| 22 |
+
engine = create_engine(os.environ.get("DB_URL"))
|
| 23 |
+
user_df = pd.read_sql(
|
| 24 |
+
"SELECT u.id, u.name, u.email, u.phone, u.verified, u.address_id, a.street, a.city, a.postal_code, a.neighborhood, a.latitude, a.longitude, a.coordinates, a.geohash FROM users u LEFT JOIN addresses a ON u.address_id = a.id;",
|
| 25 |
+
engine,
|
| 26 |
+
)
|
| 27 |
+
property_df = pd.read_sql(
|
| 28 |
+
"SELECT p.title, p.id, p.property_category, p.property_type_id, p.transaction_type, p.price, p.bedrooms, p.bathrooms, p.sqm, p.description, p.average_rating, p.status, p.owner_id, p.address_id, a.street, a.city, a.postal_code, a.neighborhood, a.latitude, a.longitude, a.coordinates, a.geohash FROM properties p LEFT JOIN addresses a ON p.address_id = a.id;",
|
| 29 |
+
engine,
|
| 30 |
+
)
|
| 31 |
+
actions_df = pd.read_sql(
|
| 32 |
+
"SELECT user_id, property_id, action, created_at FROM user_actions", engine
|
| 33 |
+
)
|
| 34 |
+
search_df = pd.read_sql(
|
| 35 |
+
"SELECT user_id, search_query, created_at, min_price, max_price, property_type FROM user_searches",
|
| 36 |
+
engine,
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
# -------------------- Preprocess Interactions --------------------
|
| 40 |
+
# Guest actions: fill NaN user_id with "guest"
|
| 41 |
+
actions_df["user_id"] = actions_df["user_id"].fillna("guest")
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def compute_score(row):
|
| 45 |
+
decay = np.exp(-((datetime.now() - row["created_at"]).days) / 30)
|
| 46 |
+
if row["action"] == "like":
|
| 47 |
+
return 3 * decay
|
| 48 |
+
elif row["action"] == "view":
|
| 49 |
+
return 1 * decay
|
| 50 |
+
elif row["action"] == "unlike":
|
| 51 |
+
return -2 * decay
|
| 52 |
+
return 0
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
actions_df["created_at"] = pd.to_datetime(actions_df["created_at"])
|
| 56 |
+
actions_df["score"] = actions_df.apply(compute_score, axis=1)
|
| 57 |
+
interaction_df = (
|
| 58 |
+
actions_df.groupby(["user_id", "property_id"])
|
| 59 |
+
.agg(score=("score", "sum"))
|
| 60 |
+
.reset_index()
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
+
# -------------------- Process Search History with SentenceTransformer --------------------
|
| 64 |
+
|
| 65 |
+
search_df["created_at"] = pd.to_datetime(search_df["created_at"])
|
| 66 |
+
search_embeddings = search_df["search_query"].apply(get_search_embedding)
|
| 67 |
+
search_emb_df = pd.DataFrame(search_embeddings.tolist(), index=search_df.index)
|
| 68 |
+
search_df = pd.concat([search_df, search_emb_df], axis=1)
|
| 69 |
+
search_emb_cols = [f"s{i}" for i in range(16)]
|
| 70 |
+
search_df_cols = list(search_df.columns[:-16]) + search_emb_cols
|
| 71 |
+
search_df.columns = search_df_cols
|
| 72 |
+
search_agg = (
|
| 73 |
+
search_df.groupby("user_id")
|
| 74 |
+
.agg(
|
| 75 |
+
lambda x: (
|
| 76 |
+
np.mean(np.stack(x), axis=0)
|
| 77 |
+
if x.dtype == "O" or isinstance(x.iloc[0], np.ndarray)
|
| 78 |
+
else x.mean()
|
| 79 |
+
)
|
| 80 |
+
)
|
| 81 |
+
.reset_index()
|
| 82 |
+
)
|
| 83 |
+
|
| 84 |
+
# Use Latitude/Longitude for Location
|
| 85 |
+
user_df["lat"] = pd.to_numeric(user_df["latitude"], errors="coerce").fillna(0)
|
| 86 |
+
user_df["lon"] = pd.to_numeric(user_df["longitude"], errors="coerce").fillna(0)
|
| 87 |
+
user_df["min_price"] = pd.to_numeric(user_df["min_price"], errors="coerce").fillna(0)
|
| 88 |
+
user_df["max_price"] = pd.to_numeric(user_df["max_price"], errors="coerce").fillna(0)
|
| 89 |
+
property_df["lat"] = pd.to_numeric(property_df["latitude"], errors="coerce").fillna(0)
|
| 90 |
+
property_df["lon"] = pd.to_numeric(property_df["longitude"], errors="coerce").fillna(0)
|
| 91 |
+
# Keep property_category for property type filter
|
| 92 |
+
# (We already have property_category; use that as type, no encoding on it if preferred)
|
| 93 |
+
# Alternatively, encode it:
|
| 94 |
+
property_df["type_enc"] = property_df["property_category"].astype("category").cat.codes
|
| 95 |
+
|
| 96 |
+
# Merge Data for Training
|
| 97 |
+
df = interaction_df.merge(
|
| 98 |
+
user_df, left_on="user_id", right_on="id", how="left", suffixes=("", "_user")
|
| 99 |
+
)
|
| 100 |
+
df = df.merge(
|
| 101 |
+
property_df,
|
| 102 |
+
left_on="property_id",
|
| 103 |
+
right_on="id",
|
| 104 |
+
how="left",
|
| 105 |
+
suffixes=("", "_prop"),
|
| 106 |
+
)
|
| 107 |
+
df = df.merge(
|
| 108 |
+
search_agg[["user_id"] + search_emb_cols], on="user_id", how="left"
|
| 109 |
+
).fillna(0)
|
| 110 |
+
# For guest actions missing user data, fill defaults.
|
| 111 |
+
default_age = user_df["age"].mean()
|
| 112 |
+
default_lat = user_df["lat"].median()
|
| 113 |
+
default_lon = user_df["lon"].median()
|
| 114 |
+
default_min_price = user_df["min_price"].median()
|
| 115 |
+
default_max_price = user_df["max_price"].median()
|
| 116 |
+
df["age"].fillna(default_age, inplace=True)
|
| 117 |
+
df["min_price"].fillna(default_min_price, inplace=True)
|
| 118 |
+
df["max_price"].fillna(default_max_price, inplace=True)
|
| 119 |
+
df["lat"].fillna(default_lat, inplace=True)
|
| 120 |
+
df["lon"].fillna(default_lon, inplace=True)
|
| 121 |
+
|
| 122 |
+
# Feature Setup
|
| 123 |
+
# User features: [age, max_price, min_price, lat, lon] + search embedding (16 dims) = 5+16 = 21 dims
|
| 124 |
+
user_feats = df[user_field_list].values.astype(
|
| 125 |
+
np.float32
|
| 126 |
+
)
|
| 127 |
+
search_feats = df[search_emb_cols].values.astype(np.float32)
|
| 128 |
+
user_features = np.concatenate([user_feats, search_feats], axis=1)
|
| 129 |
+
|
| 130 |
+
# Property features: [price, rating, lat, lon, type_enc] = 6 dims
|
| 131 |
+
property_feats = df[property_field_list].values.astype(np.float32)
|
| 132 |
+
ratings = df["score"].values.astype(np.float32)
|
| 133 |
+
|
| 134 |
+
dataset = RecommenderDataset(user_features, property_feats, ratings)
|
| 135 |
+
train_loader = DataLoader(dataset, batch_size=256, shuffle=True)
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
two_tower = TwoTowerRec()
|
| 139 |
+
optimizer = optim.Adagrad(two_tower.parameters(), lr=0.1)
|
| 140 |
+
for epoch in range(5):
|
| 141 |
+
total_loss = 0
|
| 142 |
+
for batch in train_loader:
|
| 143 |
+
optimizer.zero_grad()
|
| 144 |
+
user = torch.tensor(batch["user"], dtype=torch.float32)
|
| 145 |
+
prop = torch.tensor(batch["property"], dtype=torch.float32)
|
| 146 |
+
u_emb, p_emb = two_tower(user, prop)
|
| 147 |
+
loss = cosine_loss(u_emb, p_emb)
|
| 148 |
+
loss.backward()
|
| 149 |
+
optimizer.step()
|
| 150 |
+
total_loss += loss.item()
|
| 151 |
+
print(f"Epoch {epoch+1} Loss: {total_loss/len(train_loader):.4f}")
|
| 152 |
+
torch.save(two_tower.state_dict(), "two_tower_rec.pth")
|
| 153 |
+
|
| 154 |
+
build_faiss_index(two_tower, property_df)
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
# Prepare data for train reranker
|
| 158 |
+
X_rerank, y_rerank = build_rerank_data(two_tower, df)
|
| 159 |
+
X_train, X_val, y_train, y_val = train_test_split(X_rerank, y_rerank, test_size=0.2)
|
| 160 |
+
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
|
| 161 |
+
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
|
| 162 |
+
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
|
| 163 |
+
y_val_tensor = torch.tensor(y_val, dtype=torch.float32).unsqueeze(1)
|
| 164 |
+
|
| 165 |
+
# Train reranker
|
| 166 |
+
reranker = Reranker()
|
| 167 |
+
criterion = nn.MSELoss()
|
| 168 |
+
opt_rerank = optim.Adam(reranker.parameters(), lr=0.001)
|
| 169 |
+
for epoch in range(5):
|
| 170 |
+
reranker.train()
|
| 171 |
+
opt_rerank.zero_grad()
|
| 172 |
+
preds = reranker(X_train_tensor)
|
| 173 |
+
loss = criterion(preds, y_train_tensor)
|
| 174 |
+
loss.backward()
|
| 175 |
+
opt_rerank.step()
|
| 176 |
+
reranker.eval()
|
| 177 |
+
with torch.no_grad():
|
| 178 |
+
val_loss = criterion(reranker(X_val_tensor), y_val_tensor)
|
| 179 |
+
print(
|
| 180 |
+
f"Reranker Epoch {epoch+1}: Loss {loss.item():.4f}, Val Loss {val_loss.item():.4f}"
|
| 181 |
+
)
|
| 182 |
+
torch.save(reranker.state_dict(), "reranker.pth")
|
| 183 |
+
|
| 184 |
+
|
| 185 |
+
# -------------------- Inference: Retrieval + Reranking --------------------
|
| 186 |
+
def retrieve_candidates(user_vector, top_k=20):
|
| 187 |
+
user_tensor = torch.tensor(user_vector, dtype=torch.float32)
|
| 188 |
+
with torch.no_grad():
|
| 189 |
+
u_emb = two_tower.user_tower(user_tensor).numpy()
|
| 190 |
+
faiss.normalize_L2(u_emb)
|
| 191 |
+
index = faiss.read_index("property_faiss.index")
|
| 192 |
+
ids = np.load("property_id_map.npy")
|
| 193 |
+
dists, idxs = index.search(u_emb, top_k)
|
| 194 |
+
return ids[idxs[0]], u_emb
|
| 195 |
+
|
| 196 |
+
|
| 197 |
+
def recommend(
|
| 198 |
+
two_tower, reranker, user_raw_vector, property_type_filter=None, boost=0.1
|
| 199 |
+
):
|
| 200 |
+
user_raw = np.array([user_raw_vector], dtype=np.float32)
|
| 201 |
+
candidate_ids, u_emb = retrieve_candidates(two_tower, user_raw, top_k=20)
|
| 202 |
+
results = []
|
| 203 |
+
for pid in candidate_ids:
|
| 204 |
+
prop = property_df[property_df["id"] == pid]
|
| 205 |
+
p_vec = prop[property_field_list].values.astype(np.float32)
|
| 206 |
+
p_tensor = torch.tensor(p_vec, dtype=torch.float32)
|
| 207 |
+
with torch.no_grad():
|
| 208 |
+
p_emb = two_tower.prop_tower(p_tensor).numpy()
|
| 209 |
+
combined = np.concatenate([u_emb, p_emb], axis=1)
|
| 210 |
+
combined_tensor = torch.tensor(combined, dtype=torch.float32)
|
| 211 |
+
with torch.no_grad():
|
| 212 |
+
score = reranker(combined_tensor).numpy()[0][0]
|
| 213 |
+
# Boost score if property type matches filter
|
| 214 |
+
if property_type_filter is not None:
|
| 215 |
+
prop_type = prop["property_category"].iloc[0]
|
| 216 |
+
if prop_type == property_type_filter:
|
| 217 |
+
score += boost
|
| 218 |
+
results.append((pid, score))
|
| 219 |
+
results.sort(key=lambda x: -x[1])
|
| 220 |
+
return [pid for pid, _ in results]
|
| 221 |
+
|
| 222 |
+
|
| 223 |
+
# -------------------- Example Inference Usage --------------------
|
| 224 |
+
# For a user with: age=30, income=70000, lat and lon from address,
|
| 225 |
+
# plus aggregated search embedding (if no search history, zeros)
|
| 226 |
+
example_user_features = np.array([30, 40.7128, -74.0060] + [0] * 16, dtype=np.float32)
|
| 227 |
+
# If a property type filter is desired, e.g., "Apartment"
|
| 228 |
+
recommended_properties = recommend(
|
| 229 |
+
two_tower, reranker, example_user_features, property_type_filter="Apartment"
|
| 230 |
+
)
|
| 231 |
+
print("Recommended property IDs:", recommended_properties)
|
recommender/reranker.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch.nn as nn
|
| 2 |
+
import torch
|
| 3 |
+
import numpy as np
|
| 4 |
+
|
| 5 |
+
class Reranker(nn.Module):
|
| 6 |
+
def __init__(self, input_dim=64, hidden_dim=64):
|
| 7 |
+
super().__init__()
|
| 8 |
+
self.model = nn.Sequential(
|
| 9 |
+
nn.Linear(input_dim, hidden_dim),
|
| 10 |
+
nn.ReLU(),
|
| 11 |
+
nn.Dropout(0.2),
|
| 12 |
+
nn.Linear(hidden_dim, 1)
|
| 13 |
+
)
|
| 14 |
+
def forward(self, x):
|
| 15 |
+
return self.model(x)
|
| 16 |
+
def build_rerank_data(df, search_emb_cols, two_tower):
|
| 17 |
+
u_input = df[["age", "min_price", "max_price", "lat", "lon"]].values.astype(np.float32)
|
| 18 |
+
search_input = df[search_emb_cols].values.astype(np.float32)
|
| 19 |
+
user_input = np.concatenate([u_input, search_input], axis=1)
|
| 20 |
+
# For properties, use: price, rating, freshness, latitude, longitude, type_enc
|
| 21 |
+
p_input = df[
|
| 22 |
+
["price", "rating", "freshness", "latitude_prop", "longitude_prop", "type_enc"]
|
| 23 |
+
].values.astype(np.float32)
|
| 24 |
+
targets = df["score"].values.astype(np.float32)
|
| 25 |
+
u_tensor = torch.tensor(user_input, dtype=torch.float32)
|
| 26 |
+
p_tensor = torch.tensor(p_input, dtype=torch.float32)
|
| 27 |
+
with torch.no_grad():
|
| 28 |
+
u_emb = two_tower.user_tower(u_tensor).numpy()
|
| 29 |
+
p_emb = two_tower.prop_tower(p_tensor).numpy()
|
| 30 |
+
X = np.concatenate([u_emb, p_emb], axis=1)
|
| 31 |
+
return X, targets
|
recommender/tower.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
|
| 4 |
+
class UserTower(nn.Module):
|
| 5 |
+
def __init__(self, input_dim=21, hidden_dim=64, out_dim=32):
|
| 6 |
+
super().__init__()
|
| 7 |
+
self.model = nn.Sequential(
|
| 8 |
+
nn.Linear(input_dim, hidden_dim),
|
| 9 |
+
nn.ReLU(),
|
| 10 |
+
nn.Linear(hidden_dim, out_dim)
|
| 11 |
+
)
|
| 12 |
+
def forward(self, x):
|
| 13 |
+
return self.model(x)
|
| 14 |
+
class PropertyTower(nn.Module):
|
| 15 |
+
def __init__(self, input_dim=5, hidden_dim=64, out_dim=32):
|
| 16 |
+
super().__init__()
|
| 17 |
+
self.model = nn.Sequential(
|
| 18 |
+
nn.Linear(input_dim, hidden_dim),
|
| 19 |
+
nn.ReLU(),
|
| 20 |
+
nn.Linear(hidden_dim, out_dim)
|
| 21 |
+
)
|
| 22 |
+
def forward(self, x):
|
| 23 |
+
return self.model(x)
|
| 24 |
+
class TwoTowerRec(nn.Module):
|
| 25 |
+
def __init__(self):
|
| 26 |
+
super().__init__()
|
| 27 |
+
self.user_tower = UserTower()
|
| 28 |
+
self.prop_tower = PropertyTower()
|
| 29 |
+
def forward(self, user, prop):
|
| 30 |
+
return self.user_tower(user), self.prop_tower(prop)
|
| 31 |
+
def cosine_loss(u, p, margin=0.5):
|
| 32 |
+
cos_sim = nn.functional.cosine_similarity(u, p)
|
| 33 |
+
loss = torch.clamp(margin - cos_sim, min=0).mean()
|
| 34 |
+
return loss
|