Spaces:

Kawaiibuta
/

real_estate

Running

App Files Files Community

FrancisGOS commited on Apr 9

Commit

81fb8ba

1 Parent(s): 0d480c5

Add recommender system

Browse files

Files changed (22) hide show

app/app.py +1 -1
app/database/models/user.py +7 -0
app/database/models/user_action.py +32 -0
app/database/models/user_search.py +29 -0
app/domains/properties/controller.py +18 -4
app/domains/properties/service.py +26 -2
app/domains/property_verification/controller.py +8 -0
app/domains/user_action/__init__.py +0 -0
app/domains/user_action/controller.py +24 -0
app/domains/user_action/dtos.py +11 -0
app/domains/user_action/service.py +21 -0
app/domains/user_search/__init__.py +0 -0
app/domains/user_search/service.py +9 -0
app/migrations/versions/2025-04-10_make_table_for_recommender_system_997d3eb808d6.py +122 -0
poetry.lock +0 -0
pyproject.toml +15 -0
recommender/__init__.py +0 -0
recommender/data.py +23 -0
recommender/faiss.py +16 -0
recommender/main.py +231 -0
recommender/reranker.py +31 -0
recommender/tower.py +34 -0

app/app.py CHANGED Viewed

@@ -68,7 +68,7 @@ def on_startup():
 @get(path="/schema", include_in_schema=False)
 async def schema(request: Request) -> dict:
-    schema = request.app.openapi_schema
     return schema.to_schema()

 @get(path="/schema", include_in_schema=False)
 async def schema(request: Request) -> dict:
+    schema = request.openapi_schema
     return schema.to_schema()

app/database/models/user.py CHANGED Viewed

@@ -4,6 +4,7 @@ from typing import Optional
 import uuid
 from sqlalchemy import (
     DateTime,
     String,
     Text,
     Boolean,
@@ -92,6 +93,12 @@ class User(BaseModel):
     tags: Mapped[list[Tag]] = relationship(
         "Tag", secondary=UserTag.__table__, lazy="selectin"
     )
 class UserSchema(BaseSchema):

 import uuid
 from sqlalchemy import (
     DateTime,
+    Numeric,
     String,
     Text,
     Boolean,
     tags: Mapped[list[Tag]] = relationship(
         "Tag", secondary=UserTag.__table__, lazy="selectin"
     )
+    min_price: Mapped[float] = mapped_column(
+        Numeric(12, 2, asdecimal=False), nullable=True
+    )
+    max_price: Mapped[float] = mapped_column(
+        Numeric(12, 2, asdecimal=False), nullable=True
+    )
 class UserSchema(BaseSchema):

app/database/models/user_action.py ADDED Viewed

	@@ -0,0 +1,32 @@

+from __future__ import annotations
+from datetime import datetime
+from typing import Optional
+import uuid
+from sqlalchemy import (
+    DateTime,
+    String,
+    Text,
+    Boolean,
+    ForeignKey,
+)
+from sqlalchemy.dialects.postgresql import UUID as PG_UUID
+from sqlalchemy.orm import Mapped, mapped_column, relationship
+from database.models.base import BaseModel
+class UserAction(BaseModel):
+    __tablename__ = "user_actions"
+    user_id: Mapped[Optional[uuid.UUID]] = mapped_column(
+        PG_UUID(as_uuid=True),
+        ForeignKey("images.id", ondelete="SET NULL"),
+        nullable=True,
+        unique=True,
+    )
+    property_id: Mapped[uuid.UUID] = mapped_column(
+        PG_UUID(as_uuid=True),
+        ForeignKey("images.id", ondelete="SET NULL"),
+        nullable=True,
+        unique=True,
+    )
+    action: Mapped[str] = mapped_column(String, nullable=False, unique=False)

app/database/models/user_search.py ADDED Viewed

	@@ -0,0 +1,29 @@

+from __future__ import annotations
+from typing import Optional
+import uuid
+from sqlalchemy import (
+    Numeric,
+    String,
+    ForeignKey,
+)
+from sqlalchemy.dialects.postgresql import UUID as PG_UUID
+from sqlalchemy.orm import Mapped, mapped_column
+from database.models.base import BaseModel
+class UserSearch(BaseModel):
+    __tablename__="user_searchs"
+    user_id: Mapped[Optional[uuid.UUID]] = mapped_column(
+        PG_UUID(as_uuid=True),
+        ForeignKey("images.id", ondelete="SET NULL"),
+        nullable=True,
+        unique=True,
+    )
+    search_query: Mapped[Optional[str]] = mapped_column(String, nullable=False, unique=False)
+    type: Mapped[Optional[str]] = mapped_column(String, nullable=False, unique=False)
+    min_price: Mapped[Optional[float]] = mapped_column(
+        Numeric(12, 2, asdecimal=False), nullable=True
+    )
+    max_price: Mapped[Optional[float]] = mapped_column(
+        Numeric(12, 2, asdecimal=False), nullable=True
+    )

app/domains/properties/controller.py CHANGED Viewed

@@ -52,10 +52,19 @@ class PropertyController(Controller):
     async def get_properties(
         self,
         params: PropertySearchParams,
         property_service: PropertyService,
         pagination: LimitOffset,
     ) -> OffsetPagination[Property]:
-        return await property_service.search(search_param=params, pagination=pagination)
     @post(
         "/",
@@ -102,7 +111,9 @@ class PropertyController(Controller):
         else:
             user_id = None
         return property_service.to_schema(
-            await property_service.update_property(property_id, data=data, user_id=user_id),
             schema_type=PropertySchema,
         )
@@ -155,6 +166,9 @@ class PropertyController(Controller):
         return await property_service.update_activation(
             property_id=property_id, activate=data.active, user_id=user_id
         )
     @get("/count", no_auth=True)
-    async def count_by_city(self,property_service: PropertyService,  type: Optional[str]) -> Any:
-        return await property_service.count_by_city(type=type)

     async def get_properties(
         self,
         params: PropertySearchParams,
+        user_id: Annotated[
+            Optional[uuid.UUID],
+            Parameter(
+                default=None,
+                title="User id",
+            ),
+        ],
         property_service: PropertyService,
         pagination: LimitOffset,
     ) -> OffsetPagination[Property]:
+        return await property_service.search(
+            search_param=params, pagination=pagination, user_id=user_id
+        )
     @post(
         "/",
         else:
             user_id = None
         return property_service.to_schema(
+            await property_service.update_property(
+                property_id, data=data, user_id=user_id
+            ),
             schema_type=PropertySchema,
         )
         return await property_service.update_activation(
             property_id=property_id, activate=data.active, user_id=user_id
         )
     @get("/count", no_auth=True)
+    async def count_by_city(
+        self, property_service: PropertyService, type: Optional[str]
+    ) -> Any:
+        return await property_service.count_by_city(type=type)

app/domains/properties/service.py CHANGED Viewed

@@ -14,6 +14,7 @@ from litestar.params import Parameter
 from litestar.openapi.spec.example import Example
 from litestar.exceptions import ValidationException, NotAuthorizedException
 from sqlalchemy.orm import joinedload, selectinload
 from database.models.property_type import PropertyType
 from domains.address.service import AddressService
 from database.models.user import User
@@ -58,6 +59,10 @@ class PropertyRepository(SQLAlchemyAsyncRepository[Property]):
 class PropertySearchParams(BaseModel):
     lat: Optional[float] = Parameter(
         None,
         title="Latitude",
@@ -164,6 +169,7 @@ class PropertyService(SQLAlchemyAsyncRepositoryService[Property]):
         self,
         search_param: PropertySearchParams,
         pagination: LimitOffset,
     ) -> OffsetPagination[Property]:
         query = select(Property).options(
             joinedload(Property.address),
@@ -208,6 +214,9 @@ class PropertyService(SQLAlchemyAsyncRepositoryService[Property]):
         if search_param.min_sqm:
             query = query.where(Property.sqm >= search_param.min_sqm)
         # Apply Vietnam-specific filters
         if search_param.city:
             query = query.where(Property.address.city.ilike(f"%{search_param.city}%"))
@@ -243,7 +252,18 @@ class PropertyService(SQLAlchemyAsyncRepositoryService[Property]):
             .all(),
         )
         total = await self.count()
         return OffsetPagination(
             items=items[0],
             total=total,
@@ -414,6 +434,9 @@ async def provide_property_service(
 async def query_params_extractor(
     lat: Optional[float] = Parameter(
         None,
         title="Latitude",
@@ -493,6 +516,7 @@ async def query_params_extractor(
     ),
 ) -> PropertySearchParams:
     return PropertySearchParams(
         lat=lat,
         lng=lng,
         radius=radius,
@@ -527,7 +551,7 @@ async def fetch_city_image(city_name: str) -> str:
     data = response.json()
     if data.get("results"):
         result = data["results"][0]["urls"]["regular"]
-        city_name = city_name.replace(' ', '_')
         store.set(f"city_{city_name}", result)
         return result
     return None

 from litestar.openapi.spec.example import Example
 from litestar.exceptions import ValidationException, NotAuthorizedException
 from sqlalchemy.orm import joinedload, selectinload
+from domains.user_search.service import UserSearchService
 from database.models.property_type import PropertyType
 from domains.address.service import AddressService
 from database.models.user import User
 class PropertySearchParams(BaseModel):
+    search: Optional[str] = Parameter(
+        title="Search Query", description="Search query to check in the description"
+    )
     lat: Optional[float] = Parameter(
         None,
         title="Latitude",
         self,
         search_param: PropertySearchParams,
         pagination: LimitOffset,
+        user_id: uuid.UUID | None = None,
     ) -> OffsetPagination[Property]:
         query = select(Property).options(
             joinedload(Property.address),
         if search_param.min_sqm:
             query = query.where(Property.sqm >= search_param.min_sqm)
+        # Search query
+        if search_param.search:
+            query = query.where(Property.description.contains(search_param.search))
         # Apply Vietnam-specific filters
         if search_param.city:
             query = query.where(Property.address.city.ilike(f"%{search_param.city}%"))
             .all(),
         )
         total = await self.count()
+        user_search_service = UserSearchService(session=self.repository.session)
+        await user_search_service.create(
+            {
+                "user_id": user_id,
+                "search_query": search_param.search,
+                "type": search_param.property_category,
+                "min_price": search_param.min_price,
+                "max_price": search_param.max_price,
+            },
+            auto_commit=True,
+        )
         return OffsetPagination(
             items=items[0],
             total=total,
 async def query_params_extractor(
+    search: Optional[str] = Parameter(
+        title="Search Query", description="Search query to check in the description"
+    ),
     lat: Optional[float] = Parameter(
         None,
         title="Latitude",
     ),
 ) -> PropertySearchParams:
     return PropertySearchParams(
+        search=search,
         lat=lat,
         lng=lng,
         radius=radius,
     data = response.json()
     if data.get("results"):
         result = data["results"][0]["urls"]["regular"]
+        city_name = city_name.replace(" ", "_")
         store.set(f"city_{city_name}", result)
         return result
     return None

app/domains/property_verification/controller.py CHANGED Viewed

@@ -8,6 +8,7 @@ from database.models.user import User
 from domains.property_verification.dtos import (
     VerificationConfirmDTO,
 )
 from domains.property_verification.service import (
     VerificationService,
     provide_verification_service,
@@ -63,3 +64,10 @@ class VerificationController(Controller):
             validation_method=data.method,
             code=data.code,
         )

 from domains.property_verification.dtos import (
     VerificationConfirmDTO,
 )
+from litestar.exceptions import NotAuthorizedException
 from domains.property_verification.service import (
     VerificationService,
     provide_verification_service,
             validation_method=data.method,
             code=data.code,
         )
+    @get('/allow')
+    async def check_allow(self, property_id: UUID, verification_service: VerificationService, request: Request[User, Token, Any]) -> PropertyVerification:
+        verification = await verification_service.get_one_or_none(PropertyVerification.user_id == request.user.id, PropertyVerification.property_id == property_id)
+        if not verification:
+            raise NotAuthorizedException("You are not allowed to review this property.")
+        return verification

app/domains/user_action/__init__.py ADDED Viewed

File without changes

app/domains/user_action/controller.py ADDED Viewed

	@@ -0,0 +1,24 @@

+from litestar import Controller, post
+from litestar.di import Provide
+from database.models.user_action import UserAction
+from domains.user_action.dtos import CreateUserActionDTO
+from domains.user_action.service import (
+    UserActionService,
+    provide_user_action_service,
+)
+class UserActionController(Controller):
+    path = "actions"
+    tags = ["actions"]
+    dependencies = {"user_action_service": Provide(provide_user_action_service)}
+    @post(no_auth=True)
+    async def createAction(
+        self, body: CreateUserActionDTO, user_action_service: UserActionService
+    ) -> UserAction:
+        return await user_action_service.create(
+            body.to_dict(), auto_commit=True, auto_refresh=True
+        )

app/domains/user_action/dtos.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from typing import Optional
+from uuid import UUID
+from pydantic import ConfigDict
+from database.models.base import BaseModel
+class CreateUserActionDTO(BaseModel):
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+    action: str
+    user_id: Optional[UUID]
+    property_id: UUID

app/domains/user_action/service.py ADDED Viewed

	@@ -0,0 +1,21 @@

+from collections.abc import AsyncGenerator
+from database.models.user_action import UserAction
+from advanced_alchemy.repository import SQLAlchemyAsyncRepository
+from advanced_alchemy.service import SQLAlchemyAsyncRepositoryService
+from sqlalchemy.ext.asyncio import AsyncSession
+class UserActionRepository(SQLAlchemyAsyncRepository[UserAction]):
+    model_type = UserAction
+class UserActionService(SQLAlchemyAsyncRepositoryService[UserAction]):
+    repository_type = UserActionRepository
+async def provide_user_action_service(
+    db_session: AsyncSession,
+) -> AsyncGenerator[UserActionService, None]:
+    async with UserActionService.new(session=db_session) as service:
+        yield service

app/domains/user_search/__init__.py ADDED Viewed

File without changes

app/domains/user_search/service.py ADDED Viewed

	@@ -0,0 +1,9 @@

+from database.models.user_search import UserSearch
+from database.models.user_action import UserAction
+from advanced_alchemy.repository import SQLAlchemyAsyncRepository
+from advanced_alchemy.service import SQLAlchemyAsyncRepositoryService
+class UserSearchRepository(SQLAlchemyAsyncRepository[UserSearch]):
+    model_type = UserSearch
+class UserSearchService(SQLAlchemyAsyncRepositoryService[UserSearch]):
+    repository_type = UserSearchRepository

app/migrations/versions/2025-04-10_make_table_for_recommender_system_997d3eb808d6.py ADDED Viewed

	@@ -0,0 +1,122 @@

+# type: ignore
+"""Make table for recommender system
+Revision ID: 997d3eb808d6
+Revises: dfa22bca0d19
+Create Date: 2025-04-10 02:59:05.940463
+"""
+import warnings
+from typing import TYPE_CHECKING
+import sqlalchemy as sa
+from alembic import op
+from advanced_alchemy.types import EncryptedString, EncryptedText, GUID, ORA_JSONB, DateTimeUTC
+from sqlalchemy import Text  # noqa: F401
+if TYPE_CHECKING:
+    from collections.abc import Sequence
+__all__ = ["downgrade", "upgrade", "schema_upgrades", "schema_downgrades", "data_upgrades", "data_downgrades"]
+sa.GUID = GUID
+sa.DateTimeUTC = DateTimeUTC
+sa.ORA_JSONB = ORA_JSONB
+sa.EncryptedString = EncryptedString
+sa.EncryptedText = EncryptedText
+# revision identifiers, used by Alembic.
+revision = '997d3eb808d6'
+down_revision = 'dfa22bca0d19'
+branch_labels = None
+depends_on = None
+def upgrade() -> None:
+    with warnings.catch_warnings():
+        warnings.filterwarnings("ignore", category=UserWarning)
+        with op.get_context().autocommit_block():
+            schema_upgrades()
+            data_upgrades()
+def downgrade() -> None:
+    with warnings.catch_warnings():
+        warnings.filterwarnings("ignore", category=UserWarning)
+        with op.get_context().autocommit_block():
+            data_downgrades()
+            schema_downgrades()
+def schema_upgrades() -> None:
+    """schema upgrade migrations go here."""
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table('user_actions',
+    sa.Column('id', sa.UUID(), nullable=False),
+    sa.Column('user_id', sa.UUID(), nullable=True),
+    sa.Column('property_id', sa.UUID(), nullable=True),
+    sa.Column('action', sa.String(), nullable=False),
+    sa.Column('created_at', sa.DateTime(timezone=True), nullable=False),
+    sa.Column('updated_at', sa.DateTime(timezone=True), nullable=False),
+    sa.ForeignKeyConstraint(['property_id'], ['images.id'], name=op.f('fk_user_actions_property_id_images'), ondelete='SET NULL'),
+    sa.ForeignKeyConstraint(['user_id'], ['images.id'], name=op.f('fk_user_actions_user_id_images'), ondelete='SET NULL'),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_user_actions')),
+    sa.UniqueConstraint('id'),
+    sa.UniqueConstraint('id', name=op.f('uq_user_actions_id')),
+    sa.UniqueConstraint('property_id'),
+    sa.UniqueConstraint('property_id', name=op.f('uq_user_actions_property_id')),
+    sa.UniqueConstraint('user_id'),
+    sa.UniqueConstraint('user_id', name=op.f('uq_user_actions_user_id'))
+    )
+    op.create_table('user_searchs',
+    sa.Column('id', sa.UUID(), nullable=False),
+    sa.Column('user_id', sa.UUID(), nullable=True),
+    sa.Column('search_query', sa.String(), nullable=True),
+    sa.Column('type', sa.String(), nullable=True),
+    sa.Column('min_price', sa.Numeric(precision=12, scale=2, asdecimal=False), nullable=True),
+    sa.Column('max_price', sa.Numeric(precision=12, scale=2, asdecimal=False), nullable=True),
+    sa.Column('created_at', sa.DateTime(timezone=True), nullable=False),
+    sa.Column('updated_at', sa.DateTime(timezone=True), nullable=False),
+    sa.ForeignKeyConstraint(['user_id'], ['images.id'], name=op.f('fk_user_searchs_user_id_images'), ondelete='SET NULL'),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_user_searchs')),
+    sa.UniqueConstraint('id'),
+    sa.UniqueConstraint('id', name=op.f('uq_user_searchs_id')),
+    sa.UniqueConstraint('user_id'),
+    sa.UniqueConstraint('user_id', name=op.f('uq_user_searchs_user_id'))
+    )
+    with op.batch_alter_table('banners', schema=None) as batch_op:
+        batch_op.create_unique_constraint(batch_op.f('uq_banners_id'), ['id'])
+    with op.batch_alter_table('users', schema=None) as batch_op:
+        batch_op.add_column(sa.Column('min_price', sa.Numeric(precision=12, scale=2, asdecimal=False), nullable=True))
+        batch_op.add_column(sa.Column('max_price', sa.Numeric(precision=12, scale=2, asdecimal=False), nullable=True))
+    # ### end Alembic commands ###
+def schema_downgrades() -> None:
+    """schema downgrade migrations go here."""
+    # ### commands auto generated by Alembic - please adjust! ###
+    with op.batch_alter_table('users', schema=None) as batch_op:
+        batch_op.drop_column('max_price')
+        batch_op.drop_column('min_price')
+    with op.batch_alter_table('banners', schema=None) as batch_op:
+        batch_op.drop_constraint(batch_op.f('uq_banners_id'), type_='unique')
+    op.create_table('spatial_ref_sys',
+    sa.Column('srid', sa.INTEGER(), autoincrement=False, nullable=False),
+    sa.Column('auth_name', sa.VARCHAR(length=256), autoincrement=False, nullable=True),
+    sa.Column('auth_srid', sa.INTEGER(), autoincrement=False, nullable=True),
+    sa.Column('srtext', sa.VARCHAR(length=2048), autoincrement=False, nullable=True),
+    sa.Column('proj4text', sa.VARCHAR(length=2048), autoincrement=False, nullable=True),
+    sa.CheckConstraint('srid > 0 AND srid <= 998999', name='spatial_ref_sys_srid_check'),
+    sa.PrimaryKeyConstraint('srid', name='spatial_ref_sys_pkey')
+    )
+    op.drop_table('user_searchs')
+    op.drop_table('user_actions')
+    # ### end Alembic commands ###
+def data_upgrades() -> None:
+    """Add any optional data upgrade migrations here!"""
+def data_downgrades() -> None:
+    """Add any optional data downgrade migrations here!"""

poetry.lock CHANGED Viewed

The diff for this file is too large to render. See raw diff

pyproject.toml CHANGED Viewed

@@ -25,11 +25,26 @@ dependencies = [
     "requests (>=2.32.3,<3.0.0)",
     "qrcode (>=8.1,<9.0)",
     "pillow (>=11.1.0,<12.0.0)",
 ]
 [tool.poetry]
 package-mode = false
 [build-system]
 requires = ["poetry-core>=2.0.0,<3.0.0"]
 build-backend = "poetry.core.masonry.api"

     "requests (>=2.32.3,<3.0.0)",
     "qrcode (>=8.1,<9.0)",
     "pillow (>=11.1.0,<12.0.0)",
+    "torch (>=2.6.0,<3.0.0)",
+    "torchvision (>=0.21.0,<0.22.0)",
+    "scikit-learn (>=1.6.1,<2.0.0)",
+    "pandas (>=2.2.3,<3.0.0)",
+    "faiss-cpu (>=1.10.0,<2.0.0)",
+    "sentence-transformers (>=4.0.2,<5.0.0)",
 ]
 [tool.poetry]
 package-mode = false
+[[tool.poetry.source]]
+name = "pytorch_cpu"
+url = "https://download.pytorch.org/whl/cpu"
+priority = "explicit"
+[tool.poetry.dependencies]
+torch = {source = "pytorch_cpu"}
+torchvision = {source = "pytorch_cpu"}
 [build-system]
 requires = ["poetry-core>=2.0.0,<3.0.0"]
 build-backend = "poetry.core.masonry.api"

recommender/__init__.py ADDED Viewed

File without changes

recommender/data.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from sentence_transformers import SentenceTransformer
+from torch.utils.data import Dataset
+import pandas as pd
+import numpy as np
+from dotenv import load_dotenv
+load_dotenv()
+search_model = SentenceTransformer("all-MiniLM-L6-v2")
+class RecommenderDataset(Dataset):
+    def __init__(self, user_feats, prop_feats, targets):
+        self.user_feats = user_feats
+        self.prop_feats = prop_feats
+        self.targets = targets
+    def __len__(self):
+        return len(self.targets)
+    def __getitem__(self, idx):
+        return {"user": self.user_feats[idx],
+                "property": self.prop_feats[idx],
+                "target": self.targets[idx]}
+def get_search_embedding(query):
+    if pd.isnull(query) or query.strip() == "":
+        return np.zeros(16)
+    return search_model.encode(query)

recommender/faiss.py ADDED Viewed

	@@ -0,0 +1,16 @@

+import torch
+import numpy as np
+import faiss
+def build_faiss_index(model, prop_df):
+    prop_vec = prop_df[["price", "rating", "freshness", "latitude", "longitude", "type_enc"]].values.astype(np.float32)
+    ids = prop_df["id"].values
+    prop_tensor = torch.tensor(prop_vec, dtype=torch.float32)
+    with torch.no_grad():
+        emb = model.prop_tower(prop_tensor).numpy()
+    faiss.normalize_L2(emb)
+    dim = emb.shape[1]
+    index = faiss.IndexFlatIP(dim)
+    index.add(emb)
+    faiss.write_index(index, "property_faiss.index")
+    np.save("property_id_map.npy", ids)

recommender/main.py ADDED Viewed

	@@ -0,0 +1,231 @@

+import os
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import Dataset, DataLoader
+import pandas as pd
+import numpy as np
+import faiss
+from sqlalchemy import create_engine
+from datetime import datetime
+from sklearn.model_selection import train_test_split
+from sentence_transformers import SentenceTransformer
+from dotenv import load_dotenv
+from recommender.data import RecommenderDataset, get_search_embedding
+from recommender.faiss import build_faiss_index
+from recommender.reranker import Reranker, build_rerank_data
+from recommender.tower import TwoTowerRec, cosine_loss
+load_dotenv()
+property_field_list = ["price", "average_rating", "lat_prop", "lon_prop", "type_enc"]
+user_field_list = ["age", "min_price", "max_price", "lat", "lon"]
+# -------------------- Step 1: Load Data from PostgreSQL --------------------
+engine = create_engine(os.environ.get("DB_URL"))
+user_df = pd.read_sql(
+    "SELECT u.id, u.name, u.email, u.phone, u.verified, u.address_id, a.street, a.city, a.postal_code, a.neighborhood, a.latitude, a.longitude, a.coordinates, a.geohash FROM users u LEFT JOIN addresses a ON u.address_id = a.id;",
+    engine,
+)
+property_df = pd.read_sql(
+    "SELECT p.title, p.id, p.property_category, p.property_type_id, p.transaction_type, p.price, p.bedrooms, p.bathrooms, p.sqm, p.description, p.average_rating, p.status, p.owner_id, p.address_id, a.street, a.city, a.postal_code, a.neighborhood, a.latitude, a.longitude, a.coordinates, a.geohash FROM properties p LEFT JOIN addresses a ON p.address_id = a.id;",
+    engine,
+)
+actions_df = pd.read_sql(
+    "SELECT user_id, property_id, action, created_at FROM user_actions", engine
+)
+search_df = pd.read_sql(
+    "SELECT user_id, search_query, created_at, min_price, max_price, property_type FROM user_searches",
+    engine,
+)
+# -------------------- Preprocess Interactions --------------------
+# Guest actions: fill NaN user_id with "guest"
+actions_df["user_id"] = actions_df["user_id"].fillna("guest")
+def compute_score(row):
+    decay = np.exp(-((datetime.now() - row["created_at"]).days) / 30)
+    if row["action"] == "like":
+        return 3 * decay
+    elif row["action"] == "view":
+        return 1 * decay
+    elif row["action"] == "unlike":
+        return -2 * decay
+    return 0
+actions_df["created_at"] = pd.to_datetime(actions_df["created_at"])
+actions_df["score"] = actions_df.apply(compute_score, axis=1)
+interaction_df = (
+    actions_df.groupby(["user_id", "property_id"])
+    .agg(score=("score", "sum"))
+    .reset_index()
+)
+# -------------------- Process Search History with SentenceTransformer --------------------
+search_df["created_at"] = pd.to_datetime(search_df["created_at"])
+search_embeddings = search_df["search_query"].apply(get_search_embedding)
+search_emb_df = pd.DataFrame(search_embeddings.tolist(), index=search_df.index)
+search_df = pd.concat([search_df, search_emb_df], axis=1)
+search_emb_cols = [f"s{i}" for i in range(16)]
+search_df_cols = list(search_df.columns[:-16]) + search_emb_cols
+search_df.columns = search_df_cols
+search_agg = (
+    search_df.groupby("user_id")
+    .agg(
+        lambda x: (
+            np.mean(np.stack(x), axis=0)
+            if x.dtype == "O" or isinstance(x.iloc[0], np.ndarray)
+            else x.mean()
+        )
+    )
+    .reset_index()
+)
+# Use Latitude/Longitude for Location
+user_df["lat"] = pd.to_numeric(user_df["latitude"], errors="coerce").fillna(0)
+user_df["lon"] = pd.to_numeric(user_df["longitude"], errors="coerce").fillna(0)
+user_df["min_price"] = pd.to_numeric(user_df["min_price"], errors="coerce").fillna(0)
+user_df["max_price"] = pd.to_numeric(user_df["max_price"], errors="coerce").fillna(0)
+property_df["lat"] = pd.to_numeric(property_df["latitude"], errors="coerce").fillna(0)
+property_df["lon"] = pd.to_numeric(property_df["longitude"], errors="coerce").fillna(0)
+# Keep property_category for property type filter
+# (We already have property_category; use that as type, no encoding on it if preferred)
+# Alternatively, encode it:
+property_df["type_enc"] = property_df["property_category"].astype("category").cat.codes
+#  Merge Data for Training
+df = interaction_df.merge(
+    user_df, left_on="user_id", right_on="id", how="left", suffixes=("", "_user")
+)
+df = df.merge(
+    property_df,
+    left_on="property_id",
+    right_on="id",
+    how="left",
+    suffixes=("", "_prop"),
+)
+df = df.merge(
+    search_agg[["user_id"] + search_emb_cols], on="user_id", how="left"
+).fillna(0)
+# For guest actions missing user data, fill defaults.
+default_age = user_df["age"].mean()
+default_lat = user_df["lat"].median()
+default_lon = user_df["lon"].median()
+default_min_price = user_df["min_price"].median()
+default_max_price = user_df["max_price"].median()
+df["age"].fillna(default_age, inplace=True)
+df["min_price"].fillna(default_min_price, inplace=True)
+df["max_price"].fillna(default_max_price, inplace=True)
+df["lat"].fillna(default_lat, inplace=True)
+df["lon"].fillna(default_lon, inplace=True)
+# Feature Setup
+# User features: [age, max_price, min_price, lat, lon] + search embedding (16 dims) = 5+16 = 21 dims
+user_feats = df[user_field_list].values.astype(
+    np.float32
+)
+search_feats = df[search_emb_cols].values.astype(np.float32)
+user_features = np.concatenate([user_feats, search_feats], axis=1)
+# Property features: [price, rating, lat, lon, type_enc] = 6 dims
+property_feats = df[property_field_list].values.astype(np.float32)
+ratings = df["score"].values.astype(np.float32)
+dataset = RecommenderDataset(user_features, property_feats, ratings)
+train_loader = DataLoader(dataset, batch_size=256, shuffle=True)
+two_tower = TwoTowerRec()
+optimizer = optim.Adagrad(two_tower.parameters(), lr=0.1)
+for epoch in range(5):
+    total_loss = 0
+    for batch in train_loader:
+        optimizer.zero_grad()
+        user = torch.tensor(batch["user"], dtype=torch.float32)
+        prop = torch.tensor(batch["property"], dtype=torch.float32)
+        u_emb, p_emb = two_tower(user, prop)
+        loss = cosine_loss(u_emb, p_emb)
+        loss.backward()
+        optimizer.step()
+        total_loss += loss.item()
+    print(f"Epoch {epoch+1} Loss: {total_loss/len(train_loader):.4f}")
+torch.save(two_tower.state_dict(), "two_tower_rec.pth")
+build_faiss_index(two_tower, property_df)
+# Prepare data for train reranker
+X_rerank, y_rerank = build_rerank_data(two_tower, df)
+X_train, X_val, y_train, y_val = train_test_split(X_rerank, y_rerank, test_size=0.2)
+X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
+y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
+X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
+y_val_tensor = torch.tensor(y_val, dtype=torch.float32).unsqueeze(1)
+# Train reranker
+reranker = Reranker()
+criterion = nn.MSELoss()
+opt_rerank = optim.Adam(reranker.parameters(), lr=0.001)
+for epoch in range(5):
+    reranker.train()
+    opt_rerank.zero_grad()
+    preds = reranker(X_train_tensor)
+    loss = criterion(preds, y_train_tensor)
+    loss.backward()
+    opt_rerank.step()
+    reranker.eval()
+    with torch.no_grad():
+        val_loss = criterion(reranker(X_val_tensor), y_val_tensor)
+    print(
+        f"Reranker Epoch {epoch+1}: Loss {loss.item():.4f}, Val Loss {val_loss.item():.4f}"
+    )
+torch.save(reranker.state_dict(), "reranker.pth")
+# -------------------- Inference: Retrieval + Reranking --------------------
+def retrieve_candidates(user_vector, top_k=20):
+    user_tensor = torch.tensor(user_vector, dtype=torch.float32)
+    with torch.no_grad():
+        u_emb = two_tower.user_tower(user_tensor).numpy()
+    faiss.normalize_L2(u_emb)
+    index = faiss.read_index("property_faiss.index")
+    ids = np.load("property_id_map.npy")
+    dists, idxs = index.search(u_emb, top_k)
+    return ids[idxs[0]], u_emb
+def recommend(
+    two_tower, reranker, user_raw_vector, property_type_filter=None, boost=0.1
+):
+    user_raw = np.array([user_raw_vector], dtype=np.float32)
+    candidate_ids, u_emb = retrieve_candidates(two_tower, user_raw, top_k=20)
+    results = []
+    for pid in candidate_ids:
+        prop = property_df[property_df["id"] == pid]
+        p_vec = prop[property_field_list].values.astype(np.float32)
+        p_tensor = torch.tensor(p_vec, dtype=torch.float32)
+        with torch.no_grad():
+            p_emb = two_tower.prop_tower(p_tensor).numpy()
+        combined = np.concatenate([u_emb, p_emb], axis=1)
+        combined_tensor = torch.tensor(combined, dtype=torch.float32)
+        with torch.no_grad():
+            score = reranker(combined_tensor).numpy()[0][0]
+        # Boost score if property type matches filter
+        if property_type_filter is not None:
+            prop_type = prop["property_category"].iloc[0]
+            if prop_type == property_type_filter:
+                score += boost
+        results.append((pid, score))
+    results.sort(key=lambda x: -x[1])
+    return [pid for pid, _ in results]
+# -------------------- Example Inference Usage --------------------
+# For a user with: age=30, income=70000, lat and lon from address,
+# plus aggregated search embedding (if no search history, zeros)
+example_user_features = np.array([30, 40.7128, -74.0060] + [0] * 16, dtype=np.float32)
+# If a property type filter is desired, e.g., "Apartment"
+recommended_properties = recommend(
+    two_tower, reranker, example_user_features, property_type_filter="Apartment"
+)
+print("Recommended property IDs:", recommended_properties)

recommender/reranker.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import torch.nn as nn
+import torch
+import numpy as np
+class Reranker(nn.Module):
+    def __init__(self, input_dim=64, hidden_dim=64):
+        super().__init__()
+        self.model = nn.Sequential(
+            nn.Linear(input_dim, hidden_dim),
+            nn.ReLU(),
+            nn.Dropout(0.2),
+            nn.Linear(hidden_dim, 1)
+        )
+    def forward(self, x):
+        return self.model(x)
+def build_rerank_data(df, search_emb_cols, two_tower):
+    u_input = df[["age", "min_price", "max_price", "lat", "lon"]].values.astype(np.float32)
+    search_input = df[search_emb_cols].values.astype(np.float32)
+    user_input = np.concatenate([u_input, search_input], axis=1)
+    # For properties, use: price, rating, freshness, latitude, longitude, type_enc
+    p_input = df[
+        ["price", "rating", "freshness", "latitude_prop", "longitude_prop", "type_enc"]
+    ].values.astype(np.float32)
+    targets = df["score"].values.astype(np.float32)
+    u_tensor = torch.tensor(user_input, dtype=torch.float32)
+    p_tensor = torch.tensor(p_input, dtype=torch.float32)
+    with torch.no_grad():
+        u_emb = two_tower.user_tower(u_tensor).numpy()
+        p_emb = two_tower.prop_tower(p_tensor).numpy()
+    X = np.concatenate([u_emb, p_emb], axis=1)
+    return X, targets

recommender/tower.py ADDED Viewed

	@@ -0,0 +1,34 @@

+import torch
+import torch.nn as nn
+class UserTower(nn.Module):
+    def __init__(self, input_dim=21, hidden_dim=64, out_dim=32):
+        super().__init__()
+        self.model = nn.Sequential(
+            nn.Linear(input_dim, hidden_dim),
+            nn.ReLU(),
+            nn.Linear(hidden_dim, out_dim)
+        )
+    def forward(self, x):
+        return self.model(x)
+class PropertyTower(nn.Module):
+    def __init__(self, input_dim=5, hidden_dim=64, out_dim=32):
+        super().__init__()
+        self.model = nn.Sequential(
+            nn.Linear(input_dim, hidden_dim),
+            nn.ReLU(),
+            nn.Linear(hidden_dim, out_dim)
+        )
+    def forward(self, x):
+        return self.model(x)
+class TwoTowerRec(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.user_tower = UserTower()
+        self.prop_tower = PropertyTower()
+    def forward(self, user, prop):
+        return self.user_tower(user), self.prop_tower(prop)
+def cosine_loss(u, p, margin=0.5):
+    cos_sim = nn.functional.cosine_similarity(u, p)
+    loss = torch.clamp(margin - cos_sim, min=0).mean()
+    return loss