Add model download script and local model files for Mahjong Soul Vision

- Implemented a script to download and save the Mahjong Soul Vision model locally.
- Added configuration files including config.json, preprocessor_config.json, and model.safetensors to the local model directory.
- Configured image processing parameters and model architecture settings for image classification tasks.

Files changed (7) hide show

__pycache__/tools.cpython-310.pyc +0 -0
debug_coordinates.py +64 -0
download_model.py +22 -0
live_feed.py +322 -71
vision_transformer_local/config.json +97 -0
vision_transformer_local/model.safetensors +3 -0
vision_transformer_local/preprocessor_config.json +23 -0

__pycache__/tools.cpython-310.pyc ADDED Viewed

Binary file (10.1 kB). View file

debug_coordinates.py ADDED Viewed

	@@ -0,0 +1,64 @@

+"""
+座標デバッグ用スクリプト
+雀魂のウィンドウから手牌領域をキャプチャして確認
+"""
+import pygetwindow as gw
+from PIL import ImageGrab
+import numpy as np
+import cv2
+# ウィンドウを取得
+window_title = "雀魂"
+try:
+    window = gw.getWindowsWithTitle(window_title)[0]
+    print(f"✓ ウィンドウ検出: {window.title}")
+    print(f"  位置: ({window.left}, {window.top})")
+    print(f"  サイズ: {window.width}x{window.height}")
+except IndexError:
+    print(f"✗ '{window_title}' が見つかりません")
+    exit(1)
+# スクリーンショット取得
+screenshot = ImageGrab.grab(bbox=(window.left, window.top, window.right, window.bottom), all_screens=True)
+frame = np.array(screenshot)
+frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
+# 手牌座標（live_feed.pyと同じ）
+PLAYER_HAND_X = 105
+PLAYER_HAND_Y = 759
+PLAYER_HAND_W = 627
+PLAYER_HAND_H = 84
+# 座標が画面内か確認
+if PLAYER_HAND_Y + PLAYER_HAND_H > frame.shape[0]:
+    print(f"\n⚠ 警告: 手牌のY座標が画面外です")
+    print(f"  画面の高さ: {frame.shape[0]}")
+    print(f"  手牌の範囲: Y {PLAYER_HAND_Y} - {PLAYER_HAND_Y + PLAYER_HAND_H}")
+    # 座標を修正
+    PLAYER_HAND_Y = frame.shape[0] - PLAYER_HAND_H - 10
+    print(f"  修正後のY座標: {PLAYER_HAND_Y}")
+# 手牌領域を抽出
+hand_region = frame[PLAYER_HAND_Y:PLAYER_HAND_Y+PLAYER_HAND_H,
+                     PLAYER_HAND_X:PLAYER_HAND_X+PLAYER_HAND_W]
+# 矩形を描画
+debug_frame = frame.copy()
+cv2.rectangle(debug_frame,
+              (PLAYER_HAND_X, PLAYER_HAND_Y),
+              (PLAYER_HAND_X + PLAYER_HAND_W, PLAYER_HAND_Y + PLAYER_HAND_H),
+              (0, 255, 0), 3)
+# 保存
+cv2.imwrite("debug_full_screen.png", debug_frame)
+cv2.imwrite("debug_hand_region.png", hand_region)
+print(f"\n保存完了:")
+print(f"  debug_full_screen.png - 全画面（緑の矩形が手牌領域）")
+print(f"  debug_hand_region.png - 手牌領域のみ")
+print(f"\n手牌領域:")
+print(f"  X: {PLAYER_HAND_X}")
+print(f"  Y: {PLAYER_HAND_Y}")
+print(f"  幅: {PLAYER_HAND_W}")
+print(f"  高さ: {PLAYER_HAND_H}")
+print(f"  サイズ: {hand_region.shape}")

download_model.py ADDED Viewed

	@@ -0,0 +1,22 @@

+# モデルをローカルにダウンロードして保存
+from transformers import AutoImageProcessor, AutoModelForImageClassification
+import os
+model_name = "krmin/mahjong_soul_vision"
+local_model_path = "./vision_transformer_local"
+print(f"モデルをダウンロード中: {model_name}")
+print(f"保存先: {local_model_path}")
+# モデルとプロセッサをダウンロード
+processor = AutoImageProcessor.from_pretrained(model_name)
+model = AutoModelForImageClassification.from_pretrained(model_name)
+# ローカルに保存
+print("ローカルに保存中...")
+processor.save_pretrained(local_model_path)
+model.save_pretrained(local_model_path)
+print("✓ 完了!")
+print(f"\n次回からは以下のように読み込めます:")
+print(f'pipe = pipeline("image-classification", model="{local_model_path}", device=device)')

live_feed.py CHANGED Viewed

@@ -2,6 +2,7 @@
 # %%
 import time
 import cv2
 from PIL import Image, ImageGrab
@@ -18,6 +19,105 @@ import torch.nn as nn
 from safetensors.torch import load_file
 # Load model directly
 from transformers import AutoModel
 class ImprovedNN(nn.Module):
     def __init__(self, input_dim, output_dim):
         super(ImprovedNN, self).__init__()
@@ -45,38 +145,68 @@ class ImprovedNN(nn.Module):
 if torch.cuda.is_available():
-    print("CUDA available")
     device = torch.device("cuda")
 else:
-    print("No CUDA")
     device = torch.device("cpu")
-pipe = pipeline("image-classification", model="pjura/mahjong_soul_vision", device=device)
 input_dim = 204
 output_dim = 34
-model = ImprovedNN(input_dim=input_dim, output_dim=output_dim)
 model_path = "model.safetensors"
 state_dict = load_file(model_path)
-model.load_state_dict(state_dict)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-model.to(device)
 global_debug = False
-model.to(device)
-PLAYER_HAND_X = 300
-PLAYER_HAND_Y = 1048 - 200
-PLAYER_HAND_W = 1250
-PLAYER_HAND_H = 200
-PLAYER_PON_X = 300 + PLAYER_HAND_W
-PLAYER_PON_Y = 1048 - 200
 PLAYER_PON_W = 200
-PLAYER_PON_H = 200
 PLAYER_THROW_X = 790
 PLAYER_THROW_Y = 1048 - 490
@@ -114,10 +244,13 @@ def nothing(x):
 # Get the window by its title. Adjust this to the title of the window you want to capture.
-window_title = "MahjongSoul"
 try:
     window = gw.getWindowsWithTitle(window_title)[0]
 except IndexError:
     raise Exception(f"No window with title '{window_title}' found.")
 if global_debug:
@@ -126,7 +259,7 @@ if global_debug:
     cv2.createTrackbar('Upper', 'Trackbars', 255, 255, nothing)
-def analyze_region(frame, x, y, w, h, lower=150, upper=255, debug=False):
     if global_debug:
         lower = cv2.getTrackbarPos('Lower', 'Trackbars')
         upper = cv2.getTrackbarPos('Upper', 'Trackbars')
@@ -148,8 +281,11 @@ def analyze_region(frame, x, y, w, h, lower=150, upper=255, debug=False):
     rois = []  # Liste zur Sammlung von Regionen von Interesse (ROIs)
     boxes_temp = []  # Temporäre Liste zur Sammlung von Bounding-Box-Koordinaten
     contours, _ = cv2.findContours(roi_threshed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
     for contour in contours:
-        if cv2.contourArea(contour) > 500:
             x_rect, y_rect, w_rect, h_rect = cv2.boundingRect(contour)
             aspect_ratio = w_rect / h_rect
@@ -174,10 +310,13 @@ def analyze_region(frame, x, y, w, h, lower=150, upper=255, debug=False):
             label = predictions[0]['label']
             prob = predictions[0]['score']
-            if prob > 0.0:
                 boxes.append(boxes_temp[idx])  # idx wird hier verwendet
                 probs.append(prob)
                 labels.append(label)
     return boxes, labels, probs
@@ -239,7 +378,7 @@ def click_hand_tile(all_boxes, frame):
     translated_tensor = translate_boxes_to_tensors(all_boxes)
     # Stellen Sie sicher, dass Ihre make_prediction Funktion die Rohwahrscheinlichkeiten zurückgibt
-    probs = make_prediction(model, translated_tensor)
     # Sortieren Sie die Wahrscheinlichkeiten in absteigender Reihenfolge und erhalten Sie die Indizes
     sorted_indices = probs.argsort(descending=True)
@@ -308,10 +447,60 @@ def draw_boxes(frame, boxes, labels, probs):
 PLAYER_PON_X_TEMP = PLAYER_PON_X
 PLAYER_PON_W_TEMP = PLAYER_PON_W
-while True:
     screenshot = ImageGrab.grab(bbox=(window.left, window.top, window.right, window.bottom), all_screens=True)
     frame = np.array(screenshot)
     frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
     # Analyze regions and get boxes, labels, and probabilities
     player_pon_boxes, player_pon_labels, player_pon_probs = analyze_region(frame, PLAYER_PON_X_TEMP, PLAYER_PON_Y,
@@ -323,6 +512,13 @@ while True:
     player_hand_boxes, player_hand_labels, player_hand_probs = analyze_region(frame, PLAYER_HAND_X, PLAYER_HAND_Y,
                                                                               PLAYER_HAND_W_TEMP, PLAYER_HAND_H)
     player_throw_boxes, player_throw_labels, player_throw_probs = analyze_region(frame, PLAYER_THROW_X, PLAYER_THROW_Y,
                                                                                  PLAYER_THROW_W, PLAYER_THROW_H)
     right_player_throw_boxes, right_player_throw_labels, right_player_throw_probs = analyze_region(frame,
@@ -340,38 +536,30 @@ while True:
                                                                                                             OPPOSITE_PLAYER_THROW_Y,
                                                                                                             OPPOSITE_PLAYER_THROW_W,
                                                                                                             OPPOSITE_PLAYER_THROW_H)
-    # Draw bounding boxes, labels, and probabilities
-    draw_boxes(frame, player_hand_boxes, player_hand_labels, player_hand_probs)
-    draw_boxes(frame, player_pon_boxes, player_pon_labels, player_pon_probs)
-    draw_boxes(frame, player_throw_boxes, player_throw_labels, player_throw_probs)
-    draw_boxes(frame, right_player_throw_boxes, right_player_throw_labels, right_player_throw_probs)
-    draw_boxes(frame, left_player_throw_boxes, left_player_throw_labels, left_player_throw_probs)
-    draw_boxes(frame, opposite_player_throw_boxes, opposite_player_throw_labels, opposite_player_throw_probs)
-    cv2.rectangle(frame, (PLAYER_HAND_X, PLAYER_HAND_Y),
-                  (PLAYER_HAND_X + PLAYER_HAND_W_TEMP, PLAYER_HAND_Y + PLAYER_HAND_H),
-                  (0, 255, 0), 2)
-    cv2.rectangle(frame, (PLAYER_PON_X_TEMP, PLAYER_PON_Y),
-                  (PLAYER_PON_X_TEMP + PLAYER_PON_W_TEMP, PLAYER_PON_Y + PLAYER_PON_H),
-                  (255, 255, 0), 2)
-    cv2.rectangle(frame, (PLAYER_THROW_X, PLAYER_THROW_Y),
-                  (PLAYER_THROW_X + PLAYER_THROW_W, PLAYER_THROW_Y + PLAYER_THROW_H),
-                  (0, 0, 255), 2)
-    # Zeichnen Sie die Boxen für die anderen Spieler
-    cv2.rectangle(frame, (RIGHT_PLAYER_THROW_X, RIGHT_PLAYER_THROW_Y),
-                  (RIGHT_PLAYER_THROW_X + RIGHT_PLAYER_THROW_W, RIGHT_PLAYER_THROW_Y + RIGHT_PLAYER_THROW_H),
-                  (255, 0, 0), 2)  # Blaue Farbe für den rechten Spieler
-    cv2.rectangle(frame, (LEFT_PLAYER_THROW_X, LEFT_PLAYER_THROW_Y),
-                  (LEFT_PLAYER_THROW_X + LEFT_PLAYER_THROW_W, LEFT_PLAYER_THROW_Y + LEFT_PLAYER_THROW_H),
-                  (0, 255, 255), 2)  # Gelbe Farbe für den linken Spieler
-    cv2.rectangle(frame, (OPPOSITE_PLAYER_THROW_X, OPPOSITE_PLAYER_THROW_Y),
-                  (
-                      OPPOSITE_PLAYER_THROW_X + OPPOSITE_PLAYER_THROW_W,
-                      OPPOSITE_PLAYER_THROW_Y + OPPOSITE_PLAYER_THROW_H),
-                  (255, 0, 255), 2)  # Violette Farbe für den gegenüberliegenden Spieler
     all_boxes = {
         "player_hand": player_hand_boxes,
         "player_hand_labels": player_hand_labels,
@@ -387,28 +575,91 @@ while True:
         "opposite_player_throw_labels": opposite_player_throw_labels
     }
     if len(player_hand_labels) + len(player_pon_labels) >= 14:
-        print("Your turn!")
-        click_hand_tile(all_boxes, frame)
-        time.sleep(1)
-    if global_debug:
-        # Erstellt die Trackbars
-        lower = cv2.getTrackbarPos('Lower', 'Trackbars')
-        upper = cv2.getTrackbarPos('Upper', 'Trackbars')
-    if global_debug:
-        frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)  # Graustufen-Frame
-        _, frame_threshed = cv2.threshold(frame_gray, lower, upper, cv2.THRESH_BINARY)
-        cv2.imshow("Full Frame Gray", frame_gray)  # Zeigt den grauen Frame
-        cv2.imshow("Full Frame Threshold", frame_threshed)
-    else:
-        cv2.imshow("Mahjong Tile Recognition v2", frame)
-    # Break the loop if 'q' is pressed
-    if cv2.waitKey(1) & 0xFF == ord('q'):
-        break
-    # time.sleep(2)
-cv2.destroyAllWindows()
 # %%

 # %%
 import time
+import sys
 import cv2
 from PIL import Image, ImageGrab
 from safetensors.torch import load_file
 # Load model directly
 from transformers import AutoModel
+from PyQt5.QtWidgets import QApplication, QLabel, QVBoxLayout, QWidget
+from PyQt5.QtCore import Qt, QTimer
+from PyQt5.QtGui import QFont
+# 透明オーバーレイウィンドウクラス
+class TransparentOverlay(QWidget):
+    def __init__(self):
+        super().__init__()
+        self.initUI()
+    def initUI(self):
+        # ウィンドウ設定
+        self.setWindowFlags(
+            Qt.WindowStaysOnTopHint |  # 最前面
+            Qt.FramelessWindowHint |   # フレームなし
+            Qt.Tool                     # タスクバーに表示しない
+        )
+        self.setAttribute(Qt.WA_TranslucentBackground)  # 透明背景
+        # 位置とサイズ（左上）
+        self.setGeometry(10, 10, 400, 150)
+        # レイアウト
+        layout = QVBoxLayout()
+        layout.setContentsMargins(10, 10, 10, 10)
+        # 手牌ラベル
+        self.hand_label = QLabel("手牌: 雀魂で牌が配られるまで待機中...")
+        self.hand_label.setFont(QFont("Yu Gothic UI", 12, QFont.Bold))
+        self.hand_label.setStyleSheet("""
+            QLabel {
+                color: white;
+                background-color: rgba(0, 0, 0, 180);
+                padding: 8px;
+                border-radius: 5px;
+            }
+        """)
+        layout.addWidget(self.hand_label)
+        # 推奨打牌ラベル
+        self.recommendation_label = QLabel("推奨: -")
+        self.recommendation_label.setFont(QFont("Yu Gothic UI", 16, QFont.Bold))
+        self.recommendation_label.setStyleSheet("""
+            QLabel {
+                color: #FFD700;
+                background-color: rgba(0, 0, 0, 180);
+                padding: 10px;
+                border-radius: 5px;
+                border: 2px solid #FFD700;
+            }
+        """)
+        layout.addWidget(self.recommendation_label)
+        # ステータスラベル
+        self.status_label = QLabel("✓ 起動完了 | Space: 自動クリック | 更新: 0.2秒毎")
+        self.status_label.setFont(QFont("Yu Gothic UI", 9))
+        self.status_label.setStyleSheet("""
+            QLabel {
+                color: #00FF00;
+                background-color: rgba(0, 0, 0, 150);
+                padding: 5px;
+                border-radius: 3px;
+            }
+        """)
+        layout.addWidget(self.status_label)
+        self.setLayout(layout)
+    def update_hand(self, tiles):
+        """手牌を更新"""
+        if tiles:
+            self.hand_label.setText(f"手牌: {' '.join(tiles)}")
+    def update_recommendation(self, tile):
+        """推奨打牌を更新"""
+        if tile:
+            self.recommendation_label.setText(f"推奨: {tile}")
+            self.recommendation_label.setStyleSheet("""
+                QLabel {
+                    color: #FF4444;
+                    background-color: rgba(0, 0, 0, 200);
+                    padding: 10px;
+                    border-radius: 5px;
+                    border: 3px solid #FF4444;
+                }
+            """)
+        else:
+            self.recommendation_label.setText("推奨: -")
+            self.recommendation_label.setStyleSheet("""
+                QLabel {
+                    color: #FFD700;
+                    background-color: rgba(0, 0, 0, 180);
+                    padding: 10px;
+                    border-radius: 5px;
+                    border: 2px solid #FFD700;
+                }
+            """)
 class ImprovedNN(nn.Module):
     def __init__(self, input_dim, output_dim):
         super(ImprovedNN, self).__init__()
 if torch.cuda.is_available():
+    print("✓ CUDA利用可能")
     device = torch.device("cuda")
 else:
+    print("⚠ CUDA利用不可 - CPUモード")
     device = torch.device("cpu")
+# モデル読み込み（ローカルキャッシュを優先）
+print("モデル読み込み中...")
+import os
+local_model_path = "./vision_transformer_local"
+model_name = "krmin/mahjong_soul_vision"
+# ローカルにモデルがあればそれを使用、なければHuggingFaceから
+if os.path.exists(local_model_path):
+    print(f"  ローカルモデルを使用: {local_model_path}")
+    pipe = pipeline("image-classification", model=local_model_path, device=device)
+else:
+    print(f"  HuggingFaceからダウンロード: {model_name}")
+    print("  初回は30-60秒かかります")
+    pipe = pipeline("image-classification", model=model_name, device=device)
+    # ダウンロード後、ローカルに保存
+    try:
+        print("  次回用にローカル保存中...")
+        pipe.model.save_pretrained(local_model_path)
+        pipe.feature_extractor.save_pretrained(local_model_path)
+        print(f"  ✓ ローカルに保存完了: {local_model_path}")
+    except Exception as e:
+        print(f"  ⚠ ローカル保存失敗: {e}")
+print("  ✓ Vision Transformer読み込み完了")
 input_dim = 204
 output_dim = 34
+discard_model = ImprovedNN(input_dim=input_dim, output_dim=output_dim)
 model_path = "model.safetensors"
 state_dict = load_file(model_path)
+discard_model.load_state_dict(state_dict)
+print("  ✓ 打牌予測モデル読み込み完了")
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+discard_model.to(device)
 global_debug = False
+discard_model.to(device)
+# グローバル変数
+window = None
+window_title = "雀魂"
+# 雀魂の手牌座標（実際の画面から確認済み）
+# ウィンドウ相対座標: x=105, y=759, width=627, height=84
+PLAYER_HAND_X = 105
+PLAYER_HAND_Y = 759
+PLAYER_HAND_W = 627
+PLAYER_HAND_H = 84
+PLAYER_PON_X = PLAYER_HAND_X + PLAYER_HAND_W
+PLAYER_PON_Y = PLAYER_HAND_Y
 PLAYER_PON_W = 200
+PLAYER_PON_H = 84
 PLAYER_THROW_X = 790
 PLAYER_THROW_Y = 1048 - 490
 # Get the window by its title. Adjust this to the title of the window you want to capture.
+print(f"雀魂ウィンドウを検索中...")
 try:
     window = gw.getWindowsWithTitle(window_title)[0]
+    print(f"  ✓ ウィンドウ検出: {window.title}")
 except IndexError:
+    print(f"  ✗ エラー: '{window_title}' というタイトルのウィンドウが見つかりません")
+    print(f"  雀魂を起動してからもう一度お試しください")
     raise Exception(f"No window with title '{window_title}' found.")
 if global_debug:
     cv2.createTrackbar('Upper', 'Trackbars', 255, 255, nothing)
+def analyze_region(frame, x, y, w, h, lower=100, upper=255, debug=False):
     if global_debug:
         lower = cv2.getTrackbarPos('Lower', 'Trackbars')
         upper = cv2.getTrackbarPos('Upper', 'Trackbars')
     rois = []  # Liste zur Sammlung von Regionen von Interesse (ROIs)
     boxes_temp = []  # Temporäre Liste zur Sammlung von Bounding-Box-Koordinaten
     contours, _ = cv2.findContours(roi_threshed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
     for contour in contours:
+        area = cv2.contourArea(contour)
+        # 最小面積を200に下げて小さい牌も認識
+        if area > 200:
             x_rect, y_rect, w_rect, h_rect = cv2.boundingRect(contour)
             aspect_ratio = w_rect / h_rect
             label = predictions[0]['label']
             prob = predictions[0]['score']
+            # 確率が85%以上の認識結果のみ採用（Vision Transformerは99.7%の精度）
+            if prob > 0.85:
                 boxes.append(boxes_temp[idx])  # idx wird hier verwendet
                 probs.append(prob)
                 labels.append(label)
+                # デバッグ出力は手牌のみ（捨て牌は出力しない）
+                # print(f"認識: {label} ({prob*100:.1f}%)", end=" ")
     return boxes, labels, probs
     translated_tensor = translate_boxes_to_tensors(all_boxes)
     # Stellen Sie sicher, dass Ihre make_prediction Funktion die Rohwahrscheinlichkeiten zurückgibt
+    probs = make_prediction(discard_model, translated_tensor)
     # Sortieren Sie die Wahrscheinlichkeiten in absteigender Reihenfolge und erhalten Sie die Indizes
     sorted_indices = probs.argsort(descending=True)
 PLAYER_PON_X_TEMP = PLAYER_PON_X
 PLAYER_PON_W_TEMP = PLAYER_PON_W
+# PyQt5アプリケーション初期化
+print("UIを初期化中...")
+app = QApplication(sys.argv)
+overlay = TransparentOverlay()
+overlay.show()
+print("  ✓ 透明オーバーレイウィンドウを表示")
+# グローバル変数で推奨牌を保持
+current_recommendation = None
+previous_hand_count = 0  # 前回の手牌枚数を記憶
+print("\n" + "="*60)
+print("起動完了！")
+print("="*60)
+print("左上の透明ウィンドウに手牌と推奨牌を表示します")
+print("Spaceキー: 推奨牌を自動クリック")
+print("Dキー: デバッグ用に画面キャプチャを保存")
+print("二値化閾値: 100 (lower) - 明るい牌を検出")
+print("認識閾値: 85% - 高精度のみ採用")
+print("ウィンドウを閉じる: 終了")
+print("="*60 + "\n")
+def process_frame():
+    """フレーム処理とUI更新"""
+    global PLAYER_PON_X_TEMP, PLAYER_PON_W_TEMP, PLAYER_HAND_W_TEMP, current_recommendation, previous_hand_count, window
+    # ウィンドウ位置を毎回更新（ウィンドウが移動しても追従）
+    try:
+        old_window = window
+        window = gw.getWindowsWithTitle(window_title)[0]
+        # デバッグ: ウィンドウ位置が変わったら通知
+        if old_window and (old_window.left != window.left or old_window.top != window.top):
+            print(f"\nウィンドウ移動検出: ({old_window.left}, {old_window.top}) → ({window.left}, {window.top})")
+    except IndexError:
+        print("\r雀魂ウィンドウが見つかりません     ", end="", flush=True)
+        return
     screenshot = ImageGrab.grab(bbox=(window.left, window.top, window.right, window.bottom), all_screens=True)
     frame = np.array(screenshot)
     frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
+    # デバッグ: 'd'キーでキャプチャを保存
+    if keyboard.is_pressed('d'):
+        timestamp = int(time.time())
+        filename = f"debug_capture_{timestamp}.png"
+        cv2.imwrite(filename, frame)
+        print(f"\n📷 キャプチャ保存: {filename} (座標: left={window.left}, top={window.top}, right={window.right}, bottom={window.bottom})")
+        # 手牌領域も保存
+        roi = frame[PLAYER_HAND_Y:PLAYER_HAND_Y + PLAYER_HAND_H, PLAYER_HAND_X:PLAYER_HAND_X + PLAYER_HAND_W]
+        cv2.imwrite(f"debug_hand_{timestamp}.png", roi)
+        print(f"📷 手牌領域保存: debug_hand_{timestamp}.png")
+        time.sleep(0.5)  # 連続保存を防ぐ
     # Analyze regions and get boxes, labels, and probabilities
     player_pon_boxes, player_pon_labels, player_pon_probs = analyze_region(frame, PLAYER_PON_X_TEMP, PLAYER_PON_Y,
     player_hand_boxes, player_hand_labels, player_hand_probs = analyze_region(frame, PLAYER_HAND_X, PLAYER_HAND_Y,
                                                                               PLAYER_HAND_W_TEMP, PLAYER_HAND_H)
+    # 手牌認識の詳細をコンソールに出力
+    if len(player_hand_labels) > 0:
+        print(f"\n手牌検出: ", end="")
+        for i, label in enumerate(player_hand_labels):
+            print(f"{label}({player_hand_probs[i]*100:.1f}%) ", end="")
     player_throw_boxes, player_throw_labels, player_throw_probs = analyze_region(frame, PLAYER_THROW_X, PLAYER_THROW_Y,
                                                                                  PLAYER_THROW_W, PLAYER_THROW_H)
     right_player_throw_boxes, right_player_throw_labels, right_player_throw_probs = analyze_region(frame,
                                                                                                             OPPOSITE_PLAYER_THROW_Y,
                                                                                                             OPPOSITE_PLAYER_THROW_W,
                                                                                                             OPPOSITE_PLAYER_THROW_H)
+    # UI更新: 手牌
+    current_hand_count = len(player_hand_labels) + len(player_pon_labels)
+    if len(player_hand_labels) > 0:
+        overlay.update_hand(player_hand_labels)
+        # シンプルな表示（タイムスタンプ付き）
+        hand_str = " ".join(player_hand_labels)
+        current_time = time.strftime("%H:%M:%S")
+        # 手牌枚数が変化したら通知
+        if current_hand_count != previous_hand_count:
+            if current_hand_count == 14:
+                print(f"\n★ツモ! 14枚になりました", end="")
+            print(f"\n[{current_time}] [{len(player_hand_labels)}枚] {hand_str}     ", end="", flush=True)
+            previous_hand_count = current_hand_count
+        # else:
+        #     print(f"\r[{len(player_hand_labels)}枚] {hand_str}     ", end="", flush=True)
+    else:
+        # 手牌が認識されていない場合のデバッグ情報
+        print(f"\r⚠ 手牌未検出 (座標: x={PLAYER_HAND_X}, y={PLAYER_HAND_Y}, w={PLAYER_HAND_W_TEMP}, h={PLAYER_HAND_H}) 二値化閾値=100     ", end="", flush=True)
+        previous_hand_count = 0
     all_boxes = {
         "player_hand": player_hand_boxes,
         "player_hand_labels": player_hand_labels,
         "opposite_player_throw_labels": opposite_player_throw_labels
     }
+    # 推奨牌の計算と表示
     if len(player_hand_labels) + len(player_pon_labels) >= 14:
+        print(f"\n自分の番 (手牌:{len(player_hand_labels)}+ポン:{len(player_pon_labels)}={len(player_hand_labels)+len(player_pon_labels)}枚)", end="")
+        # 推奨牌を計算
+        try:
+            translated_tensor = translate_boxes_to_tensors(all_boxes)
+            probs = make_prediction(discard_model, translated_tensor)
+            # 最も確率の高い牌を取得
+            sorted_indices = probs.argsort(descending=True).squeeze()
+            # デバッグ: モデルの上位推奨を表示
+            print(f"\nモデル推奨TOP5: ", end="")
+            for i, idx in enumerate(sorted_indices[:5]):
+                top_idx = int(idx.item())
+                tile = translate_to_vision(top_idx)
+                prob = probs[0][top_idx].item() * 100
+                in_hand = "✓" if tile in player_hand_labels else "✗"
+                print(f"{tile}({prob:.1f}%{in_hand}) ", end="")
+            # 手牌に存在する牌の中から最も確率の高いものを選択
+            found_recommendation = False
+            for idx in sorted_indices[:10]:  # 上位10個をチェック
+                top_idx = int(idx.item())
+                recommended_tile = translate_to_vision(top_idx)
+                # 手牌に存在する牌のみを推奨
+                if recommended_tile in player_hand_labels:
+                    current_recommendation = recommended_tile
+                    overlay.update_recommendation(recommended_tile)
+                    found_recommendation = True
+                    print(f" → 推奨:{recommended_tile}", end="")
+                    # Spaceキーで自動クリック
+                    if keyboard.is_pressed('space'):
+                        overlay.status_label.setText("クリック中...")
+                        overlay.status_label.setStyleSheet("""
+                            QLabel {
+                                color: #FF0000;
+                                background-color: rgba(0, 0, 0, 150);
+                                padding: 5px;
+                                border-radius: 3px;
+                            }
+                        """)
+                        # 実際のクリック処理
+                        for i, label in enumerate(player_hand_labels):
+                            if label == recommended_tile:
+                                box = player_hand_boxes[i]
+                                x, y = box[0] + (box[2] - box[0]) // 2, box[1] + (box[3] - box[1]) // 2
+                                abs_x = window.left + x
+                                abs_y = window.top + y
+                                pyautogui.click(abs_x, abs_y)
+                                print(f" クリック!", end="")
+                                break
+                        time.sleep(0.5)
+                        overlay.status_label.setText("✓ 起動完了 | Space: 自動クリック | 更新: 0.2秒毎")
+                        overlay.status_label.setStyleSheet("""
+                            QLabel {
+                                color: #00FF00;
+                                background-color: rgba(0, 0, 0, 150);
+                                padding: 5px;
+                                border-radius: 3px;
+                            }
+                        """)
+                    break
+            if not found_recommendation:
+                overlay.update_recommendation(None)
+                print(f"\n→ 手牌に該当する推奨牌が見つかりません（TOP10に手牌の牌なし）", end="")
+        except Exception as e:
+            print(f"\n推奨計算エラー: {e}", end="")
+            import traceback
+            traceback.print_exc()
+            overlay.update_recommendation(None)
+    else:
+        overlay.update_recommendation(None)
+# タイマーでフレーム処理を実行（200ms間隔 = より高頻度で更新）
+timer = QTimer()
+timer.timeout.connect(process_frame)
+timer.start(200)
+# アプリケーション実行
+sys.exit(app.exec_())
 # %%

vision_transformer_local/config.json ADDED Viewed

	@@ -0,0 +1,97 @@

+{
+  "architectures": [
+    "ViTForImageClassification"
+  ],
+  "attention_probs_dropout_prob": 0.0,
+  "dtype": "float32",
+  "encoder_stride": 16,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.0,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "1b",
+    "1": "1n",
+    "2": "1p",
+    "3": "2b",
+    "4": "2n",
+    "5": "2p",
+    "6": "3b",
+    "7": "3n",
+    "8": "3p",
+    "9": "4b",
+    "10": "4n",
+    "11": "4p",
+    "12": "5b",
+    "13": "5n",
+    "14": "5p",
+    "15": "6b",
+    "16": "6n",
+    "17": "6p",
+    "18": "7b",
+    "19": "7n",
+    "20": "7p",
+    "21": "8b",
+    "22": "8n",
+    "23": "8p",
+    "24": "9b",
+    "25": "9n",
+    "26": "9p",
+    "27": "ew",
+    "28": "gd",
+    "29": "nw",
+    "30": "rd",
+    "31": "sw",
+    "32": "wd",
+    "33": "ww"
+  },
+  "image_size": 224,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "1b": "0",
+    "1n": "1",
+    "1p": "2",
+    "2b": "3",
+    "2n": "4",
+    "2p": "5",
+    "3b": "6",
+    "3n": "7",
+    "3p": "8",
+    "4b": "9",
+    "4n": "10",
+    "4p": "11",
+    "5b": "12",
+    "5n": "13",
+    "5p": "14",
+    "6b": "15",
+    "6n": "16",
+    "6p": "17",
+    "7b": "18",
+    "7n": "19",
+    "7p": "20",
+    "8b": "21",
+    "8n": "22",
+    "8p": "23",
+    "9b": "24",
+    "9n": "25",
+    "9p": "26",
+    "ew": "27",
+    "gd": "28",
+    "nw": "29",
+    "rd": "30",
+    "sw": "31",
+    "wd": "32",
+    "ww": "33"
+  },
+  "layer_norm_eps": 1e-12,
+  "model_type": "vit",
+  "num_attention_heads": 12,
+  "num_channels": 3,
+  "num_hidden_layers": 12,
+  "patch_size": 16,
+  "pooler_act": "tanh",
+  "pooler_output_size": 768,
+  "problem_type": "single_label_classification",
+  "qkv_bias": true,
+  "transformers_version": "4.57.1"
+}

vision_transformer_local/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3d8e6c235fbcb30498788fac92f880c5c004b7861c3f90599dc724616ae09efd
+size 343322416

vision_transformer_local/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "do_convert_rgb": null,
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "image_processor_type": "ViTImageProcessor",
+  "image_std": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "resample": 2,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "height": 224,
+    "width": 224
+  }
+}