OpenMedZoo
/

MedGo

+---
+license: apache-2.0
+language:
+- zh
+- en
+metrics:
+- accuracy
+base_model:
+- Qwen/Qwen3-30B-A3B-Instruct-2507
+pipeline_tag: text-generation
+library_name: transformers
+tags:
+- medical
+model-index:
+  - name: Med-Go-32B
+    results:
+      # ----------------------------------------------------
+      # Medical Knowledge
+      # ----------------------------------------------------
+      - task:
+          type: text-generation
+        dataset:
+          type: medical_eval_hle
+          name: Medical-Eval-HLE
+        metrics:
+          - name: accuracy
+            type: accuracy
+            value: 19.4
+            verified: false
+      - task:
+          type: text-generation
+        dataset:
+          type: supergpqa
+          name: SuperGPQA
+        metrics:
+          - name: accuracy
+            type: accuracy
+            value: 37.2
+            verified: false
+      - task:
+          type: text-generation
+        dataset:
+          type: medbullets
+          name: Medbullets
+        metrics:
+          - name: accuracy
+            type: accuracy
+            value: 64.3
+            verified: false
+      - task:
+          type: text-generation
+        dataset:
+          type: mmlu_pro
+          name: MMLU-pro
+        metrics:
+          - name: accuracy
+            type: accuracy
+            value: 74.7
+            verified: false
+      - task:
+          type: text-generation
+        dataset:
+          type: afrimedqa
+          name: AfrimedQA
+        metrics:
+          - name: accuracy
+            type: accuracy
+            value: 74.7
+            verified: false
+      - task:
+          type: text-generation
+        dataset:
+          type: medmcqa
+          name: MedMCQA
+        metrics:
+          - name: accuracy
+            type: accuracy
+            value: 68.3
+            verified: false
+      - task:
+          type: text-generation
+        dataset:
+          type: medqa_usmle
+          name: MedQA-USMLE
+        metrics:
+          - name: accuracy
+            type: accuracy
+            value: 76.8
+            verified: false
+      - task:
+          type: text-generation
+        dataset:
+          type: cmb
+          name: CMB
+        metrics:
+          - name: accuracy
+            type: accuracy
+            value: 92.5
+            verified: false
+      - task:
+          type: text-generation
+        dataset:
+          type: cmexam
+          name: CMExam
+        metrics:
+          - name: accuracy
+            type: accuracy
+            value: 87.4
+            verified: false
+      - task:
+          type: text-generation
+        dataset:
+          type: pubmedqa
+          name: PubMedQA
+        metrics:
+          - name: accuracy
+            type: accuracy
+            value: 76.6
+            verified: false
+      - task:
+          type: text-generation
+        dataset:
+          type: medexqa
+          name: MedExQA
+        metrics:
+          - name: accuracy
+            type: accuracy
+            value: 81.5
+            verified: false
+      - task:
+          type: text-generation
+        dataset:
+          type: explaincpe
+          name: ExplainCPE
+        metrics:
+          - name: accuracy
+            type: accuracy
+            value: 89.5
+            verified: false
+      - task:
+          type: text-generation
+        dataset:
+          type: mmlu_med
+          name: MMLU-Med
+        metrics:
+          - name: accuracy
+            type: accuracy
+            value: 87.4
+            verified: false
+      # ----------------------------------------------------
+      # Clinical Reasoning
+      # ----------------------------------------------------
+      - task:
+          type: text-generation
+        dataset:
+          type: medxperqa
+          name: MedXperQA
+        metrics:
+          - name: accuracy
+            type: accuracy
+            value: 20.7
+            verified: false
+      - task:
+          type: text-generation
+        dataset:
+          type: anesbench
+          name: AnesBench
+        metrics:
+          - name: accuracy
+            type: accuracy
+            value: 53.1
+            verified: false
+      - task:
+          type: text-generation
+        dataset:
+          type: diagnosisarena
+          name: DiagnosisArena
+        metrics:
+          - name: accuracy
+            type: accuracy
+            value: 64.4
+            verified: false
+      - task:
+          type: text-generation
+        dataset:
+          type: clinbench_hbp
+          name: Clinbench-HBP
+        metrics:
+          - name: accuracy
+            type: accuracy
+            value: 80.6
+            verified: false
+      # ----------------------------------------------------
+      # Medical Standard
+      # ----------------------------------------------------
+      - task:
+          type: text-generation
+        dataset:
+          type: medpair
+          name: MedPAIR
+        metrics:
+          - name: accuracy
+            type: accuracy
+            value: 32.3
+            verified: false
+      - task:
+          type: text-generation
+        dataset:
+          type: amqa
+          name: AMQA
+        metrics:
+          - name: accuracy
+            type: accuracy
+            value: 72.7
+            verified: false
+      - task:
+          type: text-generation
+        dataset:
+          type: medethicaleval
+          name: MedethicalEval
+        metrics:
+          - name: accuracy
+            type: accuracy
+            value: 92.2
+            verified: false
+---