Spaces:

uc-ctds
/

GDC-Cohort-Copilot

Running on Zero

App Files Files Community

Add active selections box

by songs1 - opened Jul 16

base: refs/heads/main

←

from: refs/pr/2

Discussion Files changed

+122

-3

Files changed (2) hide show

app.py +118 -3
style.css +4 -0

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import json
 import os
 import gradio as gr
 import requests
@@ -11,9 +12,9 @@ from guidance import json as gen_json
 from guidance.models import Transformers
 from transformers import AutoTokenizer, GPT2LMHeadModel, set_seed
-from schema import GDCCohortSchema
-DEBUG = False
 EXAMPLE_INPUTS = [
     "bam files for TCGA-BRCA",
     "kidney or adrenal gland cancers with alcohol history",
@@ -23,7 +24,7 @@ EXAMPLE_INPUTS = [
 GDC_CASES_API_ENDPOINT = "https://api.gdc.cancer.gov/cases"
 MODEL_NAME = "uc-ctds/gdc-cohort-llm-gpt2-s1M"
 TOKENIZER_NAME = MODEL_NAME
-AUTH_TOKEN = os.environ.get("HF_TOKEN", False)
 with open("config.yaml", "r") as f:
     CONFIG = yaml.safe_load(f)
@@ -380,6 +381,79 @@ def update_cards_with_counts(cohort_filter: str, *selected_filters_per_card):
     return card_updates + [gr.update(value=f"{case_count} Cases")]
 def prepare_value_count(value, count):
     return f"{value} [{count}]"
@@ -448,6 +522,12 @@ with gr.Blocks(css_paths="style.css") as demo:
         with gr.Column(scale=7):
             text_input = gr.Textbox(
                 label="Describe the cohort you're looking for:",
                 submit_btn="Generate Cohort",
                 elem_id="description-input",
                 placeholder="Enter a cohort description to begin...",
@@ -483,9 +563,21 @@ with gr.Blocks(css_paths="style.css") as demo:
                 elem_id="json-output",
             )
     with gr.Row():
         gr.Markdown(
             "The generated cohort filter will autopopulate into the filter cards below. "
             "Refine your search using the interactive checkboxes. "
             "Note that many other options can be found by selecting the different tabs on the left."
         )
@@ -576,6 +668,10 @@ with gr.Blocks(css_paths="style.css") as demo:
         fn=process_query,
         inputs=text_input,
         outputs=filter_cards + [json_output],
     )
     # Update JSON based on cards
@@ -587,14 +683,33 @@ with gr.Blocks(css_paths="style.css") as demo:
                 fn=update_json_from_cards,
                 inputs=filter_cards,
                 outputs=json_output,
             )
         else:
             filter_card.input(
                 fn=update_json_from_cards,
                 inputs=filter_cards,
                 outputs=json_output,
             )
     # Update checkboxes after executing filter query
     json_output.change(
         fn=update_cards_with_counts,

 import json
 import os
+from collections import defaultdict
 import gradio as gr
 import requests
 from guidance.models import Transformers
 from transformers import AutoTokenizer, GPT2LMHeadModel, set_seed
+from schema import GDCCohortSchema  # isort: skip
+DEBUG = "DEBUG" in os.environ
 EXAMPLE_INPUTS = [
     "bam files for TCGA-BRCA",
     "kidney or adrenal gland cancers with alcohol history",
 GDC_CASES_API_ENDPOINT = "https://api.gdc.cancer.gov/cases"
 MODEL_NAME = "uc-ctds/gdc-cohort-llm-gpt2-s1M"
 TOKENIZER_NAME = MODEL_NAME
+AUTH_TOKEN = os.environ.get("HF_TOKEN", False)  # HF_TOKEN must be set to use auth
 with open("config.yaml", "r") as f:
     CONFIG = yaml.safe_load(f)
     return card_updates + [gr.update(value=f"{case_count} Cases")]
+def update_active_selections(*selected_filters_per_card):
+    choices = []
+    for card_name, selected_filters in zip(CARD_NAMES, selected_filters_per_card):
+        # use the default values to determine card type (checkbox, range, etc)
+        default_values = CARD_2_VALUES[card_name]
+        if isinstance(default_values, list):
+            # checkbox
+            for selected_value in selected_filters:
+                base_value = get_base_value(selected_value)
+                choices.append(f"{card_name.upper()}: {base_value}")
+        elif isinstance(default_values, dict):
+            # range-slider, maybe other options in the future?
+            assert (
+                default_values["type"] == "range"
+            ), f"Expected range slider for card {card_name}"
+            lo, hi = selected_filters
+            if lo != default_values["min"] or hi != default_values["max"]:
+                # only add range filter if not default
+                lo, hi = int(lo), int(hi)
+                choices.append(f"{card_name.upper()}: {lo}-{hi}")
+        else:
+            raise ValueError(f"Unknown values for card {card_name}")
+    return gr.update(choices=choices, value=choices)
+def update_cards_from_active(current_selections, *selected_filters_per_card):
+    # active selector uses a flattened list so re-agg values under card groups
+    grouped_selections = defaultdict(set)
+    for k_v in current_selections:
+        idx = k_v.find(": ")
+        k, v = k_v[:idx], k_v[idx + 2 :]
+        grouped_selections[k].add(v)
+    card_updates = []
+    for card_name, selected_filters in zip(CARD_NAMES, selected_filters_per_card):
+        # use the default values to determine card type (checkbox, range, etc)
+        default_values = CARD_2_VALUES[card_name]
+        if isinstance(default_values, list):
+            # checkbox
+            updated_values = []
+            for selected_value in selected_filters:
+                base_value = get_base_value(selected_value)
+                if base_value in grouped_selections[card_name.upper()]:
+                    updated_values.append(selected_value)
+            update_obj = gr.update(value=updated_values)
+        elif isinstance(default_values, dict):
+            # range-slider, maybe other options in the future?
+            assert (
+                default_values["type"] == "range"
+            ), f"Expected range slider for card {card_name}"
+            # the active selector cannot change range values
+            # so if present as an active selection, no action is needed
+            # otherwise, reset entire range selector
+            if card_name.upper() in grouped_selections:
+                update_obj = gr.update()
+            else:
+                update_obj = gr.update(
+                    value=(
+                        default_values["min"],
+                        default_values["max"],
+                    )
+                )
+        else:
+            raise ValueError(f"Unknown values for card {card_name}")
+        card_updates.append(update_obj)
+    # also remove unselected value as possible choice
+    active_selection_update = gr.update(choices=current_selections)
+    return [active_selection_update] + card_updates
 def prepare_value_count(value, count):
     return f"{value} [{count}]"
         with gr.Column(scale=7):
             text_input = gr.Textbox(
                 label="Describe the cohort you're looking for:",
+                info=(
+                    "Only provide the cohort characteristics. "
+                    "Do not include extraneous text. "
+                    "For example, write 'patients with X' "
+                    "instead of 'I would like patients with X':"
+                ),
                 submit_btn="Generate Cohort",
                 elem_id="description-input",
                 placeholder="Enter a cohort description to begin...",
                 elem_id="json-output",
             )
+    with gr.Row(equal_height=True):
+        with gr.Column(scale=1, min_width=250):
+            gr.Markdown("## Currently Selected Filters")
+        with gr.Column(scale=4):
+            active_selections = gr.CheckboxGroup(
+                choices=[],
+                show_label=False,
+                interactive=True,
+                elem_id="active-selections",
+            )
     with gr.Row():
         gr.Markdown(
             "The generated cohort filter will autopopulate into the filter cards below. "
+            "**GDC Cohort Copilot can make mistakes!** "
             "Refine your search using the interactive checkboxes. "
             "Note that many other options can be found by selecting the different tabs on the left."
         )
         fn=process_query,
         inputs=text_input,
         outputs=filter_cards + [json_output],
+    ).success(
+        fn=update_active_selections,
+        inputs=filter_cards,
+        outputs=[active_selections],
     )
     # Update JSON based on cards
                 fn=update_json_from_cards,
                 inputs=filter_cards,
                 outputs=json_output,
+            ).success(
+                fn=update_active_selections,
+                inputs=filter_cards,
+                outputs=[active_selections],
             )
         else:
             filter_card.input(
                 fn=update_json_from_cards,
                 inputs=filter_cards,
                 outputs=json_output,
+            ).success(
+                fn=update_active_selections,
+                inputs=filter_cards,
+                outputs=[active_selections],
             )
+    # Enable functionality of the active filter selectors
+    active_selections.input(
+        fn=update_cards_from_active,
+        inputs=[active_selections] + filter_cards,
+        outputs=[active_selections] + filter_cards,
+    ).success(
+        fn=update_json_from_cards,
+        inputs=filter_cards,
+        outputs=json_output,
+    )
     # Update checkboxes after executing filter query
     json_output.change(
         fn=update_cards_with_counts,

style.css CHANGED Viewed

@@ -27,6 +27,10 @@
     height: 80% !important;
 }
 .card-group, .card-group > div {
     background-color: transparent;
     border: 0px;

     height: 80% !important;
 }
+#active-selections {
+    height: 50px !important;
+}
 .card-group, .card-group > div {
     background-color: transparent;
     border: 0px;