Spaces:
Runtime error
Runtime error
| import os | |
| import logging | |
| from io import StringIO | |
| from pathlib import Path | |
| from collections import OrderedDict | |
| import plotly.express as px | |
| import gradio as gr | |
| import pandas as pd | |
| from dotenv import load_dotenv | |
| # from PIL import Image | |
| import matplotlib.pyplot as plt | |
| # import cv2 | |
| # import numpy as np | |
| import plotly.graph_objects as go | |
| import networkx as nx | |
| from model import dfg2networkx, discover_process_map, discover_process_map_activities_connections, discover_process_map_variants, view_process_map | |
| load_dotenv() | |
| logger = logging.getLogger(__name__) | |
| logger.setLevel(logging.DEBUG) | |
| def get_data(temp_file, case_col, activity_col, timestamp_col, state: dict): | |
| # print(f"temp_file: {temp_file}") | |
| if isinstance(temp_file, str): | |
| # df = pd.read_csv(StringIO(temp_file), parse_dates=[ "Start", "Finish"]) | |
| df = pd.read_csv(temp_file, sep=';|,') | |
| else: | |
| # df = pd.read_csv(temp_file.name, ) # parse_dates=[ "Start", "Finish"] | |
| df = pd.read_csv(temp_file.name, sep=';|,') | |
| # logger.debug(df.head()) | |
| # logger.debug(df.dtypes) | |
| state['df'] = df | |
| return df, \ | |
| gr.Dropdown( choices=list(df.columns), multiselect=False, label="Case", info="選擇 Case ID"), \ | |
| gr.Dropdown( choices=list(df.columns), multiselect=False, label="Activity", info="選擇 Activity ID"), \ | |
| gr.Dropdown( choices=list(df.columns), multiselect=False, label="Timestamp", info="選擇 Timestamp"), \ | |
| state | |
| def get_stats(state: dict): | |
| df = state.get('df', pd.DataFrame()).copy() | |
| summary = pd.DataFrame({ | |
| "metric": ["資料筆數", "Case 數量", "Activity 數量", "起始時間", "結束時間"], | |
| "value": [ df.shape[0], df['case_id'].nunique(), df['activity'].nunique(), df['timestamp'].min(), df['timestamp'].max() ] | |
| }) | |
| case_stats = df.groupby( | |
| by = ['case_id'], as_index=False | |
| ).agg(count = ('activity', len)).reset_index() | |
| logger.debug(f"case stats: {case_stats}") | |
| case_lead_time = df.groupby( | |
| by = ['case_id'], as_index=False | |
| ).agg( duration = ('timestamp', lambda x: (x.max() - x.min()).total_seconds()//3600 )).reset_index() | |
| def avg_duration(x): | |
| return pd.Series({ "avg_duration": (x.timestamp.max() - x.timestamp.min()).total_seconds()//3600}) | |
| case_avg_duration = df.groupby( | |
| by = ['case_id'], as_index=False | |
| ).apply( | |
| avg_duration | |
| ) | |
| logger.debug(f"case lead time: {case_lead_time}") | |
| return ( | |
| summary, | |
| gr.BarPlot( case_stats, x="case_id", y="count", title="Case Stats", tooltip = ["case_id", "count"], width=None), | |
| gr.BarPlot( case_lead_time, x="case_id", y="duration", title="Case Lead Time", tooltip = ["case_id", "duration"], width=None), | |
| gr.BarPlot( case_avg_duration, x="case_id", y="avg_duration", title="Case Average Duration", tooltip = ["case_id", "avg_duration"], width=None), | |
| state | |
| ) | |
| def get_process_map( state: dict = {}): | |
| df = state.get('df', pd.DataFrame()).copy() | |
| net, img = discover_process_map( df, type='petrinet') | |
| return img, state | |
| def get_process_map_variants( top_k: int = 1, state: dict = {}): | |
| """ | |
| """ | |
| df = state.get('df', pd.DataFrame()).copy() | |
| dfg, start_activities, end_activities = discover_process_map_variants( df, top_k, type='dfg') | |
| top_variant_connections = OrderedDict(sorted(dfg.items(), key=lambda item: item[1], reverse=True)) | |
| state['top_variant_connections'] = top_variant_connections | |
| if 'top_variant' not in state and top_k == 1: | |
| state['top_variant'] = {'dfg': dfg, 'start_activities': start_activities, 'end_activities': end_activities} | |
| nx_graph = dfg2networkx( dfg, start_activities, end_activities) | |
| chart = view_process_map( nx_graph, process_type='dfg', layout_type='sfdp') | |
| return chart, state | |
| def get_process_map_activities_connections( activity_rank: int = 0, connection_rank: int = 0, state: dict = {}): | |
| """ | |
| """ | |
| df = state.get('df', pd.DataFrame()).copy() | |
| dfg, start_activities, end_activities = discover_process_map_activities_connections( df, activity_rank = activity_rank, connection_rank = connection_rank, state = state) | |
| nx_graph = dfg2networkx( dfg, start_activities, end_activities) | |
| chart = view_process_map( nx_graph, process_type='dfg', layout_type='sfdp') | |
| return chart, state | |
| def etl( case_col, activity_col, timestamp_col, state: dict): | |
| """ | |
| Argument | |
| Return | |
| """ | |
| df = state['df'].copy() | |
| df.loc[:, case_col] = df[case_col].astype(str) | |
| df.loc[:, activity_col] = df[activity_col].astype(str) | |
| df.loc[:, timestamp_col] = pd.to_datetime(df[timestamp_col]) # format='%Y-%m-%d %H:%M:%S' | |
| df.rename(columns={case_col: 'case_id', activity_col: 'activity', timestamp_col: 'timestamp'}, inplace=True) | |
| state['df'] = df | |
| return df, state | |
| ## --- block --- ## | |
| css = """ | |
| h1 { | |
| text-align: center; | |
| display:block; | |
| } | |
| """ | |
| demo = gr.Blocks(css = css) | |
| with demo: | |
| gr.Markdown("# 🌟 Process Discovery 🌟") | |
| state = gr.State(value={}) | |
| with gr.Row(): | |
| upl_btn = gr.UploadButton(label="Upload", file_types = ['.csv'], file_count = "single") | |
| with gr.Accordion('Data Preview'): | |
| df = gr.Dataframe() | |
| with gr.Row(): | |
| case_col = gr.Dropdown( multiselect=False, label="Case", info="選擇 Case ID") | |
| activity_col = gr.Dropdown( multiselect=False, label="Activity", info="選擇 Activity ID") | |
| timestamp_col = gr.Dropdown( multiselect=False, label="Timestamp", info="選擇 Timestamp") | |
| upl_btn.upload( fn=get_data, inputs = [upl_btn, case_col, activity_col, timestamp_col, state], outputs=[df, case_col, activity_col, timestamp_col, state]) | |
| column_btn = gr.Button("Select Columns") | |
| column_btn.click( fn=etl, inputs = [ case_col, activity_col, timestamp_col, state], outputs=[df, state]) | |
| with gr.Row(): | |
| with gr.Tab('Data Explorer'): | |
| # outputs.append(gr.Dataframe( label="Event logs")) | |
| de_btn = gr.Button("Get Stats") | |
| with gr.Row(): | |
| summary = gr.Dataframe( label="Summary", interactive=False, height=300) | |
| chart1 = gr.BarPlot( label="Case Stats") | |
| chart2 = gr.BarPlot( label="Case Lead Time Stats") | |
| chart3 = gr.BarPlot( label="Case Average Activity Time Stats") | |
| de_btn.click( fn=get_stats, inputs = [state], outputs=[ summary, chart1, chart2, chart3, state]) | |
| with gr.Tab('Variant Explorer'): | |
| ve_btn = gr.Button("Get Variants") | |
| top_k_variant_selector = gr.Slider(0, 10, value=1, step=1, label="Top-K", info="選擇 Variant 數量(0: 全選)") | |
| pmchart = gr.Plot( label="Process Map") | |
| ve_btn.click( fn=get_process_map_variants, inputs = [ top_k_variant_selector, state], outputs=[ pmchart, state]) | |
| # with gr.Tab('Process Explorer'): | |
| # pe_btn = gr.Button("Get Activities & Connections") | |
| # with gr.Column(): | |
| # top_k_activity_selector = gr.Slider(0, 10, value=1, step=1, label="Activity", info="【pending】增減 Top Activity 數量(0: 全選)") | |
| # top_k_connection_selector = gr.Slider(0, 10, value=1, step=1, label="Connection", info="增減 Top Connection 數量(0: 全選)") | |
| # pmchart = gr.Plot( label="Process Map") | |
| # pe_btn.click( fn=get_process_map_activities_connections, inputs = [ top_k_activity_selector, top_k_connection_selector, state], outputs=[ pmchart, state]) | |
| # with gr.Tab('Process Model'): | |
| # cc_btn = gr.Button("Get Process Model") | |
| # img = gr.Image( label="Process Model") | |
| # cc_btn.click( fn=get_process_map, inputs = [state], outputs=[ img, state]) | |
| if __name__ == "__main__": | |
| demo.launch( | |
| # share=True, | |
| server_name="0.0.0.0", | |
| server_port=int(os.environ.get("PORT")), | |
| auth=( os.environ.get("USER_NAME"), os.environ.get("PASSWORD")) | |
| ) | |