Update app.py
Browse files
app.py
CHANGED
|
@@ -1,12 +1,12 @@
|
|
| 1 |
import pandas as pd
|
| 2 |
import numpy as np
|
|
|
|
| 3 |
import gradio as gr
|
| 4 |
import matplotlib.pyplot as plt
|
| 5 |
import seaborn as sns
|
| 6 |
from datetime import datetime
|
| 7 |
from sklearn.metrics import confusion_matrix, precision_score, recall_score
|
| 8 |
|
| 9 |
-
# Sample data preparation
|
| 10 |
data = {
|
| 11 |
'transaction_amount': [2500, 799, 9338, 11749, 8999, 1500, 3000, 4000, 300, 5000, 24990],
|
| 12 |
'transaction_date': ['01-11-2024 16:08', '01-11-2024 16:15', '02-11-2024 14:43', '03-11-2024 11:14',
|
|
@@ -25,13 +25,50 @@ data = {
|
|
| 25 |
}
|
| 26 |
|
| 27 |
df = pd.DataFrame(data)
|
| 28 |
-
|
| 29 |
df['transaction_date'] = pd.to_datetime(df['transaction_date'], format='%d-%m-%Y %H:%M')
|
| 30 |
|
| 31 |
np.random.seed(42)
|
| 32 |
df['is_fraud_predicted'] = np.random.choice([0, 1], size=len(df), p=[0.3, 0.7])
|
| 33 |
df['is_fraud_reported'] = np.random.choice([0, 1], size=len(df), p=[0.4, 0.6])
|
| 34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
def filter_data(start_date, end_date, payer_id, payee_id, transaction_id):
|
| 36 |
filtered_df = df.copy()
|
| 37 |
|
|
|
|
| 1 |
import pandas as pd
|
| 2 |
import numpy as np
|
| 3 |
+
import json
|
| 4 |
import gradio as gr
|
| 5 |
import matplotlib.pyplot as plt
|
| 6 |
import seaborn as sns
|
| 7 |
from datetime import datetime
|
| 8 |
from sklearn.metrics import confusion_matrix, precision_score, recall_score
|
| 9 |
|
|
|
|
| 10 |
data = {
|
| 11 |
'transaction_amount': [2500, 799, 9338, 11749, 8999, 1500, 3000, 4000, 300, 5000, 24990],
|
| 12 |
'transaction_date': ['01-11-2024 16:08', '01-11-2024 16:15', '02-11-2024 14:43', '03-11-2024 11:14',
|
|
|
|
| 25 |
}
|
| 26 |
|
| 27 |
df = pd.DataFrame(data)
|
|
|
|
| 28 |
df['transaction_date'] = pd.to_datetime(df['transaction_date'], format='%d-%m-%Y %H:%M')
|
| 29 |
|
| 30 |
np.random.seed(42)
|
| 31 |
df['is_fraud_predicted'] = np.random.choice([0, 1], size=len(df), p=[0.3, 0.7])
|
| 32 |
df['is_fraud_reported'] = np.random.choice([0, 1], size=len(df), p=[0.4, 0.6])
|
| 33 |
|
| 34 |
+
df_fraud = pd.DataFrame(data)
|
| 35 |
+
df_fraud['fraud_reason'] = 'Suspicious Activity'
|
| 36 |
+
df_fraud['fraud_score'] = np.random.uniform(0.6, 0.95, size=len(df_fraud))
|
| 37 |
+
|
| 38 |
+
fraud_dict = {
|
| 39 |
+
row['transaction_id_anonymous']: {
|
| 40 |
+
'is_fraud': True,
|
| 41 |
+
'fraud_reason': row['fraud_reason'],
|
| 42 |
+
'fraud_score': float(row['fraud_score'])
|
| 43 |
+
}
|
| 44 |
+
for _, row in df_fraud.iterrows()
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
def generate_non_fraud_transactions(n, start_id=1000):
|
| 48 |
+
non_fraud_dict = {}
|
| 49 |
+
|
| 50 |
+
for i in range(n):
|
| 51 |
+
tx_id = f'ANON_{start_id + i}'
|
| 52 |
+
if tx_id in fraud_dict:
|
| 53 |
+
continue
|
| 54 |
+
|
| 55 |
+
non_fraud_dict[tx_id] = {
|
| 56 |
+
'is_fraud': False,
|
| 57 |
+
'fraud_reason': 'Not Fraud',
|
| 58 |
+
'fraud_score': float(np.random.uniform(0.01, 0.4))
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
return non_fraud_dict
|
| 62 |
+
|
| 63 |
+
total_records = 172927
|
| 64 |
+
fraud_records = len(fraud_dict)
|
| 65 |
+
non_fraud_needed = total_records - fraud_records
|
| 66 |
+
|
| 67 |
+
sample_non_fraud = generate_non_fraud_transactions(5, start_id=200000)
|
| 68 |
+
|
| 69 |
+
with open('transactions_train.csv', 'w') as f:
|
| 70 |
+
json.dump(fraud_dict, f, indent=2)
|
| 71 |
+
|
| 72 |
def filter_data(start_date, end_date, payer_id, payee_id, transaction_id):
|
| 73 |
filtered_df = df.copy()
|
| 74 |
|