Spaces:
Runtime error
Runtime error
da03
commited on
Commit
·
64a144d
1
Parent(s):
888f299
- dispatcher.py +1 -1
- start_system.sh +1 -1
- start_workers.py +2 -2
- worker.py +2 -2
dispatcher.py
CHANGED
|
@@ -1210,7 +1210,7 @@ if __name__ == "__main__":
|
|
| 1210 |
import argparse
|
| 1211 |
|
| 1212 |
parser = argparse.ArgumentParser(description="Dispatcher for Neural OS")
|
| 1213 |
-
parser.add_argument("--port", type=int, default=
|
| 1214 |
args = parser.parse_args()
|
| 1215 |
|
| 1216 |
logger.info(f"🌐 Starting dispatcher on 0.0.0.0:{args.port}")
|
|
|
|
| 1210 |
import argparse
|
| 1211 |
|
| 1212 |
parser = argparse.ArgumentParser(description="Dispatcher for Neural OS")
|
| 1213 |
+
parser.add_argument("--port", type=int, default=7860, help="Port to run the dispatcher on")
|
| 1214 |
args = parser.parse_args()
|
| 1215 |
|
| 1216 |
logger.info(f"🌐 Starting dispatcher on 0.0.0.0:{args.port}")
|
start_system.sh
CHANGED
|
@@ -148,7 +148,7 @@ echo "✅ Dispatcher started (PID: $DISPATCHER_PID)"
|
|
| 148 |
|
| 149 |
# Start workers
|
| 150 |
echo "🔧 Starting $NUM_GPUS GPU workers..."
|
| 151 |
-
python start_workers.py --num-gpus $NUM_GPUS --no-monitor > workers.log 2>&1
|
| 152 |
WORKER_START_EXIT_CODE=$?
|
| 153 |
|
| 154 |
# Wait for workers to fully load models and register (60 seconds)
|
|
|
|
| 148 |
|
| 149 |
# Start workers
|
| 150 |
echo "🔧 Starting $NUM_GPUS GPU workers..."
|
| 151 |
+
python start_workers.py --num-gpus $NUM_GPUS --dispatcher-url "http://localhost:$DISPATCHER_PORT" --no-monitor > workers.log 2>&1
|
| 152 |
WORKER_START_EXIT_CODE=$?
|
| 153 |
|
| 154 |
# Wait for workers to fully load models and register (60 seconds)
|
start_workers.py
CHANGED
|
@@ -13,7 +13,7 @@ import os
|
|
| 13 |
from typing import List
|
| 14 |
|
| 15 |
class WorkerManager:
|
| 16 |
-
def __init__(self, num_gpus: int, dispatcher_url: str = "http://localhost:
|
| 17 |
self.num_gpus = num_gpus
|
| 18 |
self.dispatcher_url = dispatcher_url
|
| 19 |
self.processes: List[subprocess.Popen] = []
|
|
@@ -143,7 +143,7 @@ def main():
|
|
| 143 |
parser = argparse.ArgumentParser(description="Start multiple GPU workers")
|
| 144 |
parser.add_argument("--num-gpus", type=int, required=True,
|
| 145 |
help="Number of GPU workers to start")
|
| 146 |
-
parser.add_argument("--dispatcher-url", type=str, default="http://localhost:
|
| 147 |
help="URL of the dispatcher service")
|
| 148 |
parser.add_argument("--no-monitor", action="store_true",
|
| 149 |
help="Start workers but don't monitor them")
|
|
|
|
| 13 |
from typing import List
|
| 14 |
|
| 15 |
class WorkerManager:
|
| 16 |
+
def __init__(self, num_gpus: int, dispatcher_url: str = "http://localhost:7860"):
|
| 17 |
self.num_gpus = num_gpus
|
| 18 |
self.dispatcher_url = dispatcher_url
|
| 19 |
self.processes: List[subprocess.Popen] = []
|
|
|
|
| 143 |
parser = argparse.ArgumentParser(description="Start multiple GPU workers")
|
| 144 |
parser.add_argument("--num-gpus", type=int, required=True,
|
| 145 |
help="Number of GPU workers to start")
|
| 146 |
+
parser.add_argument("--dispatcher-url", type=str, default="http://localhost:7860",
|
| 147 |
help="URL of the dispatcher service")
|
| 148 |
parser.add_argument("--no-monitor", action="store_true",
|
| 149 |
help="Start workers but don't monitor them")
|
worker.py
CHANGED
|
@@ -27,7 +27,7 @@ torch.backends.cuda.matmul.allow_tf32 = True
|
|
| 27 |
torch.backends.cudnn.allow_tf32 = True
|
| 28 |
|
| 29 |
class GPUWorker:
|
| 30 |
-
def __init__(self, worker_address: str, dispatcher_url: str = "http://localhost:
|
| 31 |
self.worker_address = worker_address # e.g., "localhost:8001", "192.168.1.100:8002"
|
| 32 |
# Parse port from worker address
|
| 33 |
if ':' in worker_address:
|
|
@@ -771,7 +771,7 @@ if __name__ == "__main__":
|
|
| 771 |
# Parse command line arguments
|
| 772 |
parser = argparse.ArgumentParser(description="GPU Worker for Neural OS")
|
| 773 |
parser.add_argument("--worker-address", type=str, required=True, help="Worker address (e.g., 'localhost:8001', '192.168.1.100:8002')")
|
| 774 |
-
parser.add_argument("--dispatcher-url", type=str, default="http://localhost:
|
| 775 |
args = parser.parse_args()
|
| 776 |
|
| 777 |
# Parse port from worker address for validation
|
|
|
|
| 27 |
torch.backends.cudnn.allow_tf32 = True
|
| 28 |
|
| 29 |
class GPUWorker:
|
| 30 |
+
def __init__(self, worker_address: str, dispatcher_url: str = "http://localhost:7860"):
|
| 31 |
self.worker_address = worker_address # e.g., "localhost:8001", "192.168.1.100:8002"
|
| 32 |
# Parse port from worker address
|
| 33 |
if ':' in worker_address:
|
|
|
|
| 771 |
# Parse command line arguments
|
| 772 |
parser = argparse.ArgumentParser(description="GPU Worker for Neural OS")
|
| 773 |
parser.add_argument("--worker-address", type=str, required=True, help="Worker address (e.g., 'localhost:8001', '192.168.1.100:8002')")
|
| 774 |
+
parser.add_argument("--dispatcher-url", type=str, default="http://localhost:7860", help="Dispatcher URL")
|
| 775 |
args = parser.parse_args()
|
| 776 |
|
| 777 |
# Parse port from worker address for validation
|