Cannot load model

#1
by FrenzyBiscuit - opened

I am on the latest tabbyapi and cannot load your model with tensor parallelism.

Loading model modules ╸━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 2% 1/43 -:--:--
Traceback (most recent call last):
File "/mnt/nvme1/temp/tabbyAPI/start.py", line 297, in
entrypoint(args, parser)
File "/mnt/nvme1/temp/tabbyAPI/main.py", line 177, in entrypoint
asyncio.run(entrypoint_async())
File "/usr/lib/python3.11/asyncio/runners.py", line 190, in run
return runner.run(main)
^^^^^^^^^^^^^^^^
File "/usr/lib/python3.11/asyncio/runners.py", line 118, in run
return self._loop.run_until_complete(task)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "uvloop/loop.pyx", line 1518, in uvloop.loop.Loop.run_until_complete
File "/mnt/nvme1/temp/tabbyAPI/main.py", line 61, in entrypoint_async
await model.load_model(
File "/mnt/nvme1/temp/tabbyAPI/common/model.py", line 229, in load_model
async for _ in load_model_gen(model_path, **kwargs):
File "/mnt/nvme1/temp/tabbyAPI/common/model.py", line 205, in load_model_gen
async for module, modules in load_status:
File "/mnt/nvme1/temp/tabbyAPI/backends/exllamav3/model.py", line 441, in load_gen
async for value in iterate_in_threadpool(generator):
File "/mnt/nvme1/temp/tabbyAPI/common/concurrency.py", line 30, in iterate_in_threadpool
yield await asyncio.to_thread(gen_next, generator)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/lib/python3.11/asyncio/threads.py", line 25, in to_thread
return await loop.run_in_executor(None, func_call)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/lib/python3.11/concurrent/futures/thread.py", line 58, in run
result = self.fn(*self.args, **self.kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/nvme1/temp/tabbyAPI/common/concurrency.py", line 20, in gen_next
return next(generator)
^^^^^^^^^^^^^^^
File "/mnt/nvme1/temp/tabbyAPI/venv/lib/python3.11/site-packages/torch/utils/_contextlib.py", line 59, in generator_context
response = gen.send(request)
^^^^^^^^^^^^^^^^^
File "/mnt/nvme1/temp/tabbyAPI/backends/exllamav3/model.py", line 488, in load_model_sync
for value in self.model.load_gen(
File "/mnt/nvme1/temp/tabbyAPI/venv/lib/python3.11/site-packages/exllamav3/model/model.py", line 310, in load_gen
yield from self._load_tp(
File "/mnt/nvme1/temp/tabbyAPI/venv/lib/python3.11/site-packages/exllamav3/model/model_tp.py", line 310, in _load_tp
exported = module.tp_export(plan, producer)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/nvme1/temp/tabbyAPI/venv/lib/python3.11/site-packages/exllamav3/modules/transformer.py", line 140, in tp_export
**{name: _export(getattr(self, name, None)) for name in (
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/nvme1/temp/tabbyAPI/venv/lib/python3.11/site-packages/exllamav3/modules/transformer.py", line 140, in
**{name: _export(getattr(self, name, None)) for name in (
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/nvme1/temp/tabbyAPI/venv/lib/python3.11/site-packages/exllamav3/modules/transformer.py", line 132, in _export
return child.tp_export(plan, producer) if child is not None else None
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/nvme1/temp/tabbyAPI/venv/lib/python3.11/site-packages/exllamav3/modules/attn.py", line 639, in tp_export
**{name: _export(getattr(self, name, None)) for name in (
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/nvme1/temp/tabbyAPI/venv/lib/python3.11/site-packages/exllamav3/modules/attn.py", line 639, in
**{name: _export(getattr(self, name, None)) for name in (
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/nvme1/temp/tabbyAPI/venv/lib/python3.11/site-packages/exllamav3/modules/attn.py", line 623, in _export
return child.tp_export(plan, producer) if child is not None else None
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/nvme1/temp/tabbyAPI/venv/lib/python3.11/site-packages/exllamav3/modules/linear.py", line 405, in tp_export
"inner": self.inner.tp_export(plan, producer),
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/nvme1/temp/tabbyAPI/venv/lib/python3.11/site-packages/exllamav3/modules/quant/exl3.py", line 206, in tp_export
"mcg": producer.send(self.mcg_tensor),
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/nvme1/temp/tabbyAPI/venv/lib/python3.11/site-packages/exllamav3/model/model_tp_shared.py", line 72, in send
src = t_cpu.view(torch.uint8).numpy().view(np.uint8).ravel()
^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: self.dim() cannot be 0 to view Int as Byte (different element sizes)
/usr/lib/python3.11/multiprocessing/resource_tracker.py:224: UserWarning: resource_tracker: There appear to be 1 leaked shared_memory objects to clean up at shutdown
warnings.warn('resource_tracker: There appear to be %d '

This is with the 8.0 BPW quant.

There is an issue with the latest exlllamav3 (starting from 0.0.9). Model was requante with the latest stable version so closing this for now.

ArtusDev changed discussion status to closed

Sign up or log in to comment