diff --git a/backends/exllamav3/model.py b/backends/exllamav3/model.py index 23e90ed8..9a9e72b6 100644 --- a/backends/exllamav3/model.py +++ b/backends/exllamav3/model.py @@ -527,7 +527,7 @@ def load_model_sync(self, progress_callback=None): if self.use_draft_model: for value in self.draft_model.load_gen( reserve_per_device=self.autosplit_reserve, - use_per_device=self.gpu_split, + use_per_device=self.draft_gpu_split, callback=progress_callback, ): if value: