ProcessRaisedException: -- Process 0 terminated with the following error: Traceback (most recent call last): File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-62e53c8d-508c-4bb8-9f36-487f73bd3e4b/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 68, in _wrap fn(i, *args) File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-62e53c8d-508c-4bb8-9f36-487f73bd3e4b/lib/python3.10/site-packages/pytorch_lightning/strategies/launchers/multiprocessing.py", line 173, in _wrapping_function results = function(*args, **kwargs) File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-62e53c8d-508c-4bb8-9f36-487f73bd3e4b/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 580, in _fit_impl self._run(model, ckpt_path=ckpt_path) File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-62e53c8d-508c-4bb8-9f36-487f73bd3e4b/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 949, in _run call._call_setup_hook(self) # allow user to set up LightningModule in accelerator environment File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-62e53c8d-508c-4bb8-9f36-487f73bd3e4b/lib/python3.10/site-packages/pytorch_lightning/trainer/call.py", line 86, in _call_setup_hook if hasattr(logger, "experiment"): File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-62e53c8d-508c-4bb8-9f36-487f73bd3e4b/lib/python3.10/site-packages/lightning_fabric/loggers/logger.py", line 118, in experiment return fn(self) File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-62e53c8d-508c-4bb8-9f36-487f73bd3e4b/lib/python3.10/site-packages/lightning_fabric/loggers/tensorboard.py", line 193, in experiment from torch.utils.tensorboard import SummaryWriter File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-62e53c8d-508c-4bb8-9f36-487f73bd3e4b/lib/python3.10/site-packages/torch/utils/tensorboard/__init__.py", line 12, in from .writer import FileWriter, SummaryWriter # noqa: F401 File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-62e53c8d-508c-4bb8-9f36-487f73bd3e4b/lib/python3.10/site-packages/torch/utils/tensorboard/writer.py", line 12, in from tensorboard.compat.proto import event_pb2 File "/databricks/python/lib/python3.10/site-packages/tensorboard/compat/proto/event_pb2.py", line 17, in from tensorboard.compat.proto import summary_pb2 as tensorboard_dot_compat_dot_proto_dot_summary__pb2 File "/databricks/python/lib/python3.10/site-packages/tensorboard/compat/proto/summary_pb2.py", line 17, in from tensorboard.compat.proto import histogram_pb2 as tensorboard_dot_compat_dot_proto_dot_histogram__pb2 File "/databricks/python/lib/python3.10/site-packages/tensorboard/compat/proto/histogram_pb2.py", line 36, in _descriptor.FieldDescriptor( File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-62e53c8d-508c-4bb8-9f36-487f73bd3e4b/lib/python3.10/site-packages/google/protobuf/descriptor.py", line 553, in __new__ _message.Message._CheckCalledFromGeneratedFile() TypeError: Descriptors cannot be created directly. If this call came from a _pb2.py file, your generated code is out of date and must be regenerated with protoc >= 3.19.0. If you cannot immediately regenerate your protos, some other possible workarounds are: 1. Downgrade the protobuf package to 3.20.x or lower. 2. Set PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python (but this will use pure-Python parsing and will be much slower). More information: https://developers.google.com/protocol-buffers/docs/news/2022-05-06#python-updates --------------------------------------------------------------------------- ProcessRaisedException Traceback (most recent call last) File , line 15 3 models = AutoNHITS(h=horizon, 4 config=nhits_config, 5 loss=HuberMQLoss(quantiles=quantiles), # Robust Huber Loss (...) 8 backend='optuna', 9 num_samples=100) 11 nf = NeuralForecast( 12 models=[models], 13 freq='D') ---> 15 Y_hat_df = nf.cross_validation(df=Y_df, val_size=val_size, 16 test_size=test_size, n_windows=None) File /local_disk0/.ephemeral_nfs/envs/pythonEnv-62e53c8d-508c-4bb8-9f36-487f73bd3e4b/lib/python3.10/site-packages/neuralforecast/core.py:520, in NeuralForecast.cross_validation(self, df, static_df, n_windows, step_size, val_size, test_size, sort_df, use_init_models, verbose, **data_kwargs) 515 fcsts = np.full( 516 (self.dataset.n_groups * h * n_windows, len(cols)), np.nan, dtype=np.float32 517 ) 519 for model in self.models: --> 520 model.fit(dataset=self.dataset, val_size=val_size, test_size=test_size) 521 model_fcsts = model.predict( 522 self.dataset, step_size=step_size, **data_kwargs 523 ) 525 # Append predictions in memory placeholder File /local_disk0/.ephemeral_nfs/envs/pythonEnv-62e53c8d-508c-4bb8-9f36-487f73bd3e4b/lib/python3.10/site-packages/neuralforecast/common/_base_auto.py:375, in BaseAuto.fit(self, dataset, val_size, test_size, random_seed) 373 best_config = results.get_best_result().config 374 else: --> 375 results = self._optuna_tune_model( 376 cls_model=self.cls_model, 377 dataset=dataset, 378 val_size=val_size, 379 test_size=test_size, 380 verbose=self.verbose, 381 num_samples=self.num_samples, 382 search_alg=search_alg, 383 config=self.config, 384 ) 385 best_config = results.best_trial.user_attrs["ALL_PARAMS"] 386 self.model = self._fit_model( 387 cls_model=self.cls_model, 388 config=best_config, (...) 391 test_size=test_size, 392 ) File /local_disk0/.ephemeral_nfs/envs/pythonEnv-62e53c8d-508c-4bb8-9f36-487f73bd3e4b/lib/python3.10/site-packages/neuralforecast/common/_base_auto.py:327, in BaseAuto._optuna_tune_model(self, cls_model, dataset, val_size, test_size, verbose, num_samples, search_alg, config) 324 sampler = None 326 study = optuna.create_study(sampler=sampler, direction="minimize") --> 327 study.optimize( 328 objective, 329 n_trials=num_samples, 330 show_progress_bar=verbose, 331 ) 332 return study File /local_disk0/.ephemeral_nfs/envs/pythonEnv-62e53c8d-508c-4bb8-9f36-487f73bd3e4b/lib/python3.10/site-packages/optuna/study/study.py:451, in Study.optimize(self, func, n_trials, timeout, n_jobs, catch, callbacks, gc_after_trial, show_progress_bar) 348 def optimize( 349 self, 350 func: ObjectiveFuncType, (...) 357 show_progress_bar: bool = False, 358 ) -> None: 359 """Optimize an objective function. 360 361 Optimization is done by choosing a suitable set of hyperparameter values from a given (...) 449 If nested invocation of this method occurs. 450 """ --> 451 _optimize( 452 study=self, 453 func=func, 454 n_trials=n_trials, 455 timeout=timeout, 456 n_jobs=n_jobs, 457 catch=tuple(catch) if isinstance(catch, Iterable) else (catch,), 458 callbacks=callbacks, 459 gc_after_trial=gc_after_trial, 460 show_progress_bar=show_progress_bar, 461 ) File /local_disk0/.ephemeral_nfs/envs/pythonEnv-62e53c8d-508c-4bb8-9f36-487f73bd3e4b/lib/python3.10/site-packages/optuna/study/_optimize.py:66, in _optimize(study, func, n_trials, timeout, n_jobs, catch, callbacks, gc_after_trial, show_progress_bar) 64 try: 65 if n_jobs == 1: ---> 66 _optimize_sequential( 67 study, 68 func, 69 n_trials, 70 timeout, 71 catch, 72 callbacks, 73 gc_after_trial, 74 reseed_sampler_rng=False, 75 time_start=None, 76 progress_bar=progress_bar, 77 ) 78 else: 79 if n_jobs == -1: File /local_disk0/.ephemeral_nfs/envs/pythonEnv-62e53c8d-508c-4bb8-9f36-487f73bd3e4b/lib/python3.10/site-packages/optuna/study/_optimize.py:163, in _optimize_sequential(study, func, n_trials, timeout, catch, callbacks, gc_after_trial, reseed_sampler_rng, time_start, progress_bar) 160 break 162 try: --> 163 frozen_trial = _run_trial(study, func, catch) 164 finally: 165 # The following line mitigates memory problems that can be occurred in some 166 # environments (e.g., services that use computing containers such as GitHub Actions). 167 # Please refer to the following PR for further details: 168 # https://github.com/optuna/optuna/pull/325. 169 if gc_after_trial: File /local_disk0/.ephemeral_nfs/envs/pythonEnv-62e53c8d-508c-4bb8-9f36-487f73bd3e4b/lib/python3.10/site-packages/optuna/study/_optimize.py:251, in _run_trial(study, func, catch) 244 assert False, "Should not reach." 246 if ( 247 frozen_trial.state == TrialState.FAIL 248 and func_err is not None 249 and not isinstance(func_err, catch) 250 ): --> 251 raise func_err 252 return frozen_trial File /local_disk0/.ephemeral_nfs/envs/pythonEnv-62e53c8d-508c-4bb8-9f36-487f73bd3e4b/lib/python3.10/site-packages/optuna/study/_optimize.py:200, in _run_trial(study, func, catch) 198 with get_heartbeat_thread(trial._trial_id, study._storage): 199 try: --> 200 value_or_values = func(trial) 201 except exceptions.TrialPruned as e: 202 # TODO(mamu): Handle multi-objective cases. 203 state = TrialState.PRUNED File /local_disk0/.ephemeral_nfs/envs/pythonEnv-62e53c8d-508c-4bb8-9f36-487f73bd3e4b/lib/python3.10/site-packages/neuralforecast/common/_base_auto.py:311, in BaseAuto._optuna_tune_model..objective(trial) 309 def objective(trial): 310 cfg = config(trial) --> 311 fitted_model = self._fit_model( 312 cls_model=cls_model, 313 config=cfg, 314 dataset=dataset, 315 val_size=val_size, 316 test_size=test_size, 317 ) 318 trial.set_user_attr("ALL_PARAMS", cfg) 319 return fitted_model.trainer.callback_metrics["valid_loss"].item() File /local_disk0/.ephemeral_nfs/envs/pythonEnv-62e53c8d-508c-4bb8-9f36-487f73bd3e4b/lib/python3.10/site-packages/neuralforecast/common/_base_auto.py:336, in BaseAuto._fit_model(self, cls_model, config, dataset, val_size, test_size) 334 def _fit_model(self, cls_model, config, dataset, val_size, test_size): 335 model = cls_model(**config) --> 336 model.fit(dataset, val_size=val_size, test_size=test_size) 337 return model File /local_disk0/.ephemeral_nfs/envs/pythonEnv-62e53c8d-508c-4bb8-9f36-487f73bd3e4b/lib/python3.10/site-packages/neuralforecast/common/_base_windows.py:734, in BaseWindows.fit(self, dataset, val_size, test_size, random_seed) 731 self.trainer_kwargs["check_val_every_n_epoch"] = None 733 trainer = pl.Trainer(**self.trainer_kwargs) --> 734 trainer.fit(self, datamodule=datamodule) File /databricks/python/lib/python3.10/site-packages/mlflow/utils/autologging_utils/safety.py:432, in safe_patch..safe_patch_function(*args, **kwargs) 417 if ( 418 active_session_failed 419 or autologging_is_disabled(autologging_integration) (...) 426 # warning behavior during original function execution, since autologging is being 427 # skipped 428 with set_non_mlflow_warnings_behavior_for_current_thread( 429 disable_warnings=False, 430 reroute_warnings=False, 431 ): --> 432 return original(*args, **kwargs) 434 # Whether or not the original / underlying function has been called during the 435 # execution of patched code 436 original_has_been_called = False File /local_disk0/.ephemeral_nfs/envs/pythonEnv-62e53c8d-508c-4bb8-9f36-487f73bd3e4b/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py:544, in Trainer.fit(self, model, train_dataloaders, val_dataloaders, datamodule, ckpt_path) 542 self.state.status = TrainerStatus.RUNNING 543 self.training = True --> 544 call._call_and_handle_interrupt( 545 self, self._fit_impl, model, train_dataloaders, val_dataloaders, datamodule, ckpt_path 546 ) File /local_disk0/.ephemeral_nfs/envs/pythonEnv-62e53c8d-508c-4bb8-9f36-487f73bd3e4b/lib/python3.10/site-packages/pytorch_lightning/trainer/call.py:43, in _call_and_handle_interrupt(trainer, trainer_fn, *args, **kwargs) 41 try: 42 if trainer.strategy.launcher is not None: ---> 43 return trainer.strategy.launcher.launch(trainer_fn, *args, trainer=trainer, **kwargs) 44 return trainer_fn(*args, **kwargs) 46 except _TunerExitException: File /local_disk0/.ephemeral_nfs/envs/pythonEnv-62e53c8d-508c-4bb8-9f36-487f73bd3e4b/lib/python3.10/site-packages/pytorch_lightning/strategies/launchers/multiprocessing.py:144, in _MultiProcessingLauncher.launch(self, function, trainer, *args, **kwargs) 136 process_context = mp.start_processes( 137 self._wrapping_function, 138 args=process_args, (...) 141 join=False, # we will join ourselves to get the process references 142 ) 143 self.procs = process_context.processes --> 144 while not process_context.join(): 145 pass 147 worker_output = return_queue.get() File /local_disk0/.ephemeral_nfs/envs/pythonEnv-62e53c8d-508c-4bb8-9f36-487f73bd3e4b/lib/python3.10/site-packages/torch/multiprocessing/spawn.py:158, in ProcessContext.join(self, timeout) 156 msg = "\n\n-- Process %d terminated with the following error:\n" % error_index 157 msg += original_trace --> 158 raise ProcessRaisedException(msg, error_index, failed_process.pid) ProcessRaisedException: -- Process 0 terminated with the following error: Traceback (most recent call last): File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-62e53c8d-508c-4bb8-9f36-487f73bd3e4b/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 68, in _wrap fn(i, *args) File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-62e53c8d-508c-4bb8-9f36-487f73bd3e4b/lib/python3.10/site-packages/pytorch_lightning/strategies/launchers/multiprocessing.py", line 173, in _wrapping_function results = function(*args, **kwargs) File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-62e53c8d-508c-4bb8-9f36-487f73bd3e4b/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 580, in _fit_impl self._run(model, ckpt_path=ckpt_path) File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-62e53c8d-508c-4bb8-9f36-487f73bd3e4b/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 949, in _run call._call_setup_hook(self) # allow user to set up LightningModule in accelerator environment File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-62e53c8d-508c-4bb8-9f36-487f73bd3e4b/lib/python3.10/site-packages/pytorch_lightning/trainer/call.py", line 86, in _call_setup_hook if hasattr(logger, "experiment"): File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-62e53c8d-508c-4bb8-9f36-487f73bd3e4b/lib/python3.10/site-packages/lightning_fabric/loggers/logger.py", line 118, in experiment return fn(self) File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-62e53c8d-508c-4bb8-9f36-487f73bd3e4b/lib/python3.10/site-packages/lightning_fabric/loggers/tensorboard.py", line 193, in experiment from torch.utils.tensorboard import SummaryWriter File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-62e53c8d-508c-4bb8-9f36-487f73bd3e4b/lib/python3.10/site-packages/torch/utils/tensorboard/__init__.py", line 12, in from .writer import FileWriter, SummaryWriter # noqa: F401 File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-62e53c8d-508c-4bb8-9f36-487f73bd3e4b/lib/python3.10/site-packages/torch/utils/tensorboard/writer.py", line 12, in from tensorboard.compat.proto import event_pb2 File "/databricks/python/lib/python3.10/site-packages/tensorboard/compat/proto/event_pb2.py", line 17, in from tensorboard.compat.proto import summary_pb2 as tensorboard_dot_compat_dot_proto_dot_summary__pb2 File "/databricks/python/lib/python3.10/site-packages/tensorboard/compat/proto/summary_pb2.py", line 17, in from tensorboard.compat.proto import histogram_pb2 as tensorboard_dot_compat_dot_proto_dot_histogram__pb2 File "/databricks/python/lib/python3.10/site-packages/tensorboard/compat/proto/histogram_pb2.py", line 36, in _descriptor.FieldDescriptor( File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-62e53c8d-508c-4bb8-9f36-487f73bd3e4b/lib/python3.10/site-packages/google/protobuf/descriptor.py", line 553, in __new__ _message.Message._CheckCalledFromGeneratedFile() TypeError: Descriptors cannot be created directly. If this call came from a _pb2.py file, your generated code is out of date and must be regenerated with protoc >= 3.19.0. If you cannot immediately regenerate your protos, some other possible workarounds are: 1. Downgrade the protobuf package to 3.20.x or lower. 2. Set PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python (but this will use pure-Python parsing and will be much slower). More information: https://developers.google.com/protocol-buffers/docs/news/2022-05-06#python-updates