I tried to load the same hugging face dataset in my databricks environment for the dataset mentoined in the course it throws me the
code tried:
dataset = load_dataset(‘knkarthick/dialogsum’)
package versions
%pip install --upgrade pip
%pip install --disable-pip-version-check torch==1.13.1 torchdata==0.5.1 transformers==4.27.2 datasets==2.11.0 --quiet
below error
Couldn’t reach ‘knkarthick/dialogsum’ on the Hub (ConnectionError)
ConnectionError Traceback (most recent call last)
File :1
----> 1 dataset = load_dataset(‘knkarthick/dialogsum’)
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-2125ae42-4a67-4f5e-8a88-b6c4dbd4e20e/lib/python3.9/site-packages/datasets/load.py:1767, in load_dataset(path, name, data_dir, data_files, split, cache_dir, features, download_config, download_mode, verification_mode, ignore_verifications, keep_in_memory, save_infos, revision, use_auth_token, task, streaming, num_proc, storage_options, **config_kwargs)
1762 verification_mode = VerificationMode(
1763 (verification_mode or VerificationMode.BASIC_CHECKS) if not save_infos else VerificationMode.ALL_CHECKS
1764 )
1766 # Create a dataset builder
→ 1767 builder_instance = load_dataset_builder(
1768 path=path,
1769 name=name,
1770 data_dir=data_dir,
1771 data_files=data_files,
1772 cache_dir=cache_dir,
1773 features=features,
1774 download_config=download_config,
1775 download_mode=download_mode,
1776 revision=revision,
1777 use_auth_token=use_auth_token,
1778 storage_options=storage_options,
1779 **config_kwargs,
1780 )
1782 # Return iterable dataset in case of streaming
1783 if streaming:
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-2125ae42-4a67-4f5e-8a88-b6c4dbd4e20e/lib/python3.9/site-packages/datasets/load.py:1498, in load_dataset_builder(path, name, data_dir, data_files, cache_dir, features, download_config, download_mode, revision, use_auth_token, storage_options, **config_kwargs)
1496 download_config = download_config.copy() if download_config else DownloadConfig()
1497 download_config.use_auth_token = use_auth_token
→ 1498 dataset_module = dataset_module_factory(
1499 path,
1500 revision=revision,
1501 download_config=download_config,
1502 download_mode=download_mode,
1503 data_dir=data_dir,
1504 data_files=data_files,
1505 )
1507 # Get dataset builder class from the processing script
1508 builder_cls = import_main_class(dataset_module.module_path)
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-2125ae42-4a67-4f5e-8a88-b6c4dbd4e20e/lib/python3.9/site-packages/datasets/load.py:1215, in dataset_module_factory(path, revision, download_config, download_mode, dynamic_modules_path, data_dir, data_files, **download_kwargs)
1210 if isinstance(e1, FileNotFoundError):
1211 raise FileNotFoundError(
1212 f"Couldn’t find a dataset script at {relative_to_absolute_path(combined_path)} or any data file in the same directory. "
1213 f"Couldn’t find ‘{path}’ on the Hugging Face Hub either: {type(e1).name}: {e1}"
1214 ) from None
→ 1215 raise e1 from None
1216 else:
1217 raise FileNotFoundError(
1218 f"Couldn’t find a dataset script at {relative_to_absolute_path(combined_path)} or any data file in the same directory."
1219 )
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-2125ae42-4a67-4f5e-8a88-b6c4dbd4e20e/lib/python3.9/site-packages/datasets/load.py:1171, in dataset_module_factory(path, revision, download_config, download_mode, dynamic_modules_path, data_dir, data_files, **download_kwargs)
1162 except Exception as e: # noqa: catch any exception of hf_hub and consider that the dataset doesn’t exist
1163 if isinstance(
1164 e,
1165 (
(…)
1169 ),
1170 ):
→ 1171 raise ConnectionError(f"Couldn’t reach ‘{path}’ on the Hub ({type(e).name})“)
1172 elif “404” in str(e):
1173 msg = f"Dataset ‘{path}’ doesn’t exist on the Hub”
ConnectionError: Couldn’t reach ‘knkarthick/dialogsum’ on the Hub (ConnectionError)