Getting erros while trying to load the dialog data set. Had reached out to coursera support and they confirmed that the lab is working fine at their end and to reach out here for support.
ValueError Traceback (most recent call last)
Cell In[6], line 3
1 huggingface_dataset_name = “knkarthick/dialogsum”
----> 3 dataset = load_dataset(huggingface_dataset_name)
File /opt/conda/lib/python3.10/site-packages/datasets/load.py:1767, in load_dataset(path, name, data_dir, data_files, split, cache_dir, features, download_config, download_mode, verification_mode, ignore_verifications, keep_in_memory, save_infos, revision, use_auth_token, task, streaming, num_proc, storage_options, **config_kwargs)
1762 verification_mode = VerificationMode(
1763 (verification_mode or VerificationMode.BASIC_CHECKS) if not save_infos else VerificationMode.ALL_CHECKS
1764 )
1766 # Create a dataset builder
→ 1767 builder_instance = load_dataset_builder(
1768 path=path,
1769 name=name,
1770 data_dir=data_dir,
1771 data_files=data_files,
1772 cache_dir=cache_dir,
1773 features=features,
1774 download_config=download_config,
1775 download_mode=download_mode,
1776 revision=revision,
1777 use_auth_token=use_auth_token,
1778 storage_options=storage_options,
1779 **config_kwargs,
1780 )
1782 # Return iterable dataset in case of streaming
1783 if streaming:
File /opt/conda/lib/python3.10/site-packages/datasets/load.py:1498, in load_dataset_builder(path, name, data_dir, data_files, cache_dir, features, download_config, download_mode, revision, use_auth_token, storage_options, **config_kwargs)
1496 download_config = download_config.copy() if download_config else DownloadConfig()
1497 download_config.use_auth_token = use_auth_token
→ 1498 dataset_module = dataset_module_factory(
1499 path,
1500 revision=revision,
1501 download_config=download_config,
1502 download_mode=download_mode,
1503 data_dir=data_dir,
1504 data_files=data_files,
1505 )
1507 # Get dataset builder class from the processing script
1508 builder_cls = import_main_class(dataset_module.module_path)
File /opt/conda/lib/python3.10/site-packages/datasets/load.py:1215, in dataset_module_factory(path, revision, download_config, download_mode, dynamic_modules_path, data_dir, data_files, **download_kwargs)
1210 if isinstance(e1, FileNotFoundError):
1211 raise FileNotFoundError(
1212 f"Couldn’t find a dataset script at {relative_to_absolute_path(combined_path)} or any data file in the same directory. "
1213 f"Couldn’t find ‘{path}’ on the Hugging Face Hub either: {type(e1).name}: {e1}"
1214 ) from None
→ 1215 raise e1 from None
1216 else:
1217 raise FileNotFoundError(
1218 f"Couldn’t find a dataset script at {relative_to_absolute_path(combined_path)} or any data file in the same directory."
1219 )
File /opt/conda/lib/python3.10/site-packages/datasets/load.py:1199, in dataset_module_factory(path, revision, download_config, download_mode, dynamic_modules_path, data_dir, data_files, **download_kwargs)
1184 return HubDatasetModuleFactoryWithScript(
1185 path,
1186 revision=revision,
(…)
1189 dynamic_modules_path=dynamic_modules_path,
1190 ).get_module()
1191 else:
1192 return HubDatasetModuleFactoryWithoutScript(
1193 path,
1194 revision=revision,
1195 data_dir=data_dir,
1196 data_files=data_files,
1197 download_config=download_config,
1198 download_mode=download_mode,
→ 1199 ).get_module()
1200 except (
1201 Exception
1202 ) as e1: # noqa: all the attempts failed, before raising the error we should check if the module is already cached.
1203 try:
File /opt/conda/lib/python3.10/site-packages/datasets/load.py:765, in HubDatasetModuleFactoryWithoutScript.get_module(self)
755 def get_module(self) → DatasetModule:
756 hfh_dataset_info = HfApi(config.HF_ENDPOINT).dataset_info(
757 self.name,
758 revision=self.revision,
759 token=self.download_config.use_auth_token,
760 timeout=100.0,
761 )
762 patterns = (
763 sanitize_patterns(self.data_files)
764 if self.data_files is not None
→ 765 else get_data_patterns_in_dataset_repository(hfh_dataset_info, self.data_dir)
766 )
767 data_files = DataFilesDict.from_hf_repo(
768 patterns,
769 dataset_info=hfh_dataset_info,
770 base_path=self.data_dir,
771 allowed_extensions=ALL_ALLOWED_EXTENSIONS,
772 )
773 module_names = {
774 key: infer_module_for_data_files(data_files_list, use_auth_token=self.download_config.use_auth_token)
775 for key, data_files_list in data_files.items()
776 }
File /opt/conda/lib/python3.10/site-packages/datasets/data_files.py:675, in get_data_patterns_in_dataset_repository(dataset_info, base_path)
673 resolver = partial(_resolve_single_pattern_in_dataset_repository, dataset_info, base_path=base_path)
674 try:
→ 675 return _get_data_files_patterns(resolver)
676 except FileNotFoundError:
677 raise EmptyDatasetError(
678 f"The dataset repository at ‘{dataset_info.id}’ doesn’t contain any data files"
679 ) from None
File /opt/conda/lib/python3.10/site-packages/datasets/data_files.py:236, in _get_data_files_patterns(pattern_resolver)
234 try:
235 for pattern in patterns:
→ 236 data_files = pattern_resolver(pattern)
237 if len(data_files) > 0:
238 non_empty_splits.append(split)
File /opt/conda/lib/python3.10/site-packages/datasets/data_files.py:486, in _resolve_single_pattern_in_dataset_repository(dataset_info, pattern, base_path, allowed_extensions)
484 else:
485 base_path = “/”
→ 486 glob_iter = [PurePath(filepath) for filepath in fs.glob(PurePath(pattern).as_posix()) if fs.isfile(filepath)]
487 matched_paths = [
488 filepath
489 for filepath in glob_iter
(…)
496 )
497 ] # ignore .ipynb and pycache, but keep /…/
498 if allowed_extensions is not None:
File /opt/conda/lib/python3.10/site-packages/fsspec/spec.py:606, in AbstractFileSystem.glob(self, path, maxdepth, **kwargs)
602 depth = None
604 allpaths = self.find(root, maxdepth=depth, withdirs=True, detail=True, **kwargs)
→ 606 pattern = glob_translate(path + (“/” if ends_with_sep else “”))
607 pattern = re.compile(pattern)
609 out = {
610 p: info
611 for p, info in sorted(allpaths.items())
(…)
618 )
619 }
File /opt/conda/lib/python3.10/site-packages/fsspec/utils.py:734, in glob_translate(pat)
732 continue
733 elif “" in part:
→ 734 raise ValueError(
735 "Invalid pattern: '’ can only be an entire path component”
736 )
737 if part:
738 results.extend(_translate(part, f"{not_sep}*", not_sep))
ValueError: Invalid pattern: ‘**’ can only be an entire path component