When creating a post, please add:
- Week # must be added in the tags option of the post.
- Link to the classroom item you are referring to:
- Description (include relevant info but please do not post solution code or your entire notebook):
I am in the middle of the lab and using coursera framework. I am unable to load the dataset. I am getting following error. Please help.
ValueError Traceback (most recent call last)
Cell In[16], line 3
1 huggingface_dataset_name = “knkarthick/dialogsum”
----> 3 dataset = load_dataset(huggingface_dataset_name)
5 dataset
File /opt/conda/lib/python3.12/site-packages/datasets/load.py:1704, in load_dataset(path, name, data_dir, data_files, split, cache_dir, features, download_config, download_mode, ignore_verifications, keep_in_memory, save_infos, revision, use_auth_token, task, streaming, **config_kwargs)
1701 ignore_verifications = ignore_verifications or save_infos
1703 # Create a dataset builder
→ 1704 builder_instance = load_dataset_builder(
1705 path=path,
1706 name=name,
1707 data_dir=data_dir,
1708 data_files=data_files,
1709 cache_dir=cache_dir,
1710 features=features,
1711 download_config=download_config,
1712 download_mode=download_mode,
1713 revision=revision,
1714 use_auth_token=use_auth_token,
1715 **config_kwargs,
1716 )
1718 # Return iterable dataset in case of streaming
1719 if streaming:
File /opt/conda/lib/python3.12/site-packages/datasets/load.py:1530, in load_dataset_builder(path, name, data_dir, data_files, cache_dir, features, download_config, download_mode, revision, use_auth_token, **config_kwargs)
1528 download_config = download_config.copy() if download_config else DownloadConfig()
1529 download_config.use_auth_token = use_auth_token
→ 1530 dataset_module = dataset_module_factory(
1531 path,
1532 revision=revision,
1533 download_config=download_config,
1534 download_mode=download_mode,
1535 data_dir=data_dir,
1536 data_files=data_files,
1537 )
1539 # Get dataset builder class from the processing script
1540 builder_cls = import_main_class(dataset_module.module_path)
File /opt/conda/lib/python3.12/site-packages/datasets/load.py:1282, in dataset_module_factory(path, revision, download_config, download_mode, force_local_path, dynamic_modules_path, data_dir, data_files, **download_kwargs)
1277 if isinstance(e1, FileNotFoundError):
1278 raise FileNotFoundError(
1279 f"Couldn’t find a dataset script at {relative_to_absolute_path(combined_path)} or any data file in the same directory. "
1280 f"Couldn’t find ‘{path}’ on the Hugging Face Hub either: {type(e1).name}: {e1}"
1281 ) from None
→ 1282 raise e1 from None
1283 else:
1284 raise FileNotFoundError(
1285 f"Couldn’t find a dataset script at {relative_to_absolute_path(combined_path)} or any data file in the same directory."
1286 )
File /opt/conda/lib/python3.12/site-packages/datasets/load.py:1270, in dataset_module_factory(path, revision, download_config, download_mode, force_local_path, dynamic_modules_path, data_dir, data_files, **download_kwargs)
1255 return HubDatasetModuleFactoryWithScript(
1256 path,
1257 revision=revision,
(…)
1260 dynamic_modules_path=dynamic_modules_path,
1261 ).get_module()
1262 else:
1263 return HubDatasetModuleFactoryWithoutScript(
1264 path,
1265 revision=revision,
1266 data_dir=data_dir,
1267 data_files=data_files,
1268 download_config=download_config,
1269 download_mode=download_mode,
→ 1270 ).get_module()
1271 except Exception as e1: # noqa: all the attempts failed, before raising the error we should check if the module is already cached.
1272 try:
File /opt/conda/lib/python3.12/site-packages/datasets/load.py:883, in HubDatasetModuleFactoryWithoutScript.get_module(self)
873 token = self.download_config.use_auth_token
874 hfh_dataset_info = HfApi(config.HF_ENDPOINT).dataset_info(
875 self.name,
876 revision=self.revision,
877 token=token,
878 timeout=100.0,
879 )
880 patterns = (
881 sanitize_patterns(self.data_files)
882 if self.data_files is not None
→ 883 else get_patterns_in_dataset_repository(hfh_dataset_info, self.data_dir)
884 )
885 data_files = DataFilesDict.from_hf_repo(
886 patterns,
887 dataset_info=hfh_dataset_info,
888 base_path=self.data_dir,
889 allowed_extensions=ALL_ALLOWED_EXTENSIONS,
890 )
891 module_names = {
892 key: infer_module_for_data_files(data_files_list, use_auth_token=self.download_config.use_auth_token)
893 for key, data_files_list in data_files.items()
894 }
File /opt/conda/lib/python3.12/site-packages/datasets/data_files.py:482, in get_patterns_in_dataset_repository(dataset_info, base_path)
480 resolver = partial(_resolve_single_pattern_in_dataset_repository, dataset_info, base_path=base_path)
481 try:
→ 482 return _get_data_files_patterns(resolver)
483 except FileNotFoundError:
484 raise FileNotFoundError(
485 f"The dataset repository at ‘{dataset_info.id}’ doesn’t contain any data file."
486 ) from None
File /opt/conda/lib/python3.12/site-packages/datasets/data_files.py:99, in _get_data_files_patterns(pattern_resolver)
97 try:
98 for pattern in patterns:
—> 99 data_files = pattern_resolver(pattern)
100 if len(data_files) > 0:
101 non_empty_splits.append(split)
File /opt/conda/lib/python3.12/site-packages/datasets/data_files.py:306, in _resolve_single_pattern_in_dataset_repository(dataset_info, pattern, base_path, allowed_extensions)
304 if base_path:
305 pattern = f"{base_path}/{pattern}"
→ 306 glob_iter = [PurePath(filepath) for filepath in fs.glob(PurePath(pattern).as_posix()) if fs.isfile(filepath)]
307 matched_paths = [
308 filepath
309 for filepath in glob_iter
310 if filepath.name not in data_files_ignore and not filepath.name.startswith(“.”)
311 ]
312 if allowed_extensions is not None:
File /opt/conda/lib/python3.12/site-packages/fsspec/spec.py:611, in AbstractFileSystem.glob(self, path, maxdepth, **kwargs)
607 depth = None
609 allpaths = self.find(root, maxdepth=depth, withdirs=True, detail=True, **kwargs)
→ 611 pattern = glob_translate(path + (“/” if ends_with_sep else “”))
612 pattern = re.compile(pattern)
614 out = {
615 p: info
616 for p, info in sorted(allpaths.items())
(…)
623 )
624 }
File /opt/conda/lib/python3.12/site-packages/fsspec/utils.py:729, in glob_translate(pat)
727 continue
728 elif “" in part:
→ 729 raise ValueError(
730 "Invalid pattern: '’ can only be an entire path component”
731 )
732 if part:
733 results.extend(_translate(part, f"{not_sep}*", not_sep))
ValueError: Invalid pattern: ‘**’ can only be an entire path component