can post the screenshot of that error you got. if the error log is too long,take two screenshots and post.
There you go:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
Cell In[3], line 1
----> 1 dataset = load_dataset("librispeech_asr",
2 split="train.clean.100",
3 streaming=True,
4 trust_remote_code=True)
File /usr/local/lib/python3.9/site-packages/datasets/load.py:2523, in load_dataset(path, name, data_dir, data_files, split, cache_dir, features, download_config, download_mode, verification_mode, ignore_verifications, keep_in_memory, save_infos, revision, token, use_auth_token, task, streaming, num_proc, storage_options, trust_remote_code, **config_kwargs)
2518 verification_mode = VerificationMode(
2519 (verification_mode or VerificationMode.BASIC_CHECKS) if not save_infos else VerificationMode.ALL_CHECKS
2520 )
2522 # Create a dataset builder
-> 2523 builder_instance = load_dataset_builder(
2524 path=path,
2525 name=name,
2526 data_dir=data_dir,
2527 data_files=data_files,
2528 cache_dir=cache_dir,
2529 features=features,
2530 download_config=download_config,
2531 download_mode=download_mode,
2532 revision=revision,
2533 token=token,
2534 storage_options=storage_options,
2535 trust_remote_code=trust_remote_code,
2536 _require_default_config_name=name is None,
2537 **config_kwargs,
2538 )
2540 # Return iterable dataset in case of streaming
2541 if streaming:
File /usr/local/lib/python3.9/site-packages/datasets/load.py:2195, in load_dataset_builder(path, name, data_dir, data_files, cache_dir, features, download_config, download_mode, revision, token, use_auth_token, storage_options, trust_remote_code, _require_default_config_name, **config_kwargs)
2193 download_config = download_config.copy() if download_config else DownloadConfig()
2194 download_config.storage_options.update(storage_options)
-> 2195 dataset_module = dataset_module_factory(
2196 path,
2197 revision=revision,
2198 download_config=download_config,
2199 download_mode=download_mode,
2200 data_dir=data_dir,
2201 data_files=data_files,
2202 cache_dir=cache_dir,
2203 trust_remote_code=trust_remote_code,
2204 _require_default_config_name=_require_default_config_name,
2205 _require_custom_configs=bool(config_kwargs),
2206 )
2207 # Get dataset builder class from the processing script
2208 builder_kwargs = dataset_module.builder_kwargs
File /usr/local/lib/python3.9/site-packages/datasets/load.py:1846, in dataset_module_factory(path, revision, download_config, download_mode, dynamic_modules_path, data_dir, data_files, cache_dir, trust_remote_code, _require_default_config_name, _require_custom_configs, **download_kwargs)
1841 if isinstance(e1, FileNotFoundError):
1842 raise FileNotFoundError(
1843 f"Couldn't find a dataset script at {relative_to_absolute_path(combined_path)} or any data file in the same directory. "
1844 f"Couldn't find '{path}' on the Hugging Face Hub either: {type(e1).__name__}: {e1}"
1845 ) from None
-> 1846 raise e1 from None
1847 else:
1848 raise FileNotFoundError(
1849 f"Couldn't find a dataset script at {relative_to_absolute_path(combined_path)} or any data file in the same directory."
1850 )
File /usr/local/lib/python3.9/site-packages/datasets/load.py:1791, in dataset_module_factory(path, revision, download_config, download_mode, dynamic_modules_path, data_dir, data_files, cache_dir, trust_remote_code, _require_default_config_name, _require_custom_configs, **download_kwargs)
1787 raise DatasetNotFoundError(
1788 msg + ". If the repo is private or gated, make sure to log in with `huggingface-cli login`."
1789 )
1790 else:
-> 1791 raise e
1792 if filename in [sibling.rfilename for sibling in dataset_info.siblings]: # contains a dataset script
1793 fs = HfFileSystem(endpoint=config.HF_ENDPOINT, token=download_config.token)
File /usr/local/lib/python3.9/site-packages/datasets/load.py:1765, in dataset_module_factory(path, revision, download_config, download_mode, dynamic_modules_path, data_dir, data_files, cache_dir, trust_remote_code, _require_default_config_name, _require_custom_configs, **download_kwargs)
1763 hf_api = HfApi(config.HF_ENDPOINT)
1764 try:
-> 1765 dataset_info = hf_api.dataset_info(
1766 repo_id=path,
1767 revision=revision,
1768 token=download_config.token,
1769 timeout=100.0,
1770 )
1771 except Exception as e: # noqa catch any exception of hf_hub and consider that the dataset doesn't exist
1772 if isinstance(
1773 e,
1774 (
(...)
1778 ),
1779 ):
File /usr/local/lib/python3.9/site-packages/huggingface_hub/utils/_validators.py:118, in validate_hf_hub_args.<locals>._inner_fn(*args, **kwargs)
115 if check_use_auth_token:
116 kwargs = smoothly_deprecate_use_auth_token(fn_name=fn.__name__, has_token=has_token, kwargs=kwargs)
--> 118 return fn(*args, **kwargs)
File /usr/local/lib/python3.9/site-packages/huggingface_hub/hf_api.py:2150, in HfApi.dataset_info(self, repo_id, revision, timeout, files_metadata, token)
2148 hf_raise_for_status(r)
2149 data = r.json()
-> 2150 return DatasetInfo(**data)
File /usr/local/lib/python3.9/site-packages/huggingface_hub/hf_api.py:770, in DatasetInfo.__init__(self, **kwargs)
768 self.likes = kwargs.pop("likes")
769 self.paperswithcode_id = kwargs.pop("paperswithcode_id", None)
--> 770 self.tags = kwargs.pop("tags")
771 card_data = kwargs.pop("cardData", None) or kwargs.pop("card_data", None)
772 self.card_data = (
773 DatasetCardData(**card_data, ignore_metadata_errors=True) if isinstance(card_data, dict) else card_data
774 )
KeyError: 'tags'
The traceback indicates that the code is failing because the tags
key is missing in the dataset information. The root cause could be due to an issue in the version of the datasets
or huggingface_hub
library you are using.
To overcome the problem just upgrade both libraries to the latest version. To sum up in the notebook just enclose in the beginning of the Notebook the following code:
%pip install --upgrade datasets huggingface_hub
For clearence, I have just forwarded you the screenshots:
Hoping this can help you.
Thanks for the help, I was able to run the dataset and run the notebook’s code. Someone should update the course’s notebook underlying set-up so that other learners don’t run into this issue.
if the issue arise on course environment, then let me know. I will report to staff for update.
Thanks for reporting
Yes, I’ve experienced this issue in the course environment, kindly refer to the screen capture in my original post.
@lesly.zerna please update this notebook for dataset version issue as it caused error on the course environment
The fixed update has been implemented! You can try the notebook again!