Getting error on using load_dataset

Course: Building Applications with Vector databases
When I am running the lab i consistently get an error
File /usr/local/lib/python3.10/site-packages/datasets/load.py:2128, in load_dataset(path, name, data_dir, data_files, split, cache_dir, features, download_config, download_mode, verification_mode, ignore_verifications, keep_in_memory, save_infos, revision, token, use_auth_token, task, streaming, num_proc, storage_options, **config_kwargs)
2123 verification_mode = VerificationMode(
2124 (verification_mode or VerificationMode.BASIC_CHECKS) if not save_infos else VerificationMode.ALL_CHECKS
2125 )
2127 # Create a dataset builder
→ 2128 builder_instance = load_dataset_builder(
2129 path=path,
2130 name=name,
2131 data_dir=data_dir,
2132 data_files=data_files,
2133 cache_dir=cache_dir,
2134 features=features,
2135 download_config=download_config,
2136 download_mode=download_mode,
2137 revision=revision,
2138 token=token,
2139 storage_options=storage_options,
2140 **config_kwargs,
2141 )
2143 # Return iterable dataset in case of streaming
2144 if streaming:

File /usr/local/lib/python3.10/site-packages/datasets/load.py:1814, in load_dataset_builder(path, name, data_dir, data_files, cache_dir, features, download_config, download_mode, revision, token, use_auth_token, storage_options, **config_kwargs)
1812 download_config = download_config.copy() if download_config else DownloadConfig()
1813 download_config.storage_options.update(storage_options)
→ 1814 dataset_module = dataset_module_factory(
1815 path,
1816 revision=revision,
1817 download_config=download_config,
1818 download_mode=download_mode,
1819 data_dir=data_dir,
1820 data_files=data_files,
1821 )
1822 # Get dataset builder class from the processing script
1823 builder_kwargs = dataset_module.builder_kwargs

File /usr/local/lib/python3.10/site-packages/datasets/load.py:1511, in dataset_module_factory(path, revision, download_config, download_mode, dynamic_modules_path, data_dir, data_files, **download_kwargs)
1506 if isinstance(e1, FileNotFoundError):
1507 raise FileNotFoundError(
1508 f"Couldn’t find a dataset script at {relative_to_absolute_path(combined_path)} or any data file in the same directory. "
1509 f"Couldn’t find ‘{path}’ on the Hugging Face Hub either: {type(e1).name}: {e1}"
1510 ) from None
→ 1511 raise e1 from None
1512 else:
1513 raise FileNotFoundError(
1514 f"Couldn’t find a dataset script at {relative_to_absolute_path(combined_path)} or any data file in the same directory."
1515 )

File /usr/local/lib/python3.10/site-packages/datasets/load.py:1478, in dataset_module_factory(path, revision, download_config, download_mode, dynamic_modules_path, data_dir, data_files, **download_kwargs)
1474 raise FileNotFoundError(
1475 msg + “. If the repo is private or gated, make sure to log in with huggingface-cli login.”
1476 )
1477 else:
→ 1478 raise e
1479 if filename in [sibling.rfilename for sibling in dataset_info.siblings]:
1480 return HubDatasetModuleFactoryWithScript(
1481 path,
1482 revision=revision,
(…)
1485 dynamic_modules_path=dynamic_modules_path,
1486 ).get_module()

File /usr/local/lib/python3.10/site-packages/datasets/load.py:1452, in dataset_module_factory(path, revision, download_config, download_mode, dynamic_modules_path, data_dir, data_files, **download_kwargs)
1450 hf_api = HfApi(config.HF_ENDPOINT)
1451 try:
→ 1452 dataset_info = hf_api.dataset_info(
1453 repo_id=path,
1454 revision=revision,
1455 token=download_config.token,
1456 timeout=100.0,
1457 )
1458 except Exception as e: # noqa catch any exception of hf_hub and consider that the dataset doesn’t exist
1459 if isinstance(
1460 e,
1461 (
(…)
1465 ),
1466 ):

File /usr/local/lib/python3.10/site-packages/huggingface_hub/utils/_validators.py:118, in validate_hf_hub_args.._inner_fn(*args, **kwargs)
115 if check_use_auth_token:
116 kwargs = smoothly_deprecate_use_auth_token(fn_name=fn.name, has_token=has_token, kwargs=kwargs)
→ 118 return fn(*args, **kwargs)

File /usr/local/lib/python3.10/site-packages/huggingface_hub/hf_api.py:2150, in HfApi.dataset_info(self, repo_id, revision, timeout, files_metadata, token)
2148 hf_raise_for_status(r)
2149 data = r.json()
→ 2150 return DatasetInfo(**data)

File /usr/local/lib/python3.10/site-packages/huggingface_hub/hf_api.py:770, in DatasetInfo.init(self, **kwargs)
768 self.likes = kwargs.pop(“likes”)
769 self.paperswithcode_id = kwargs.pop(“paperswithcode_id”, None)
→ 770 self.tags = kwargs.pop(“tags”)
771 card_data = kwargs.pop(“cardData”, None) or kwargs.pop(“card_data”, None)
772 self.card_data = (
773 DatasetCardData(**card_data, ignore_metadata_errors=True) if isinstance(card_data, dict) else card_data
774 )

KeyError: ‘tags’

It would be better if you move your post to the forum topic for that assignment.

You can move your thread using the little “pencil” icon in the thread title. Select the courser from the drop-down list, and then add a tag for the week number.

Mentors generally don’t monitor the “Study Group” forum area.