Below error came up when I am running the cell to project the embeddings using UMAP in the course environment, any suggestions how to solve this? Thank you.
Lesson 2 Visualizing High-dimensional Data with UMAP
OSError Traceback (most recent call last)
Cell In[37], line 7
1 from utils import prepare_dataset_for_umap_visualization as data_prep
3 # prepare image_text pairs
4
5 # for the first 50 data of Huggingface dataset
6 # “yashikota/cat-image-dataset”
----> 7 cat_img_txt_pairs = data_prep(“yashikota/cat-image-dataset”,
8 “cat”, test_size=50)
10 # for the first 50 data of Huggingface dataset
11 # “tanganke/stanford_cars”
12 car_img_txt_pairs = data_prep(“tanganke/stanford_cars”,
13 “car”, test_size=50)
File ~/work/L2/utils.py:75, in prepare_dataset_for_umap_visualization(hf_dataset, class_name, templates, test_size)
73 def prepare_dataset_for_umap_visualization(hf_dataset, class_name, templates=templates, test_size=1000):
74 # load Huggingface dataset (download if needed)
—> 75 dataset = load_dataset(hf_dataset, trust_remote_code=True)
76 # split dataset with specific test_size
77 train_test_dataset = dataset[‘train’].train_test_split(test_size=test_size)
File /usr/local/lib/python3.11/site-packages/datasets/load.py:2594, in load_dataset(path, name, data_dir, data_files, split, cache_dir, features, download_config, download_mode, verification_mode, ignore_verifications, keep_in_memory, save_infos, revision, token, use_auth_token, task, streaming, num_proc, storage_options, trust_remote_code, **config_kwargs)
2589 verification_mode = VerificationMode(
2590 (verification_mode or VerificationMode.BASIC_CHECKS) if not save_infos else VerificationMode.ALL_CHECKS
2591 )
2593 # Create a dataset builder
→ 2594 builder_instance = load_dataset_builder(
2595 path=path,
2596 name=name,
2597 data_dir=data_dir,
2598 data_files=data_files,
2599 cache_dir=cache_dir,
2600 features=features,
2601 download_config=download_config,
2602 download_mode=download_mode,
2603 revision=revision,
2604 token=token,
2605 storage_options=storage_options,
2606 trust_remote_code=trust_remote_code,
2607 _require_default_config_name=name is None,
2608 **config_kwargs,
2609 )
2611 # Return iterable dataset in case of streaming
2612 if streaming:
File /usr/local/lib/python3.11/site-packages/datasets/load.py:2266, in load_dataset_builder(path, name, data_dir, data_files, cache_dir, features, download_config, download_mode, revision, token, use_auth_token, storage_options, trust_remote_code, _require_default_config_name, **config_kwargs)
2264 download_config = download_config.copy() if download_config else DownloadConfig()
2265 download_config.storage_options.update(storage_options)
→ 2266 dataset_module = dataset_module_factory(
2267 path,
2268 revision=revision,
2269 download_config=download_config,
2270 download_mode=download_mode,
2271 data_dir=data_dir,
2272 data_files=data_files,
2273 cache_dir=cache_dir,
2274 trust_remote_code=trust_remote_code,
2275 _require_default_config_name=_require_default_config_name,
2276 _require_custom_configs=bool(config_kwargs),
2277 )
2278 # Get dataset builder class from the processing script
2279 builder_kwargs = dataset_module.builder_kwargs
File /usr/local/lib/python3.11/site-packages/datasets/load.py:1914, in dataset_module_factory(path, revision, download_config, download_mode, dynamic_modules_path, data_dir, data_files, cache_dir, trust_remote_code, _require_default_config_name, _require_custom_configs, **download_kwargs)
1909 if isinstance(e1, FileNotFoundError):
1910 raise FileNotFoundError(
1911 f"Couldn’t find a dataset script at {relative_to_absolute_path(combined_path)} or any data file in the same directory. "
1912 f"Couldn’t find ‘{path}’ on the Hugging Face Hub either: {type(e1).name}: {e1}"
1913 ) from None
→ 1914 raise e1 from None
1915 else:
1916 raise FileNotFoundError(
1917 f"Couldn’t find a dataset script at {relative_to_absolute_path(combined_path)} or any data file in the same directory."
1918 )
File /usr/local/lib/python3.11/site-packages/datasets/load.py:1896, in dataset_module_factory(path, revision, download_config, download_mode, dynamic_modules_path, data_dir, data_files, cache_dir, trust_remote_code, _require_default_config_name, _require_custom_configs, **download_kwargs)
1880 return HubDatasetModuleFactoryWithScript(
1881 path,
1882 revision=revision,
(…)
1886 trust_remote_code=trust_remote_code,
1887 ).get_module()
1888 else:
1889 return HubDatasetModuleFactoryWithoutScript(
1890 path,
1891 revision=revision,
1892 data_dir=data_dir,
1893 data_files=data_files,
1894 download_config=download_config,
1895 download_mode=download_mode,
→ 1896 ).get_module()
1897 except Exception as e1:
1898 # All the attempts failed, before raising the error we should check if the module is already cached
1899 try:
File /usr/local/lib/python3.11/site-packages/datasets/load.py:1214, in HubDatasetModuleFactoryWithoutScript.get_module(self)
1212 download_config.download_desc = “Downloading readme”
1213 try:
→ 1214 dataset_readme_path = cached_path(
1215 hf_dataset_url(self.name, config.REPOCARD_FILENAME, revision=revision),
1216 download_config=download_config,
1217 )
1218 dataset_card_data = DatasetCard.load(Path(dataset_readme_path)).data
1219 except FileNotFoundError:
File /usr/local/lib/python3.11/site-packages/datasets/utils/file_utils.py:201, in cached_path(url_or_filename, download_config, **download_kwargs)
197 url_or_filename = strip_protocol(url_or_filename)
199 if is_remote_url(url_or_filename):
200 # URL, so get it from the cache (downloading if necessary)
→ 201 output_path = get_from_cache(
202 url_or_filename,
203 cache_dir=cache_dir,
204 force_download=download_config.force_download,
205 proxies=download_config.proxies,
206 resume_download=download_config.resume_download,
207 user_agent=download_config.user_agent,
208 local_files_only=download_config.local_files_only,
209 use_etag=download_config.use_etag,
210 max_retries=download_config.max_retries,
211 token=download_config.token,
212 ignore_url_params=download_config.ignore_url_params,
213 storage_options=download_config.storage_options,
214 download_desc=download_config.download_desc,
215 disable_tqdm=download_config.disable_tqdm,
216 )
217 elif os.path.exists(url_or_filename):
218 # File, and it exists.
219 output_path = url_or_filename
File /usr/local/lib/python3.11/site-packages/datasets/utils/file_utils.py:537, in get_from_cache(url, cache_dir, force_download, proxies, etag_timeout, resume_download, user_agent, local_files_only, use_etag, max_retries, token, use_auth_token, ignore_url_params, storage_options, download_desc, disable_tqdm)
534 if isinstance(cache_dir, Path):
535 cache_dir = str(cache_dir)
→ 537 os.makedirs(cache_dir, exist_ok=True)
539 if ignore_url_params:
540 # strip all query parameters and #fragments from the URL
541 cached_url = urljoin(url, urlparse(url).path)
File :215, in makedirs(name, mode, exist_ok)
File :225, in makedirs(name, mode, exist_ok)
OSError: [Errno 30] Read-only file system: ‘/home/jovyan/work/models/.cache’