Here is the error I got when running this cell:
Cell starts on line 12:
url=“https://www.youtube.com/watch?v=jGwO_UgTS7I”
save_dir=“docs/youtube/”
loader = GenericLoader(
YoutubeAudioLoader([url],save_dir),
OpenAIWhisperParser()
)
docs = loader.load()
Error:
JSONDecodeError                           Traceback (most recent call last)
File /usr/local/lib/python3.9/site-packages/openai/api_requestor.py:669, in APIRequestor._interpret_response_line(self, rbody, rcode, rheaders, stream)
668 try:
 → 669     data = json.loads(rbody)
670 except (JSONDecodeError, UnicodeDecodeError) as e:
File /usr/local/lib/python3.9/json/init.py:346, in loads(s, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
343 if (cls is None and object_hook is None and
344         parse_int is None and parse_float is None and
345         parse_constant is None and object_pairs_hook is None and not kw):
 → 346     return _default_decoder.decode(s)
347 if cls is None:
File /usr/local/lib/python3.9/json/decoder.py:337, in JSONDecoder.decode(self, s, _w)
333 “”“Return the Python representation of s (a str instance
334 containing a JSON document).
335
336 “””
 → 337 obj, end = self.raw_decode(s, idx=_w(s, 0).end())
338 end = _w(s, end).end()
File /usr/local/lib/python3.9/json/decoder.py:355, in JSONDecoder.raw_decode(self, s, idx)
354 except StopIteration as err:
 → 355     raise JSONDecodeError(“Expecting value”, s, err.value) from None
356 return obj, end
JSONDecodeError: Expecting value: line 1 column 1 (char 0)
The above exception was the direct cause of the following exception:
APIError                                  Traceback (most recent call last)
Cell In[12], line 7
2 save_dir=“docs/youtube/”
3 loader = GenericLoader(
4     YoutubeAudioLoader([url],save_dir),
5     OpenAIWhisperParser()
6 )
----> 7 docs = loader.load()
File /usr/local/lib/python3.9/site-packages/langchain/document_loaders/generic.py:90, in GenericLoader.load(self)
88 def load(self) → List[Document]:
89     “”“Load all documents.”“”
—> 90     return list(self.lazy_load())
File /usr/local/lib/python3.9/site-packages/langchain/document_loaders/generic.py:86, in GenericLoader.lazy_load(self)
84 “”“Load documents lazily. Use this when working at a large scale.”“”
85 for blob in self.blob_loader.yield_blobs():
—> 86     yield from self.blob_parser.lazy_parse(blob)
File /usr/local/lib/python3.9/site-packages/langchain/document_loaders/parsers/audio.py:51, in OpenAIWhisperParser.lazy_parse(self, blob)
49 # Transcribe
50 print(f"Transcribing part {split_number+1}!")
—> 51 transcript = openai.Audio.transcribe(“whisper-1”, file_obj)
53 yield Document(
54     page_content=transcript.text,
55     metadata={“source”: blob.source, “chunk”: split_number},
56 )
File /usr/local/lib/python3.9/site-packages/openai/api_resources/audio.py:57, in Audio.transcribe(cls, model, file, api_key, api_base, api_type, api_version, organization, **params)
55 requestor, files, data = cls._prepare_request(file, file.name, model, **params)
56 url = cls._get_url(“transcriptions”)
—> 57 response, _, api_key = requestor.request(“post”, url, files=files, params=data)
58 return util.convert_to_openai_object(
59     response, api_key, api_version, organization
60 )
File /usr/local/lib/python3.9/site-packages/openai/api_requestor.py:226, in APIRequestor.request(self, method, url, params, headers, files, stream, request_id, request_timeout)
205 def request(
206     self,
207     method,
(…)
214     request_timeout: Optional[Union[float, Tuple[float, float]]] = None,
215 ) → Tuple[Union[OpenAIResponse, Iterator[OpenAIResponse]], bool, str]:
216     result = self.request_raw(
217         method.lower(),
218         url,
(…)
224         request_timeout=request_timeout,
225     )
 → 226     resp, got_stream = self._interpret_response(result, stream)
227     return resp, got_stream, self.api_key
File /usr/local/lib/python3.9/site-packages/openai/api_requestor.py:619, in APIRequestor._interpret_response(self, result, stream)
611     return (
612         self._interpret_response_line(
613             line, result.status_code, result.headers, stream=True
614         )
615         for line in parse_stream(result.iter_lines())
616     ), True
617 else:
618     return (
 → 619         self._interpret_response_line(
620             result.content.decode(“utf-8”),
621             result.status_code,
622             result.headers,
623             stream=False,
624         ),
625         False,
626     )
File /usr/local/lib/python3.9/site-packages/openai/api_requestor.py:671, in APIRequestor._interpret_response_line(self, rbody, rcode, rheaders, stream)
669     data = json.loads(rbody)
670 except (JSONDecodeError, UnicodeDecodeError) as e:
 → 671     raise error.APIError(
672         f"HTTP code {rcode} from API ({rbody})", rbody, rcode, headers=rheaders
673     ) from e
674 resp = OpenAIResponse(data, rheaders)
675 # In the future, we might add a “status” parameter to errors
676 # to better handle the “error while streaming” case.
APIError: HTTP code 504 from API (
504 Gateway Time-out
