Here is the error I got when running this cell:
Cell starts on line 12:
url=“https://www.youtube.com/watch?v=jGwO_UgTS7I”
save_dir=“docs/youtube/”
loader = GenericLoader(
YoutubeAudioLoader([url],save_dir),
OpenAIWhisperParser()
)
docs = loader.load()
Error:
JSONDecodeError Traceback (most recent call last)
File /usr/local/lib/python3.9/site-packages/openai/api_requestor.py:669, in APIRequestor._interpret_response_line(self, rbody, rcode, rheaders, stream)
668 try:
→ 669 data = json.loads(rbody)
670 except (JSONDecodeError, UnicodeDecodeError) as e:
File /usr/local/lib/python3.9/json/init.py:346, in loads(s, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
343 if (cls is None and object_hook is None and
344 parse_int is None and parse_float is None and
345 parse_constant is None and object_pairs_hook is None and not kw):
→ 346 return _default_decoder.decode(s)
347 if cls is None:
File /usr/local/lib/python3.9/json/decoder.py:337, in JSONDecoder.decode(self, s, _w)
333 “”“Return the Python representation of s
(a str
instance
334 containing a JSON document).
335
336 “””
→ 337 obj, end = self.raw_decode(s, idx=_w(s, 0).end())
338 end = _w(s, end).end()
File /usr/local/lib/python3.9/json/decoder.py:355, in JSONDecoder.raw_decode(self, s, idx)
354 except StopIteration as err:
→ 355 raise JSONDecodeError(“Expecting value”, s, err.value) from None
356 return obj, end
JSONDecodeError: Expecting value: line 1 column 1 (char 0)
The above exception was the direct cause of the following exception:
APIError Traceback (most recent call last)
Cell In[12], line 7
2 save_dir=“docs/youtube/”
3 loader = GenericLoader(
4 YoutubeAudioLoader([url],save_dir),
5 OpenAIWhisperParser()
6 )
----> 7 docs = loader.load()
File /usr/local/lib/python3.9/site-packages/langchain/document_loaders/generic.py:90, in GenericLoader.load(self)
88 def load(self) → List[Document]:
89 “”“Load all documents.”“”
—> 90 return list(self.lazy_load())
File /usr/local/lib/python3.9/site-packages/langchain/document_loaders/generic.py:86, in GenericLoader.lazy_load(self)
84 “”“Load documents lazily. Use this when working at a large scale.”“”
85 for blob in self.blob_loader.yield_blobs():
—> 86 yield from self.blob_parser.lazy_parse(blob)
File /usr/local/lib/python3.9/site-packages/langchain/document_loaders/parsers/audio.py:51, in OpenAIWhisperParser.lazy_parse(self, blob)
49 # Transcribe
50 print(f"Transcribing part {split_number+1}!")
—> 51 transcript = openai.Audio.transcribe(“whisper-1”, file_obj)
53 yield Document(
54 page_content=transcript.text,
55 metadata={“source”: blob.source, “chunk”: split_number},
56 )
File /usr/local/lib/python3.9/site-packages/openai/api_resources/audio.py:57, in Audio.transcribe(cls, model, file, api_key, api_base, api_type, api_version, organization, **params)
55 requestor, files, data = cls._prepare_request(file, file.name, model, **params)
56 url = cls._get_url(“transcriptions”)
—> 57 response, _, api_key = requestor.request(“post”, url, files=files, params=data)
58 return util.convert_to_openai_object(
59 response, api_key, api_version, organization
60 )
File /usr/local/lib/python3.9/site-packages/openai/api_requestor.py:226, in APIRequestor.request(self, method, url, params, headers, files, stream, request_id, request_timeout)
205 def request(
206 self,
207 method,
(…)
214 request_timeout: Optional[Union[float, Tuple[float, float]]] = None,
215 ) → Tuple[Union[OpenAIResponse, Iterator[OpenAIResponse]], bool, str]:
216 result = self.request_raw(
217 method.lower(),
218 url,
(…)
224 request_timeout=request_timeout,
225 )
→ 226 resp, got_stream = self._interpret_response(result, stream)
227 return resp, got_stream, self.api_key
File /usr/local/lib/python3.9/site-packages/openai/api_requestor.py:619, in APIRequestor._interpret_response(self, result, stream)
611 return (
612 self._interpret_response_line(
613 line, result.status_code, result.headers, stream=True
614 )
615 for line in parse_stream(result.iter_lines())
616 ), True
617 else:
618 return (
→ 619 self._interpret_response_line(
620 result.content.decode(“utf-8”),
621 result.status_code,
622 result.headers,
623 stream=False,
624 ),
625 False,
626 )
File /usr/local/lib/python3.9/site-packages/openai/api_requestor.py:671, in APIRequestor._interpret_response_line(self, rbody, rcode, rheaders, stream)
669 data = json.loads(rbody)
670 except (JSONDecodeError, UnicodeDecodeError) as e:
→ 671 raise error.APIError(
672 f"HTTP code {rcode} from API ({rbody})", rbody, rcode, headers=rheaders
673 ) from e
674 resp = OpenAIResponse(data, rheaders)
675 # In the future, we might add a “status” parameter to errors
676 # to better handle the “error while streaming” case.
APIError: HTTP code 504 from API (
504 Gateway Time-out