I am getting “We expect a single channel audio input for AutomaticSpeechRecognitionPipeline” error in asr(audio) cell.
Details: ---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[26], line 1
-—> 1 asr(audio)
File /usr/local/lib/python3.11/site-packages/transformers/pipelines/automatic_speech_recognition.py:292, in AutomaticSpeechRecognitionPipeline._call_(self, inputs, **kwargs)
229 def _call_(
230 self,
231 inputs: Union[np.ndarray, bytes, str],
232 **kwargs,
233 ):
234 “”"
235 Transcribe the audio sequence(s) given as inputs to text. See the [`AutomaticSpeechRecognitionPipeline`]
236 documentation for more information.
(…)
290 `“”.join(chunk[“text”] for chunk in output[“chunks”])`.
291 “”"
- → 292 return super()._call_(inputs, **kwargs)
File /usr/local/lib/python3.11/site-packages/transformers/pipelines/base.py:1154, in Pipeline._call_(self, inputs, num_workers, batch_size, *args, **kwargs)
1152 return self.iterate(inputs, preprocess_params, forward_params, postprocess_params)
1153 elif self.framework == “pt” and isinstance(self, ChunkPipeline):
-> 1154 return next(
1155 iter(
1156 self.get_iterator(
1157 [inputs], num_workers, batch_size, preprocess_params, forward_params, postprocess_params
1158 )
1159 )
1160 )
1161 else:
1162 return self.run_single(inputs, preprocess_params, forward_params, postprocess_params)
File /usr/local/lib/python3.11/site-packages/transformers/pipelines/pt_utils.py:124, in PipelineIterator._next_(self)
121 return self.loader_batch_item()
123 # We’re out of items within a batch
- → 124 item = next(self.iterator)
125 processed = self.infer(item, **self.params)
126 # We now have a batch of “inferred things”.
File /usr/local/lib/python3.11/site-packages/transformers/pipelines/pt_utils.py:266, in PipelinePackIterator._next_(self)
263 return accumulator
265 while not is_last:
- → 266 processed = self.infer(next(self.iterator), **self.params)
267 if self.loader_batch_size is not None:
268 if isinstance(processed, torch.Tensor):
File /usr/local/lib/python3.11/site-packages/torch/utils/data/dataloader.py:631, in _BaseDataLoaderIter._next_(self)
628 if self._sampler_iter is None:
629 # TODO( Bug in dataloader iterator found by mypy · Issue #76750 · pytorch/pytorch · GitHub )
630 self._reset() # type: ignore[call-arg]
→ 631 data = self._next_data()
632 self._num_yielded += 1
633 if self._dataset_kind == _DatasetKind.Iterable and \
634 self._IterableDataset_len_called is not None and \
635 self._num_yielded > self._IterableDataset_len_called:
File /usr/local/lib/python3.11/site-packages/torch/utils/data/dataloader.py:675, in _SingleProcessDataLoaderIter._next_data(self)
673 def _next_data(self):
674 index = self._next_index() # may raise StopIteration
→ 675 data = self._dataset_fetcher.fetch(index) # may raise StopIteration
676 if self._pin_memory:
677 data = _utils.pin_memory.pin_memory(data, self._pin_memory_device)
File /usr/local/lib/python3.11/site-packages/torch/utils/data/_utils/fetch.py:32, in _IterableDatasetFetcher.fetch(self, possibly_batched_index)
30 for _ in possibly_batched_index:
31 try:
—> 32 data.append(next(self.dataset_iter))
33 except StopIteration:
34 self.ended = True
File /usr/local/lib/python3.11/site-packages/transformers/pipelines/pt_utils.py:183, in PipelineChunkIterator._next_(self)
180 self.subiterator = self.infer(next(self.iterator), **self.params)
181 try:
182 # Try to return next item
→ 183 processed = next(self.subiterator)
184 except StopIteration:
185 # When a preprocess iterator ends, we can start lookig at the next item
186 # ChunkIterator will keep feeding until ALL elements of iterator
(…)
189 # Another way to look at it, is we’re basically flattening lists of lists
190 # into a single list, but with generators
191 self.subiterator = self.infer(next(self.iterator), **self.params)
File /usr/local/lib/python3.11/site-packages/transformers/pipelines/automatic_speech_recognition.py:419, in AutomaticSpeechRecognitionPipeline.preprocess(self, inputs, chunk_length_s, stride_length_s)
417 raise ValueError(f"We expect a numpy ndarray as input, got `{type(inputs)}`")
418 if len(inputs.shape) != 1:
→ 419 raise ValueError(“We expect a single channel audio input for AutomaticSpeechRecognitionPipeline”)
421 if chunk_length_s:
422 if stride_length_s is None:
ValueError: We expect a single channel audio input for AutomaticSpeechRecognitionPipeline