L_3 Notebook Fails

Hi, 6th cell of notebook L_3, from the “Retry-based structured output” section, fails for me. I tried re-starting the kernel and loggin in on a different account.

It is the cell with

response = instructor_client.chat.completions.create(
    model='meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo',
    messages = [{
        "role": "user", 
        "content": 'sup'
    }], 
    # Note: OpenAI uses response_format, instructor
    #       uses response_model!
    response_model=Greeting
)

response

The error message is as follows:

---------------------------------------------------------------------------
BadRequestError                           Traceback (most recent call last)
File /usr/local/lib/python3.11/site-packages/instructor/retry.py:168, in retry_sync(func, response_model, args, kwargs, context, max_retries, strict, mode, hooks)
    167 hooks.emit_completion_arguments(*args, **kwargs)
--> 168 response = func(*args, **kwargs)
    169 hooks.emit_completion_response(response)

File /usr/local/lib/python3.11/site-packages/openai/_utils/_utils.py:279, in required_args.<locals>.inner.<locals>.wrapper(*args, **kwargs)
    278     raise TypeError(msg)
--> 279 return func(*args, **kwargs)

File /usr/local/lib/python3.11/site-packages/openai/resources/chat/completions/completions.py:914, in Completions.create(self, messages, model, audio, frequency_penalty, function_call, functions, logit_bias, logprobs, max_completion_tokens, max_tokens, metadata, modalities, n, parallel_tool_calls, prediction, presence_penalty, reasoning_effort, response_format, seed, service_tier, stop, store, stream, stream_options, temperature, tool_choice, tools, top_logprobs, top_p, user, web_search_options, extra_headers, extra_query, extra_body, timeout)
    913 validate_response_format(response_format)
--> 914 return self._post(
    915     "/chat/completions",
    916     body=maybe_transform(
    917         {
    918             "messages": messages,
    919             "model": model,
    920             "audio": audio,
    921             "frequency_penalty": frequency_penalty,
    922             "function_call": function_call,
    923             "functions": functions,
    924             "logit_bias": logit_bias,
    925             "logprobs": logprobs,
    926             "max_completion_tokens": max_completion_tokens,
    927             "max_tokens": max_tokens,
    928             "metadata": metadata,
    929             "modalities": modalities,
    930             "n": n,
    931             "parallel_tool_calls": parallel_tool_calls,
    932             "prediction": prediction,
    933             "presence_penalty": presence_penalty,
    934             "reasoning_effort": reasoning_effort,
    935             "response_format": response_format,
    936             "seed": seed,
    937             "service_tier": service_tier,
    938             "stop": stop,
    939             "store": store,
    940             "stream": stream,
    941             "stream_options": stream_options,
    942             "temperature": temperature,
    943             "tool_choice": tool_choice,
    944             "tools": tools,
    945             "top_logprobs": top_logprobs,
    946             "top_p": top_p,
    947             "user": user,
    948             "web_search_options": web_search_options,
    949         },
    950         completion_create_params.CompletionCreateParams,
    951     ),
    952     options=make_request_options(
    953         extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
    954     ),
    955     cast_to=ChatCompletion,
    956     stream=stream or False,
    957     stream_cls=Stream[ChatCompletionChunk],
    958 )

File /usr/local/lib/python3.11/site-packages/openai/_base_client.py:1242, in SyncAPIClient.post(self, path, cast_to, body, options, files, stream, stream_cls)
   1239 opts = FinalRequestOptions.construct(
   1240     method="post", url=path, json_data=body, files=to_httpx_files(files), **options
   1241 )
-> 1242 return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))

File /usr/local/lib/python3.11/site-packages/openai/_base_client.py:919, in SyncAPIClient.request(self, cast_to, options, remaining_retries, stream, stream_cls)
    917     retries_taken = 0
--> 919 return self._request(
    920     cast_to=cast_to,
    921     options=options,
    922     stream=stream,
    923     stream_cls=stream_cls,
    924     retries_taken=retries_taken,
    925 )

File /usr/local/lib/python3.11/site-packages/openai/_base_client.py:1023, in SyncAPIClient._request(self, cast_to, options, retries_taken, stream, stream_cls)
   1022     log.debug("Re-raising status error")
-> 1023     raise self._make_status_error_from_response(err.response) from None
   1025 return self._process_response(
   1026     cast_to=cast_to,
   1027     options=options,
   (...)
   1031     retries_taken=retries_taken,
   1032 )

BadRequestError: Error code: 400 - {'id': 'nuGfNuB-2j9zxn-940aead85e573023', 'error': {'message': 'invalid tools grammar: Model supports only tool_choice auto', 'type': 'invalid_request_error', 'param': 'tools', 'code': None}}

The above exception was the direct cause of the following exception:

RetryError                                Traceback (most recent call last)
File /usr/local/lib/python3.11/site-packages/instructor/retry.py:163, in retry_sync(func, response_model, args, kwargs, context, max_retries, strict, mode, hooks)
    162 response = None
--> 163 for attempt in max_retries:
    164     with attempt:

File /usr/local/lib/python3.11/site-packages/tenacity/__init__.py:443, in BaseRetrying.__iter__(self)
    442 while True:
--> 443     do = self.iter(retry_state=retry_state)
    444     if isinstance(do, DoAttempt):

File /usr/local/lib/python3.11/site-packages/tenacity/__init__.py:376, in BaseRetrying.iter(self, retry_state)
    375 for action in self.iter_state.actions:
--> 376     result = action(retry_state)
    377 return result

File /usr/local/lib/python3.11/site-packages/tenacity/__init__.py:419, in BaseRetrying._post_stop_check_actions.<locals>.exc_check(rs)
    418     raise retry_exc.reraise()
--> 419 raise retry_exc from fut.exception()

RetryError: RetryError[<Future at 0x7f455b32b110 state=finished raised BadRequestError>]

The above exception was the direct cause of the following exception:

InstructorRetryException                  Traceback (most recent call last)
Cell In[6], line 1
----> 1 response = instructor_client.chat.completions.create(
      2     model='meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo',
      3     messages = [{
      4         "role": "user", 
      5         "content": 'sup'
      6     }], 
      7     # Note: OpenAI uses response_format, instructor
      8     #       uses response_model!
      9     response_model=Greeting
     10 )
     12 response

File /usr/local/lib/python3.11/site-packages/instructor/client.py:180, in Instructor.create(self, response_model, messages, max_retries, validation_context, context, strict, **kwargs)
    168 def create(
    169     self,
    170     response_model: type[T] | None,
   (...)
    176     **kwargs: Any,
    177 ) -> T | Any | Awaitable[T] | Awaitable[Any]:
    178     kwargs = self.handle_kwargs(kwargs)
--> 180     return self.create_fn(
    181         response_model=response_model,
    182         messages=messages,
    183         max_retries=max_retries,
    184         validation_context=validation_context,
    185         context=context,
    186         strict=strict,
    187         hooks=self.hooks,
    188         **kwargs,
    189     )

File /usr/local/lib/python3.11/site-packages/instructor/patch.py:193, in patch.<locals>.new_create_sync(response_model, validation_context, context, max_retries, strict, hooks, *args, **kwargs)
    187 response_model, new_kwargs = handle_response_model(
    188     response_model=response_model, mode=mode, **kwargs
    189 )  # type: ignore
    191 new_kwargs = handle_templating(new_kwargs, context)
--> 193 response = retry_sync(
    194     func=func,  # type: ignore
    195     response_model=response_model,
    196     context=context,
    197     max_retries=max_retries,
    198     args=args,
    199     hooks=hooks,
    200     strict=strict,
    201     kwargs=new_kwargs,
    202     mode=mode,
    203 )
    204 return response

File /usr/local/lib/python3.11/site-packages/instructor/retry.py:194, in retry_sync(func, response_model, args, kwargs, context, max_retries, strict, mode, hooks)
    192 except RetryError as e:
    193     logger.debug(f"Retry error: {e}")
--> 194     raise InstructorRetryException(
    195         e.last_attempt._exception,
    196         last_completion=response,
    197         n_attempts=attempt.retry_state.attempt_number,
    198         #! deprecate messages soon
    199         messages=extract_messages(
    200             kwargs
    201         ),  # Use the optimized function instead of nested lookups
    202         create_kwargs=kwargs,
    203         total_usage=total_usage,
    204     ) from e

InstructorRetryException: Error code: 400 - {'id': 'nuGfNuB-2j9zxn-940aead85e573023', 'error': {'message': 'invalid tools grammar: Model supports only tool_choice auto', 'type': 'invalid_request_error', 'param': 'tools', 'code': None}}