Lab 7: Llama Stack errors in this cell:
LLAMA_STACK_API_TOGETHER_URL=os.getenv(“DLAI_LLAMA_STACK_API_TOGETHER_URL”, …)
LLAMA31_8B_INSTRUCT = “Llama3.1-8B-Instruct”
from llama_stack_client import LlamaStackClient
from llama_stack_client.lib.inference.event_logger import EventLogger
from llama_stack_client.types import UserMessage
import json
async def run_main():
client = LlamaStackClient(
base_url=LLAMA_STACK_API_TOGETHER_URL,
)
iterator = client.inference.chat_completion(
messages=[
UserMessage(
content="Who wrote the book Innovator's Dilemma? How about Charlotte's Web?",
role="user",
),
UserMessage(
content="which book was published first?",
role="user",
),
],
model=LLAMA31_8B_INSTRUCT,
stream=True,
x_llama_stack_provider_data=json.dumps({"together_api_key": os.getenv('TOGETHER_API_KEY')})
)
async for log in EventLogger().log(iterator):
log.print()
#print("?")
await run_main()
The error is as follows:
NotFoundError Traceback (most recent call last)
Cell In[6], line 36
33 log.print()
34 #print(“?”)
—> 36 await run_main()
Cell In[6], line 15, in run_main()
10 async def run_main():
11 client = LlamaStackClient(
12 base_url=LLAMA_STACK_API_TOGETHER_URL,
13 )
—> 15 iterator = client.inference.chat_completion(
16 messages=[
17 UserMessage(
18 content=“Who wrote the book Innovator’s Dilemma? How about Charlotte’s Web?”,
19 role=“user”,
20 ),
21
22 UserMessage(
23 content=“which book was published first?”,
24 role=“user”,
25 ),
26 ],
27 model=LLAMA31_8B_INSTRUCT,
28 stream=True,
29 x_llama_stack_provider_data=json.dumps({“together_api_key”: os.getenv(‘TOGETHER_API_KEY’)})
30 )
32 async for log in EventLogger().log(iterator):
33 log.print()
File /usr/local/lib/python3.11/site-packages/llama_stack_client/_utils/_utils.py:274, in required_args..inner..wrapper(*args, **kwargs)
272 msg = f"Missing required argument: {quote(missing[0])}"
273 raise TypeError(msg)
→ 274 return func(*args, **kwargs)
File /usr/local/lib/python3.11/site-packages/llama_stack_client/resources/inference/inference.py:220, in InferenceResource.chat_completion(self, messages, model, logprobs, sampling_params, stream, tool_choice, tool_prompt_format, tools, x_llama_stack_provider_data, extra_headers, extra_query, extra_body, timeout)
213 extra_headers = {“Accept”: “text/event-stream”, **(extra_headers or {})}
214 extra_headers = {
215 **strip_not_given({“X-LlamaStack-ProviderData”: x_llama_stack_provider_data}),
216 **(extra_headers or {}),
217 }
218 return cast(
219 InferenceChatCompletionResponse,
→ 220 self._post(
221 “/inference/chat_completion”,
222 body=maybe_transform(
223 {
224 “messages”: messages,
225 “model”: model,
226 “logprobs”: logprobs,
227 “sampling_params”: sampling_params,
228 “stream”: stream,
229 “tool_choice”: tool_choice,
230 “tool_prompt_format”: tool_prompt_format,
231 “tools”: tools,
232 },
233 inference_chat_completion_params.InferenceChatCompletionParams,
234 ),
235 options=make_request_options(
236 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
237 ),
238 cast_to=cast(
239 Any, InferenceChatCompletionResponse
240 ), # Union types cannot be passed in as arguments in the type system
241 stream=stream or False,
242 stream_cls=Stream[InferenceChatCompletionResponse],
243 ),
244 )
File /usr/local/lib/python3.11/site-packages/llama_stack_client/_base_client.py:1251, in SyncAPIClient.post(self, path, cast_to, body, options, files, stream, stream_cls)
1237 def post(
1238 self,
1239 path: str,
(…)
1246 stream_cls: type[_StreamT] | None = None,
1247 ) → ResponseT | _StreamT:
1248 opts = FinalRequestOptions.construct(
1249 method=“post”, url=path, json_data=body, files=to_httpx_files(files), **options
1250 )
→ 1251 return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))
File /usr/local/lib/python3.11/site-packages/llama_stack_client/_base_client.py:943, in SyncAPIClient.request(self, cast_to, options, remaining_retries, stream, stream_cls)
940 else:
941 retries_taken = 0
→ 943 return self._request(
944 cast_to=cast_to,
945 options=options,
946 stream=stream,
947 stream_cls=stream_cls,
948 retries_taken=retries_taken,
949 )
File /usr/local/lib/python3.11/site-packages/llama_stack_client/_base_client.py:1046, in SyncAPIClient._request(self, cast_to, options, retries_taken, stream, stream_cls)
1043 err.response.read()
1045 log.debug(“Re-raising status error”)
→ 1046 raise self._make_status_error_from_response(err.response) from None
1048 return self._process_response(
1049 cast_to=cast_to,
1050 options=options,
(…)
1054 retries_taken=retries_taken,
1055 )
NotFoundError: Error code: 404 - {‘detail’: ‘Not Found’}