Error in Lesson 7

zmadscientist · February 1, 2025, 8:48am

Lab 7: Llama Stack errors in this cell:

LLAMA_STACK_API_TOGETHER_URL=os.getenv(“DLAI_LLAMA_STACK_API_TOGETHER_URL”, …)
LLAMA31_8B_INSTRUCT = “Llama3.1-8B-Instruct”

from llama_stack_client import LlamaStackClient
from llama_stack_client.lib.inference.event_logger import EventLogger
from llama_stack_client.types import UserMessage

import json

async def run_main():
client = LlamaStackClient(
base_url=LLAMA_STACK_API_TOGETHER_URL,
)

iterator = client.inference.chat_completion(
    messages=[
        UserMessage(
            content="Who wrote the book Innovator's Dilemma? How about Charlotte's Web?",
            role="user",
        ),

        UserMessage(
            content="which book was published first?",
            role="user",
        ),
    ],
    model=LLAMA31_8B_INSTRUCT,
    stream=True,
    x_llama_stack_provider_data=json.dumps({"together_api_key": os.getenv('TOGETHER_API_KEY')})
)

async for log in EventLogger().log(iterator):
    log.print()
    #print("?")

await run_main()

The error is as follows:

NotFoundError Traceback (most recent call last)
Cell In[6], line 36
33 log.print()
34 #print(“?”)
—> 36 await run_main()

Cell In[6], line 15, in run_main()
10 async def run_main():
11 client = LlamaStackClient(
12 base_url=LLAMA_STACK_API_TOGETHER_URL,
13 )
—> 15 iterator = client.inference.chat_completion(
16 messages=[
17 UserMessage(
18 content=“Who wrote the book Innovator’s Dilemma? How about Charlotte’s Web?”,
19 role=“user”,
20 ),
21
22 UserMessage(
23 content=“which book was published first?”,
24 role=“user”,
25 ),
26 ],
27 model=LLAMA31_8B_INSTRUCT,
28 stream=True,
29 x_llama_stack_provider_data=json.dumps({“together_api_key”: os.getenv(‘TOGETHER_API_KEY’)})
30 )
32 async for log in EventLogger().log(iterator):
33 log.print()

File /usr/local/lib/python3.11/site-packages/llama_stack_client/_utils/_utils.py:274, in required_args..inner..wrapper(*args, **kwargs)
272 msg = f"Missing required argument: {quote(missing[0])}"
273 raise TypeError(msg)
→ 274 return func(*args, **kwargs)

File /usr/local/lib/python3.11/site-packages/llama_stack_client/resources/inference/inference.py:220, in InferenceResource.chat_completion(self, messages, model, logprobs, sampling_params, stream, tool_choice, tool_prompt_format, tools, x_llama_stack_provider_data, extra_headers, extra_query, extra_body, timeout)
213 extra_headers = {“Accept”: “text/event-stream”, **(extra_headers or {})}
214 extra_headers = {
215 **strip_not_given({“X-LlamaStack-ProviderData”: x_llama_stack_provider_data}),
216 **(extra_headers or {}),
217 }
218 return cast(
219 InferenceChatCompletionResponse,
→ 220 self._post(
221 “/inference/chat_completion”,
222 body=maybe_transform(
223 {
224 “messages”: messages,
225 “model”: model,
226 “logprobs”: logprobs,
227 “sampling_params”: sampling_params,
228 “stream”: stream,
229 “tool_choice”: tool_choice,
230 “tool_prompt_format”: tool_prompt_format,
231 “tools”: tools,
232 },
233 inference_chat_completion_params.InferenceChatCompletionParams,
234 ),
235 options=make_request_options(
236 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
237 ),
238 cast_to=cast(
239 Any, InferenceChatCompletionResponse
240 ), # Union types cannot be passed in as arguments in the type system
241 stream=stream or False,
242 stream_cls=Stream[InferenceChatCompletionResponse],
243 ),
244 )

File /usr/local/lib/python3.11/site-packages/llama_stack_client/_base_client.py:1251, in SyncAPIClient.post(self, path, cast_to, body, options, files, stream, stream_cls)
1237 def post(
1238 self,
1239 path: str,
(…)
1246 stream_cls: type[_StreamT] | None = None,
1247 ) → ResponseT | _StreamT:
1248 opts = FinalRequestOptions.construct(
1249 method=“post”, url=path, json_data=body, files=to_httpx_files(files), **options
1250 )
→ 1251 return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))

File /usr/local/lib/python3.11/site-packages/llama_stack_client/_base_client.py:943, in SyncAPIClient.request(self, cast_to, options, remaining_retries, stream, stream_cls)
940 else:
941 retries_taken = 0
→ 943 return self._request(
944 cast_to=cast_to,
945 options=options,
946 stream=stream,
947 stream_cls=stream_cls,
948 retries_taken=retries_taken,
949 )

File /usr/local/lib/python3.11/site-packages/llama_stack_client/_base_client.py:1046, in SyncAPIClient._request(self, cast_to, options, retries_taken, stream, stream_cls)
1043 err.response.read()
1045 log.debug(“Re-raising status error”)
→ 1046 raise self._make_status_error_from_response(err.response) from None
1048 return self._process_response(
1049 cast_to=cast_to,
1050 options=options,
(…)
1054 retries_taken=retries_taken,
1055 )

NotFoundError: Error code: 404 - {‘detail’: ‘Not Found’}

Topic		Replies	Views
404 in L7 code Introducing Multimodal Llama 3.2	1	68	February 2, 2025
I got the error "AttributeError: 'NoneType' object has no attribute 'content'" while learning Llama Stack Inference Introducing Multimodal Llama 3.2 lab-help	1	67	November 6, 2024
Prompt Engineering with Llama 2&3: supplied notebook code error Prompt Engineering with Llama 2 short-course	4	147	February 22, 2025
Invalid_api_key Introducing Multimodal Llama 3.2	15	130	November 17, 2024
Error when running Jupyter Code Prompt Engineering with Llama 2	6	314	April 4, 2025

Error in Lesson 7

Related topics