Here is the detailed description of the problem.
- When i send the single text prompt it works fine.
- When i send the follow up text prompt it remember the context and answer it, so it works fine.
- Same thing follows for text again and again.
- When i send the image with text prompt it works fine and answer it.
- When i send the text after sending (image with text) it forget’s the context and says i don’t remember any image which you have sent.
Does anyone have faced similar issue? or any solution to this problem?
import warnings
warnings.filterwarnings('ignore')
from dotenv import load_dotenv
import os
import asyncio
import requests
import base64
from typing import Optional
from llama_stack_client import LlamaStackClient
from llama_stack_client.types import UserMessage
from llama_stack_client.types.agent_create_params import AgentConfig
from PIL import Image
import matplotlib.pyplot as plt
# Load environment variables
_ = load_dotenv()
# Constants
LLAMA_STACK_API_TOGETHER_URL = "https://llama-stack.together.ai"
LLAMA32_11B_INSTRUCT = "Llama3.2-11B-Vision-Instruct"
def display_image(path):
img = Image.open(path)
plt.imshow(img)
plt.axis('off')
plt.show()
# Encode image from file path to base64
def encode_image_path(image_path: str) -> str:
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode("utf-8")
def encode_image(image_url: str) -> str:
response = requests.get(image_url)
if response.status_code == 200:
return base64.b64encode(response.content).decode('utf-8')
else:
raise Exception(f"Failed to retrieve image. Status code: {response.status_code}")
# Agent class to handle creating and managing agents
class Agent:
def __init__(self):
self.client = LlamaStackClient(base_url=LLAMA_STACK_API_TOGETHER_URL)
self.max_retries = 3 # Define maximum retries
def create_agent(self, agent_config: AgentConfig):
agent = self.client.agents.create(agent_config=agent_config)
self.agent_id = agent.agent_id
session = self.client.agents.sessions.create(
agent_id=self.agent_id,
session_name="example_session",
)
self.session_id = session.session_id
async def execute_turn_with_image_path(self, content: str, image_path: str):
base64_image = encode_image_path(image_path)
messages = [{
"role": "user",
"content": [
{
"image": {
"uri": f"data:image/jpeg;base64,{base64_image}"
}
},
content,
]
}]
await self._execute_image_turn(messages)
async def execute_turn_text_only(self, content: str):
messages = [UserMessage(content=content, role="user")]
await self._execute_turn(messages)
async def _execute_image_turn(self, messages):
retries = 0
success = False
while retries < self.max_retries and not success:
try:
response = self.client.agents.turns.create(
agent_id=self.agent_id,
session_id=self.session_id,
messages=messages,
stream=True,
)
for chunk in response:
if chunk and chunk.event and chunk.event.payload:
if chunk.event.payload.event_type == "turn_complete":
print(chunk.event.payload.turn['output_message']['content'])
success = True
break
else:
print("Warning: Received an incomplete response chunk.")
retries += 1
break
except Exception as e:
print("Error during turn execution:", e)
retries += 1
if not success:
print("Failed to complete the query after multiple retries.")
async def _execute_turn(self, messages):
retries = 0
success = False
while retries < self.max_retries and not success:
try:
response = self.client.agents.turns.create(
agent_id=self.agent_id,
session_id=self.session_id,
messages=messages,
stream=True,
)
for chunk in response:
if chunk and chunk.event and chunk.event.payload:
if chunk.event.payload.event_type == "turn_complete":
print(chunk.event.payload.turn.output_message.content)
success = True
break
else:
print("Warning: Received an incomplete response chunk.")
retries += 1
break
except Exception as e:
print("Error during turn execution:", e)
retries += 1
if not success:
print("Failed to complete the query after multiple retries.")
def close_session(self):
self.client.agents.sessions.delete(session_id=self.session_id)
print("Session closed.")
# Main function to run queries within the same session
async def run_main():
agent_config = AgentConfig(
model=LLAMA32_11B_INSTRUCT,
instructions="You are a helpful assistant",
enable_session_persistence=True,
)
agent = Agent()
agent.create_agent(agent_config)
while True:
prompt = input("Enter your query (or type 'exit' to quit): ")
if prompt.lower() == 'exit':
agent.close_session()
print("Session ended.")
break
# If the prompt includes an "image:" keyword, expect the local image path as input
if "image:" in prompt.lower():
query_text, image_path = prompt.split("image:", 1)
query_text = query_text.strip()
image_path = image_path.strip()
await agent.execute_turn_with_image_path(content=query_text, image_path=image_path)
else:
try:
await agent.execute_turn_text_only(content=prompt)
except Exception as e:
print("Error during text-only query:", e)
# Example usage: to run text-based and image-based queries
if __name__ == "__main__":
try:
asyncio.run(run_main())
except Exception as e:
print(f"Error occurred: {e}")