I'm wondering if we can use langchain without llm from openai. I've tried replac…e openai with "bloom-7b1" and "flan-t5-xl" and used agent from langchain according to visual chatgpt [https://github.com/microsoft/visual-chatgpt](url).
Here is my demo:
```
class Text2Image:
def __init__(self, device):
print(f"Initializing Text2Image to {device}")
self.device = device
self.torch_dtype = torch.float16 if 'cuda' in device else torch.float32
self.pipe = StableDiffusionPipeline.from_pretrained("/dfs/data/llmcheckpoints/stable-diffusion-v1-5",
torch_dtype=self.torch_dtype)
self.pipe.to(device)
self.a_prompt = 'best quality, extremely detailed'
self.n_prompt = 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, ' \
'fewer digits, cropped, worst quality, low quality'
@prompts(name="Generate Image From User Input Text",
description="useful when you want to generate an image from a user input text and save it to a file. "
"like: generate an image of an object or something, or generate an image that includes some objects. "
"The input to this tool should be a string, representing the text used to generate image. ")
def inference(self, text):
image_filename = os.path.join('image', f"{str(uuid.uuid4())[:8]}.png")
prompt = text + ', ' + self.a_prompt
image = self.pipe(prompt, negative_prompt=self.n_prompt).images[0]
image.save(image_filename)
print(
f"\nProcessed Text2Image, Input Text: {text}, Output Image: {image_filename}")
return image_filename
```
```
from typing import Any, List, Mapping, Optional
from pydantic import BaseModel, Extra
from langchain.llms.base import LLM
from langchain.llms.utils import enforce_stop_tokens
class CustomPipeline(LLM, BaseModel):
model_id: str = "/dfs/data/llmcheckpoints/bloom-7b1/"
class Config:
"""Configuration for this pydantic object."""
extra = Extra.forbid
def __init__(self, model_id):
super().__init__()
# from transformers import T5TokenizerFast, T5ForConditionalGeneration
from transformers import AutoTokenizer, AutoModelForCausalLM
global model, tokenizer
# model = T5ForConditionalGeneration.from_pretrained(model_id)
# tokenizer = T5TokenizerFast.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, device_map='auto')
tokenizer = AutoTokenizer.from_pretrained(model_id, device_map='auto')
@property
def _llm_type(self) -> str:
return "custom_pipeline"
def _call(self, prompt: str, stop: Optional[List[str]] = None, max_length=2048, num_return_sequences=1):
input_ids = tokenizer.encode(prompt, return_tensors="pt").cuda()
outputs = model.generate(input_ids, max_length=max_length, num_return_sequences=num_return_sequences)
response = [tokenizer.decode(output, skip_special_tokens=True) for output in outputs][0]
return response
```
```
class ConversationBot:
def __init__(self, load_dict):
print(f"Initializing AiMaster ChatBot, load_dict={load_dict}")
model_id = "/dfs/data/llmcheckpoints/bloom-7b1/"
self.llm = CustomPipeline(model_id=model_id)
print('load flant5xl done!')
self.memory = ConversationStringBufferMemory(memory_key="chat_history", output_key='output')
self.models = {}
# Load Basic Foundation Models
for class_name, device in load_dict.items():
self.models[class_name] = globals()[class_name](device=device)
# Load Template Foundation Models
for class_name, module in globals().items():
if getattr(module, 'template_model', False):
template_required_names = {k for k in inspect.signature(module.__init__).parameters.keys() if k!='self'}
loaded_names = set([type(e).__name__ for e in self.models.values()])
if template_required_names.issubset(loaded_names):
self.models[class_name] = globals()[class_name](
**{name: self.models[name] for name in template_required_names})
self.tools = []
for instance in self.models.values():
for e in dir(instance):
if e.startswith('inference'):
func = getattr(instance, e)
self.tools.append(Tool(name=func.name, description=func.description, func=func))
self.agent = initialize_agent(
self.tools,
self.llm,
agent="conversational-react-description",
verbose=True,
memory=self.memory,
return_intermediate_steps=True,
#agent_kwargs={'format_instructions': AIMASTER_CHATBOT_FORMAT_INSTRUCTIONS},)
agent_kwargs={'prefix': AIMASTER_CHATBOT_PREFIX, 'format_instructions': AIMASTER_CHATBOT_FORMAT_INSTRUCTIONS,
}, )
def run_text(self, text, state):
self.agent.memory.buffer = cut_dialogue_history(self.agent.memory.buffer, keep_last_n_words=500)
res = self.agent({"input": text})
res['output'] = res['output'].replace("\\", "/")
response = re.sub('(image/\S*png)', lambda m: f'})*{m.group(0)}*', res['output'])
state = state + [(text, response)]
print(f"\nProcessed run_text, Input text: {text}\nCurrent state: {state}\n"
f"Current Memory: {self.agent.memory.buffer}")
return state, state
def run_image(self, image, state):
# image_filename = os.path.join('image', f"{str(uuid.uuid4())[:8]}.png")
image_filename = image
print("======>Auto Resize Image...")
# img = Image.open(image.name)
img = Image.open(image_filename)
width, height = img.size
ratio = min(512 / width, 512 / height)
width_new, height_new = (round(width * ratio), round(height * ratio))
width_new = int(np.round(width_new / 64.0)) * 64
height_new = int(np.round(height_new / 64.0)) * 64
img = img.resize((width_new, height_new))
img = img.convert('RGB')
img.save(image_filename, "PNG")
print(f"Resize image form {width}x{height} to {width_new}x{height_new}")
description = self.models['ImageCaptioning'].inference(image_filename)
Human_prompt = f'\nHuman: provide a figure named {image_filename}. The description is: {description}. This information helps you to understand this image, but you should use tools to finish following tasks, rather than directly imagine from my description. If you understand, say \"Received\". \n'
AI_prompt = "Received. "
self.agent.memory.buffer = self.agent.memory.buffer + Human_prompt + 'AI: ' + AI_prompt
state = state + [(f"*{image_filename}*", AI_prompt)]
print(f"\nProcessed run_image, Input image: {image_filename}\nCurrent state: {state}\n"
f"Current Memory: {self.agent.memory.buffer}")
return state, state, f' {image_filename} '
if __name__=="__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--load', type=str, default="ImageCaptioning_cuda:0, Text2Image_cuda:0")
args = parser.parse_args()
load_dict = {e.split('_')[0].strip(): e.split('_')[1].strip() for e in args.load.split(',')}
bot = ConversationBot(load_dict=load_dict)
global state
state = list()
while True:
text = input('input:')
if text.startswith("image:"):
result = bot.run_image(text[6:],state)
elif text == 'stop':
break
elif text == 'clear':
bot.memory.clear
else:
result = bot.run_text(text,state)
```
It seems that both two llms fail in using tools that I offer.
Any suggestions will help me a lot!