I am literally trying to reuse the code from the final lecture in order to make my own Chatbot. However, I am using open source models rather than OpenAI ones. However, I am getting errors running the code. It would be really appreciated if someone could let me know where the error is coming from. Thank you.
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.llms import HuggingFaceHub
from langchain import HuggingFacePipeline
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import DocArrayInMemorySearch
from langchain.document_loaders import PyPDFLoader
from langchain.chains import ConversationalRetrievalChain
from langchain_community.chat_models.huggingface import ChatHuggingFace
import os
import panel as pn
import param
pn.extension()
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())
huggingface_token = os.environ["HUGGINGFACEHUB_API_TOKEN"]
llm = HuggingFaceHub(
repo_id = "EleutherAI/polyglot-ko-3.8b",
task="text-generation",
model_kwargs={
"max_new_tokens": 128,
"temperature": 0.5
}
)
def load_db(file, chain_type, k):
# load documents
loader = PyPDFLoader(file)
documents = loader.load()
# split documents
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=500,
chunk_overlap=50
)
docs = text_splitter.split_documents(documents)
# define embedding
embeddings = HuggingFaceEmbeddings(model_name="jhgan/ko-sroberta-multitask", model_kwargs={"device": 'cpu'}
, encode_kwargs={'normalize_embeddings': False})
# create vector databse
db = DocArrayInMemorySearch.from_documents(docs, embeddings)
# define retriever
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": k})
# create a chatbot chain. Memory is managed externally
qa = ConversationalRetrievalChain.from_llm(
llm=ChatHuggingFace(llm=llm),
chain_type=chain_type,
retriever=retriever,
return_source_documents=True,
return_generated_question=True,
)
return qa
class cbfs(param.Parameterized):
chat_history = param.List([])
answer = param.String("")
db_query = param.String("")
db_response = param.List([])
def __init__(self, **params):
super(cbfs, self).__init__(**params)
self.panels = []
self.loaded_file = "/Users/mzc01-dongkyun.im/Documents/LangChainProjects/■ MZC㈜_취업규칙_개정_2023.pdf"
self.qa = load_db(self.loaded_file, "stuff", 4)
def call_load_db(self, count):
if count == 0 or file_input.value is None: # init or no file specified :
return pn.pane.Markdown(f"Loaded File: {self.loaded_file}")
else:
file_input.save("temp.pdf") # local copy
self.loaded_file = file_input.filename
button_load.button_style = "outline"
self.qa = load_db("temp.pdf", "stuff", 4)
button_load.button_style="solid"
self.clr_history()
return pn.pane.Markdown(f"Loaded File: {self.loaded_file}")
def convchain(self, query):
if not query:
return pn.WidgetBox(pn.Row('User:', pn.pane.Markdown("", width=600)), scroll=True)
result = self.qa({"question": query, "chat_history": self.chat_history})
self.chat_history.extend([(query, result["answer"])])
self.db_query = result["generated_question"]
self.db_response = result["source_documents"]
self.answer = result['answer']
self.panels.extend([
pn.Row('User:', pn.pane.Markdown(query, width=600)),
pn.Row('Chatbot:', pn.pane.Markdown(self.answer, width=600, style={'background-color': '#F6F6F6'}))
])
inp.value = '' # clears loading indicator when cleared
return pn.WidgetBox(*self.panels, scroll=True)
@param.depends('db_query ', )
def get_lquest(self):
if not self.db_query:
return pn.Column(
pn.Row(pn.pane.Markdown(f"Last question to DB:", styles={'background-color': '#F6F6F6'})),
pn.Row(pn.pane.Str("no DB accesses so far"))
)
return pn.Column(
pn.Row(pn.pane.Markdown(f"DB query:", styles={'background-color': '#F6F6F6'})),
pn.pane.Str(self.db_query)
)
@param.depends('db_response ', )
def get_sources(self):
if not self.db_response:
return
rlist = [pn.Row(pn.pane.Markdown(f"Result of DB lookup:", styles={'background-color': '#F6F6F6'}))]
for doc in self.db_response:
rlist.append(pn.Row(pn.pane.Str(doc)))
return pn.WidgetBox(*rlist, width=600, scroll=True)
@param.depends('convchain', 'clr_history')
def get_chats(self):
if not self.chat_history:
return pn.WidgetBox(pn.Row(pn.pane.Str("No History Yet")), width=600, scroll=True)
rlist=[pn.Row(pn.pane.Markdown(f"Current Chat History variable", styles={'background-color': '#F6F6F6'}))]
for exchange in self.chat_history:
rlist.append(pn.Row(pn.pane.Str(exchange)))
return pn.WidgetBox(*rlist, width=600, scroll=True)
def clr_history(self,count=0):
self.chat_history = []
return
cb = cbfs()
file_input = pn.widgets.FileInput(accept='.pdf')
button_load = pn.widgets.Button(name="Load DB", button_type='primary')
button_clearhistory = pn.widgets.Button(name="Clear History", button_type='warning')
button_clearhistory.on_click(cb.clr_history)
inp = pn.widgets.TextInput( placeholder='Enter text here…')
bound_button_load = pn.bind(cb.call_load_db, button_load.param.clicks)
conversation = pn.bind(cb.convchain, inp)
jpg_pane = pn.pane.Image( './img/convchain.jpg')
tab1 = pn.Column(
pn.Row(inp),
pn.layout.Divider(),
pn.panel(conversation, loading_indicator=True, height=300),
pn.layout.Divider(),
)
tab2= pn.Column(
pn.panel(cb.get_lquest),
pn.layout.Divider(),
pn.panel(cb.get_sources ),
)
tab3= pn.Column(
pn.panel(cb.get_chats),
pn.layout.Divider(),
)
tab4=pn.Column(
pn.Row( file_input, button_load, bound_button_load),
pn.Row( button_clearhistory, pn.pane.Markdown("Clears chat history. Can use to start a new topic" )),
pn.layout.Divider(),
pn.Row(jpg_pane.clone(width=400))
)
dashboard = pn.Column(
pn.Row(pn.pane.Markdown('# ChatWithYourData_Bot')),
pn.Tabs(('Conversation', tab1), ('Database', tab2), ('Chat History', tab3),('Configure', tab4))
)
dashboard
Running the code won’t pop up chatbot interface but rather produce this message:
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
[Open Browser Console for more detailed log - Double click to close this message]
Failed to load model class ‘BokehModel’ from module ‘@bokeh/jupyter_bokeh’
Error: No version of module @bokeh/jupyter_bokeh is registered
at ph.loadClass (https://file+.vscode-resource.vscode-cdn.net/Users/mzc01-dongkyun.im/.vscode/extensions/ms-toolsai.jupyter-renderers-1.0.17/out/node_modules/%40vscode/jupyter-ipywidgets8/dist/ipywidgets.js:2:4099813)
at ph.loadClass (https://file+.vscode-resource.vscode-cdn.net/Users/mzc01-dongkyun.im/.vscode/extensions/ms-toolsai.jupyter-renderers-1.0.17/out/node_modules/%40vscode/jupyter-ipywidgets8/dist/ipywidgets.js:2:4403287)
at ph.loadModelClass (https://file+.vscode-resource.vscode-cdn.net/Users/mzc01-dongkyun.im/.vscode/extensions/ms-toolsai.jupyter-renderers-1.0.17/out/node_modules/%40vscode/jupyter-ipywidgets8/dist/ipywidgets.js:2:4097773)
at ph._make_model (https://file+.vscode-resource.vscode-cdn.net/Users/mzc01-dongkyun.im/.vscode/extensions/ms-toolsai.jupyter-renderers-1.0.17/out/node_modules/%40vscode/jupyter-ipywidgets8/dist/ipywidgets.js:2:4094616)
at ph.new_model (https://file+.vscode-resource.vscode-cdn.net/Users/mzc01-dongkyun.im/.vscode/extensions/ms-toolsai.jupyter-renderers-1.0.17/out/node_modules/%40vscode/jupyter-ipywidgets8/dist/ipywidgets.js:2:4092246)
at ph.handle_comm_open (https://file+.vscode-resource.vscode-cdn.net/Users/mzc01-dongkyun.im/.vscode/extensions/ms-toolsai.jupyter-renderers-1.0.17/out/node_modules/%40vscode/jupyter-ipywidgets8/dist/ipywidgets.js:2:4091039)
at https://file+.vscode-resource.vscode-cdn.net/Users/mzc01-dongkyun.im/.vscode/extensions/ms-toolsai.jupyter-renderers-1.0.17/out/node_modules/%40vscode/jupyter-ipywidgets8/dist/ipywidgets.js:2:4402511
at n._handleCommOpen (https://file+.vscode-resource.vscode-cdn.net/Users/mzc01-dongkyun.im/.vscode/extensions/ms-toolsai.jupyter-2024.1.1-darwin-x64/dist/webviews/webview-side/ipywidgetsKernel/ipywidgetsKernel.js:3:80955)
at async n._handleMessage (https://file+.vscode-resource.vscode-cdn.net/Users/mzc01-dongkyun.im/.vscode/extensions/ms-toolsai.jupyter-2024.1.1-darwin-x64/dist/webviews/webview-side/ipywidgetsKernel/ipywidgetsKernel.js:3:82830)