05_Training_lab_student - Error with "Finetune a model in 3 lines of code using Lamin"

I tried running this in the workbook:
model = BasicModelRunner(“EleutherAI/pythia-410m”)
model.load_data_from_jsonlines(“lamini_docs.jsonl”, input_key=“question”, output_key=“answer”)
model.train(is_public=True)

and I got the following error:
LAMINI CONFIGURATION
{}
LAMINI CONFIGURATION
{}
LAMINI CONFIGURATION
{}
status code: 422

HTTPError Traceback (most recent call last)
File /usr/local/lib/python3.9/site-packages/lamini/api/rest_requests.py:25, in make_web_request(key, url, http_method, json)
24 try:
—> 25 resp.raise_for_status()
26 except requests.exceptions.HTTPError as e:

File /usr/local/lib/python3.9/site-packages/requests/models.py:1021, in Response.raise_for_status(self)
1020 if http_error_msg:
→ 1021 raise HTTPError(http_error_msg, response=self)

HTTPError: 422 Client Error: Unprocessable Entity for url: http://jupyter-api-proxy.internal.dlai/rev-proxy/lamini/v1/train

During handling of the above exception, another exception occurred:

UserError Traceback (most recent call last)
Cell In[29], line 3
1 model = BasicModelRunner(“EleutherAI/pythia-410m”)
2 model.load_data_from_jsonlines(“lamini_docs.jsonl”, input_key=“question”, output_key=“answer”)
----> 3 model.train(is_public=True)

File /usr/local/lib/python3.9/site-packages/lamini/runners/base_runner.py:259, in BaseRunner.train(self, limit, is_public, **kwargs)
254 final_status = self.lamini_api.train_and_wait(
255 is_public=is_public,
256 **kwargs,
257 )
258 else:
→ 259 final_status = self.lamini_api.train_and_wait(
260 data,
261 is_public=is_public,
262 **kwargs,
263 )
264 try:
265 self.model_name = final_status[“model_name”]

File /usr/local/lib/python3.9/site-packages/lamini/api/lamini.py:128, in Lamini.train_and_wait(self, data, finetune_args, enable_peft, peft_args, is_public, use_cached_model, **kwargs)
118 def train_and_wait(
119 self,
120 data: Optional[List] = None,
(…)
126 **kwargs,
127 ):
→ 128 job = self.train(
129 data,
130 finetune_args=finetune_args,
131 enable_peft=enable_peft,
132 peft_args=peft_args,
133 is_public=is_public,
134 use_cached_model=use_cached_model,
135 )
137 try:
138 status = self.check_job_status(job[“job_id”])

File /usr/local/lib/python3.9/site-packages/lamini/api/lamini.py:106, in Lamini.train(self, data, finetune_args, enable_peft, peft_args, is_public, use_cached_model)
103 self.upload_data(data)
104 data = None
→ 106 return self.trainer.train(
107 data,
108 self.model_name,
109 self.upload_file_path,
110 finetune_args,
111 enable_peft,
112 peft_args,
113 is_public,
114 use_cached_model,
115 )

File /usr/local/lib/python3.9/site-packages/lamini/api/train.py:54, in Train.train(self, data, model_name, upload_file_path, finetune_args, enable_peft, peft_args, is_public, use_cached_model)
51 req_data[“model_config”] = self.model_config.as_dict()
52 url = self.api_prefix + “train”
—> 54 job = make_web_request(self.api_key, url, “post”, req_data)
55 self.job_id = job[“job_id”]
56 print(
57 f"Training job submitted! Check status of job {self.job_id} here: {self.ui_url}/train/{self.job_id}"
58 )

File /usr/local/lib/python3.9/site-packages/lamini/api/rest_requests.py:59, in make_web_request(key, url, http_method, json)
57 except Exception:
58 json_response = {}
—> 59 raise UserError(json_response.get(“detail”, “UserError”))
60 if resp.status_code == 503:
61 try:

UserError: [{‘loc’: [‘body’, ‘dataset_id’], ‘msg’: ‘field required’, ‘type’: ‘value_error.missing’}]

out = model.evaluate()

It looks like it’s some HTTP error:
HTTPError: 422 Client Error: Unprocessable Entity for url: http://jupyter-api-proxy.internal.dlai/rev-proxy/lamini/v1/train

Not entirely sure but I think you need to run the following lines before running the lines you ran:-

import os
import lamini

lamini.api_url = os.getenv(“POWERML__PRODUCTION__URL”)
lamini.api_key = os.getenv(“POWERML__PRODUCTION__KEY”)

To,
Menotrs

2024-08-07 01:12:00,635 - INFO - lamini.api.inference_queue - Launching 1 batches onto the thread pool of size 12

LAMINI CONFIGURATION
{}
LAMINI CONFIGURATION
{}
LAMINI CONFIGURATION
{}
status code: 561

---------------------------------------------------------------------------
HTTPError                                 Traceback (most recent call last)
File /usr/local/lib/python3.9/site-packages/lamini/api/rest_requests.py:25, in make_web_request(key, url, http_method, json)
     24 try:
---> 25     resp.raise_for_status()
     26 except requests.exceptions.HTTPError as e:

File /usr/local/lib/python3.9/site-packages/requests/models.py:1021, in Response.raise_for_status(self)
   1020 if http_error_msg:
-> 1021     raise HTTPError(http_error_msg, response=self)

HTTPError: 561 Server Error: Unknown Status Code for url: http://jupyter-api-proxy.internal.dlai/rev-proxy/lamini/v1/completions

During handling of the above exception, another exception occurred:

APIError                                  Traceback (most recent call last)
Cell In[25], line 2
      1 bigger_finetuned_model = BasicModelRunner(model_name_to_id["bigger_model_name"])
----> 2 bigger_finetuned_output = bigger_finetuned_model(test_question)
      3 print("Bigger (2.8B) finetuned model (test): ", bigger_finetuned_output)

File /usr/local/lib/python3.9/site-packages/lamini/runners/base_runner.py:28, in BaseRunner.__call__(self, prompt, system_prompt, output_type, max_tokens)
     21 def __call__(
     22     self,
     23     prompt: Union[str, List[str]],
   (...)
     26     max_tokens: Optional[int] = None,
     27 ):
---> 28     return self.call(prompt, system_prompt, output_type, max_tokens)

File /usr/local/lib/python3.9/site-packages/lamini/runners/base_runner.py:39, in BaseRunner.call(self, prompt, system_prompt, output_type, max_tokens)
     30 def call(
     31     self,
     32     prompt: Union[str, List[str]],
   (...)
     35     max_tokens: Optional[int] = None,
     36 ):
     37     input_objects = self.create_final_prompts(prompt, system_prompt)
---> 39     return self.lamini_api.generate(
     40         prompt=input_objects,
     41         model_name=self.model_name,
     42         max_tokens=max_tokens,
     43         output_type=output_type,
     44     )

File /usr/local/lib/python3.9/site-packages/lamini/api/lamini.py:46, in Lamini.generate(self, prompt, model_name, output_type, max_tokens, stop_tokens)
     31 def generate(
     32     self,
     33     prompt: Union[str, List[str]],
   (...)
     37     stop_tokens: Optional[List[str]] = None,
     38 ):
     39     req_data = self.make_llm_req_map(
     40         prompt=prompt,
     41         model_name=model_name or self.model_name,
   (...)
     44         stop_tokens=stop_tokens,
     45     )
---> 46     result = self.inference_queue.submit(req_data)
     47     if isinstance(prompt, str) and len(result) == 1:
     48         if output_type is None:

File /usr/local/lib/python3.9/site-packages/lamini/api/inference_queue.py:41, in InferenceQueue.submit(self, request)
     39 # Wait for all the results to come back
     40 for result in results:
---> 41     result.result()
     43 # Combine the results and return them
     44 return self.combine_results(results)

File /usr/local/lib/python3.9/concurrent/futures/_base.py:446, in Future.result(self, timeout)
    444     raise CancelledError()
    445 elif self._state == FINISHED:
--> 446     return self.__get_result()
    447 else:
    448     raise TimeoutError()

File /usr/local/lib/python3.9/concurrent/futures/_base.py:391, in Future.__get_result(self)
    389 if self._exception:
    390     try:
--> 391         raise self._exception
    392     finally:
    393         # Break a reference cycle with the exception in self._exception
    394         self = None

File /usr/local/lib/python3.9/concurrent/futures/thread.py:58, in _WorkItem.run(self)
     55     return
     57 try:
---> 58     result = self.fn(*self.args, **self.kwargs)
     59 except BaseException as exc:
     60     self.future.set_exception(exc)

File /usr/local/lib/python3.9/site-packages/lamini/api/inference_queue.py:103, in process_batch(key, api_prefix, batch)
    101 def process_batch(key, api_prefix, batch):
    102     url = api_prefix + "completions"
--> 103     result = make_web_request(key, url, "post", batch)
    104     return result

File /usr/local/lib/python3.9/site-packages/lamini/api/rest_requests.py:76, in make_web_request(key, url, http_method, json)
     74             if description == {"detail": ""}:
     75                 raise APIError("500 Internal Server Error")
---> 76             raise APIError(f"API error {description}")
     78 return resp.json()

APIError: API error {'detail': "Currently this user has support for base models: ['EleutherAI/pythia-410m', 'EleutherAI/pythia-70m', 'hf-internal-testing/tiny-random-gpt2', 'meta-llama/Llama-2-13b-chat-hf', 'meta-llama/Llama-2-7b-chat-hf', 'meta-llama/Llama-2-7b-hf', 'meta-llama/Meta-Llama-3-8B-Instruct', 'meta-llama/Meta-Llama-3.1-8B-Instruct', 'microsoft/phi-2', 'microsoft/Phi-3-mini-4k-instruct', 'mistralai/Mistral-7B-Instruct-v0.1', 'mistralai/Mistral-7B-Instruct-v0.2', 'mistralai/Mistral-7B-Instruct-v0.3', 'Qwen/Qwen2-7B-Instruct']. Need help? Email us at info@lamini.ai"}

As per your above suggestion I have verified that cell 1 is used
Kindly Help!