These are the steps on linux (jq is for pretty printing json):
# Install ollama
# this also runs ollama service once install is complete
# run `ollama serve` on a seperate terminal if you stop the service.
curl -fsSL https://ollama.com/install.sh | sh
# output skipped
# check version
$ ollama --version
# output: ollama version is 0.12.5
# download model
$ ollama pull gemma3:270m
# output skipped
# check before loading model
$ curl http://localhost:11434/api/ps
# output: {"models":[]}
# load model into memory
$ curl http://localhost:11434/api/generate -d '{
"model": "gemma3:270m"
}'
# output: {"model":"gemma3:270m","created_at":"2025-10-15T18:15:10.920382548Z","response":"","done":true,"done_reason":"load"}
# check loaded model status
$ curl http://localhost:11434/api/ps | jq
Output:
{
"models": [
{
"name": "gemma3:270m",
"model": "gemma3:270m",
"size": 550094976,
"digest": "e7d36fb2c3b3293cfe56d55889867a064b3a2b22e98335f2e6e8a387e081d6be",
"details": {
"parent_model": "",
"format": "gguf",
"family": "gemma3",
"families": [
"gemma3"
],
"parameter_size": "268.10M",
"quantization_level": "Q8_0"
},
"expires_at": "2025-10-15T23:50:10.920793022+05:30",
"size_vram": 550094976,
"context_length": 4096
}
]
}
# Use model
$ curl http://localhost:11434/api/generate -d '{
"model": "gemma3:270m",
"prompt": "What color is the sky at different times of the day? Respond using JSON",
"format": "json",
"stream": false
}' | jq
Output:
{
"model": "gemma3:270m",
"created_at": "2025-10-15T18:17:14.08242417Z",
"response": "{\"color\": \"blue\", \"description\": \"The sky at different times of the day is typically blue.\"\n}",
"done": true,
"done_reason": "stop",
"context": [
105,
2364,
107,
3689,
2258,
563,
506,
7217,
657,
1607,
2782,
529,
506,
1719,
236881,
58025,
1699,
10434,
106,
107,
105,
4368,
107,
14937,
3001,
1083,
623,
9503,
827,
623,
7777,
1083,
623,
818,
7217,
657,
1607,
2782,
529,
506,
1719,
563,
11082,
3730,
1781,
107,
236783
],
"total_duration": 730446542,
"load_duration": 139217711,
"prompt_eval_count": 24,
"prompt_eval_duration": 27352765,
"eval_count": 25,
"eval_duration": 208710680
}
# Unload model
$ curl http://localhost:11434/api/generate -d '{
"model": "gemma3:270m",
"keep_alive": 0
}'
# Output: {"model":"gemma3:270m","created_at":"2025-10-15T18:19:03.251184622Z","response":"","done":true,"done_reason":"unload"}
# check model in memory
$ curl http://localhost:11434/api/ps
# Output: {"models":[]}