Skip to content

Register

Registering a model is as easy as:

1
2
3
4
5
service.register(
    model_name="mymodel",
    model=model,
    entrypoint="predict",
)
Register Multiple Model and Multiple Versions?

You can register multiple models with multiple versions:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
service.register(
    model_name="my-model",
    model=my_model,
    entrypoint="predict",
)
service.register(
    model_name="my-model",
    model=my_model_v1,
    entrypoint="predict",
    version_name="v1,
)

service.register(
    model_name="your-model",
    model=your_model,
    entrypoint="predict",
)
service.register(
    model_name="your-model",
    model=your_model_v1,
    entrypoint="predict",
    version_name="v1,
)
service.register(
    model_name="your-model",
    model=your_model_v2,
    entrypoint="predict",
    version_name="v2,
)

Parameters

Parameter Type Default Details
model_name str Name of the model
model object Model object or path
version_name str None Name of the version
entrypoint str None Name of the function to use
metadata dict None Metadata of the model
handler object None A class to handler model loading and predicting
load_now bool True Whether loading the model on registration

Examples

Model Name

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
from pinferencia import Server


def predict(data):
    return sum(data)

service = Server()
service.register(
    model_name="mymodel",
    model=predict,
)

Model

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
from pinferencia import Server


class MyModel:
    def predict(self, data):
        return sum(data)


model = MyModel()

service = Server()
service.register(
    model_name="mymodel",
    model=model,
    entrypoint="predict
)
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
from pinferencia import Server


def predict(data):
    return sum(data)

service = Server()
service.register(
    model_name="mymodel",
    model=predict,
)

Version Name

Model without version name will be registered as default version.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
from pinferencia import Server


def add(data):
    return data[0] + data[1]

def substract(data):
    return data[0] + data[1]

service = Server()
service.register(
    model_name="mymodel",
    model=add,
    version_name="add", # (1)
)
service.register(
    model_name="mymodel",
    model=substract,
    version_name="substract", # (2)
)
  1. The predicting endpoint will be http://127.0.0.1/v1/models/mymodel/versions/add/predict
  2. The predicting endpoint will be http://127.0.0.1/v1/models/mymodel/versions/substract/predict

Entrypoint

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
from pinferencia import Server


class MyModel:
    def add(self, data):
        return data[0] + data[1]

    def substract(self, data):
        return data[0] - data[1]


model = MyModel()

service = Server()
service.register(
    model_name="mymodel",
    model=model,
    version_name="add", # (1)
    entrypoint="add", # (3)
)
service.register(
    model_name="mymodel",
    model=model,
    version_name="substract", # (2)
    entrypoint="substract", # (4)
)
  1. The predicting endpoint will be http://127.0.0.1/v1/models/mymodel/versions/add/predict
  2. The predicting endpoint will be http://127.0.0.1/v1/models/mymodel/versions/substract/predict
  3. add function of the model will be used to predict.
  4. substract function of the model will be used to predict.

Metadata

Default API

Pinferencia default metadata schema supports platform and device

These are information for display purpose only.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
from pinferencia import Server


def predict(data):
    return sum(data)

service = Server()
service.register(
    model_name="mymodel",
    model=predict,
    metadata={
        "platform": "Linux",
        "device": "CPU+GPU",
    }
)

Kserve API

Pinferencia also supports Kserve API.

For Kserve V2, the metadata supports: - platform - inputs - outputs

The inputs and outputs metadata will determine the data and datatype model received and returned.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
from pinferencia import Server


def predict(data):
    return sum(data)

service = Server(api="kserve") # (1)
service.register(
    model_name="mymodel",
    model=predict,
    metadata={
        "platform": "mac os",
        "inputs": [
            {
                "name": "integers", # (2)
                "datatype": "int64",
                "shape": [1],
                "data": [1, 2, 3],
            }
        ],
        "outputs": [
            {"name": "sum", "datatype": "int64", "shape": -1, "data": 6}, # (3)
            {"name": "product", "datatype": "int64", "shape": -1, "data": 6},
        ],
    }
)
  1. If you want to use kserve API, you need to set api="kserve" when initializing the service.
  2. In the request, if there are multiple inputs, only input with name intergers will be passed to the model.
  3. Output data will be converted into int64. The datatype field only supports numpy data type. If the data cannot be converted, there will be an extra error field in the output, indicating the reason of the failure.

Handler

Details of handlers can be found at Handlers.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
from pinferencia import Server
from pinferencia.handlers import PickleHandler


class MyPrintHandler(PickleHandler):

    def predict(self, data):
        print(data)
        return self.model.predict(data)


def predict(data):
    return sum(data)

service = Server()
service.register(
    model_name="mymodel",
    model=predict,
    handler=MyPrintHandler
)

Load Now

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
import joblib

from pinferencia import Server


class JoblibHandler(BaseHandler):
    def load_model(self):
        return joblib.load(self.model_path)


service = Server(model_dir="/opt/models")
service.register(
    model_name="mymodel",
    model="/path/to/model.joblib",
    entrypoint="predict",
    handler=JoblibHandler,
    load_now=True,
)