import io
import tempfile
import typing
import altair as alt
import pandas as pd
import tensorflow as tf
import mlui.classes.errors as errors
import mlui.enums as enums
import mlui.tools as tools
import mlui.types.classes as t
[docs]
class Model:
"""
Class representing a machine learning model.
This class provides methods for managing and interacting with a TensorFlow machine
learning model.
"""
def __init__(self) -> None:
"""Initialize an empty model."""
self.reset_state()
[docs]
def reset_state(self) -> None:
"""
Reset the model state.
This method resets the internal state of the model, including its configuration
and assigned features.
"""
self._object: t.Object = tf.keras.Model(inputs=list(), outputs=list())
self._built: bool = False
self._set_config()
self.update_state()
[docs]
def _set_config(self) -> None:
"""Set the configuration attributes for the model."""
self._name: str = self._object.name
self._inputs: t.Layers = typing.cast(t.Layers, self._object.input_names)
self._outputs: t.Layers = typing.cast(t.Layers, self._object.output_names)
self._input_shape: t.LayerShape = self._get_processed_shape("input")
self._output_shape: t.LayerShape = self._get_processed_shape("output")
self._optimizer: t.Optimizer = None
self._losses: t.LayerLosses = dict.fromkeys(self._outputs)
self._metrics: t.LayerMetrics = dict.fromkeys(self._outputs, list())
self._callbacks: t.Callbacks = dict()
self._compiled: bool = self._object._is_compiled
self._history: t.DataFrame = pd.DataFrame()
[docs]
def update_state(self) -> None:
"""
Update the internal state of the model, resetting the input and output features.
"""
self._input_features: t.LayerFeatures = dict.fromkeys(self._inputs, list())
self._output_features: t.LayerFeatures = dict.fromkeys(self._outputs, list())
self._input_configured: t.LayerConfigured = dict.fromkeys(self._inputs, False)
self._output_configured: t.LayerConfigured = dict.fromkeys(self._outputs, False)
[docs]
def _shapes_to_list(self, shapes: t.Shapes) -> list[t.Shape]:
"""
Convert shapes from a dictionary or a single tuple to a list.
Parameters
----------
shapes : dict of tuples, list of tuples or tuple
Input or output shapes. Single shape is a `tuple` of `(None, int)`.
Returns
-------
list of tuples
Converted shapes.
"""
if isinstance(shapes, dict):
return list(shapes.values())
return [shapes] if isinstance(shapes, tuple) else shapes
[docs]
def _get_processed_shape(self, at: t.Side) -> t.LayerShape:
"""
Retrieve and process the shapes of input or output layers.
Parameters
----------
at : {'input', 'output'}
Side to retrieve shapes for.
Returns
-------
dict of {str to int}
Processed shapes for the specified side.
"""
if at == "input":
layers = self._inputs
shapes = self._object.input_shape
else:
layers = self._outputs
shapes = self._object.output_shape
shapes = self._shapes_to_list(shapes)
return {layer: shape[1] for layer, shape in zip(layers, shapes)}
[docs]
def _get_processed_data(self, data: t.DataFrame, at: t.Side) -> t.LayerData:
"""
Process the input or output data based on the specified side.
Parameters
----------
data : DataFrame
Input or output data.
at : {'input', 'output'}
Side to process data for.
Returns
-------
dict of {str to NDArray}
Processed data.
"""
if at == "input":
layers = self._inputs
features = self._input_features
else:
layers = self._outputs
features = self._output_features
return {layer: data[features[layer]].to_numpy() for layer in layers}
[docs]
def set_optimizer(self, entity: str, params: t.OptimizerParams) -> None:
"""
Set the optimizer for the model.
Parameters
----------
entity : str
Name of the optimizer type.
params : OptimizerParams
Parameters for the optimizer.
Raises
------
SetError
If there is an issue setting the optimizer.
"""
try:
prototype = enums.optimizers.classes[entity]
self._optimizer = prototype(**params)
except KeyError:
raise errors.SetError("There is no prototype for this optimizer!")
except (ValueError, AttributeError, TypeError):
raise errors.SetError("Unable to set the optimizer!")
[docs]
def get_optimizer(self) -> str | None:
"""
Get the name of the current optimizer.
Returns
-------
str or None
Name of the optimizer.
"""
return typing.cast(str, self._optimizer.name) if self._optimizer else None
[docs]
def set_loss(self, layer: str, entity: str) -> None:
"""
Set the loss function for a specific output layer.
Parameters
----------
layer : str
Name of the output layer.
entity : str
Name of the loss function type.
"""
self._losses[layer] = entity
[docs]
def get_loss(self, layer: str) -> t.Loss:
"""
Get the loss function for a specific output layer.
Parameters
----------
layer : str
Name of the output layer.
Returns
-------
str or None
Name of the loss function.
"""
return self._losses[layer] if self._losses.get(layer) else None
[docs]
def set_metrics(self, layer: str, entities: list[str]) -> None:
"""
Set the metrics for a specific output layer.
Parameters
----------
layer : str
Name of the output layer.
entities : list of str
Names of the metrics.
"""
self._metrics[layer] = entities
[docs]
def get_metrics(self, layer: str) -> t.Metrics:
"""
Get the metrics for a specific output layer.
Parameters
----------
layer : str
Name of the output layer.
Returns
-------
list of str
Names of the metrics.
"""
return self._metrics[layer].copy() if self._metrics.get(layer) else list()
[docs]
def compile(self) -> None:
"""
Compile the model.
Raises
------
ModelError
If there is an issue compiling the model.
"""
if not self._optimizer_is_set:
raise errors.ModelError("Please, set the model optimizer!")
if not self._losses_are_set:
raise errors.ModelError(
"Please, set the loss function for each output layer!"
)
try:
self._object.compile(
optimizer=self._optimizer, loss=self._losses, metrics=self._metrics
)
except (ValueError, AttributeError, TypeError):
raise errors.ModelError("Unable to compile the model!")
self._compiled = True
[docs]
def set_features(self, layer: str, columns: t.Columns, at: t.Side) -> None:
"""
Set the input or output features for a specific layer.
Parameters
----------
layer : str
Name of the layer.
columns : list of str
Names of the columns.
at : {'input', 'output'}
Side to set features for.
Raises
------
SetError
If there is an issue setting the features.
"""
if not columns:
raise errors.SetError("Please, select at least one column!")
if at == "input":
shape = self._input_shape.get(layer)
configured = self._input_configured
features = self._input_features
else:
shape = self._output_shape.get(layer)
configured = self._output_configured
features = self._output_features
if not shape:
raise errors.SetError("There is no such layer in the model!")
if len(columns) > shape:
raise errors.SetError("Please, select fewer columns!")
if len(columns) == shape:
configured[layer] = True
else:
configured[layer] = False
features[layer] = columns
[docs]
def get_features(self, layer: str, at: t.Side) -> t.Features:
"""
Get the input or output features for a specific layer.
Parameters
----------
layer : str
Name of the layer.
at : {'input', 'output'}
Side to get features for.
Returns
-------
list of str
Names of the features.
"""
if at == "input":
features = self._input_features
else:
features = self._output_features
return features[layer].copy() if features.get(layer) else list()
[docs]
def set_callback(self, entity: str, params: t.CallbackParams) -> None:
"""
Set the callback for the model.
Parameters
----------
entity : str
Name of the callback type.
params : CallbackParams
Parameters for the callback.
Raises
------
SetError
If there is an issue setting the callback.
"""
try:
prototype = enums.callbacks.classes[entity]
self._callbacks[entity] = prototype(**params)
except KeyError:
raise errors.SetError("There is no prototype for this callback!")
except (ValueError, AttributeError, TypeError):
raise errors.SetError("Unable to set the callback!")
[docs]
def get_callback(self, entity: str) -> t.Callback:
"""
Get the callback for the model.
Parameters
----------
entity : str
Name of the callback type.
Returns
-------
Callback or None
Callback instance.
"""
return self._callbacks.get(entity)
[docs]
def delete_callback(self, entity: str) -> None:
"""
Delete the callback from the model.
Parameters
----------
entity : str
Name of the callback type.
"""
self._callbacks.pop(entity, None)
[docs]
def fit(
self, data: t.DataFrame, batch_size: int, num_epochs: int, val_split: float
) -> None:
"""
Fit the model to the provided data.
Parameters
----------
data : DataFrame
Input and output data.
batch_size : int
Batch size.
num_epochs : int
Number of epochs.
val_split : float
Validation split.
Raises
------
ModelError
If there is an issue fitting the model.
"""
if tools.data.contains_nonnumeric_dtypes(data):
raise errors.ModelError("The data for fitting contains non-numeric values!")
try:
logs = self._object.fit(
x=self._get_processed_data(data, "input"),
y=self._get_processed_data(data, "output"),
batch_size=batch_size,
epochs=num_epochs,
validation_split=val_split,
callbacks=self._callbacks.values(),
)
except (RuntimeError, ValueError, AttributeError, TypeError):
raise errors.ModelError("Unable to fit the model!")
self._update_history(pd.DataFrame(logs.history))
[docs]
def _update_history(self, logs: t.DataFrame) -> None:
"""
Update the training history with new logs.
Parameters
----------
logs : DataFrame
New logs to be added to the history.
"""
history_len = len(self._history)
logs.insert(0, "epoch", range(history_len + 1, history_len + len(logs) + 1))
self._history = pd.concat([self._history, logs])
[docs]
def plot_history(self, y: t.LogsNames, points: bool) -> t.Chart:
"""
Plot the training history.
Parameters
----------
y : list of str
Names of the logs to plot.
points : bool
Whether to include points on the plot.
Returns
-------
Chart
Altair chart representing the training history.
"""
if not y:
raise errors.PlotError("Please, select at least one log!")
try:
logs = self._history.loc[:, ["epoch", *y]]
melted_logs = logs.melt(
"epoch", var_name="log_name", value_name="log_value"
)
chart = (
alt.Chart(melted_logs)
.mark_line(point=points)
.encode(
x=alt.X("epoch").scale(zero=False).title("Epoch"),
y=alt.Y("log_value").scale(zero=False).title("Value"),
color=alt.Color("log_name")
.scale(scheme="set1")
.legend(title="Log"),
)
.interactive(bind_x=True, bind_y=True)
.properties(height=500)
)
except (ValueError, AttributeError, TypeError):
raise errors.PlotError("Unable to display the plot!")
return chart
@property
def name(self) -> str:
"""Name of the model."""
return self._name
@property
def inputs(self) -> t.Layers:
"""Names of the input layers."""
return self._inputs.copy()
@property
def outputs(self) -> t.Layers:
"""Names of the output layers."""
return self._outputs.copy()
@property
def input_shape(self) -> t.LayerShape:
"""Shapes of the input layers."""
return self._input_shape.copy()
@property
def output_shape(self) -> t.LayerShape:
"""Shapes of the output layers."""
return self._output_shape.copy()
@property
def input_configured(self) -> bool:
"""True if all input layers are configured, False otherwise."""
return (
True
if self._input_configured and all(self._input_configured.values())
else False
)
@property
def output_configured(self) -> bool:
"""True if all output layers are configured, False otherwise."""
return (
True
if self._output_configured and all(self._output_configured.values())
else False
)
@property
def _optimizer_is_set(self) -> bool:
"""True if an optimizer is set, False otherwise."""
return True if self._optimizer else False
@property
def _losses_are_set(self) -> bool:
"""True if losses are set for all output layers, False otherwise."""
return True if self._losses and all(self._losses.values()) else False
@property
def built(self) -> bool:
"""True if the model is built, False otherwise."""
return self._built
@property
def compiled(self) -> bool:
"""True if the model is compiled, False otherwise."""
return self._compiled
@property
def history(self) -> t.DataFrame:
"""Training history DataFrame."""
return self._history.copy()
@property
def summary(self) -> None:
"""Summary of the model."""
self._object.summary()
@property
def graph(self) -> bytes:
"""Bytes representation of the model graph."""
with tempfile.NamedTemporaryFile(suffix=".pdf") as tmp:
tf.keras.utils.plot_model(
self._object, to_file=tmp.name, show_shapes=True, rankdir="LR", dpi=200
)
graph = tmp.read()
return graph
@property
def as_bytes(self) -> bytes:
"""Bytes representation of the saved model."""
with tempfile.NamedTemporaryFile() as tmp:
self._object.save(filepath=tmp.name, save_format="h5")
model_as_bytes = tmp.read()
return model_as_bytes
[docs]
class UploadedModel(Model):
"""Class representing the uploaded model."""
def __init__(self) -> None:
"""Initialize an empty uploaded machine learning model."""
super().__init__()
[docs]
def upload(self, buff: io.BytesIO) -> None:
"""
Upload a model from the provided file.
Parameters
----------
buff : file-like object
Byte buffer containing the model.
Raises
------
UploadError
If there is an issue reading the model from the file. If there is an issue
validating the shapes of the model.
"""
try:
with tempfile.NamedTemporaryFile() as tmp:
tmp.write(buff.getbuffer())
model = typing.cast(
t.Object, tf.keras.models.load_model(tmp.name, compile=False)
)
tools.model.validate_shapes(model.input_shape)
tools.model.validate_shapes(model.output_shape)
self._object = model
self._built = True
self._set_config()
self.update_state()
except (ValueError, errors.ValidateModelError) as error:
raise errors.UploadError(error)
[docs]
def evaluate(self, data: t.DataFrame, batch_size: int) -> t.EvaluationResults:
"""
Evaluate the model on the provided data.
Parameters
----------
data : DataFrame
Input and output data.
batch_size : int
Batch size.
Raises
------
ModelError
If there is an issue evaluating the model.
Returns
-------
DataFrame
Evaluation results as a DataFrame.
"""
if tools.data.contains_nonnumeric_dtypes(data):
raise errors.ModelError(
"The data for evaluation contains non-numeric values!"
)
try:
logs = typing.cast(
dict[str, float],
self._object.evaluate(
x=self._get_processed_data(data, "input"),
y=self._get_processed_data(data, "output"),
batch_size=batch_size,
callbacks=self._callbacks.values(),
return_dict=True,
),
) # Type-cast the return value as 'return_dict' is set to True
results = pd.DataFrame(logs.items(), columns=["Name", "Value"])
except (RuntimeError, ValueError, AttributeError, TypeError):
raise errors.ModelError("Unable to evaluate the model!")
return results
[docs]
def predict(self, data: t.DataFrame, batch_size: int) -> t.Predictions:
"""
Make predictions using the model on the provided data.
Parameters
----------
data : DataFrame
Input data.
batch_size : int
Batch size.
Raises
------
ModelError
If there is an issue making predictions.
Returns
-------
list of DataFrame
Predictions as DataFrames.
"""
if tools.data.contains_nonnumeric_dtypes(data):
raise errors.ModelError(
"The data for predictions contains non-numeric values!"
)
try:
arrays = self._object.predict(
x=self._get_processed_data(data, "input"),
batch_size=batch_size,
callbacks=self._callbacks.values(),
)
if isinstance(arrays, dict):
predictions = [pd.DataFrame(array) for array in arrays.values()]
elif isinstance(arrays, list):
predictions = [pd.DataFrame(array) for array in arrays]
else:
predictions = [pd.DataFrame(arrays)]
except (RuntimeError, ValueError, AttributeError, TypeError):
raise errors.ModelError("Unable to make the prediction!")
return predictions
[docs]
class CreatedModel(Model):
"""Class representing the created model."""
def __init__(self) -> None:
"""Initialize an empty created machine learning model."""
self.reset_state()
[docs]
def reset_state(self) -> None:
"""Reset the state of the created model."""
super().reset_state()
self._layers: t.LayerObject = dict()
[docs]
def set_name(self, name: str) -> None:
"""
Set the name of the created model.
Parameters
----------
name : str
Name of the model.
"""
self._name = name
[docs]
def set_layer(
self,
entity: str,
name: str,
params: t.LayerParams,
connection: t.LayerConnection,
) -> None:
"""
Set the layer for the created model.
Parameters
----------
entity : str
Name of the layer type.
name : str
Name of the layer.
params : LayerParams
Parameters for the layer.
connection : Layer, list of Layer or None
Layer(s) to connect.
Raises
------
SetError
If there is an issue setting the layer.
"""
if name in self._layers:
raise errors.SetError("Layer with this name already exists!")
try:
prototype = enums.layers.classes[entity]
if connection is None:
layer = prototype(name=name, **params)
else:
layer = prototype(name=name, **params)(connection)
except KeyError:
raise errors.SetError("There is no prototype for this layer!")
except (ValueError, AttributeError, TypeError):
raise errors.SetError("Unable to set the layer!")
if entity == "Input":
self._inputs.append(name)
self._layers[name] = layer
[docs]
def delete_last_layer(self) -> None:
"""
Delete the last layer from the created model.
Raises
------
DeleteError
If there are no layers to remove.
"""
try:
name = self._layers.popitem()[0]
except KeyError:
raise errors.DeleteError("There are no layers to remove!")
try:
self._inputs.remove(name)
except ValueError:
pass
try:
self._outputs.remove(name)
except ValueError:
pass
[docs]
def set_outputs(self, outputs: t.Layers) -> None:
"""
Set the output layers for the created model.
Parameters
----------
outputs : list of str
Names of the layers.
Raises
------
SetError
If there is an issue setting the output layers.
"""
if not outputs:
raise errors.SetError("Please, select at least one output!")
self._outputs = outputs
[docs]
def create(self) -> None:
"""
Create the machine learning model.
Raises
------
CreateError
If there is an issue creating the model.
"""
input_layers = {name: self._layers[name] for name in self._inputs}
output_layers = {name: self._layers[name] for name in self._outputs}
if not input_layers or not output_layers:
raise errors.CreateError("There are no input or output layers!")
try:
self._object = tf.keras.Model(
inputs=input_layers, outputs=output_layers, name=self._name
)
except (ValueError, AttributeError, TypeError):
raise errors.CreateError("Unable to create the model!")
self._built = True
self._set_config()
self.update_state()
@property
def layers(self) -> t.LayerObject:
"""Objects of the layers."""
return self._layers.copy()