Source code for mlui.classes.model

import io
import tempfile
import typing

import altair as alt
import pandas as pd
import tensorflow as tf

import mlui.classes.errors as errors
import mlui.enums as enums
import mlui.tools as tools
import mlui.types.classes as t


[docs] class Model: """ Class representing a machine learning model. This class provides methods for managing and interacting with a TensorFlow machine learning model. """ def __init__(self) -> None: """Initialize an empty model.""" self.reset_state()
[docs] def reset_state(self) -> None: """ Reset the model state. This method resets the internal state of the model, including its configuration and assigned features. """ self._object: t.Object = tf.keras.Model(inputs=list(), outputs=list()) self._built: bool = False self._set_config() self.update_state()
[docs] def _set_config(self) -> None: """Set the configuration attributes for the model.""" self._name: str = self._object.name self._inputs: t.Layers = typing.cast(t.Layers, self._object.input_names) self._outputs: t.Layers = typing.cast(t.Layers, self._object.output_names) self._input_shape: t.LayerShape = self._get_processed_shape("input") self._output_shape: t.LayerShape = self._get_processed_shape("output") self._optimizer: t.Optimizer = None self._losses: t.LayerLosses = dict.fromkeys(self._outputs) self._metrics: t.LayerMetrics = dict.fromkeys(self._outputs, list()) self._callbacks: t.Callbacks = dict() self._compiled: bool = self._object._is_compiled self._history: t.DataFrame = pd.DataFrame()
[docs] def update_state(self) -> None: """ Update the internal state of the model, resetting the input and output features. """ self._input_features: t.LayerFeatures = dict.fromkeys(self._inputs, list()) self._output_features: t.LayerFeatures = dict.fromkeys(self._outputs, list()) self._input_configured: t.LayerConfigured = dict.fromkeys(self._inputs, False) self._output_configured: t.LayerConfigured = dict.fromkeys(self._outputs, False)
[docs] def _shapes_to_list(self, shapes: t.Shapes) -> list[t.Shape]: """ Convert shapes from a dictionary or a single tuple to a list. Parameters ---------- shapes : dict of tuples, list of tuples or tuple Input or output shapes. Single shape is a `tuple` of `(None, int)`. Returns ------- list of tuples Converted shapes. """ if isinstance(shapes, dict): return list(shapes.values()) return [shapes] if isinstance(shapes, tuple) else shapes
[docs] def _get_processed_shape(self, at: t.Side) -> t.LayerShape: """ Retrieve and process the shapes of input or output layers. Parameters ---------- at : {'input', 'output'} Side to retrieve shapes for. Returns ------- dict of {str to int} Processed shapes for the specified side. """ if at == "input": layers = self._inputs shapes = self._object.input_shape else: layers = self._outputs shapes = self._object.output_shape shapes = self._shapes_to_list(shapes) return {layer: shape[1] for layer, shape in zip(layers, shapes)}
[docs] def _get_processed_data(self, data: t.DataFrame, at: t.Side) -> t.LayerData: """ Process the input or output data based on the specified side. Parameters ---------- data : DataFrame Input or output data. at : {'input', 'output'} Side to process data for. Returns ------- dict of {str to NDArray} Processed data. """ if at == "input": layers = self._inputs features = self._input_features else: layers = self._outputs features = self._output_features return {layer: data[features[layer]].to_numpy() for layer in layers}
[docs] def set_optimizer(self, entity: str, params: t.OptimizerParams) -> None: """ Set the optimizer for the model. Parameters ---------- entity : str Name of the optimizer type. params : OptimizerParams Parameters for the optimizer. Raises ------ SetError If there is an issue setting the optimizer. """ try: prototype = enums.optimizers.classes[entity] self._optimizer = prototype(**params) except KeyError: raise errors.SetError("There is no prototype for this optimizer!") except (ValueError, AttributeError, TypeError): raise errors.SetError("Unable to set the optimizer!")
[docs] def get_optimizer(self) -> str | None: """ Get the name of the current optimizer. Returns ------- str or None Name of the optimizer. """ return typing.cast(str, self._optimizer.name) if self._optimizer else None
[docs] def set_loss(self, layer: str, entity: str) -> None: """ Set the loss function for a specific output layer. Parameters ---------- layer : str Name of the output layer. entity : str Name of the loss function type. """ self._losses[layer] = entity
[docs] def get_loss(self, layer: str) -> t.Loss: """ Get the loss function for a specific output layer. Parameters ---------- layer : str Name of the output layer. Returns ------- str or None Name of the loss function. """ return self._losses[layer] if self._losses.get(layer) else None
[docs] def set_metrics(self, layer: str, entities: list[str]) -> None: """ Set the metrics for a specific output layer. Parameters ---------- layer : str Name of the output layer. entities : list of str Names of the metrics. """ self._metrics[layer] = entities
[docs] def get_metrics(self, layer: str) -> t.Metrics: """ Get the metrics for a specific output layer. Parameters ---------- layer : str Name of the output layer. Returns ------- list of str Names of the metrics. """ return self._metrics[layer].copy() if self._metrics.get(layer) else list()
[docs] def compile(self) -> None: """ Compile the model. Raises ------ ModelError If there is an issue compiling the model. """ if not self._optimizer_is_set: raise errors.ModelError("Please, set the model optimizer!") if not self._losses_are_set: raise errors.ModelError( "Please, set the loss function for each output layer!" ) try: self._object.compile( optimizer=self._optimizer, loss=self._losses, metrics=self._metrics ) except (ValueError, AttributeError, TypeError): raise errors.ModelError("Unable to compile the model!") self._compiled = True
[docs] def set_features(self, layer: str, columns: t.Columns, at: t.Side) -> None: """ Set the input or output features for a specific layer. Parameters ---------- layer : str Name of the layer. columns : list of str Names of the columns. at : {'input', 'output'} Side to set features for. Raises ------ SetError If there is an issue setting the features. """ if not columns: raise errors.SetError("Please, select at least one column!") if at == "input": shape = self._input_shape.get(layer) configured = self._input_configured features = self._input_features else: shape = self._output_shape.get(layer) configured = self._output_configured features = self._output_features if not shape: raise errors.SetError("There is no such layer in the model!") if len(columns) > shape: raise errors.SetError("Please, select fewer columns!") if len(columns) == shape: configured[layer] = True else: configured[layer] = False features[layer] = columns
[docs] def get_features(self, layer: str, at: t.Side) -> t.Features: """ Get the input or output features for a specific layer. Parameters ---------- layer : str Name of the layer. at : {'input', 'output'} Side to get features for. Returns ------- list of str Names of the features. """ if at == "input": features = self._input_features else: features = self._output_features return features[layer].copy() if features.get(layer) else list()
[docs] def set_callback(self, entity: str, params: t.CallbackParams) -> None: """ Set the callback for the model. Parameters ---------- entity : str Name of the callback type. params : CallbackParams Parameters for the callback. Raises ------ SetError If there is an issue setting the callback. """ try: prototype = enums.callbacks.classes[entity] self._callbacks[entity] = prototype(**params) except KeyError: raise errors.SetError("There is no prototype for this callback!") except (ValueError, AttributeError, TypeError): raise errors.SetError("Unable to set the callback!")
[docs] def get_callback(self, entity: str) -> t.Callback: """ Get the callback for the model. Parameters ---------- entity : str Name of the callback type. Returns ------- Callback or None Callback instance. """ return self._callbacks.get(entity)
[docs] def delete_callback(self, entity: str) -> None: """ Delete the callback from the model. Parameters ---------- entity : str Name of the callback type. """ self._callbacks.pop(entity, None)
[docs] def fit( self, data: t.DataFrame, batch_size: int, num_epochs: int, val_split: float ) -> None: """ Fit the model to the provided data. Parameters ---------- data : DataFrame Input and output data. batch_size : int Batch size. num_epochs : int Number of epochs. val_split : float Validation split. Raises ------ ModelError If there is an issue fitting the model. """ if tools.data.contains_nonnumeric_dtypes(data): raise errors.ModelError("The data for fitting contains non-numeric values!") try: logs = self._object.fit( x=self._get_processed_data(data, "input"), y=self._get_processed_data(data, "output"), batch_size=batch_size, epochs=num_epochs, validation_split=val_split, callbacks=self._callbacks.values(), ) except (RuntimeError, ValueError, AttributeError, TypeError): raise errors.ModelError("Unable to fit the model!") self._update_history(pd.DataFrame(logs.history))
[docs] def _update_history(self, logs: t.DataFrame) -> None: """ Update the training history with new logs. Parameters ---------- logs : DataFrame New logs to be added to the history. """ history_len = len(self._history) logs.insert(0, "epoch", range(history_len + 1, history_len + len(logs) + 1)) self._history = pd.concat([self._history, logs])
[docs] def plot_history(self, y: t.LogsNames, points: bool) -> t.Chart: """ Plot the training history. Parameters ---------- y : list of str Names of the logs to plot. points : bool Whether to include points on the plot. Returns ------- Chart Altair chart representing the training history. """ if not y: raise errors.PlotError("Please, select at least one log!") try: logs = self._history.loc[:, ["epoch", *y]] melted_logs = logs.melt( "epoch", var_name="log_name", value_name="log_value" ) chart = ( alt.Chart(melted_logs) .mark_line(point=points) .encode( x=alt.X("epoch").scale(zero=False).title("Epoch"), y=alt.Y("log_value").scale(zero=False).title("Value"), color=alt.Color("log_name") .scale(scheme="set1") .legend(title="Log"), ) .interactive(bind_x=True, bind_y=True) .properties(height=500) ) except (ValueError, AttributeError, TypeError): raise errors.PlotError("Unable to display the plot!") return chart
@property def name(self) -> str: """Name of the model.""" return self._name @property def inputs(self) -> t.Layers: """Names of the input layers.""" return self._inputs.copy() @property def outputs(self) -> t.Layers: """Names of the output layers.""" return self._outputs.copy() @property def input_shape(self) -> t.LayerShape: """Shapes of the input layers.""" return self._input_shape.copy() @property def output_shape(self) -> t.LayerShape: """Shapes of the output layers.""" return self._output_shape.copy() @property def input_configured(self) -> bool: """True if all input layers are configured, False otherwise.""" return ( True if self._input_configured and all(self._input_configured.values()) else False ) @property def output_configured(self) -> bool: """True if all output layers are configured, False otherwise.""" return ( True if self._output_configured and all(self._output_configured.values()) else False ) @property def _optimizer_is_set(self) -> bool: """True if an optimizer is set, False otherwise.""" return True if self._optimizer else False @property def _losses_are_set(self) -> bool: """True if losses are set for all output layers, False otherwise.""" return True if self._losses and all(self._losses.values()) else False @property def built(self) -> bool: """True if the model is built, False otherwise.""" return self._built @property def compiled(self) -> bool: """True if the model is compiled, False otherwise.""" return self._compiled @property def history(self) -> t.DataFrame: """Training history DataFrame.""" return self._history.copy() @property def summary(self) -> None: """Summary of the model.""" self._object.summary() @property def graph(self) -> bytes: """Bytes representation of the model graph.""" with tempfile.NamedTemporaryFile(suffix=".pdf") as tmp: tf.keras.utils.plot_model( self._object, to_file=tmp.name, show_shapes=True, rankdir="LR", dpi=200 ) graph = tmp.read() return graph @property def as_bytes(self) -> bytes: """Bytes representation of the saved model.""" with tempfile.NamedTemporaryFile() as tmp: self._object.save(filepath=tmp.name, save_format="h5") model_as_bytes = tmp.read() return model_as_bytes
[docs] class UploadedModel(Model): """Class representing the uploaded model.""" def __init__(self) -> None: """Initialize an empty uploaded machine learning model.""" super().__init__()
[docs] def upload(self, buff: io.BytesIO) -> None: """ Upload a model from the provided file. Parameters ---------- buff : file-like object Byte buffer containing the model. Raises ------ UploadError If there is an issue reading the model from the file. If there is an issue validating the shapes of the model. """ try: with tempfile.NamedTemporaryFile() as tmp: tmp.write(buff.getbuffer()) model = typing.cast( t.Object, tf.keras.models.load_model(tmp.name, compile=False) ) tools.model.validate_shapes(model.input_shape) tools.model.validate_shapes(model.output_shape) self._object = model self._built = True self._set_config() self.update_state() except (ValueError, errors.ValidateModelError) as error: raise errors.UploadError(error)
[docs] def evaluate(self, data: t.DataFrame, batch_size: int) -> t.EvaluationResults: """ Evaluate the model on the provided data. Parameters ---------- data : DataFrame Input and output data. batch_size : int Batch size. Raises ------ ModelError If there is an issue evaluating the model. Returns ------- DataFrame Evaluation results as a DataFrame. """ if tools.data.contains_nonnumeric_dtypes(data): raise errors.ModelError( "The data for evaluation contains non-numeric values!" ) try: logs = typing.cast( dict[str, float], self._object.evaluate( x=self._get_processed_data(data, "input"), y=self._get_processed_data(data, "output"), batch_size=batch_size, callbacks=self._callbacks.values(), return_dict=True, ), ) # Type-cast the return value as 'return_dict' is set to True results = pd.DataFrame(logs.items(), columns=["Name", "Value"]) except (RuntimeError, ValueError, AttributeError, TypeError): raise errors.ModelError("Unable to evaluate the model!") return results
[docs] def predict(self, data: t.DataFrame, batch_size: int) -> t.Predictions: """ Make predictions using the model on the provided data. Parameters ---------- data : DataFrame Input data. batch_size : int Batch size. Raises ------ ModelError If there is an issue making predictions. Returns ------- list of DataFrame Predictions as DataFrames. """ if tools.data.contains_nonnumeric_dtypes(data): raise errors.ModelError( "The data for predictions contains non-numeric values!" ) try: arrays = self._object.predict( x=self._get_processed_data(data, "input"), batch_size=batch_size, callbacks=self._callbacks.values(), ) if isinstance(arrays, dict): predictions = [pd.DataFrame(array) for array in arrays.values()] elif isinstance(arrays, list): predictions = [pd.DataFrame(array) for array in arrays] else: predictions = [pd.DataFrame(arrays)] except (RuntimeError, ValueError, AttributeError, TypeError): raise errors.ModelError("Unable to make the prediction!") return predictions
[docs] class CreatedModel(Model): """Class representing the created model.""" def __init__(self) -> None: """Initialize an empty created machine learning model.""" self.reset_state()
[docs] def reset_state(self) -> None: """Reset the state of the created model.""" super().reset_state() self._layers: t.LayerObject = dict()
[docs] def set_name(self, name: str) -> None: """ Set the name of the created model. Parameters ---------- name : str Name of the model. """ self._name = name
[docs] def set_layer( self, entity: str, name: str, params: t.LayerParams, connection: t.LayerConnection, ) -> None: """ Set the layer for the created model. Parameters ---------- entity : str Name of the layer type. name : str Name of the layer. params : LayerParams Parameters for the layer. connection : Layer, list of Layer or None Layer(s) to connect. Raises ------ SetError If there is an issue setting the layer. """ if name in self._layers: raise errors.SetError("Layer with this name already exists!") try: prototype = enums.layers.classes[entity] if connection is None: layer = prototype(name=name, **params) else: layer = prototype(name=name, **params)(connection) except KeyError: raise errors.SetError("There is no prototype for this layer!") except (ValueError, AttributeError, TypeError): raise errors.SetError("Unable to set the layer!") if entity == "Input": self._inputs.append(name) self._layers[name] = layer
[docs] def delete_last_layer(self) -> None: """ Delete the last layer from the created model. Raises ------ DeleteError If there are no layers to remove. """ try: name = self._layers.popitem()[0] except KeyError: raise errors.DeleteError("There are no layers to remove!") try: self._inputs.remove(name) except ValueError: pass try: self._outputs.remove(name) except ValueError: pass
[docs] def set_outputs(self, outputs: t.Layers) -> None: """ Set the output layers for the created model. Parameters ---------- outputs : list of str Names of the layers. Raises ------ SetError If there is an issue setting the output layers. """ if not outputs: raise errors.SetError("Please, select at least one output!") self._outputs = outputs
[docs] def create(self) -> None: """ Create the machine learning model. Raises ------ CreateError If there is an issue creating the model. """ input_layers = {name: self._layers[name] for name in self._inputs} output_layers = {name: self._layers[name] for name in self._outputs} if not input_layers or not output_layers: raise errors.CreateError("There are no input or output layers!") try: self._object = tf.keras.Model( inputs=input_layers, outputs=output_layers, name=self._name ) except (ValueError, AttributeError, TypeError): raise errors.CreateError("Unable to create the model!") self._built = True self._set_config() self.update_state()
@property def layers(self) -> t.LayerObject: """Objects of the layers.""" return self._layers.copy()