Advanced Topics¶
Advanced usage patterns and best practices for the WebNN Python API.
Performance Optimization¶
Graph Compilation¶
Compile graphs once and reuse them:
import webnn
class ModelCache:
def __init__(self):
self.ml = webnn.ML()
self.context = self.ml.create_context()
self.graphs = {}
def get_or_build_graph(self, name, builder_fn):
"""Cache compiled graphs for reuse."""
if name not in self.graphs:
builder = self.context.create_graph_builder()
output = builder_fn(builder)
self.graphs[name] = builder.build({name: output})
return self.graphs[name]
# Usage
cache = ModelCache()
def build_relu(builder):
x = builder.input("x", [100], "float32")
return builder.relu(x)
# First call: compiles the graph
graph1 = cache.get_or_build_graph("relu", build_relu)
# Second call: returns cached graph (fast!)
graph2 = cache.get_or_build_graph("relu", build_relu)
assert graph1 is graph2
Memory-Efficient Constants¶
For large constant tensors, use the most memory-efficient data type:
import webnn
import numpy as np
ml = webnn.ML()
context = ml.create_context()
builder = context.create_graph_builder()
# Use float16 instead of float32 to halve memory usage
large_weights = np.random.randn(1000, 1000).astype('float16')
weights_op = builder.constant(large_weights)
print(f"Memory saved: {large_weights.nbytes / 1024 / 1024:.2f} MB vs "
f"{(large_weights.nbytes * 2) / 1024 / 1024:.2f} MB for float32")
Integration with Other Libraries¶
NumPy Integration with Execution¶
Seamless conversion between NumPy and WebNN:
import webnn
import numpy as np
ml = webnn.ML()
context = ml.create_context(accelerated=False)
builder = context.create_graph_builder()
# Build a simple matmul with NumPy weights
x = builder.input("x", [1, 100], "float32")
weights = np.random.randn(100, 50).astype('float32') * 0.01
bias = np.zeros(50, dtype='float32')
w_op = builder.constant(weights)
b_op = builder.constant(bias)
output = builder.add(builder.matmul(x, w_op), b_op)
graph = builder.build({"output": output})
# Execute with NumPy input
x_data = np.random.randn(1, 100).astype('float32')
results = context.compute(graph, {"x": x_data})
print(f"Input shape: {x_data.shape}")
print(f"Output shape: {results['output'].shape}")
print(f"Result is NumPy array: {isinstance(results['output'], np.ndarray)}")
ONNX Integration¶
Load existing ONNX models and convert them:
import webnn
import numpy as np
# Note: This is a conceptual example. Full ONNX loading
# would require parsing the ONNX protobuf format.
def load_onnx_weights(onnx_path):
"""
Conceptual example of loading ONNX weights.
In practice, you'd use onnx.load() to parse the model.
"""
# This is a simplified example
weights = {
'fc1': np.random.randn(784, 128).astype('float32'),
'fc1_bias': np.zeros(128, dtype='float32'),
'fc2': np.random.randn(128, 10).astype('float32'),
'fc2_bias': np.zeros(10, dtype='float32'),
}
return weights
def build_from_onnx_weights(weights):
ml = webnn.ML()
context = ml.create_context()
builder = context.create_graph_builder()
# Build graph using ONNX weights
x = builder.input("input", [1, 784], "float32")
w1 = builder.constant(weights['fc1'])
b1 = builder.constant(weights['fc1_bias'])
h1 = builder.matmul(x, w1)
h1 = builder.add(h1, b1)
h1 = builder.relu(h1)
w2 = builder.constant(weights['fc2'])
b2 = builder.constant(weights['fc2_bias'])
output = builder.matmul(h1, w2)
output = builder.add(output, b2)
return builder.build({"output": output})
weights = load_onnx_weights("model.onnx")
graph = build_from_onnx_weights(weights)
Graph Introspection and Execution¶
Inspect and analyze compiled graphs, then execute them:
import webnn
import numpy as np
ml = webnn.ML()
context = ml.create_context(accelerated=False)
builder = context.create_graph_builder()
# Build a complex graph
x = builder.input("x", [2, 3], "float32")
y = builder.input("y", [3, 4], "float32")
z = builder.matmul(x, y)
w = builder.relu(z)
output = builder.sigmoid(w)
graph = builder.build({"final": output})
# Inspect the graph
print("Graph Analysis:")
print(f" Inputs: {graph.get_input_names()}")
print(f" Outputs: {graph.get_output_names()}")
print(f" Total operands: {graph.operand_count}")
print(f" Total operations: {graph.operation_count}")
# Execute the graph
x_data = np.random.randn(2, 3).astype('float32')
y_data = np.random.randn(3, 4).astype('float32')
results = context.compute(graph, {"x": x_data, "y": y_data})
print(f"\nExecution:")
print(f" Output shape: {results['final'].shape}")
print(f" Output range: [{results['final'].min():.4f}, {results['final'].max():.4f}]")
Custom Graph Patterns¶
Residual Connections¶
import webnn
import numpy as np
def residual_block(builder, x, hidden_size):
"""Create a residual block: output = relu(x + fc(x))"""
# Linear transformation
w = builder.constant(np.random.randn(hidden_size, hidden_size).astype('float32') * 0.01)
transformed = builder.matmul(x, w)
# Add residual connection
residual = builder.add(x, transformed)
# Activation
output = builder.relu(residual)
return output
ml = webnn.ML()
context = ml.create_context()
builder = context.create_graph_builder()
x = builder.input("x", [1, 128], "float32")
y = residual_block(builder, x, 128)
graph = builder.build({"output": y})
context.convert_to_onnx(graph, "residual.onnx")
Attention Mechanism (Simplified)¶
import webnn
import numpy as np
def scaled_dot_product_attention(builder, query, key, value, d_k):
"""
Simplified attention mechanism (without softmax for now).
attention = (query @ key.T) @ value
"""
# Transpose key (conceptually)
key_t = key # In practice, you'd need to handle transposition
# Attention scores: query @ key.T
scores = builder.matmul(query, key_t)
# Apply scaling factor (as a constant multiply)
scale = 1.0 / np.sqrt(d_k)
scale_tensor = builder.constant(np.full_like(scores, scale))
scaled_scores = builder.mul(scores, scale_tensor)
# Attention output: scores @ value
output = builder.matmul(scaled_scores, value)
return output
Error Handling Strategies¶
Comprehensive Error Handling¶
import webnn
import sys
import traceback
def safe_graph_export(graph_fn, output_path):
"""
Safely build and export a graph with comprehensive error handling.
"""
try:
ml = webnn.ML()
context = ml.create_context()
builder = context.create_graph_builder()
# Build the graph
try:
output = graph_fn(builder)
graph = builder.build({"output": output})
except ValueError as e:
print(f" Graph validation failed: {e}", file=sys.stderr)
traceback.print_exc()
return False
# Export to ONNX
try:
context.convert_to_onnx(graph, output_path)
print(f"[OK] Successfully exported to {output_path}")
return True
except IOError as e:
print(f" File I/O error: {e}", file=sys.stderr)
return False
except RuntimeError as e:
print(f" Conversion failed: {e}", file=sys.stderr)
return False
except Exception as e:
print(f" Unexpected error: {e}", file=sys.stderr)
traceback.print_exc()
return False
# Usage
def my_graph(builder):
x = builder.input("x", [10], "float32")
return builder.relu(x)
success = safe_graph_export(my_graph, "model.onnx")
sys.exit(0 if success else 1)
Testing Graphs¶
Unit Testing WebNN Graphs¶
import unittest
import webnn
import numpy as np
import os
class TestWebNNGraphs(unittest.TestCase):
def setUp(self):
"""Set up test fixtures."""
self.ml = webnn.ML()
self.context = self.ml.create_context()
def test_simple_relu(self):
"""Test ReLU graph creation and export."""
builder = self.context.create_graph_builder()
x = builder.input("x", [10], "float32")
y = builder.relu(x)
graph = builder.build({"y": y})
self.assertEqual(graph.operand_count, 2)
self.assertEqual(graph.operation_count, 1)
self.assertIn("x", graph.get_input_names())
self.assertIn("y", graph.get_output_names())
def test_onnx_export(self):
"""Test ONNX export functionality."""
builder = self.context.create_graph_builder()
x = builder.input("x", [10], "float32")
y = builder.relu(x)
graph = builder.build({"y": y})
output_path = "test_model.onnx"
try:
self.context.convert_to_onnx(graph, output_path)
self.assertTrue(os.path.exists(output_path))
self.assertGreater(os.path.getsize(output_path), 0)
finally:
if os.path.exists(output_path):
os.remove(output_path)
def test_invalid_shape(self):
"""Test that invalid shapes raise errors."""
builder = self.context.create_graph_builder()
# This should work
x = builder.input("x", [10, 20], "float32")
# Empty shape is valid (scalar)
scalar = builder.input("scalar", [], "float32")
def test_multiple_outputs(self):
"""Test graphs with multiple outputs."""
builder = self.context.create_graph_builder()
x = builder.input("x", [10], "float32")
y1 = builder.relu(x)
y2 = builder.sigmoid(x)
graph = builder.build({"relu": y1, "sigmoid": y2})
outputs = graph.get_output_names()
self.assertIn("relu", outputs)
self.assertIn("sigmoid", outputs)
if __name__ == '__main__':
unittest.main()
Debugging Tips¶
Verbose Graph Building¶
import webnn
class VerboseBuilder:
"""Wrapper that logs all operations."""
def __init__(self, context):
self.context = context
self.builder = context.create_graph_builder()
self.op_count = 0
def input(self, name, shape, dtype="float32"):
result = self.builder.input(name, shape, dtype)
print(f"[{self.op_count}] INPUT: {name} {shape} {dtype}")
self.op_count += 1
return result
def constant(self, value, **kwargs):
result = self.builder.constant(value, **kwargs)
print(f"[{self.op_count}] CONSTANT: shape={value.shape}")
self.op_count += 1
return result
def relu(self, x):
result = self.builder.relu(x)
print(f"[{self.op_count}] RELU")
self.op_count += 1
return result
def matmul(self, a, b):
result = self.builder.matmul(a, b)
print(f"[{self.op_count}] MATMUL")
self.op_count += 1
return result
# Add other operations as needed...
def build(self, outputs):
print(f"\nBuilding graph with {len(outputs)} output(s)...")
return self.builder.build(outputs)
# Usage
ml = webnn.ML()
context = ml.create_context()
builder = VerboseBuilder(context)
x = builder.input("x", [10], "float32")
y = builder.relu(x)
graph = builder.build({"y": y})
Output:
Platform-Specific Features¶
Backend Selection and Execution¶
Choose the best backend for your platform and execute models:
import webnn
import numpy as np
import platform
ml = webnn.ML()
# Try GPU/NPU acceleration first
context = ml.create_context(accelerated=True, power_preference="high-performance")
print(f"Platform: {platform.system()}")
print(f"Accelerated: {context.accelerated}")
# Build a simple graph
builder = context.create_graph_builder()
x = builder.input("x", [10], "float32")
y = builder.relu(x)
graph = builder.build({"y": y})
# Execute on selected backend
x_data = np.array([-5, -3, -1, 0, 1, 3, 5, 7, 9, 11], dtype=np.float32)
results = context.compute(graph, {"x": x_data})
print(f"Result: {results['y']}")
# Export for different platforms
context.convert_to_onnx(graph, "model.onnx")
print("[OK] Exported ONNX (cross-platform)")
if platform.system() == "Darwin":
try:
context.convert_to_coreml(graph, "model.mlmodel")
print("[OK] Exported CoreML (macOS GPU/Neural Engine)")
except Exception as e:
print(f" CoreML export: {e}")
Best Practices Summary¶
- Compile once, reuse: Cache compiled graphs
- Use appropriate data types: float16 for memory efficiency
- Handle errors gracefully: Wrap operations in try-except blocks
- Test thoroughly: Write unit tests for your graphs
- Validate shapes: Check tensor dimensions before building
- Profile performance: Measure compilation and export times
- Document graphs: Add comments explaining graph structure
- Use type hints: Leverage Python type hints for better IDE support
from typing import Dict
import webnn
import numpy as np
def build_classifier(
input_size: int,
hidden_size: int,
num_classes: int
) -> webnn.MLGraph:
"""
Build a simple classifier graph.
Args:
input_size: Size of input features
hidden_size: Size of hidden layer
num_classes: Number of output classes
Returns:
Compiled MLGraph ready for export
"""
ml = webnn.ML()
context = ml.create_context()
builder = context.create_graph_builder()
# Build model...
x = builder.input("input", [1, input_size], "float32")
# ... rest of the model
return graph