Rust - Smart Pointers

Overview

Estimated time: 45–55 minutes

Master Rust's smart pointers for managing heap memory and shared ownership. Learn about Box<T>, Rc<T>, Arc<T>, and interior mutability patterns that enable flexible memory management while maintaining safety.

Learning Objectives

Prerequisites

Box<T> - Heap Allocation

Basic Box Usage

Box<T> stores data on the heap instead of the stack:

fn main() {
    // Store a single value on the heap
    let boxed_number = Box::new(42);
    println!("Boxed number: {}", boxed_number);
    
    // Box automatically dereferences
    let value = *boxed_number;
    println!("Dereferenced value: {}", value);
    
    // Large data structures benefit from heap allocation
    let large_array = Box::new([0; 1000000]);
    println!("Large array created on heap");
    
    // Box ownership works like any other value
    let box1 = Box::new(String::from("Hello"));
    let box2 = box1; // Ownership moved
    // println!("{}", box1); // This would cause a compile error
    println!("Moved box: {}", box2);
}

Recursive Data Structures

Box enables recursive types by providing indirection:

// Binary tree using Box
#[derive(Debug)]
enum BinaryTree {
    Empty,
    Node {
        value: T,
        left: Box>,
        right: Box>,
    },
}

impl BinaryTree {
    fn new() -> Self {
        BinaryTree::Empty
    }
    
    fn leaf(value: T) -> Self {
        BinaryTree::Node {
            value,
            left: Box::new(BinaryTree::Empty),
            right: Box::new(BinaryTree::Empty),
        }
    }
    
    fn node(value: T, left: BinaryTree, right: BinaryTree) -> Self {
        BinaryTree::Node {
            value,
            left: Box::new(left),
            right: Box::new(right),
        }
    }
}

// Linked list using Box
#[derive(Debug)]
struct Node {
    data: T,
    next: Option>>,
}

impl Node {
    fn new(data: T) -> Self {
        Node { data, next: None }
    }
    
    fn append(&mut self, data: T) {
        match &mut self.next {
            None => self.next = Some(Box::new(Node::new(data))),
            Some(next_node) => next_node.append(data),
        }
    }
    
    fn iter(&self) -> NodeIterator {
        NodeIterator { current: Some(self) }
    }
}

struct NodeIterator<'a, T> {
    current: Option<&'a Node>,
}

impl<'a, T> Iterator for NodeIterator<'a, T> {
    type Item = &'a T;
    
    fn next(&mut self) -> Option {
        match self.current {
            Some(node) => {
                let data = &node.data;
                self.current = node.next.as_deref();
                Some(data)
            }
            None => None,
        }
    }
}

fn main() {
    // Create a binary tree
    let tree = BinaryTree::node(
        1,
        BinaryTree::leaf(2),
        BinaryTree::node(3, BinaryTree::leaf(4), BinaryTree::leaf(5))
    );
    println!("Tree: {:?}", tree);
    
    // Create a linked list
    let mut list = Node::new(1);
    list.append(2);
    list.append(3);
    list.append(4);
    
    println!("Linked list values:");
    for value in list.iter() {
        println!("  {}", value);
    }
}

Rc<T> - Reference Counting

Shared Ownership

Rc<T> enables multiple owners of the same data:

use std::rc::Rc;

#[derive(Debug)]
struct SharedData {
    id: u32,
    name: String,
}

fn main() {
    // Create shared data
    let data = Rc::new(SharedData {
        id: 1,
        name: String::from("Shared Resource"),
    });
    
    println!("Reference count: {}", Rc::strong_count(&data)); // 1
    
    // Clone creates new references, not new data
    let data_ref1 = Rc::clone(&data);
    let data_ref2 = Rc::clone(&data);
    
    println!("Reference count after cloning: {}", Rc::strong_count(&data)); // 3
    
    println!("Data from original: {:?}", data);
    println!("Data from ref1: {:?}", data_ref1);
    println!("Data from ref2: {:?}", data_ref2);
    
    // All references point to the same data
    println!("Same data? {}", Rc::ptr_eq(&data, &data_ref1)); // true
    
    // When all Rc go out of scope, data is cleaned up
    drop(data_ref1);
    println!("Reference count after dropping ref1: {}", Rc::strong_count(&data)); // 2
}

Shared Data Structures

Use Rc for shared ownership in data structures:

use std::rc::Rc;

#[derive(Debug)]
struct Node {
    value: T,
    children: Vec>>,
}

impl Node {
    fn new(value: T) -> Rc {
        Rc::new(Node {
            value,
            children: Vec::new(),
        })
    }
    
    fn add_child(parent: &mut Rc, child: Rc) {
        // We need Rc::get_mut or similar pattern for mutation
        // This is a simplified example
        if let Some(parent_mut) = Rc::get_mut(parent) {
            parent_mut.children.push(child);
        }
    }
}

// Graph structure with shared nodes
#[derive(Debug)]
struct Graph {
    nodes: Vec>>,
}

#[derive(Debug)]
struct GraphNode {
    value: T,
    edges: Vec>>,
}

impl Graph {
    fn new() -> Self {
        Graph { nodes: Vec::new() }
    }
    
    fn add_node(&mut self, value: T) -> Rc> {
        let node = Rc::new(GraphNode {
            value,
            edges: Vec::new(),
        });
        self.nodes.push(Rc::clone(&node));
        node
    }
    
    fn connect_nodes(from: &Rc>, to: Rc>) {
        // In a real implementation, you'd need interior mutability
        // This is conceptual
        println!("Would connect nodes (need RefCell for mutation)");
    }
}

fn main() {
    let mut graph = Graph::new();
    let node1 = graph.add_node("Node 1");
    let node2 = graph.add_node("Node 2");
    let node3 = graph.add_node("Node 3");
    
    println!("Graph has {} nodes", graph.nodes.len());
    println!("Node 1 reference count: {}", Rc::strong_count(&node1)); // 2 (graph + local)
}

Arc<T> - Atomic Reference Counting

Thread-Safe Shared Ownership

Arc<T> is the thread-safe version of Rc<T>:

use std::sync::Arc;
use std::thread;
use std::time::Duration;

#[derive(Debug)]
struct SharedConfig {
    name: String,
    value: u32,
}

fn main() {
    let config = Arc::new(SharedConfig {
        name: String::from("Global Config"),
        value: 42,
    });
    
    println!("Initial reference count: {}", Arc::strong_count(&config));
    
    let mut handles = vec![];
    
    // Spawn multiple threads that share the same data
    for i in 0..3 {
        let config_clone = Arc::clone(&config);
        let handle = thread::spawn(move || {
            println!("Thread {}: {:?}", i, config_clone);
            println!("Thread {}: Reference count: {}", i, Arc::strong_count(&config_clone));
            thread::sleep(Duration::from_millis(100));
        });
        handles.push(handle);
    }
    
    // Wait for all threads to complete
    for handle in handles {
        handle.join().unwrap();
    }
    
    println!("Final reference count: {}", Arc::strong_count(&config));
}

Shared State Between Threads

Combine Arc with Mutex for shared mutable state:

use std::sync::{Arc, Mutex};
use std::thread;

fn main() {
    // Shared counter between threads
    let counter = Arc::new(Mutex::new(0));
    let mut handles = vec![];
    
    for i in 0..5 {
        let counter_clone = Arc::clone(&counter);
        let handle = thread::spawn(move || {
            for _ in 0..10 {
                let mut num = counter_clone.lock().unwrap();
                *num += 1;
                println!("Thread {} incremented counter to {}", i, *num);
            }
        });
        handles.push(handle);
    }
    
    // Wait for all threads
    for handle in handles {
        handle.join().unwrap();
    }
    
    println!("Final counter value: {}", *counter.lock().unwrap());
    
    // Shared data structure
    let shared_vec = Arc::new(Mutex::new(Vec::new()));
    let mut handles = vec![];
    
    for i in 0..3 {
        let vec_clone = Arc::clone(&shared_vec);
        let handle = thread::spawn(move || {
            let mut vec = vec_clone.lock().unwrap();
            vec.push(format!("Item from thread {}", i));
        });
        handles.push(handle);
    }
    
    for handle in handles {
        handle.join().unwrap();
    }
    
    println!("Shared vector: {:?}", *shared_vec.lock().unwrap());
}

Weak References

Breaking Reference Cycles

Use Weak references to prevent memory leaks from cycles:

use std::cell::RefCell;
use std::rc::{Rc, Weak};

#[derive(Debug)]
struct Parent {
    name: String,
    children: RefCell>>,
}

#[derive(Debug)]
struct Child {
    name: String,
    parent: RefCell>,
}

impl Parent {
    fn new(name: String) -> Rc {
        Rc::new(Parent {
            name,
            children: RefCell::new(Vec::new()),
        })
    }
    
    fn add_child(parent: &Rc, name: String) -> Rc {
        let child = Rc::new(Child {
            name,
            parent: RefCell::new(Rc::downgrade(parent)),
        });
        
        parent.children.borrow_mut().push(Rc::clone(&child));
        child
    }
}

impl Child {
    fn get_parent(&self) -> Option> {
        self.parent.borrow().upgrade()
    }
}

fn main() {
    let parent = Parent::new("Alice".to_string());
    println!("Parent strong count: {}", Rc::strong_count(&parent));
    
    let child1 = Parent::add_child(&parent, "Bob".to_string());
    let child2 = Parent::add_child(&parent, "Charlie".to_string());
    
    println!("Parent strong count after adding children: {}", Rc::strong_count(&parent));
    println!("Child1 strong count: {}", Rc::strong_count(&child1));
    
    // Access parent from child
    if let Some(parent_ref) = child1.get_parent() {
        println!("Child1's parent: {}", parent_ref.name);
    }
    
    // Weak reference count
    println!("Parent weak count: {}", Rc::weak_count(&parent));
    
    // When parent is dropped, children's weak references become invalid
    drop(parent);
    
    if let Some(parent_ref) = child1.get_parent() {
        println!("Parent still exists: {}", parent_ref.name);
    } else {
        println!("Parent has been dropped");
    }
}

Choosing Smart Pointers

Decision Guide

When to use each smart pointer:

// Box - Single ownership, heap allocation
fn use_box_when() {
    // 1. Large data that would overflow the stack
    let big_data = Box::new([0u8; 1_000_000]);
    
    // 2. Trait objects (dynamic dispatch)
    trait Draw { fn draw(&self); }
    struct Circle;
    impl Draw for Circle { fn draw(&self) { println!("Drawing circle"); } }
    
    let drawable: Box = Box::new(Circle);
    drawable.draw();
    
    // 3. Recursive data structures
    enum List {
        Cons(i32, Box),
        Nil,
    }
    
    let list = List::Cons(1, Box::new(List::Cons(2, Box::new(List::Nil))));
}

// Rc - Multiple ownership, single-threaded
fn use_rc_when() {
    use std::rc::Rc;
    
    // When multiple parts of your program need to read the same data
    let shared_data = Rc::new(vec![1, 2, 3, 4, 5]);
    let reader1 = Rc::clone(&shared_data);
    let reader2 = Rc::clone(&shared_data);
    
    // Both can read from the same data
    println!("Reader1: {:?}", reader1);
    println!("Reader2: {:?}", reader2);
}

// Arc - Multiple ownership, multi-threaded
fn use_arc_when() {
    use std::sync::Arc;
    use std::thread;
    
    // When sharing data between threads
    let shared_data = Arc::new(vec![1, 2, 3, 4, 5]);
    
    let handles: Vec<_> = (0..3).map(|i| {
        let data = Arc::clone(&shared_data);
        thread::spawn(move || {
            println!("Thread {}: {:?}", i, data);
        })
    }).collect();
    
    for handle in handles {
        handle.join().unwrap();
    }
}

fn main() {
    use_box_when();
    use_rc_when();
    use_arc_when();
}

Performance Considerations

Memory and Performance Trade-offs

Understanding the costs of smart pointers:

use std::rc::Rc;
use std::sync::Arc;
use std::time::Instant;

fn benchmark_ownership_models() {
    const COUNT: usize = 1_000_000;
    
    // Direct ownership (fastest)
    let start = Instant::now();
    let mut values = Vec::new();
    for i in 0..COUNT {
        values.push(i);
    }
    println!("Direct ownership: {:?}", start.elapsed());
    
    // Box overhead
    let start = Instant::now();
    let mut boxed_values = Vec::new();
    for i in 0..COUNT {
        boxed_values.push(Box::new(i));
    }
    println!("Box overhead: {:?}", start.elapsed());
    
    // Rc overhead
    let start = Instant::now();
    let mut rc_values = Vec::new();
    for i in 0..COUNT {
        rc_values.push(Rc::new(i));
    }
    println!("Rc overhead: {:?}", start.elapsed());
    
    // Arc overhead (atomic operations)
    let start = Instant::now();
    let mut arc_values = Vec::new();
    for i in 0..COUNT {
        arc_values.push(Arc::new(i));
    }
    println!("Arc overhead: {:?}", start.elapsed());
    
    // Cloning costs
    let rc_value = Rc::new(42);
    let start = Instant::now();
    for _ in 0..COUNT {
        let _clone = Rc::clone(&rc_value);
    }
    println!("Rc clone cost: {:?}", start.elapsed());
    
    let arc_value = Arc::new(42);
    let start = Instant::now();
    for _ in 0..COUNT {
        let _clone = Arc::clone(&arc_value);
    }
    println!("Arc clone cost: {:?}", start.elapsed());
}

fn main() {
    benchmark_ownership_models();
}

Common Patterns

Smart Pointer Idioms

Common patterns when using smart pointers:

use std::rc::Rc;
use std::cell::RefCell;

// Configuration pattern
struct AppConfig {
    database_url: String,
    api_key: String,
    max_connections: u32,
}

type SharedConfig = Rc;

struct DatabaseService {
    config: SharedConfig,
}

struct ApiService {
    config: SharedConfig,
}

impl DatabaseService {
    fn new(config: SharedConfig) -> Self {
        DatabaseService { config }
    }
    
    fn connect(&self) {
        println!("Connecting to: {}", self.config.database_url);
        println!("Max connections: {}", self.config.max_connections);
    }
}

impl ApiService {
    fn new(config: SharedConfig) -> Self {
        ApiService { config }
    }
    
    fn authenticate(&self) {
        println!("Using API key: {}", self.config.api_key);
    }
}

// Cache pattern with interior mutability
type Cache = Rc>>;

fn create_shared_cache() -> Cache 
where 
    K: std::hash::Hash + Eq,
{
    Rc::new(RefCell::new(std::collections::HashMap::new()))
}

fn main() {
    // Shared configuration
    let config = Rc::new(AppConfig {
        database_url: "postgresql://localhost/myapp".to_string(),
        api_key: "secret-key-123".to_string(),
        max_connections: 10,
    });
    
    let db_service = DatabaseService::new(Rc::clone(&config));
    let api_service = ApiService::new(Rc::clone(&config));
    
    db_service.connect();
    api_service.authenticate();
    
    // Shared cache
    let cache: Cache = create_shared_cache();
    
    // Multiple components can share the cache
    let cache_ref1 = Rc::clone(&cache);
    let cache_ref2 = Rc::clone(&cache);
    
    cache_ref1.borrow_mut().insert("key1".to_string(), 42);
    
    if let Some(value) = cache_ref2.borrow().get("key1") {
        println!("Cached value: {}", value);
    }
}

Common Pitfalls

Mistakes to Avoid

Checks for Understanding

  1. When would you use Box<T> instead of direct ownership?
  2. What's the difference between Rc<T> and Arc<T>?
  3. How do you prevent memory leaks from reference cycles?
  4. What happens when the last Rc to some data is dropped?
Answers
  1. For heap allocation of large data, trait objects, or recursive data structures
  2. Rc is single-threaded; Arc is thread-safe with atomic reference counting
  3. Use Weak references to break cycles and prevent memory leaks
  4. The data is automatically deallocated when the reference count reaches zero