Rust - Standard Collections

Overview

Estimated time: 40–50 minutes

Master Rust's standard collection types. Learn when to use Vec, HashMap, BTreeMap, HashSet, and other collections. Understand performance characteristics and choose the right collection for your use case.

Learning Objectives

Prerequisites

Sequential Collections

Vec<T> - Dynamic Arrays

The most common collection for ordered, indexed data:

fn main() {
    // Creation and initialization
    let mut numbers = Vec::new();
    numbers.push(1);
    numbers.push(2);
    numbers.push(3);
    
    let names = vec!["Alice", "Bob", "Charlie"];
    let zeros = vec![0; 5]; // [0, 0, 0, 0, 0]
    
    // Capacity management
    let mut vec = Vec::with_capacity(10);
    println!("Capacity: {}, Length: {}", vec.capacity(), vec.len());
    
    vec.extend([1, 2, 3, 4, 5]);
    println!("After extending: Capacity: {}, Length: {}", vec.capacity(), vec.len());
    
    // Access and modification
    if let Some(first) = numbers.first() {
        println!("First element: {}", first);
    }
    
    if let Some(last) = numbers.last_mut() {
        *last = 10;
    }
    
    // Slicing
    let slice = &vec[1..4];
    println!("Slice: {:?}", slice);
    
    // Removal and insertion
    vec.insert(2, 100);
    let removed = vec.remove(1);
    println!("Removed: {}, Vec: {:?}", removed, vec);
}

VecDeque<T> - Double-Ended Queue

Efficient insertion and removal at both ends:

use std::collections::VecDeque;

fn main() {
    let mut deque = VecDeque::new();
    
    // Add to both ends
    deque.push_back(1);
    deque.push_back(2);
    deque.push_front(0);
    deque.push_front(-1);
    
    println!("Deque: {:?}", deque); // [-1, 0, 1, 2]
    
    // Remove from both ends
    let front = deque.pop_front();
    let back = deque.pop_back();
    
    println!("Front: {:?}, Back: {:?}", front, back); // Some(-1), Some(2)
    println!("Remaining: {:?}", deque); // [0, 1]
    
    // Use as a queue (FIFO)
    let mut queue = VecDeque::new();
    queue.push_back("first");
    queue.push_back("second");
    queue.push_back("third");
    
    while let Some(item) = queue.pop_front() {
        println!("Processing: {}", item);
    }
    
    // Use as a stack (LIFO)
    let mut stack = VecDeque::new();
    stack.push_back("first");
    stack.push_back("second");
    stack.push_back("third");
    
    while let Some(item) = stack.pop_back() {
        println!("Processing: {}", item);
    }
}

Key-Value Collections

HashMap<K, V> - Hash-Based Maps

Fast lookups with average O(1) performance:

use std::collections::HashMap;

fn main() {
    // Creation and initialization
    let mut scores = HashMap::new();
    scores.insert("Alice", 95);
    scores.insert("Bob", 87);
    scores.insert("Charlie", 92);
    
    // From iterator
    let teams = vec![("Red", 10), ("Blue", 20), ("Green", 15)];
    let team_scores: HashMap<_, _> = teams.into_iter().collect();
    
    // Access patterns
    match scores.get("Alice") {
        Some(score) => println!("Alice's score: {}", score),
        None => println!("Alice not found"),
    }
    
    // Default values
    let default_score = scores.get("David").unwrap_or(&0);
    println!("David's score (default): {}", default_score);
    
    // Entry API for complex updates
    let word = "hello world";
    let mut letter_count = HashMap::new();
    
    for ch in word.chars() {
        if ch != ' ' {
            let count = letter_count.entry(ch).or_insert(0);
            *count += 1;
        }
    }
    
    println!("Letter counts: {:?}", letter_count);
    
    // Update patterns
    scores.entry("Alice").and_modify(|score| *score += 5).or_insert(0);
    scores.entry("David").or_insert(85);
    
    println!("Updated scores: {:?}", scores);
}

BTreeMap<K, V> - Ordered Maps

Sorted key-value pairs with O(log n) operations:

use std::collections::BTreeMap;

fn main() {
    let mut grades = BTreeMap::new();
    grades.insert("Math", 95);
    grades.insert("Science", 88);
    grades.insert("English", 92);
    grades.insert("History", 87);
    
    // Keys are automatically sorted
    println!("Grades in alphabetical order:");
    for (subject, grade) in &grades {
        println!("  {}: {}", subject, grade);
    }
    
    // Range queries
    let stem_subjects: BTreeMap<_, _> = grades
        .range("Math".."Science")
        .map(|(k, v)| (*k, *v))
        .collect();
    
    println!("STEM subjects: {:?}", stem_subjects);
    
    // Split operations
    let mut all_grades = grades.clone();
    let upper_half = all_grades.split_off("History");
    
    println!("Lower half: {:?}", all_grades);
    println!("Upper half: {:?}", upper_half);
    
    // First and last elements
    if let Some((first_subject, first_grade)) = grades.first_key_value() {
        println!("First: {} - {}", first_subject, first_grade);
    }
    
    if let Some((last_subject, last_grade)) = grades.last_key_value() {
        println!("Last: {} - {}", last_subject, last_grade);
    }
}

Set Collections

HashSet<T> - Hash-Based Sets

Unique elements with fast lookup:

use std::collections::HashSet;

fn main() {
    // Creation and basic operations
    let mut fruits = HashSet::new();
    fruits.insert("apple");
    fruits.insert("banana");
    fruits.insert("orange");
    fruits.insert("apple"); // Duplicate ignored
    
    println!("Fruits: {:?}", fruits);
    println!("Contains apple: {}", fruits.contains("apple"));
    println!("Number of fruits: {}", fruits.len());
    
    // From iterator
    let numbers: HashSet = [1, 2, 3, 4, 5, 5, 4, 3].iter().cloned().collect();
    println!("Unique numbers: {:?}", numbers);
    
    // Set operations
    let set_a: HashSet = [1, 2, 3, 4].iter().cloned().collect();
    let set_b: HashSet = [3, 4, 5, 6].iter().cloned().collect();
    
    // Union
    let union: HashSet<_> = set_a.union(&set_b).cloned().collect();
    println!("Union: {:?}", union);
    
    // Intersection
    let intersection: HashSet<_> = set_a.intersection(&set_b).cloned().collect();
    println!("Intersection: {:?}", intersection);
    
    // Difference
    let difference: HashSet<_> = set_a.difference(&set_b).cloned().collect();
    println!("A - B: {:?}", difference);
    
    // Symmetric difference
    let sym_diff: HashSet<_> = set_a.symmetric_difference(&set_b).cloned().collect();
    println!("Symmetric difference: {:?}", sym_diff);
    
    // Subset checking
    let small_set: HashSet = [1, 2].iter().cloned().collect();
    println!("Is subset: {}", small_set.is_subset(&set_a));
    println!("Is disjoint: {}", set_a.is_disjoint(&set_b));
}

BTreeSet<T> - Ordered Sets

Sorted unique elements:

use std::collections::BTreeSet;

fn main() {
    let mut words = BTreeSet::new();
    words.insert("zebra");
    words.insert("apple");
    words.insert("banana");
    words.insert("cat");
    
    // Automatically sorted
    println!("Words in order: {:?}", words);
    
    // Range operations
    let middle_words: BTreeSet<_> = words
        .range("banana".."zebra")
        .cloned()
        .collect();
    
    println!("Middle words: {:?}", middle_words);
    
    // Split operations
    let mut all_words = words.clone();
    let latter_half = all_words.split_off("cat");
    
    println!("First half: {:?}", all_words);
    println!("Second half: {:?}", latter_half);
    
    // Navigation
    if let Some(first) = words.first() {
        println!("First word: {}", first);
    }
    
    if let Some(last) = words.last() {
        println!("Last word: {}", last);
    }
}

Collection Comparison

Performance Characteristics

Understanding when to use each collection:

use std::collections::{HashMap, BTreeMap, HashSet, BTreeSet, VecDeque};
use std::time::Instant;

fn benchmark_collections() {
    const SIZE: usize = 100_000;
    
    // Vec vs VecDeque for front insertion
    let start = Instant::now();
    let mut vec = Vec::new();
    for i in 0..1000 {
        vec.insert(0, i); // O(n) operation
    }
    println!("Vec front insertion: {:?}", start.elapsed());
    
    let start = Instant::now();
    let mut deque = VecDeque::new();
    for i in 0..1000 {
        deque.push_front(i); // O(1) operation
    }
    println!("VecDeque front insertion: {:?}", start.elapsed());
    
    // HashMap vs BTreeMap lookup
    let mut hash_map = HashMap::new();
    let mut btree_map = BTreeMap::new();
    
    for i in 0..SIZE {
        hash_map.insert(i, i * 2);
        btree_map.insert(i, i * 2);
    }
    
    // HashMap lookup (average O(1))
    let start = Instant::now();
    for i in 0..SIZE {
        let _ = hash_map.get(&i);
    }
    println!("HashMap lookup: {:?}", start.elapsed());
    
    // BTreeMap lookup (O(log n))
    let start = Instant::now();
    for i in 0..SIZE {
        let _ = btree_map.get(&i);
    }
    println!("BTreeMap lookup: {:?}", start.elapsed());
}

// Collection choice guide
fn collection_choice_examples() {
    // Use Vec when:
    // - You need indexed access
    // - Order matters
    // - You mostly append to the end
    let mut log_entries = Vec::new();
    log_entries.push("User logged in");
    log_entries.push("User updated profile");
    
    // Use VecDeque when:
    // - You need efficient front/back operations
    // - Implementing queues or stacks
    let mut task_queue = VecDeque::new();
    task_queue.push_back("Task 1");
    task_queue.push_back("Task 2");
    let next_task = task_queue.pop_front();
    
    // Use HashMap when:
    // - You need fast key-value lookup
    // - Order doesn't matter
    let mut user_sessions = HashMap::new();
    user_sessions.insert("session_123", "user_456");
    
    // Use BTreeMap when:
    // - You need sorted keys
    // - You need range queries
    let mut timestamp_events = BTreeMap::new();
    timestamp_events.insert(1642680000, "Event A");
    timestamp_events.insert(1642680060, "Event B");
    
    // Use HashSet when:
    // - You need unique elements
    // - Fast membership testing
    let mut unique_visitors = HashSet::new();
    unique_visitors.insert("192.168.1.1");
    unique_visitors.insert("10.0.0.1");
    
    // Use BTreeSet when:
    // - You need unique, sorted elements
    // - Range operations on sets
    let mut sorted_scores = BTreeSet::new();
    sorted_scores.insert(95);
    sorted_scores.insert(87);
    sorted_scores.insert(92);
}

fn main() {
    benchmark_collections();
    collection_choice_examples();
}

Real-World Examples

Data Processing Pipeline

Using collections together for complex data processing:

use std::collections::{HashMap, HashSet, BTreeSet};

#[derive(Debug, Clone)]
struct LogEntry {
    timestamp: u64,
    level: String,
    message: String,
    user_id: Option,
}

fn analyze_logs(logs: Vec) -> LogAnalysis {
    let mut analysis = LogAnalysis::new();
    
    for entry in logs {
        // Count by level
        *analysis.level_counts.entry(entry.level.clone()).or_insert(0) += 1;
        
        // Track unique users
        if let Some(user_id) = entry.user_id {
            analysis.unique_users.insert(user_id);
        }
        
        // Store chronologically
        analysis.chronological_entries.insert(entry.timestamp, entry.message.clone());
        
        // Track error messages
        if entry.level == "ERROR" {
            analysis.error_messages.insert(entry.message);
        }
    }
    
    analysis
}

struct LogAnalysis {
    level_counts: HashMap,
    unique_users: HashSet,
    chronological_entries: BTreeMap,
    error_messages: BTreeSet,
}

impl LogAnalysis {
    fn new() -> Self {
        LogAnalysis {
            level_counts: HashMap::new(),
            unique_users: HashSet::new(),
            chronological_entries: BTreeMap::new(),
            error_messages: BTreeSet::new(),
        }
    }
    
    fn print_summary(&self) {
        println!("=== Log Analysis Summary ===");
        
        // Level distribution
        println!("Log levels:");
        for (level, count) in &self.level_counts {
            println!("  {}: {}", level, count);
        }
        
        // User activity
        println!("Unique users: {}", self.unique_users.len());
        
        // Time range
        if let (Some((first_time, _)), Some((last_time, _))) = 
            (self.chronological_entries.first_key_value(), 
             self.chronological_entries.last_key_value()) {
            println!("Time range: {} to {}", first_time, last_time);
        }
        
        // Unique errors
        println!("Unique error types: {}", self.error_messages.len());
        for error in &self.error_messages {
            println!("  - {}", error);
        }
    }
}

fn main() {
    let logs = vec![
        LogEntry { timestamp: 1642680000, level: "INFO".to_string(), message: "User logged in".to_string(), user_id: Some(123) },
        LogEntry { timestamp: 1642680060, level: "WARN".to_string(), message: "High memory usage".to_string(), user_id: None },
        LogEntry { timestamp: 1642680120, level: "ERROR".to_string(), message: "Database connection failed".to_string(), user_id: Some(456) },
        LogEntry { timestamp: 1642680180, level: "INFO".to_string(), message: "User logged out".to_string(), user_id: Some(123) },
        LogEntry { timestamp: 1642680240, level: "ERROR".to_string(), message: "Database connection failed".to_string(), user_id: Some(789) },
    ];
    
    let analysis = analyze_logs(logs);
    analysis.print_summary();
}

Collection Conversion

Converting Between Collections

Transform data between different collection types:

use std::collections::{HashMap, HashSet, BTreeMap, BTreeSet, VecDeque};

fn main() {
    // Vec to other collections
    let numbers = vec![3, 1, 4, 1, 5, 9, 2, 6, 5, 3];
    
    // Vec to HashSet (removes duplicates)
    let unique_numbers: HashSet<_> = numbers.iter().cloned().collect();
    println!("Unique numbers: {:?}", unique_numbers);
    
    // Vec to BTreeSet (removes duplicates and sorts)
    let sorted_unique: BTreeSet<_> = numbers.iter().cloned().collect();
    println!("Sorted unique: {:?}", sorted_unique);
    
    // Vec to VecDeque
    let deque: VecDeque<_> = numbers.iter().cloned().collect();
    println!("As deque: {:?}", deque);
    
    // Creating maps from vectors
    let words = vec!["apple", "banana", "cherry", "date"];
    
    // Vec to HashMap with indices
    let word_indices: HashMap = words.iter().enumerate().collect();
    println!("Word indices: {:?}", word_indices);
    
    // Vec to HashMap with lengths
    let word_lengths: HashMap<&str, usize> = words.iter()
        .map(|word| (*word, word.len()))
        .collect();
    println!("Word lengths: {:?}", word_lengths);
    
    // Converting maps to other collections
    let scores: HashMap<&str, i32> = [("Alice", 95), ("Bob", 87), ("Charlie", 92)]
        .iter().cloned().collect();
    
    // Map keys to Vec
    let names: Vec<&str> = scores.keys().cloned().collect();
    println!("Names: {:?}", names);
    
    // Map values to sorted set
    let unique_scores: BTreeSet = scores.values().cloned().collect();
    println!("Unique scores: {:?}", unique_scores);
    
    // Chain collections
    let more_numbers = vec![7, 8, 9];
    let all_numbers: Vec = numbers.iter()
        .chain(more_numbers.iter())
        .cloned()
        .collect();
    println!("All numbers: {:?}", all_numbers);
}

Best Practices

Collection Usage Guidelines

Common Pitfalls

Mistakes to Avoid

Checks for Understanding

  1. When would you use VecDeque instead of Vec?
  2. What's the main difference between HashMap and BTreeMap?
  3. How do you remove duplicates from a Vec while preserving order?
  4. When should you use BTreeSet instead of HashSet?
Answers
  1. When you need efficient insertion/removal at the front, or implementing queues/stacks
  2. HashMap has O(1) average lookup but unordered; BTreeMap has O(log n) lookup but maintains sorted order
  3. Use a HashSet to track seen items while iterating: let mut seen = HashSet::new(); vec.retain(|x| seen.insert(*x))
  4. When you need the elements to be sorted, or when you need range operations