Rust - Strings

Overview

Estimated time: 50–60 minutes

Master Rust's string types and text handling. Learn the difference between String and &str, string creation and manipulation, UTF-8 encoding, concatenation patterns, and memory management for strings.

Learning Objectives

Prerequisites

String Types in Rust

Rust has two main string types:

String vs &str

fn main() {
    // String literals are &str
    let greeting: &str = "Hello, world!";
    println!("String literal: {}", greeting);
    
    // Creating owned String
    let mut owned_string = String::from("Hello");
    println!("Owned string: {}", owned_string);
    
    // String can be modified
    owned_string.push_str(", Rust!");
    println!("Modified string: {}", owned_string);
    
    // &str cannot be modified directly
    // greeting.push_str("!"); // This would cause an error
    
    // Converting between types
    let str_to_string: String = greeting.to_string();
    let string_to_str: &str = &owned_string;
    
    println!("Converted: {} -> {}", str_to_string, string_to_str);
}

Expected Output:

String literal: Hello, world!
Owned string: Hello
Modified string: Hello, Rust!
Converted: Hello, world! -> Hello, Rust!

Creating Strings

Different Ways to Create Strings

fn main() {
    // String literals (&str)
    let literal = "Hello, world!";
    
    // Creating String from string literal
    let from_literal = String::from("Hello, Rust!");
    let to_string = "Hello, Rust!".to_string();
    
    // Creating empty String
    let mut empty = String::new();
    
    // Creating String with capacity
    let mut with_capacity = String::with_capacity(50);
    
    // From character iterator
    let from_chars: String = "Hello".chars().collect();
    
    // From bytes (UTF-8)
    let from_bytes = String::from_utf8(vec![72, 101, 108, 108, 111]).unwrap();
    
    println!("literal: {}", literal);
    println!("from_literal: {}", from_literal);
    println!("to_string: {}", to_string);
    println!("from_chars: {}", from_chars);
    println!("from_bytes: {}", from_bytes);
    
    // Adding to empty string
    empty.push_str("Now I have content!");
    println!("empty (now filled): {}", empty);
    
    // Capacity demonstration
    println!("with_capacity capacity: {}", with_capacity.capacity());
    with_capacity.push_str("This string has pre-allocated space");
    println!("with_capacity: {}", with_capacity);
}

Expected Output:

literal: Hello, world!
from_literal: Hello, Rust!
to_string: Hello, Rust!
from_chars: Hello
from_bytes: Hello
empty (now filled): Now I have content!
with_capacity capacity: 50
with_capacity: This string has pre-allocated space

String Creation from Different Sources

fn main() {
    // From numbers
    let number = 42;
    let from_number = number.to_string();
    let formatted = format!("The answer is {}", number);
    
    // From arrays and vectors
    let words = vec!["Hello", "beautiful", "world"];
    let joined = words.join(" ");
    
    // From repeated characters
    let repeated = "Na".repeat(8) + " Batman!";
    
    // From format macro
    let formatted_complex = format!("Name: {}, Age: {}, Score: {:.2}", 
                                    "Alice", 30, 95.7);
    
    println!("from_number: {}", from_number);
    println!("formatted: {}", formatted);
    println!("joined: {}", joined);
    println!("repeated: {}", repeated);
    println!("formatted_complex: {}", formatted_complex);
    
    // Raw strings (useful for regex, paths, etc.)
    let raw_string = r#"This is a "raw" string with \n and \t"#;
    let multiline_raw = r#"
        Line 1
        Line 2
        Line 3
    "#;
    
    println!("raw_string: {}", raw_string);
    println!("multiline_raw: {}", multiline_raw);
}

Expected Output:

from_number: 42
formatted: The answer is 42
joined: Hello beautiful world
repeated: NaNaNaNaNaNaNaNa Batman!
formatted_complex: Name: Alice, Age: 30, Score: 95.70
raw_string: This is a "raw" string with \n and \t
multiline_raw: 
        Line 1
        Line 2
        Line 3
    

String Manipulation

Adding to Strings

fn main() {
    let mut text = String::from("Hello");
    
    // Adding a single character
    text.push(' ');
    text.push('R');
    text.push('u');
    text.push('s');
    text.push('t');
    println!("After push: {}", text);
    
    // Adding a string slice
    text.push_str("!");
    println!("After push_str: {}", text);
    
    // Using += operator
    text += " Programming";
    println!("After +=: {}", text);
    
    // Insert at specific position
    text.insert(0, '[');
    text.insert_str(text.len(), "]");
    println!("After insert: {}", text);
    
    // Replace parts of string
    let replaced = text.replace("Rust", "Amazing Rust");
    println!("Replaced: {}", replaced);
    
    // Replace only first occurrence
    let replace_first = text.replacen("l", "L", 1);
    println!("Replace first 'l': {}", replace_first);
}

Expected Output:

After push: Hello Rust
After push_str: Hello Rust!
After +=: Hello Rust! Programming
After insert: [Hello Rust! Programming]
Replaced: [Hello Amazing Rust! Programming]
Replace first 'l': [HeLlo Rust! Programming]

String Concatenation

fn main() {
    let first = String::from("Hello");
    let second = " World";
    let third = "!";
    
    // Using + operator (moves first string)
    let concat1 = first + second + third;
    println!("Using +: {}", concat1);
    // Note: first is no longer accessible here
    
    // Using format! macro (doesn't move any strings)
    let greeting = "Hello";
    let name = "Alice";
    let punctuation = "!";
    
    let formatted = format!("{} {}{}", greeting, name, punctuation);
    println!("Using format!: {}", formatted);
    
    // All original variables still accessible
    println!("Original: {} {} {}", greeting, name, punctuation);
    
    // Concatenating multiple strings efficiently
    let parts = vec!["The", "quick", "brown", "fox"];
    let sentence = parts.join(" ");
    println!("Joined: {}", sentence);
    
    // Building string incrementally
    let mut builder = String::new();
    for (i, word) in parts.iter().enumerate() {
        if i > 0 {
            builder.push(' ');
        }
        builder.push_str(word);
    }
    println!("Built: {}", builder);
}

Expected Output:

Using +: Hello World!
Using format!: Hello Alice!
Original: Hello Alice !
Joined: The quick brown fox
Built: The quick brown fox

String Slicing and Indexing

Working with String Slices

fn main() {
    let text = "Hello, Rust Programming!";
    
    // Basic slicing (be careful with UTF-8!)
    let hello = &text[0..5];
    let rust = &text[7..11];
    let programming = &text[12..23];
    
    println!("Original: {}", text);
    println!("hello: {}", hello);
    println!("rust: {}", rust);
    println!("programming: {}", programming);
    
    // Slicing from start or to end
    let from_start = &text[..5];      // Same as &text[0..5]
    let to_end = &text[7..];          // From index 7 to end
    let full = &text[..];             // Entire string
    
    println!("from_start: {}", from_start);
    println!("to_end: {}", to_end);
    println!("full: {}", full);
    
    // Safe slicing methods
    let safe_slice = text.get(0..5);
    match safe_slice {
        Some(slice) => println!("Safe slice: {}", slice),
        None => println!("Invalid slice range"),
    }
    
    // Getting characters safely
    let chars: Vec = text.chars().collect();
    if let Some(first_char) = chars.get(0) {
        println!("First character: {}", first_char);
    }
    
    // Character indices (important for UTF-8)
    for (i, ch) in text.char_indices() {
        if i < 20 {  // Only first few for brevity
            println!("Char '{}' at byte index {}", ch, i);
        }
    }
}

Expected Output:

Original: Hello, Rust Programming!
hello: Hello
rust: Rust
programming: Programming
from_start: Hello
to_end: Rust Programming!
full: Hello, Rust Programming!
Safe slice: Hello
First character: H
Char 'H' at byte index 0
Char 'e' at byte index 1
Char 'l' at byte index 2
Char 'l' at byte index 3
Char 'o' at byte index 4
Char ',' at byte index 5
Char ' ' at byte index 6
Char 'R' at byte index 7
Char 'u' at byte index 8
Char 's' at byte index 9
Char 't' at byte index 10
Char ' ' at byte index 11
Char 'P' at byte index 12
Char 'r' at byte index 13
Char 'o' at byte index 14
Char 'g' at byte index 15
Char 'r' at byte index 16
Char 'a' at byte index 17
Char 'm' at byte index 18
Char 'm' at byte index 19

UTF-8 and Unicode Handling

Working with Unicode

fn main() {
    let unicode_text = "Hello 世界 🦀 Rust!";
    
    println!("Text: {}", unicode_text);
    println!("Length in bytes: {}", unicode_text.len());
    println!("Length in characters: {}", unicode_text.chars().count());
    
    // Iterating over characters
    println!("\nCharacters:");
    for (i, ch) in unicode_text.chars().enumerate() {
        println!("{}: '{}' (U+{:04X})", i, ch, ch as u32);
    }
    
    // Iterating over bytes
    println!("\nFirst 20 bytes:");
    for (i, byte) in unicode_text.bytes().enumerate().take(20) {
        println!("Byte {}: {} (0x{:02X})", i, byte, byte);
    }
    
    // Working with grapheme clusters (requires unicode-segmentation crate in real code)
    // For now, let's show the concept with characters
    let emoji_text = "👨‍👩‍👧‍👦 family";  // This is actually multiple Unicode code points
    println!("\nEmoji text: {}", emoji_text);
    println!("Character count: {}", emoji_text.chars().count());
    println!("Byte count: {}", emoji_text.len());
    
    // Safe Unicode operations
    let mixed = "Café naïve résumé";
    println!("\nMixed text: {}", mixed);
    
    // Converting case (Unicode-aware)
    println!("Uppercase: {}", mixed.to_uppercase());
    println!("Lowercase: {}", mixed.to_lowercase());
}

Expected Output:

Text: Hello 世界 🦀 Rust!
Length in bytes: 22
Length in characters: 13

Characters:
0: 'H' (U+0048)
1: 'e' (U+0065)
2: 'l' (U+006C)
3: 'l' (U+006C)
4: 'o' (U+006F)
5: ' ' (U+0020)
6: '世' (U+4E16)
7: '界' (U+754C)
8: ' ' (U+0020)
9: '🦀' (U+1F980)
10: ' ' (U+0020)
11: 'R' (U+0052)
12: 'u' (U+0075)
13: 's' (U+0073)
14: 't' (U+0074)
15: '!' (U+0021)

First 20 bytes:
Byte 0: 72 (0x48)
Byte 1: 101 (0x65)
Byte 2: 108 (0x6C)
Byte 3: 108 (0x6C)
Byte 4: 111 (0x6F)
Byte 5: 32 (0x20)
Byte 6: 228 (0xE4)
Byte 7: 184 (0xB8)
Byte 8: 150 (0x96)
Byte 9: 231 (0xE7)
Byte 10: 149 (0x95)
Byte 11: 140 (0x8C)
Byte 12: 32 (0x20)
Byte 13: 240 (0xF0)
Byte 14: 159 (0x9F)
Byte 15: 166 (0xA6)
Byte 16: 128 (0x80)
Byte 17: 32 (0x20)
Byte 18: 82 (0x52)
Byte 19: 117 (0x75)

Emoji text: 👨‍👩‍👧‍👦 family
Character count: 12
Byte count: 32

Mixed text: Café naïve résumé
Uppercase: CAFÉ NAÏVE RÉSUMÉ
Lowercase: café naïve résumé

String Searching and Pattern Matching

Finding Substrings

fn main() {
    let text = "The quick brown fox jumps over the lazy dog";
    
    // Basic searching
    if text.contains("fox") {
        println!("Found 'fox' in the text");
    }
    
    // Find position
    match text.find("fox") {
        Some(index) => println!("'fox' found at position {}", index),
        None => println!("'fox' not found"),
    }
    
    // Find from the end
    match text.rfind("the") {
        Some(index) => println!("Last 'the' found at position {}", index),
        None => println!("'the' not found"),
    }
    
    // Starts with / ends with
    println!("Starts with 'The': {}", text.starts_with("The"));
    println!("Ends with 'dog': {}", text.ends_with("dog"));
    
    // Multiple occurrences
    let pattern = "the";
    let matches: Vec<_> = text.match_indices(pattern).collect();
    println!("All occurrences of '{}': {:?}", pattern, matches);
    
    // Case-insensitive search
    let text_lower = text.to_lowercase();
    let pattern_lower = "THE";
    if text_lower.contains(&pattern_lower.to_lowercase()) {
        println!("Found '{}' (case-insensitive)", pattern_lower);
    }
    
    // Pattern matching with closures
    let has_long_word = text.split_whitespace()
        .any(|word| word.len() > 5);
    println!("Has word longer than 5 characters: {}", has_long_word);
}

Expected Output:

Found 'fox' in the text
'fox' found at position 16
Last 'the' found at position 31
Starts with 'The': true
Ends with 'dog': true
All occurrences of 'the': [(31, "the")]
Found 'THE' (case-insensitive)
Has word longer than 5 characters: true

String Splitting and Parsing

fn main() {
    let csv_data = "John,25,Engineer,New York";
    let multiline = "Line 1\nLine 2\r\nLine 3\n";
    let whitespace_text = "  hello   world  rust  ";
    
    // Split by delimiter
    let fields: Vec<&str> = csv_data.split(',').collect();
    println!("CSV fields: {:?}", fields);
    
    // Split by whitespace
    let words: Vec<&str> = whitespace_text.split_whitespace().collect();
    println!("Words: {:?}", words);
    
    // Split by lines
    let lines: Vec<&str> = multiline.lines().collect();
    println!("Lines: {:?}", lines);
    
    // Split with limit
    let limited: Vec<&str> = csv_data.splitn(3, ',').collect();
    println!("Limited split: {:?}", limited);
    
    // Parse numbers from string
    let numbers_text = "42 3.14 100 -5";
    let numbers: Result, _> = numbers_text
        .split_whitespace()
        .map(|s| s.parse::())
        .collect();
    
    match numbers {
        Ok(nums) => println!("Parsed integers: {:?}", nums),
        Err(e) => println!("Parse error: {}", e),
    }
    
    // Parse mixed data
    let person_data = "Alice:30:true";
    let parts: Vec<&str> = person_data.split(':').collect();
    
    if parts.len() == 3 {
        let name = parts[0];
        let age: Result = parts[1].parse();
        let active: Result = parts[2].parse();
        
        match (age, active) {
            (Ok(a), Ok(act)) => {
                println!("Person: {} (age {}, active: {})", name, a, act);
            }
            _ => println!("Failed to parse person data"),
        }
    }
}

Expected Output:

CSV fields: ["John", "25", "Engineer", "New York"]
Words: ["hello", "world", "rust"]
Lines: ["Line 1", "Line 2", "Line 3"]
Limited split: ["John", "25", "Engineer,New York"]
Parse error: invalid digit found in string
Person: Alice (age 30, active: true)

String Formatting and Display

Format Macro and Display Traits

fn main() {
    let name = "Alice";
    let age = 30;
    let height = 5.6;
    let is_employed = true;
    
    // Basic formatting
    let basic = format!("Name: {}, Age: {}", name, age);
    println!("{}", basic);
    
    // Positional arguments
    let positional = format!("{0} is {1} years old. {0} works as a developer.", name, age);
    println!("{}", positional);
    
    // Named arguments
    let named = format!("{name} is {age} years old and {height} feet tall", 
                        name=name, age=age, height=height);
    println!("{}", named);
    
    // Number formatting
    let pi = 3.14159265359;
    println!("Pi: {}", pi);
    println!("Pi (2 decimals): {:.2}", pi);
    println!("Pi (6 decimals): {:.6}", pi);
    println!("Pi (scientific): {:e}", pi);
    
    // Integer formatting
    let number = 255;
    println!("Decimal: {}", number);
    println!("Binary: {:b}", number);
    println!("Octal: {:o}", number);
    println!("Hex (lowercase): {:x}", number);
    println!("Hex (uppercase): {:X}", number);
    
    // Padding and alignment
    println!("Right aligned: '{:>10}'", name);
    println!("Left aligned: '{:<10}'", name);
    println!("Center aligned: '{:^10}'", name);
    println!("Zero padded: '{:05}'", 42);
    
    // Debug formatting
    let vec = vec![1, 2, 3, 4, 5];
    println!("Debug: {:?}", vec);
    println!("Pretty debug: {:#?}", vec);
    
    // Boolean formatting
    println!("Employed: {}", is_employed);
    println!("Employed (debug): {:?}", is_employed);
}

Expected Output:

Name: Alice, Age: 30
Alice is 30 years old. Alice works as a developer.
Alice is 30 years old and 5.6 feet tall
Pi: 3.14159265359
Pi (2 decimals): 3.14
Pi (6 decimals): 3.141593
Pi (scientific): 3.14159265359e0
Decimal: 255
Binary: 11111111
Octal: 377
Hex (lowercase): ff
Hex (uppercase): FF
Right aligned: '     Alice'
Left aligned: 'Alice     '
Center aligned: '  Alice   '
Zero padded: '00042'
Debug: [1, 2, 3, 4, 5]
Pretty debug: [
    1,
    2,
    3,
    4,
    5,
]
Employed: true
Employed (debug): true

String Memory Management

Capacity and Performance

fn main() {
    let mut text = String::new();
    
    println!("Initial - Length: {}, Capacity: {}", text.len(), text.capacity());
    
    // Adding text will grow capacity as needed
    text.push_str("Hello");
    println!("After 'Hello' - Length: {}, Capacity: {}", text.len(), text.capacity());
    
    text.push_str(" World and more text to trigger reallocation");
    println!("After more text - Length: {}, Capacity: {}", text.len(), text.capacity());
    
    // Pre-allocating capacity
    let mut efficient = String::with_capacity(100);
    println!("Pre-allocated - Length: {}, Capacity: {}", efficient.len(), efficient.capacity());
    
    efficient.push_str("This won't cause reallocation");
    println!("After adding text - Length: {}, Capacity: {}", efficient.len(), efficient.capacity());
    
    // Shrinking capacity
    let mut big_string = String::with_capacity(1000);
    big_string.push_str("Small content");
    println!("Before shrink - Length: {}, Capacity: {}", big_string.len(), big_string.capacity());
    
    big_string.shrink_to_fit();
    println!("After shrink - Length: {}, Capacity: {}", big_string.len(), big_string.capacity());
    
    // Clearing content
    big_string.clear();
    println!("After clear - Length: {}, Capacity: {}", big_string.len(), big_string.capacity());
    
    // Demonstrating string building performance
    let words = vec!["The", "quick", "brown", "fox", "jumps", "over", "the", "lazy", "dog"];
    
    // Inefficient: multiple allocations
    let mut inefficient = String::new();
    for word in &words {
        inefficient = inefficient + word + " ";
    }
    
    // More efficient: pre-allocate and use push_str
    let total_len: usize = words.iter().map(|w| w.len() + 1).sum();
    let mut efficient_build = String::with_capacity(total_len);
    for word in &words {
        efficient_build.push_str(word);
        efficient_build.push(' ');
    }
    
    println!("Inefficient result: '{}'", inefficient.trim());
    println!("Efficient result: '{}'", efficient_build.trim());
}

Expected Output:

Initial - Length: 0, Capacity: 0
After 'Hello' - Length: 5, Capacity: 8
After more text - Length: 48, Capacity: 48
Pre-allocated - Length: 0, Capacity: 100
After adding text - Length: 30, Capacity: 100
Before shrink - Length: 13, Capacity: 1000
After shrink - Length: 13, Capacity: 13
After clear - Length: 0, Capacity: 13
Inefficient result: 'The quick brown fox jumps over the lazy dog'
Efficient result: 'The quick brown fox jumps over the lazy dog'

Best Practices

Choosing String Types

Performance Tips

fn main() {
    // Good: accepts both String and &str
    fn process_text(text: &str) -> String {
        format!("Processed: {}", text.to_uppercase())
    }
    
    let owned = String::from("hello");
    let slice = "world";
    
    println!("{}", process_text(&owned));  // Works with String
    println!("{}", process_text(slice));   // Works with &str
    
    // Efficient string building
    let parts = vec!["alpha", "beta", "gamma", "delta"];
    
    // Best: use join() for simple concatenation
    let joined = parts.join("-");
    println!("Joined: {}", joined);
    
    // Good: use format! for complex formatting
    let formatted = format!("Parts: [{}]", parts.join(", "));
    println!("{}", formatted);
    
    // Avoid: inefficient concatenation in loops
    // let mut bad = String::new();
    // for part in &parts {
    //     bad = bad + part + "-";  // Creates new string each time
    // }
}

Expected Output:

Processed: HELLO
Processed: WORLD
Joined: alpha-beta-gamma-delta
Parts: [alpha, beta, gamma, delta]

Common Pitfalls

Checks for Understanding

  1. What's the difference between String and &str?
  2. Why can't you index into a string with string[0]?
  3. What's the difference between push() and push_str()?
  4. How do you safely get a substring without panicking?
  5. What's the most efficient way to concatenate many strings?

Answers

  1. String is owned and mutable; &str is a borrowed slice of string data
  2. Because of UTF-8 encoding - characters can be multiple bytes, so indexing by byte is unsafe
  3. push() adds a single character; push_str() adds a string slice
  4. Use get() method or chars().nth() for character access
  5. Use join() for simple cases, or String::with_capacity() + push_str() for complex cases

← PreviousNext →