Strings and Other Collections

Strings »

Bytes and Chars

A bit is the smallest unit of storage, stores either 0 or 1
A byte is a collection of 8 bits, range from 0 to 255, u8 in Rust
ASCII char is a 7-bit code, and can be stored in 1 byte
Unicode matches the first 128 chars in ASCII code
UTF-8 uses up to 4 bytes to store a character
A Rust char is a 32-bit (4-byte) value holding a Unicode code point

// Create char
char::from_digit(num, radix)         // radix <= 36
let b = char::from_digit(11, 16);   // 'b'
char::from()
char::from_u32(ox2764);         // return an Option


// Classifying char
.is_numeric()
.is_alphabet()
.is_alphanum()
.is_whitespace()        // including \t and \n
.is_control()
.is_ascii()
.is_digit(radix)
.to_digit(radix) -> Option

// case conversion
.is_lowercase()
.is_uppercase()
// uppercase/lowercase use .to_string() to convert to String
.to_lowercase() -> ToLowercase
.to_uppercase() -> ToUppercase

// convert to integers
assert_eq!('B' as u32, 66);

String and str

Rust String and str types are guaranteed to hold only well-formed UTF-8
String is resizable, a wrapper over Vec<u8>
String does not support indexing (Unicode)
str is non-resizable, and mostly used as &str
- String impl Deref<Target=str> so it can use all methods of str
Other types of related types
- PathBuf and &Path for filenames
- Vec<u8> and &[u8] for binary data of non UTF-8 encode
- OsString and &OsStr for env var and cli arguments from OS
- CString and &CStr for C lib of null-terminated strings

// &str, String, and &String are all sized types.
// A &str is essentially:
struct StringSlice {
    ptr: *const u8,
    len: usize,
}
// A String is essentially:
struct String {
    ptr: *const u8,
    len: usize,
    cap: usize,
}
// A &String is essentially:
struct StringRef {
    ptr: *const String,
}

Methods

Create String

String::new()       // empty
String::from(str_slice)
String::with_capacity(n)
str_slice.to_string()
iter.collect::<String>()
slice.to_owned()
"literal".into() // free Into trait 
format!("String", "Literal") //String
// also write! and writeln!
// however, write! and writeln! returns Result
let mut letter = String::new();
writeln!(letter, "Test {}", "write!")?;

// arrays/slices/vec can use .concat() or .join()
let bits = vec!["veni", "vidi", "vici"];
assert_eq!(bits.concat(), "venividivici");
assert_eq!(bits.join(", "), "veni, vidi, vici");

// from UTF-8 data
str::from_utf8(byte_slice) -> Result
String::from_utf8(vec) -> Result
// replace invalid UTF-8 with replacement char
String::from_utf8_lossy(byte_slice) -> Cow<'_, str>

Convert to Strings

// std::fmt::Display auto impl `ToString`, and `to_string` method
impl<T> ToString for T
where
    T: Display + ?Sized, 
pub trait ToString {
    fn to_string(&self) -> String;
}

Simple inspection

.len()
.is_empty
slice[range]    // slice[index] is not okay
slice.split_at(i) -> tuple

Appending and inserting

string.push(ch)         // use single quote for 'c'
string.push_str(slice)
string.extend(iter)     // Extend trait
string.insert(i, ch)    
string.insert_str(i, slice)

// +: s1 can only be a String and will be out of scope
// use `format!` or `write!` instead
let s = s1 + &s2;

Replacing

// removing
string.clear()
string.truncate(n)      // discard after byte offset n
string.pop() -> Option<char>
string.remove(i)        // remove by byte offset i
string.drain(range) -> Drain    // return iter
string.replace_range(range, replacement)

// Searching and replacing
// use `r` to search from right side, e.g. `rfind`
// patterns can be `char` pattern, `String`, `&str`
// `FnMut(char) -> bool` or  `&[char].as_ref()`
slice.contains(pattern)
slice.starts_with(pattern), slice.ends_with(pattern)
slice.find(pattern), slice.rfind(pattern)
slice.replace(pattern, replacement)
slice.replacen(pattern, replaceemnt, n) // replace the first n matches

assert_eq!("`Borrow` and `BorrowMut`"
           .replace(|ch:char| !ch.is_alphanumeric(), ""),
           "BorrowandBorrowMut");

Iterating

slice.bytes()       // return Iterator<u8>, its UTF-8 encoding
slice.chars()       // return Iterator<char>
slice.lines()       // "\n" or "\r\n"

// the following return Iterator<&str>

.split(pattern)
.rsplit(p)

// similar to split, but trailing substring is skipped if empty
// i.e. pattern is treated as a terminator rather than separator
.split_terminator(p)    
.rsplit_terminator(p)
let v: Vec<&str> = "A.B.".split_terminator('.').collect();
assert_eq!(v, ["A", "B"]);  // would be ["A", "B", ""] if use .split

.split_whitespace()
.split_ascii_whitespace()
.splitn(3, p)
.rsplitn(3, p)
.matches(p)

Trimming -> &str

slice.trim()    // will trim White Space type, not just ' ' 
.trim_start()
.trim_end()
.trim_matches(pattern)

Working with other types

Cow<'_, B>
AsRef
- Slices and Strings implement AsRef<str>, AsRef<[u8]>, AsRef<Path>, and AsRef<OsStr>

// std::str::FromStr trait with `from_str -> Result`
usize::from_str("112132")
f64::from_str("123.43")
bool::from_str("true")
char::from_str("erer").is_err()
IpAddr::from_str(...)

// access text as UTF-8
slice.as_bytes() -> &[u8]   // borrows
slice.into_bytes() -> Vec<u8>   // consumes

// Can pass slices and strings to fn with these traits bounds directly

// Cow<'_, str>
// See Ownership

Collections

Sequences
- Vec<T> Growable array
- VecDeque<T> Double-ended queue
- LinkedList<T> Doubly linked list
Maps
- HashMap<K: Eq + Hash, V> Key-value hash table
- BTreeMap<K: Ord, V> Sorted key-value table
Sets
- HashSet<T: Eq + Ord> Unordered, hash-based set
- BTreeSet<T: Ord> Sorted set
BinaryHeap<T: Ord> Max heap

Vec<T>

String is impl as Vec<u8>
Values are stored next to each other in memory
Size 24 bytes (ptr, len and capacity)

Create Vec

// Vec::new() or vec!
let v: Vec<i32> = Vec::new();
let v = vec![1, 2, 3];

// from [T]
fn to_vec(&self) -> Vec<T: Clone>

// with specified capacity
.with_capacity(usize)

// fill and fill_with
let mut buf = vec![0; 10];
buf.fill(1);
assert_eq!(buf, vec![1; 10]);

let mut buf = vec![1; 10];
buf.fill_with(Default::default);
assert_eq!(buf, vec![0; 10]);

// repeat a slice n times
fn repeat(&self, n: usize) -> Vec<T>
assert_eq!([1, 2].repeat(3), vec![1, 2, 1, 2, 1, 2]);

// add additional capacity
// may reserve more than asked
.reserve(usize)
.reserve_exact(usize)
let mut vec = vec![1];
vec.reserve(10);
assert!(vec.capacity() >= 11);

// append move `other` to to `Self`
// leaving `other` empty
fn append(&mut self, other: &mut Vec)
let mut vec = vec![1, 2, 3];
let mut vec2 = vec![4, 5, 6];
vec.append(&mut vec2);
assert_eq!(vec, [1, 2, 3, 4, 5, 6]);
assert_eq!(vec2, []);

// extend
fn extend_from_slice(&mut self, other: &[T])
fn extent_from_within(&mut self, range: RangeBounds<usize>)

// dedup: remove consecutive repeated elements
.dedup()
fn dedup_by(&mut self, same_bucket: FnMut(&mut T, &mut T) -> bool)
fn dedup_by_key(&mut self, key: F)
    where F: FnMut(&mut T) -> K: PartialEq<K>

// drain: drain iter that removes the range, and yields the removed iter
fn drain(&mut slef, range: RangeBounds<usize>) -> Drain

// resize with new lens filled with Value if needed, otherwise truncated
fn resize(&mut self, new_len: usize, value: T)
fn resize_wth(&mut self, new_len: usize, f: FnMut() -> T)

// splice: creates a splicing iter that replaces the specified range
// with the given `replace_with`; yileds the removed item
pub fn splice<R, I>(
    &mut self,
    range: R,
    replace_with: I
) -> Splice<'_, <I as IntoIterator>::IntoIter, A>
where
    R: RangeBounds<usize>,
    I: IntoIterator<Item = T>, 
let mut v = vec![1, 2, 3];
let new = [7, 8];
let u: Vec<_> = v.splice(..2, new).collect();
assert_eq!(v, &[7, 8, 3]);
assert_eq!(u, &[1, 2]);

// split_off splits into two at given index; returning the 2nd half
// original vec will be left with the first half; capacity unchanged
let mut vec = vec![1, 2, 3];
let vec2 = vec.split_off(1);
assert_eq!(vec, [1]);
assert_eq!(vec2, [2, 3]);

// split takes F and returns an iter
fn split<F>(&self, pred: F) -> Split<'_, T, F>
where
    F: FnMut(&T) -> bool,

// split_at index returns a tuple of slices
fn split_at(&self, mid: usize) -> (&[T], &[T])
let v = [1, 2, 3, 4, 5, 6];
{
   let (left, right) = v.split_at(0);
   assert_eq!(left, []);
   assert_eq!(right, [1, 2, 3, 4, 5, 6]);
}
// other split methods
.split_first()
.split_inclusive(F)
.split_last()
.splitn(n: usize, pred: F)  // returning at most n times

// clear all values
fn clear(&mut self)

.is_empty()
.len()
.ends_with(&[T]) -> bool
.is_sorted()

Change type

.as_ptr
.as_slice       // same as &vec[...]
.into_boxed_slice
.into_raw_parts

// leak
// consumes the Vec, returns a mut ref to the contents
pub fn leak<'a>(self) -> &'a mut [T]
where
    A: 'a,
let x = vec![1, 2, 3];
let static_ref: &'static mut [usize] = x.leak();
static_ref[0] += 1;
assert_eq!(static_ref, &[2, 2, 3]);

// chunk: returning an iter over chunk_size elements of the slice at a time
// last trunk can be shorter
// chunks_exact will drop the last element if it is shorter than chunk_size
fn chunks(&self, chunk_size: usize) -> Chunks<'_, T>
let slice = ['l', 'o', 'r', 'e', 'm'];
let mut iter = slice.chunks(2);
assert_eq!(iter.next().unwrap(), &['l', 'o']);
assert_eq!(iter.next().unwrap(), &['r', 'e']);
assert_eq!(iter.next().unwrap(), &['m']);
assert!(iter.next().is_none());

// windows: returns an iter over all contiguous window of length size
// returns none if the slice is shorter than size
fn windows(&self, size: usize) -> Windows<'_, T>
let slice = ['r', 'u', 's', 't'];
let mut iter = slice.windows(2);
assert_eq!(iter.next().unwrap(), &['r', 'u']);
assert_eq!(iter.next().unwrap(), &['u', 's']);
assert_eq!(iter.next().unwrap(), &['s', 't']);
assert!(iter.next().is_none());
// returns None if slice is shorter than size
let slice = ['f', 'o', 'o'];
let mut iter = slice.windows(4);
assert!(iter.next().is_none());

// concat: into a single value of `Self::Output`
assert_eq!(["hello", "world"].concat(), "helloworld");
assert_eq!([[1, 2], [3, 4]].concat(), [1, 2, 3, 4]);

// join using specified separator
pub fn join<Separator>(
    &self,
    sep: Separator
) -> <[T] as Join<Separator>>::Output
where
    [T]: Join<Separator>,
assert_eq!(["hello", "world"].join(" "), "hello world");
assert_eq!([[1, 2], [3, 4]].join(&0), [1, 2, 0, 3, 4]);
assert_eq!([[1, 2], [3, 4]].join(&[0, 0][..]), [1, 2, 0, 0, 3, 4]);

// select_nth_unstable reorder the slice
// such that the element at index is its final sorted psoition
// also related `select_nth_unstable_by<F>`, 
// `select_nth_unstable_by_key<K, F>`
fn select_nth_unstable(
        &mut self,
        index: usize
) -> (&mut [T], &mut T, &mut [T])
where
    T: Ord,

Elements

Vec<T> impl Index

&v[i]
.first() -> Option<&T>
.first_mut() -> Option<&T>
.last() -> Option<&T>
.get(i) -> Option<&T>    
// get_mut
.reverse()
.sort()
fn sort_by<F>(&mut self, compare: F)
    where
        F: FnMut(&T, &T) -> Ordering,


.push(T)
.insert(index: usize, element: T)   // slow
.remove(indes: usize) -> T  // slow


v.pop() -> Option<T>

// retain only the elements based on F
fn retain<F>(&mut self, f:F)
    where
        F: FnMut(&T) -> bool,

// shrink capacity to either the supplied value or its length
fn shrink_to(&mut self, min_capacity: usize)
fn shrink_to_fit(&mut self)

// removes an element, and returns it
// the removed element is replaced by the last element
// O(1), does not preserve ordering
fn swap_remove(&mut self, index: usize) -> T

// truncate shortens the vec, keeping the first `len`, drop the rest
fn truncate(&mut self, len: usize)

// swap
fn swap(&mut self, a: usize, b: usize)
// swaps entire contents of both, must be same length
slice_a.swap(&mut slice_b)  

// Vec<T> can only store one type
// use Enum to store multiple type
enum SpreadsheetCell {
    Int(i32),
    Float(f64),
    Text(String),
}
let row = vec![
    SpreadsheetCell::Int(5),
    SpreadsheetCell::Float(3.2),
    SpreadsheetCell::Text("blue".to_string()),
];

HashMap

// all the K have to be the same type; as V
HashMap<K,V>
// need to use std lib, not in preclude
use std::collections::HashMap;

let mut score = HashMap::new();
scores.insert(String::from("Blue"),10);
scores.insert(String::from("Red"),50);
scores.get(String::from("Blue"));
for (key, value) in &scores {
    println!("{}:{}",key,value);
}

// use collect to create HashMap from tuple
tuple_slice.iter().collect::HashMap<_, _>()

// entry
scores.entry(String::from("Blue")).or_insert(50);

// count values of a &[&str]
let count_words = line.iter().fold(HashMap::new(), |mut words, str| {
    *words.entry(str).or_insert(0) += 1;
    words
});