Strings and Other Collections
Strings »
Bytes and Chars
- A
bit is the smallest unit of storage, stores either 0 or 1
- A
byte is a collection of 8 bits, range from 0 to 255, u8 in Rust
- ASCII char is a 7-bit code, and can be stored in 1
byte
- Unicode matches the first 128 chars in ASCII code
- UTF-8 uses up to 4 bytes to store a character
- A Rust
char is a 32-bit (4-byte) value holding a Unicode code point
// Create char
char::from_digit(num, radix) // radix <= 36
let b = char::from_digit(11, 16); // 'b'
char::from()
char::from_u32(ox2764); // return an Option
// Classifying char
.is_numeric()
.is_alphabet()
.is_alphanum()
.is_whitespace() // including \t and \n
.is_control()
.is_ascii()
.is_digit(radix)
.to_digit(radix) -> Option
// case conversion
.is_lowercase()
.is_uppercase()
// uppercase/lowercase use .to_string() to convert to String
.to_lowercase() -> ToLowercase
.to_uppercase() -> ToUppercase
// convert to integers
assert_eq!('B' as u32, 66);
String and str
- Rust
String and str types are guaranteed to hold only well-formed UTF-8
String is resizable, a wrapper over Vec<u8>
- String does not support indexing (Unicode)
str is non-resizable, and mostly used as &str
String impl Deref<Target=str> so it can use all methods of str
- Other types of related types
PathBuf and &Path for filenames
Vec<u8> and &[u8] for binary data of non UTF-8 encode
OsString and &OsStr for env var and cli arguments from OS
CString and &CStr for C lib of null-terminated strings
// &str, String, and &String are all sized types.
// A &str is essentially:
struct StringSlice {
ptr: *const u8,
len: usize,
}
// A String is essentially:
struct String {
ptr: *const u8,
len: usize,
cap: usize,
}
// A &String is essentially:
struct StringRef {
ptr: *const String,
}
Methods
String::new() // empty
String::from(str_slice)
String::with_capacity(n)
str_slice.to_string()
iter.collect::<String>()
slice.to_owned()
"literal".into() // free Into trait
format!("String", "Literal") //String
// also write! and writeln!
// however, write! and writeln! returns Result
let mut letter = String::new();
writeln!(letter, "Test {}", "write!")?;
// arrays/slices/vec can use .concat() or .join()
let bits = vec!["veni", "vidi", "vici"];
assert_eq!(bits.concat(), "venividivici");
assert_eq!(bits.join(", "), "veni, vidi, vici");
// from UTF-8 data
str::from_utf8(byte_slice) -> Result
String::from_utf8(vec) -> Result
// replace invalid UTF-8 with replacement char
String::from_utf8_lossy(byte_slice) -> Cow<'_, str>
// std::fmt::Display auto impl `ToString`, and `to_string` method
impl<T> ToString for T
where
T: Display + ?Sized,
pub trait ToString {
fn to_string(&self) -> String;
}
.len()
.is_empty
slice[range] // slice[index] is not okay
slice.split_at(i) -> tuple
string.push(ch) // use single quote for 'c'
string.push_str(slice)
string.extend(iter) // Extend trait
string.insert(i, ch)
string.insert_str(i, slice)
// +: s1 can only be a String and will be out of scope
// use `format!` or `write!` instead
let s = s1 + &s2;
// removing
string.clear()
string.truncate(n) // discard after byte offset n
string.pop() -> Option<char>
string.remove(i) // remove by byte offset i
string.drain(range) -> Drain // return iter
string.replace_range(range, replacement)
// Searching and replacing
// use `r` to search from right side, e.g. `rfind`
// patterns can be `char` pattern, `String`, `&str`
// `FnMut(char) -> bool` or `&[char].as_ref()`
slice.contains(pattern)
slice.starts_with(pattern), slice.ends_with(pattern)
slice.find(pattern), slice.rfind(pattern)
slice.replace(pattern, replacement)
slice.replacen(pattern, replaceemnt, n) // replace the first n matches
assert_eq!("`Borrow` and `BorrowMut`"
.replace(|ch:char| !ch.is_alphanumeric(), ""),
"BorrowandBorrowMut");
slice.bytes() // return Iterator<u8>, its UTF-8 encoding
slice.chars() // return Iterator<char>
slice.lines() // "\n" or "\r\n"
// the following return Iterator<&str>
.split(pattern)
.rsplit(p)
// similar to split, but trailing substring is skipped if empty
// i.e. pattern is treated as a terminator rather than separator
.split_terminator(p)
.rsplit_terminator(p)
let v: Vec<&str> = "A.B.".split_terminator('.').collect();
assert_eq!(v, ["A", "B"]); // would be ["A", "B", ""] if use .split
.split_whitespace()
.split_ascii_whitespace()
.splitn(3, p)
.rsplitn(3, p)
.matches(p)
slice.trim() // will trim White Space type, not just ' '
.trim_start()
.trim_end()
.trim_matches(pattern)
Working with other types
Cow<'_, B>
AsRef
- Slices and Strings implement
AsRef<str>, AsRef<[u8]>, AsRef<Path>, and AsRef<OsStr>
// std::str::FromStr trait with `from_str -> Result`
usize::from_str("112132")
f64::from_str("123.43")
bool::from_str("true")
char::from_str("erer").is_err()
IpAddr::from_str(...)
// access text as UTF-8
slice.as_bytes() -> &[u8] // borrows
slice.into_bytes() -> Vec<u8> // consumes
// Can pass slices and strings to fn with these traits bounds directly
// Cow<'_, str>
// See Ownership
Collections
- Sequences
Vec<T> Growable array
VecDeque<T> Double-ended queue
LinkedList<T> Doubly linked list
- Maps
HashMap<K: Eq + Hash, V> Key-value hash table
BTreeMap<K: Ord, V> Sorted key-value table
- Sets
HashSet<T: Eq + Ord> Unordered, hash-based set
BTreeSet<T: Ord> Sorted set
BinaryHeap<T: Ord> Max heap
Vec<T>
String is impl as Vec<u8>
- Values are stored next to each other in memory
- Size 24 bytes (ptr, len and capacity)
Create Vec
// Vec::new() or vec!
let v: Vec<i32> = Vec::new();
let v = vec![1, 2, 3];
// from [T]
fn to_vec(&self) -> Vec<T: Clone>
// with specified capacity
.with_capacity(usize)
// fill and fill_with
let mut buf = vec![0; 10];
buf.fill(1);
assert_eq!(buf, vec![1; 10]);
let mut buf = vec![1; 10];
buf.fill_with(Default::default);
assert_eq!(buf, vec![0; 10]);
// repeat a slice n times
fn repeat(&self, n: usize) -> Vec<T>
assert_eq!([1, 2].repeat(3), vec![1, 2, 1, 2, 1, 2]);
// add additional capacity
// may reserve more than asked
.reserve(usize)
.reserve_exact(usize)
let mut vec = vec![1];
vec.reserve(10);
assert!(vec.capacity() >= 11);
// append move `other` to to `Self`
// leaving `other` empty
fn append(&mut self, other: &mut Vec)
let mut vec = vec![1, 2, 3];
let mut vec2 = vec![4, 5, 6];
vec.append(&mut vec2);
assert_eq!(vec, [1, 2, 3, 4, 5, 6]);
assert_eq!(vec2, []);
// extend
fn extend_from_slice(&mut self, other: &[T])
fn extent_from_within(&mut self, range: RangeBounds<usize>)
// dedup: remove consecutive repeated elements
.dedup()
fn dedup_by(&mut self, same_bucket: FnMut(&mut T, &mut T) -> bool)
fn dedup_by_key(&mut self, key: F)
where F: FnMut(&mut T) -> K: PartialEq<K>
// drain: drain iter that removes the range, and yields the removed iter
fn drain(&mut slef, range: RangeBounds<usize>) -> Drain
// resize with new lens filled with Value if needed, otherwise truncated
fn resize(&mut self, new_len: usize, value: T)
fn resize_wth(&mut self, new_len: usize, f: FnMut() -> T)
// splice: creates a splicing iter that replaces the specified range
// with the given `replace_with`; yileds the removed item
pub fn splice<R, I>(
&mut self,
range: R,
replace_with: I
) -> Splice<'_, <I as IntoIterator>::IntoIter, A>
where
R: RangeBounds<usize>,
I: IntoIterator<Item = T>,
let mut v = vec![1, 2, 3];
let new = [7, 8];
let u: Vec<_> = v.splice(..2, new).collect();
assert_eq!(v, &[7, 8, 3]);
assert_eq!(u, &[1, 2]);
// split_off splits into two at given index; returning the 2nd half
// original vec will be left with the first half; capacity unchanged
let mut vec = vec![1, 2, 3];
let vec2 = vec.split_off(1);
assert_eq!(vec, [1]);
assert_eq!(vec2, [2, 3]);
// split takes F and returns an iter
fn split<F>(&self, pred: F) -> Split<'_, T, F>
where
F: FnMut(&T) -> bool,
// split_at index returns a tuple of slices
fn split_at(&self, mid: usize) -> (&[T], &[T])
let v = [1, 2, 3, 4, 5, 6];
{
let (left, right) = v.split_at(0);
assert_eq!(left, []);
assert_eq!(right, [1, 2, 3, 4, 5, 6]);
}
// other split methods
.split_first()
.split_inclusive(F)
.split_last()
.splitn(n: usize, pred: F) // returning at most n times
// clear all values
fn clear(&mut self)
.is_empty()
.len()
.ends_with(&[T]) -> bool
.is_sorted()
Change type
.as_ptr
.as_slice // same as &vec[...]
.into_boxed_slice
.into_raw_parts
// leak
// consumes the Vec, returns a mut ref to the contents
pub fn leak<'a>(self) -> &'a mut [T]
where
A: 'a,
let x = vec![1, 2, 3];
let static_ref: &'static mut [usize] = x.leak();
static_ref[0] += 1;
assert_eq!(static_ref, &[2, 2, 3]);
// chunk: returning an iter over chunk_size elements of the slice at a time
// last trunk can be shorter
// chunks_exact will drop the last element if it is shorter than chunk_size
fn chunks(&self, chunk_size: usize) -> Chunks<'_, T>
let slice = ['l', 'o', 'r', 'e', 'm'];
let mut iter = slice.chunks(2);
assert_eq!(iter.next().unwrap(), &['l', 'o']);
assert_eq!(iter.next().unwrap(), &['r', 'e']);
assert_eq!(iter.next().unwrap(), &['m']);
assert!(iter.next().is_none());
// windows: returns an iter over all contiguous window of length size
// returns none if the slice is shorter than size
fn windows(&self, size: usize) -> Windows<'_, T>
let slice = ['r', 'u', 's', 't'];
let mut iter = slice.windows(2);
assert_eq!(iter.next().unwrap(), &['r', 'u']);
assert_eq!(iter.next().unwrap(), &['u', 's']);
assert_eq!(iter.next().unwrap(), &['s', 't']);
assert!(iter.next().is_none());
// returns None if slice is shorter than size
let slice = ['f', 'o', 'o'];
let mut iter = slice.windows(4);
assert!(iter.next().is_none());
// concat: into a single value of `Self::Output`
assert_eq!(["hello", "world"].concat(), "helloworld");
assert_eq!([[1, 2], [3, 4]].concat(), [1, 2, 3, 4]);
// join using specified separator
pub fn join<Separator>(
&self,
sep: Separator
) -> <[T] as Join<Separator>>::Output
where
[T]: Join<Separator>,
assert_eq!(["hello", "world"].join(" "), "hello world");
assert_eq!([[1, 2], [3, 4]].join(&0), [1, 2, 0, 3, 4]);
assert_eq!([[1, 2], [3, 4]].join(&[0, 0][..]), [1, 2, 0, 0, 3, 4]);
// select_nth_unstable reorder the slice
// such that the element at index is its final sorted psoition
// also related `select_nth_unstable_by<F>`,
// `select_nth_unstable_by_key<K, F>`
fn select_nth_unstable(
&mut self,
index: usize
) -> (&mut [T], &mut T, &mut [T])
where
T: Ord,
Elements
&v[i]
.first() -> Option<&T>
.first_mut() -> Option<&T>
.last() -> Option<&T>
.get(i) -> Option<&T>
// get_mut
.reverse()
.sort()
fn sort_by<F>(&mut self, compare: F)
where
F: FnMut(&T, &T) -> Ordering,
.push(T)
.insert(index: usize, element: T) // slow
.remove(indes: usize) -> T // slow
v.pop() -> Option<T>
// retain only the elements based on F
fn retain<F>(&mut self, f:F)
where
F: FnMut(&T) -> bool,
// shrink capacity to either the supplied value or its length
fn shrink_to(&mut self, min_capacity: usize)
fn shrink_to_fit(&mut self)
// removes an element, and returns it
// the removed element is replaced by the last element
// O(1), does not preserve ordering
fn swap_remove(&mut self, index: usize) -> T
// truncate shortens the vec, keeping the first `len`, drop the rest
fn truncate(&mut self, len: usize)
// swap
fn swap(&mut self, a: usize, b: usize)
// swaps entire contents of both, must be same length
slice_a.swap(&mut slice_b)
// Vec<T> can only store one type
// use Enum to store multiple type
enum SpreadsheetCell {
Int(i32),
Float(f64),
Text(String),
}
let row = vec![
SpreadsheetCell::Int(5),
SpreadsheetCell::Float(3.2),
SpreadsheetCell::Text("blue".to_string()),
];
HashMap
// all the K have to be the same type; as V
HashMap<K,V>
// need to use std lib, not in preclude
use std::collections::HashMap;
let mut score = HashMap::new();
scores.insert(String::from("Blue"),10);
scores.insert(String::from("Red"),50);
scores.get(String::from("Blue"));
for (key, value) in &scores {
println!("{}:{}",key,value);
}
// use collect to create HashMap from tuple
tuple_slice.iter().collect::HashMap<_, _>()
// entry
scores.entry(String::from("Blue")).or_insert(50);
// count values of a &[&str]
let count_words = line.iter().fold(HashMap::new(), |mut words, str| {
*words.entry(str).or_insert(0) += 1;
words
});