Shell Scripts

overall

#!/bin/bash
bash -c 'scripts' # run inline scripts
!! # repeat last command
$! # most recent background command
$$ # PID of current shell

sh -x file # debugging; trace execution
set -x # turn on and off(+x) trace mode

(...) # commands in subshell without affecting current env
{ ...; } # commands in current shell; notice the spaces and ;

set arg1 arg2 arg3 # set $1 $2 and $3

.   # dot command, same as source

here doc

# - would eliminate any tabs befor text
# use "_EOF_" if necessary
cat <<- _EOF_
    text
_EOF_

variables

# no space in definition
var=foo
readonly VAR # ro
readonly -p # list
unset var # remove 
CONST="string"

$var # same as ${var}
# use "" to prevent shell from expanding the var
"$var"

$(command)

$((expression)) # arithmetic expansion
## use awk for float calculation
echo $num1 $num2 $num3 | awk '{ print $1*$2/$3 }'
awk 'BEGIN{ printf("%.3g", 1024*8096) }'

# array
declar -a array=($(ls))
array=(1 2 a b c)   # within () separated by space
echo ${array[0]}
echo ${array} # in sh, this is the same as ${array[0]}
${#array[*]} #number of elements
${#array[i]} # length of ith element

# passing and returning array to function
# only the first item will be passed if using $array name
# use ${array[@]} instead
function function_name {
    local newarray
    newarray=("$@")
    echo ${newarray[@]}
}
# necessary to use () when calling the function
var=($(function_name ${array[@]}))

string manipulation

# convert to array
# use IFS to change default delimiter (space)
array_type=($string_type)
echo ${array_type[@]}

# case conversion
# does not work in zsh
,,  to lowercase
^^  to uppercase
~~  transpose case
,   first letter to lowercase
^   first letter to uppercase
${old_string^^}

# string slicing
{String:START:LENGTH}
{String:START}  # to the end

# reverse
echo "$string" | rev

# search and replace
{STRING/X/Y}
{STRING//X/Y}   # replace all

# remove string
${var#pattern} # chop the shortest match for pattern from the front of var's value 
${var##pattern} # chop the longest match 
${var%pattern} # chop shortest from end
${var%%pattern} # chop longest from end
# example
FILENMAE=/path/to/file.txt
${FILENAME##*/} #file.txt

${var:-value} # if unset or empty, default to value
${var:+value} # if not null, substitute with value
${var:=value} # if unset, default to value and set var to value
${var:?value} # if unset, 
${#var} # number of char

reading and printing

read var1 var2
read # $REPLAY is default answer
read -r # prevent \ from interpreting 
read -p "prompt text" # with prompt text
read -s -p "enter password: " # sensitive
read -n # specify length of input

#for trimming leading and trailing whitespaces in input.
# [StackOverflow](https://stackoverflow.com/questions/57497365/what-does-the-bash-read-d-do)
# IFS='' prevent any trimming of leading and trailing whitespaces
# -d '' use NUL as delimiter
IFS='' read -r -d '' variable << EOF
text
EOF

echo -n text
printf "format" arg1 arg2...
printf "%+d\n%+d\n" 10 -10 #+10 \n -10

conditions

if [[ $var == string ]]; then
   : # null command; do nothing 
    -eq # for numbers
elif [[ $foo != string ]]; then
    command
fi

case $var in 
    case1 ) command
    ;;
    case2 | case3 ) command
    ;;
    * )  command ;;

esac

for NUMBER in 0 1 2 3 5 6 7; do
    { cmmand }
done

for ((a=1; a<10; a++)); do
    command
    continue # skip the rest of the loop
done

while [[ "$number" -lt 10 ]]; do 
    command
    break # break out of the loop; break n: nth innermost loop
done

function

function_name()
{
    local var   # define local var
    exit n # exit the program
    return n # complete the function
}

function name
{
    return value    # value between 0 and 255
}

# type in CLI
function name { command; } # need ;

var=$(function_name)    # assigns the return value to a var

exit status: 0 success; non zero: error

$# # the number of arguments
$* # all arguments
$@ # all arguments
$0 # current program name
$1 # 1st arugment
${10} # for arguments > 10, use {}
$? # exit status of the last command: 0 succuss; non zero: error
shift # ${n} value shifted to ${n-1}; $# decrease by one
shift 3 # shift 3 times

quotes

# single quote: ignores all special char
# double quote: ignore most char other than
    $ # var name substitutions
    ` # back quotes, not recommended, use $() instead
    \ # backslashes, \n new line; \$ ignore $, \", \'

test


[ -f filename ] # regular file
-e # file exists; same as -a
-s # file is not zero size
-f # regular file
-d # a directory
-h # sym link -L
-r/w/x # file has read/write/exc permission
-n # not null
-z # is null

# integer comparison
-eq
-ne
-gt
-lt
-ge
-le

[[ test ]] && {action} || {action}

-a # AND; lower precedence than string/integer operators
-o # OR; lower precedence than -a

[[ ! -f "$file1" -a -f "$file2" ]]
[[ \( "$count" -ge 10 \) -o \( "$count" -lt 1 \) ]] # need \ for (), and spaces surround parentheses - treated as argument


test "$var" = "string" # must have space before and after = coz they are arguments; using double quote to cover when $var is null

regular expression (regex)

.   # matches any single char
?   # preceding item matches no or one char; extended expression
*   # preceding item matches no or more char
+   # preceding item matches one more more times; extended
-   # range [a-z]
\b  # matches the empty string at the edge of a word
\B  # matches the empty string provided it's not at the edge of a word
\<  # match the empty string at the beginning of a word
\>  # matches the empty string at the end of word
^   # caret, matches start of string; also negate in a range
$   # matches end of string
()  # groups; extended
\   # escape special char: .*[]^${}\+?|()
|   # logical OR, no space between pipe sympbol; extended

# extended
{n} # matches n times, p{2}
{n,m}   # matches n and no more than m times
{n,}    # matches or more than n times

[abc]   # matches either one
[a-z]   # range
[0-9][0-9][0-9] # multiple
[a-fm-z]    # multiple range
[^oi]t  # negate either char

# examnple to verify email
^([a-zA-Z0-9_\-\.\+]+)@([a-zA-Z0-9_\-\.]+)\.([a-zA-Z]{2,5})$

# special char classes
[[:alpha:]]	# Pattern for any alphabetical character, either upper or lower case.
[[:alnum:]]	# Pattern for 0–9, A–Z, or a–z.
[[:blank:]]	# Pattern for space or Tab only.
[[:digit:]]	# Pattern for 0 to 9.
[[:lower:]]	# Pattern for a–z lower case only.
[[:print:]]	# Pattern for any printable character.
[[:punct:]]	# Pattern for any punctuation character.
[[:space:]]	# Pattern for any whitespace character: space, Tab, NL, FF, VT, CR.
[[:upper:]]	# Pattern for A–Z upper case only.

file manipulation

# cut and paste
cut -cchars file
cut -c1,13,50 data 
cut -c10-40 data
cut -c5- data # till the end
cut -ddchar -ffields file
cut -d: -f1,6 /etc/passwd # delimiter :, first and sixth field

paste var1 var2 # seprated by tab
paste -dchar var1 var2 # specify delimiter
paste -d'+' file1 file2
paste -s names # all in one line
ls | paste -d' ' -s - # read names and paste into one line

# find
find /path -maxdepth 1 -name name -type d -printf "%T+ %f\n"
# this will find the current root path (. or full path name) 
# %T time
# %p path: ./name
# %f file name only

# grep: text matching and printing
# sed: text transformation and substitution
# awk: everything else

# grep
grep PATTERN files
grep -E # extended regex
grep -e PATTERN #  use pattern; -f FILE: use patterns from FILE
env | grep ^HO # begins with HO
grep UNIX intro # same as sed -n '/UNIX/p' intro
grep [tT]he # either the or The
grep -i the # ignore case
grep '[A-Z]' list
grep '[0-9]' data
grep '[A-Z]...[0-9]' list # five char, start with letter and end with digit
grep '\.pic$' file # end with .pic
grep -v 'UNIX' intro # reverse: does not contain UNIX
grep -l 'PATTERN' files # list files that contain PATTERN
grep -n 'PAT' files # print line number


# sed
sed options 'flag/regxp/flag' file
# options
-e 'scripts1; script2'  # seperated by ; or new line
-f scriptfile   # seperated by new line
-n  # quiet, does not produce output for each command
-i[SUFFIX]  # in place, make backup if extension supplied

# substitution 's/pattern/replacement/flags'
# default behavior for 's' is replace first occurrence for each line
sed 's/char1/char2/' data   # substitute flag
# flags
's/char1/char2/2'   # replace the second occurrence rather than first
's/char1/char2/g'   # global
sed -n 's/char1/char2/p'   # print out the line that contains a matching pattern, used with -n
's/char1/char2/w newfile'   # save to file
cat somefile.txt | sed 's/ *$//' > new_file # remove extra blank spaces at the end of each line; $ for end of line

# insert, append and change
sed 'i\new line' # insert "new line" before the data
sed '3i\new line'   # insert "new line" before line 3
sed '3a\new line'   # append "new line" after line3
sed 'a\new line'    # append to end of the data
sed '3c\new line'   # change line 3
sed '/UNIX/c\new line'  # change the line containing UNIX to "new line"
sed '2,3c\new line' # change BOTH lines 2 and 3 to "new line"
sed '2,3y/123/789' # change 1 to 7, 2 to 8, and 3 to 9 for lines 2 and 3

# lines
# address: single number; n1,n2: range; n1,: from n1
[address]command
sed -i '1,5s/c1/c2/' file # replace c1 with c2 from lines 1-5 and save to the same file
sed -n '3,p' intro # print from the third lines forward
sed '/^$/d' intro # delete any blank lines   

# pattern filter
/pattern/command # command only applies to matching pattern lines
sed '/UNIX/d' intro # delete all lines containing UNIX
sed -n '/UNIX/p' intro # print lines containing UNIX
sed '/User/s/bash/csh/' /etc/passwd # replace bash with csh for User
sed -i '/en_US.UTF-8 UTF-8/s/# *//' /etc/locale.gen # remove # and its trailing spaces
sed s/^ *//g        # remove all leading whitespaces

# group commands with {}
sed '3,${
    s/brown/greeen/
    s/lazy/active/
    }'  datafile
}


# awk - a symlink to gawk in Arch
awk options '/regex/{ program }' file
# default behavior: reads a line, executes the program script
# options
-F fs   # specifies file separators, defaut whitespace or tab
-f file # file name to read the program (not the file to read data)
-v var=value    # defines var
-mf N   #  max numbe rof fields
-mr N   # max record size
-W  keyword # 
# examples
awk -F: '/user/{ print $1 }' /etc/passwd
awk -F: -f scriptfile datafile
# field var
$0  # whole line
$1  # 1st field; $n: nth field by FS char
# program - seperate commands by ; or new line
awk '{ $2="newchar"; print $0 }'   # replace second field with newchar

# run script prior to or after reading data
# can have multiple BEGIN or END, only run once; but normal action will run for each line
awk 'BEGIN { print "Header Information" } 
    /test/{ print $0 } 
    END { print "END" }' data

## tricks
# [remove all leading whitespaces](https://unix.stackexchange.com/questions/102008/how-do-i-trim-leading-and-trailing-whitespace-from-each-line-of-some-output)
awk '{ $1=$1; print $1 }'

# tr
cat file | tr from-char to-char # replace characters
tr -d char # delete char from STDIN; sed 's/ //g' file

# sort
sort data
sort -u data # eliminate duplicate lines
sort -r # reverse
sort -o # same as >
sort -k3n -t: /etc/passwd # skip third filed delimiter :

# uniq
uniq file # remove duplicate lines
uniq -d file # list duplicate lines
sort /etc/passwd | cut -f1 -d: | uniq -d
uniq -c # count