Bash and Make tips for scripting reproducible workflows
$ echo “Make, etc.” | \wall -g PCDSeth Fitzsimmons Stamen Design, etc.[email protected]
View Slide
# slides$ open \http://bit.ly/2eFFvJt
# why?## * repeatable# * self-documenting# * transformations# <> data changes
# on Windows 10?# no worries## “Bash on Ubuntu# on Windows”
# shell fundamentals$ /bin/sh -c theory
# is a comment$ is a prompt
# ––help usually# works# man is detailed
# do this, then that$ this; that
# if this, then that$ this && that
# that, unless this$ this || that
# standard file# descriptors# (stdio)
# stdin (fd 0)$ cat > greetingHi!⌃d$ cat greetingHi!
# stdout (fd 1)$ echo “Hi PCD!”
# stderr$ >&2 echo Error
# a data black hole/dev/null
# redirection$ echo hi > greeting
# append$ echo hey >> greeting
# stderr → stdout$ thing 2>&1 logs
# pipe# this means everything# can be combined!!!$ cat /etc/passwd | \grep -i calendar
# exit codes$ thing; echo $?
# success (0)$ thing && echo $?0
# failure (not 0)$ thing || echo $?1
$ make basics
# same input (repeatedly),# same output$ make idempotencyYes$ make idempotencyYes
# targetthis:touch $@
# prerequisitethis: that# that existscp that this
$ make -j2 vars fns
# targetthis:# outputs “this”echo $@
# all prereqsthis: that the_other# “that the_other”echo $^
# first prereqthis: that the_other# “that”echo $<
# nth prereqthis: that the_other# “the_other”echo $(word 2, $^)
# catch-all%:echo 42 > $@
$ ls /bin
# explore
# who am I?$ whoamiseth
# where am I?$ pwd/home/seth
# what time is it?$ dateTue Oct 18 23:58:42 MDT 2016
# what’s here?$ ls -lhtotal 8-rw-r--r-- 1 seth wheel …
# what’s in that file?$ cat /etc/passwd…
# …a page at a time$ less /etc/passwd…
# …just the beginning$ head /etc/passwd…
# …just the end$ tail /etc/passwd…
# what variables are set?$ envHOME=/home/seth…
# where’s that file?$ find . -type f -name hi
# where’d that file go?# (full-text search, macOS)$ mdfind Seattle
# get help$ man man…
# manipulation
# create a file$ touch file
# copy a file$ cp file file2
# move a file$ mv file2 file3
# delete a file$ rm file3
# create a directory$ mkdir -p my/stuff
# remove a directory$ rmdir my/stuff
# remove a directory# and everything in it$ rm -r my
# find lines in a file$ grep -i name file.txt
# find non-matching lines$ grep -v name file.txt
# count words, lines,# characters$ wc file.txt
# pretty-print JSON$ jq . file.json
# extract fields$ jq .name file.json
# replace things$ sed 's/this/that/' file
# extract columns$ cut -d , -f 1,3 file.csv
# compression
# open a zip file$ unzip file.zip
# list zip contents$ unzip -v file.zip
# create a zip$ zip file file.zip
# open a tarball$ tar zxf file.tar.gz
# list a tarball$ tar ztf file.tar.gz
# create a tarball$ tar zcf file.tar.gz stuff/
# compress with gzip$ gzip file.tar
# uncompress with gzip$ gzip -d file.tar.gz
# misc
# always exit 0$ true; echo $?0
# always exit non-0$ false; echo $?1
# fetch and fail# if appropriate$ curl -f nacis.org
# download$ wget nacis.org
# “open” (macOS)$ open nacis.org
# display progress$ cat /etc/passwd | \ pv | wc -l
# also write to a file$ echo hi | tee file
$ man bash
#!/usr/bin/env bash
set -eo pipefail
set -x
NACIS=2016
# assignmentNACIS=“2016”
# capture a commandNACIS=$(curl nacis.org)
echo $NACIS
echo ${NACIS}
# set a default valueecho ${NACIS:-PCD}
# replaceecho ${NACIS/2016/2017}
# replace allecho ${NACIS//2016/2017}
# substringecho ${NACIS:2:2}
# remove suffix$ filename=“world.tif”$ echo ${filename%.tif}world
# do math$ echo $[2 ** 3]8
if [[ “this” != “that” ]]; thenecho Control Flowelif [[ ! -f file ]]; thentouch fileelif [[ $six -le $five ]]; thenfalseelserm -f filefi
while true; doecho Control Flowdone
for f in $(ls); doecho $fdone
# man test
# extract filename$ basename /etc/passwdpasswd
# extract directory$ dirname /etc/passwd/etc
$ make recipes
$ make convertsource.json: source.shpogr2ogr \-t_srs EPSG:4326 \-f GeoJSON \$@ \$<
$ make reprojectoutput.tif: source.tifgdalwarp \-q \-t_srs EPSG:3857 \$< \$@
$ make wildernessdata/S_USA.Wilderness.zip:@mkdir -p $$(dirname $@)@curl -sfL http://data.fs.usda.gov/geodata/edw/edw_resources/shp/S_USA.Wilderness.zip \-o $@
$ make tabledb/wilderness: sql/wilderness.sqlpsql \-c "\d $(subst db/,,$@)" \> /dev/null \2>&1 || \psql \-v ON_ERROR_STOP=1 \-qX1f \$<
# resources• https://bost.ocks.org/mike/make/• http://www.gregreda.com/2013/07/15/unix-commands-for-data-science/• https://google.github.io/styleguide/shell.xml• https://github.com/stamen/toner-carto/blob/master/Makefile (WARNING!)• http://mojodna.net/2015/01/07/make-for-data-using-make.html