diff --git a/datasets/compress.sh b/datasets/compress.sh new file mode 100755 index 0000000..bb2abdb --- /dev/null +++ b/datasets/compress.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +filenames=( "job_codings_v3.csv" ) +filenames=( ${filenames[@]} "job_metadata.csv" ) +filenames=( ${filenames[@]} $( ls job_similarities_*.csv ) ) + +echo "${filenames[*]}" + +for in_fn in ${filenames[@]}; do + out_fn="${in_fn}.tar.xz" + if [ -f "${in_fn}" ]; then + if [ ! -f "${out_fn}" ]; then + echo "Compressing ${in_fn}" + tar -cf - "${in_fn}" | xz -9 > "${out_fn}" + else + echo "Skipping ${out_fn}. File exists." + fi + fi +done