#!/usr/bin/env bash

tmpfile=/tmp/locus2roster.$USER.$$

if [[ ! -s $1 ]] ; then
  exit
fi

(cat $1 \
  | tr -d '\011'\
  | egrep -v '^[ 	]*(Photo$|[0-9](|[0-9](|[0-9]))|Row *[0-9]*)[ 	]*$'\
  | sed 's+^[ 	]*\([0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]*\)$+\1+g'\
  | sed 's+ -+ +' | tr '\012' ' ' | tr -s ' ' | tr '' '\012' | tail +2)\
  | sed 's+\.\([A-Z]\)+\1+
         s+^00++
         s+Dual ++
         s+.Psychology: .*/+Psychology/+
         s+Philosophy .*/+Philosophy/+
         s+\([^/]*\)/.* Freshman+\1   1+
         s+\([^/]*\)/.* Sophomore+\1   2+
         s+\([^/]*\)/.* Junior+\1   3+
         s+\([^/]*\)/.* Senior+\1   4+
         s+Undergraduate Arts \& Sciences+ A\&S+
         s+Environmental Sustainability+ IES+
         s+The Graduate School+Grad+
         s+Graduate School+Grad+
         s+Undergraduate Business+ Bus+
         s+Undergraduate Education+Educ+
         s+Sec Ed.*)+SecEd+
         s+Ugrd Schl of Communication+Comm+
         s+St Joseph Seminary+StJo+
         s+School of Professional Studies+SCPS+
         s+Undergraduate Engineering Sci+Engr+
         s+Mathematics \& Computer Science+MathC+
         s+Dual Physics \& Computer Sci+PhysC+
         s+Physics and Computer Science+PhysC+
         s+Accounting+Acct +
         s+Anthropology+Anth +
         s+Applied Mathematics+Amth +
         s+Biochemistry+Bioch+
         s+Bioinformatics+Bioin+
         s+Biology+Biolo+
         s+Chemistry+Chemi+
         s+Cognitve \& Behavioral Neurosci+Neur +
         s+Comm Networks \& Security+CommN+
         s+Computer Science+CompS+
         s+Criminal Justice+CrimJ+
         s+CrimJ \& Criminology+CrimJ+
         s+Cybersecurity+Csec +
         s+Data Science+Dsci +
         s+Economics+Econo+
         s+English+Engl +
         s+English as a Second Language+ESL  +
         s+Environmental Policy+EnviP+
         s+Environmental Science+EnviS+
         s+Environmntl Sci: [^0-9]*+EnviS   +
         s+Finance+Fin  +
         s+Forensic Science+Foren+
         s+Global \& International Studies+GlobI+
         s+History+Hist +
         s+Information Systms & Analytics+InfoS+
         s+Information Systems+InfoS+
         s+Information Technology+InfoT+
         s+International Studies+Inter+
         s+International Business+IBus +
         s+Journalism+Jour +
         s+Marketing+Mark +
         s+Mathematics+Math +
         s+Middle Grades Education+MidGr+
         s+Moleculr & Cellulr Neuroscienc+Neur +
         s+Philosophy+Philo+
         s+Physics+Physi+
         s+Political Science+Poli +
         s+Psychology+Psych+
         s+Sociology+Socio+
         s+Socio and Anth +So\&An+
         s+Software Development+Softw+
         s+Software Engineering+Softw+
         s+Statistics+Stat +
         s+Undecided Concentration+Undec+
         s+Undergraduate Non[ -]Degree+UNond+
         s+Engineering Sci Biomedical Eng+BiomE+
         s+Engineering Sci Computer Eng+CompE+
         s+Freshman+  1+
         s+Sophomore+  2+
         s+Junior+  3+
         s+Senior+  4+
         s+Graduate+  6+
         s+,+, +
         s+Engaged Learning++
         s+\(.*\) Graded+\1Graded +
         s+ *$+ +' > $tmpfile
namelen=`awk -F '{print $1}' $tmpfile | wc -L` # actually NUMBER+NAME
printf "   NUMBER %"`expr $namelen - 10`"s   Descr Units Prog Major CLS EADDR\n" "NAME"
awk -F '{printf $1; for (i=0;i<='$namelen'-length($1)+1;i++) printf " "; print $2}' $tmpfile
rm $tmpfile
