#!/usr/bin/perl
# -*- Perl -*-

# $Id: make-peereval-reports 261 2018-11-05 21:21:15Z jrv $

# This script processes a csv file exported from the PeerEval tool in
# myCourses for a peer evaluation using the SWEN-261 Peer Evaluations template.
# It can generate three outputs: a file with the ratings and comments made by
# all team formatted so that it can be copied into an email message, a file
# with the ratings for each team member, and a file of the confidential
# comments made by evaluators. Command line arguments choose which
# outputs to generate. The script requires a team file and the CSV
# file exported from PeerEval.
#
# The team file is tab-separated file with the following information on each
# line. Team is typically the letter designator, but can be any designator as
# long as it is consistent through the file. The loginid is the student's RIT
# loginid without @rit.edu appended. The name may optionally be surrounded by
# quotes.
#
#	team<tab>["]name["]<tab>loginid

use strict;
use warnings;

use Data::Dumper;
use Scalar::Util "reftype";
use File::Basename;
use Text::CSV;

use constant FALSE => 0;
use constant TRUE => (!FALSE);

# For getopts processing of the command line
use Getopt::Std;
$Getopt::Std::STANDARD_HELP_VERSION = TRUE;

my $cmdOptionKeys = 'b:cehi:k:p:rv';
use constant BOILERPLATE_NAME => 'b';
use constant CONFIDENTIAL_OPT => 'c';
use constant EVALUATIONS => 'e';
use constant SHOW_USAGE => 'h';
use constant INSTRUCTOR_FILE => 'i';
use constant SET_KEY_FORMAT => 'k';
use constant FILENAME_PREFIX => 'p';
use constant RATINGS_OUTPUT => 'r';
use constant VERBOSE => 'v';

# These are the default base filename if the -p options is not provided, and
# the suffixes that will be used.
my $filenamePrefix = 'PeerEvals';
my $confidentialFilename = '';
my $confidentialTail = '-confidential.txt';
my $ratingsFilename = '';
my $ratingsTail = '-ratings.csv';
my $instructorFilename = '';
my $evalsFilename = '';
my $evalsTextTail = '.txt';
my $evalsHTMLTail = '.html';
my $evalsExtension = '.txt';
my $studentKeyFormat = '';
my $boilerplateFilename;

# This is the default name of the boilerplate file for outputing evaluations

my %cmdOptions;
my $confidential = FALSE;
my $ratingsOutput = FALSE;
my $evaluations = FALSE;
my $htmlOutput = FALSE;
my $instructorFile = FALSE;
my $setKeyFormat = FALSE;
my $verbose = FALSE;
my $verboseRaterAnnounced = FALSE;

# Global associations that point to student and team records.
my %ratedStudents;
my %teamsByName;
my @studentRatings;

# These are the indices into a student's entry
use constant FULLNAME => 0;
use constant LASTNAME => 1;
use constant FIRSTNAME => 2;
use constant LOGINID => 3;
use constant RATINGS => 4;
use constant FEEDBACK => 5;
use constant FACTOR => 6;
use constant POSITION => 7;
use constant CONFIDENTIAL => 8;
use constant KEY => 9;
use constant INSTRUCTOR_STUDENT => 10;

# Each team entry in %teamsByName has two elements: array of references to
# the students on the team, and any instructor team comments.
use constant STUDENTS => 0;
use constant INSTRUCTOR_TEAM => 1;

# These variables will help to flag a use of last name and first name in the
# student key when it was not identified in all member names.
my $lastOrFirstNameUsed = FALSE;
my $lastAndFirstNameIdentified = TRUE;

# We scan the header of the CSV file of survey data downloaded from Qualtrics.
# The indices for all the "Your name" columns get stored. For each of the
# three standard questions, we need to save the index in the student record
# for where comments from this question type get stored and the key for the
# student being rated. We also save the column that identifies the team if
# this is a multi-team survey.
my $teamColumn;
my @userNameColumns;
my @questionColumns;

use constant RATED_KEY => 0;
use constant QUESTION_INDEX => 1;
use constant IGNORE_FIELD => -1;

# This is the @questionColumns entry to ignore the column
my $ignoreColumn = ['', IGNORE_FIELD];

# These are the first words of the column headers in the raw output. Each
# indicates the question the column holds data for.
use constant TEAM_KEY => "Which team";
use constant NAME_KEY => "Your name";

# Each entry in this array holds the regexp to match the CSV column header
# for a rating or comment about a student. The second element of each entry
# is the index for where the response goes in the student record.
use constant HEADER_REGEXP => 0;
use constant COMMENT_INDEX => 1;
my @headerRegexp = (
    [qr/^Rate.*unprepared - (.*)$/, RATINGS],
    [qr/^Feedback.*member. - (.*)$/, FEEDBACK],
    [qr/^Confidential.*name. - (.*)$/, CONFIDENTIAL]
);

# @ratingCategories holds the labels for each rating category found in the raw
# output which reports the rating given as a number from 1 to 5.
#
# @ratingWeights holds the weight that each rating gets out of 100.
#
# The rating data is 1-based, and 0 is the default value that will be used
# when a team member has not submitted an evaluation.
my @ratingCategories =
    ("No Eval","Unsatisfactory","Marginal","Satisfactory","Very Good",
     "Excellent");
my @ratingWeights = (0.0, 0.0, 50.0, 75.0, 88.0, 100.0);

# When we switched to the Qualtrics surveys, we output the rating values as the
# text value and not a numeric. This hash with make the conversion so that the
# old computation of the rating factors will not need to be modified.
my %ratingValues = (
    'Unsatisfactory' => 1,
    'Marginal' => 2,
    'Satisfactory' => 3,
    'Very Good' => 4,
    'Excellent' => 5
);

# The boilerplate file is read into this array. It is used to output each
# student's peer evaluation.
my @evalBoilerplate;

#-----------------------------------------------------------------------
#
#  Definition of subroutines
#
#-----------------------------------------------------------------------

# Print version and help information in the form needed by getopts
my $VERSION = '$Id: make-peereval-reports 261 2018-11-05 21:21:15Z jrv $';
my $scriptName = fileparse($0);
sub VERSION_MESSAGE() {
    print "$scriptName version: $VERSION\n";
    return;
}

sub HELP_MESSAGE()
{
    print << "HELP MESSAGE";
Usage: $scriptName [-hcrev] [-b boilerplate] [-i instructor-file] \\
                   [-k FORMAT] [-p prefix] tabbed-team-info-file [raw-peerfile]

  -b\tprovide a boilerplate file to use for evaluation output
  -c\toutput confidential comments
  -e\toutput evaluation data
  -h\toutput this help message
  -i\tinstructor comments on team, individual, overriding factor
  -k\tset the format for the key used to index students
  -p\tprovide filename prefix for all generated files (default: PeerEvals)
  -r\toutput ratings and factor data to csv file
  -v\tgenerate verbose output

The tabbed-team-info-file is a required tab separated file with one line per
student holding team-id<tab>student-name<tab>rit-login-id. The team-id is a
unique id for each team, typically, the letter designating the team. The
student-name is the full name for the student. If a comma exists in the full
name, it is assumed to divide the full name into last-name, first-name which
would be available individually in substitutions. The script generates a
message if it finds that the boilerplate file uses last-name or first-name,
and some names were not provided using the comma notation. Blanks would be
output for those students instead of their names. The student-name can be
enclosed in quotes which are stripped off. The rit-login-id is just the id
part, i.e. it does not include \@rit.edu.

The raw-peerfile is the CSV data downloaded from the Qualtrics peer evaluation
project or projects. It can be given on the command line or read from standard
input.

By default, the full name is used as the key to identify each student in the
survey data. The -k option can define the format for a different key to
use. The format can include the keywords: FULLNAME, LASTNAME, FIRSTNAME,
LOGINID.

Without a boilerplate file specified, the evaluation data is output in a
compressed text format. It includes full name, login id, factor, ratings, and
the team member feedback comments.

If a boilerplate file is specified, it is used as the base for the output of
each student's peer evaluation information. In the boilerplate file, you can
specify the following keywords that will be replaced with the corresponding
information for a student: FULLNAME, LASTNAME, FIRSTNAME, LOGINID, EMAIL,
FACTOR, RATINGS, PEERCOMMENTS, INSTRUCTORTEAM, and INSTRUCTORSTUDENT. If the
extension on the boilerplate file is .html, the PEERCOMMENTS are output as a
<ul> with one student comment per <li> element.

The instructor file allows the instructor to provide comments about a team's
performance, the performance of an individual student, and to override the
calculated adjustment factor for a student. Headers in the file identify the
comments given on the lines following the line with the header. Headers have
the following formats:
  [[Team team-id]] team-id is the short id used to identify each team
  [[student-key]] student-key is the key that is used to identify each student
  [[student-key override-factor]] if a numeric value is given it will override
    the computed factor for that student

You only need to provide headers for comments that you make. The header is on
a line by itself. All of the following lines of text up to the next header or
the end of the instructor file are associated with that team or student and
will be substituted for keywords in a boilerplate file. A default message of
"No instructor comments" is inserted when no comments are given.

"PeerEvals" is the default prefix for all output files. This can be
changed with the -p option. The default filenames are:
  Evaluation data - PeerEvals.txt (.html if boilerplate extension is .html)
  Ratings data - PeerEvals-ratings.csv
  Confidential comments - PeerEvals-confidential.txt

HELP MESSAGE

    exit 0;
}

# Create the key for storing and finding students based on the format specified.
# This subroutine receives a reference to an array holding the student's
# information. The function detects if the FIRSTNAME or LASTNAME substitution
# keywords were used in the student key format without defining those names for
# a student.
sub makeStudentKey {
    my $studentInfo = $_[0];    
    my $studentKey;
    my $lastOrFirstNameUnknown = FALSE;
    
    if($studentKeyFormat ne '') {
	$studentKey = $studentKeyFormat;
	$studentKey =~ s/FULLNAME/@{$studentInfo}[FULLNAME]/g;

	if($studentKey =~ /LASTNAME/) {
	    if(@{$studentInfo}[LASTNAME] ne '') {
		$studentKey =~ s/LASTNAME/@{$studentInfo}[LASTNAME]/g;
	    }
	    else {
		$lastOrFirstNameUnknown = TRUE;
	    }
	}

	if($studentKey =~ /FIRSTNAME/) {
	    if(@{$studentInfo}[FIRSTNAME] ne '') {
		$studentKey =~ s/FIRSTNAME/@{$studentInfo}[FIRSTNAME]/g;
	    }
	    else {
		$lastOrFirstNameUnknown = TRUE;
	    }
	}

	if($lastOrFirstNameUnknown) {
	    print STDERR "Last or first name used for " .
		@{$studentInfo}[FULLNAME] . " but not identified\n";
	}

	$studentKey =~ s/LOGINID/@{$studentInfo}[LOGINID]/g;
    }
    else {
	$studentKey = @{$studentInfo}[FULLNAME];
    }

    return $studentKey;
}

# Gather information about the students. The student records are stored in
# %ratedStudents. References to student records are also stored by team in
# %teamsByName.
sub getStudents($)
{
    my($teamInfoFilename) = ($_[0]);

    if($verbose) {
	print "Reading team information from $teamInfoFilename\n";
    }

    if(!open(TEAMINFO, $teamInfoFilename)) {
	print STDERR "Can not open team info file $teamInfoFilename: $!\n";
	exit 2;
    }

    while(<TEAMINFO>) {
	# remove line terminators and any trailing whitespace
	s/\r*$//;
	s/\s+$//;

	# Ignore blank lines in the tabbed team file, and then make sure it
	# is well-formed as id<tab>name<tab>RIT id/email
	if(/\S/) {
	    if(/^\S[\S ]*\t\S[\S ]+\t[0-9a-zA-Z@\.]+$/) {
		# Parse and store the student name and e-mail address. Strip
		# any quotes around the name.
		my ($teamId, $name, $email) = split('\t');
		($name) = $name =~ /"*([^"]+)"*/;
		
		# If the full-name has an embedded comma this is considered
		# to be what separates the last name and first name.
		my $lastName = '';
		my $firstName = '';

		if($name =~ /([^,]+), *(.*)/) {
		    $lastName = $1;
		    $firstName = $2;
		}
		else {
		    $lastAndFirstNameIdentified = FALSE;
		}

		# There are constants at the top of this file that define
		# indices into this array of student information which is name,
		# email, ratings, public comments, individual factor, position
		# in team arry, confidential comments, and key
		my $ref_A =
	       [$name, $lastName, $firstName, $email, [], [], 0, 0, [], '', ''];

		my $studentKey = makeStudentKey($ref_A);
		$ratedStudents{$studentKey} = $ref_A;
		$ratedStudents{$studentKey}[KEY] = $studentKey;

		if(!defined($teamsByName{$teamId}))
		{
		    # ToDo - removed the reference operator because I think it
		    # is redundant. Make sure to check.
		    $teamsByName{$teamId} = [[$ref_A], ''];
		}
	        else {
		    # Remember the position of this teammate in the team array.
		    # This will be the slot where this teammate will place
		    # ratings for everyone on the team.
		    my $position = scalar @{$teamsByName{$teamId}[STUDENTS]};
		    @{$ref_A}[POSITION] = $position;
		    push(@{$teamsByName{$teamId}[STUDENTS]}, $ref_A);
	        }
	    }
	    else {
		print STDERR
		  "Team info file line $. does not appear to be well-formed.\n";
	    }
	}
    }
    close TEAMINFO;

    for my $teamName (keys(%teamsByName)) {
	my $numberStudents = @{$teamsByName{$teamName}[STUDENTS]};
	for my $student (@{$teamsByName{$teamName}[STUDENTS]}) {
	    # Fill rating computation area with zeros
	  @{@{$student}[RATINGS]}[0..$numberStudents - 1] = (0)x$numberStudents;
	}
    }
    
    if(keys(%ratedStudents) > 0) {
	if($verbose) {
	    print "Completed reading team information\n";
	}
    }
    else {
	print STDERR "No team information found.\n";
	exit 3;
    }

    return;
}

# Identify the data contained in each column in the survey data
sub identifyColumns {
    my($fields) = ($_[0]);
    
    my $columnNum = 0;
    $teamColumn = -1;

    foreach my $columnText (@{$fields}) {
	# We will process the CSV data from the column to the right of the
	# team column to the end of the line. For a single team survey
	# without a team question, this will be the column to the left of
	# the user name column.
	if($columnText =~ /^${\TEAM_KEY}/) {
	    $teamColumn = $columnNum;
	}

	# For each row, we will first look in these columns to see who the
	# rater is.
	elsif($columnText =~ /^${\NAME_KEY}/) {
	    # Accommodate a single team survey that does not have a team
	    # question.
	    if($teamColumn == -1) {
		$teamColumn = $columnNum - 1;
	    }
	    push(@userNameColumns, $columnNum - ($teamColumn + 1));
	    push(@questionColumns, $ignoreColumn);
	}

	# In the information columns, we save who the rated student is and
	# the index in the student record where this type of question data
	# gets stored.
	else {
	    my $nameKey;
	    my $knownColumn = FALSE;
	    
	    foreach my $headerSearchItem (@headerRegexp) {
		my ($nameKey) =
		    $columnText =~ /${@{$headerSearchItem}[HEADER_REGEXP]}/;
		if(defined $nameKey) {
		    # We recognized this column header
		    $knownColumn = TRUE;
		    
		    if(exists $ratedStudents{$nameKey}) {
			# This is a known student so save the column information
			push(@questionColumns,
			     [$nameKey, @{$headerSearchItem}[COMMENT_INDEX]]);
			last;
		    }
		    else {
			print STDERR
			 "Ignoring unknown student $nameKey in header column " .
			    $columnNum . "\n";
			push(@questionColumns, $ignoreColumn);
		    }
		}
	    }

	    # If we are past the team column, and we did not recognize this
	    # column header ignore it.
	    if(($teamColumn != -1) && !$knownColumn) {
		push(@questionColumns, $ignoreColumn);

		if($verbose) {
		    print 'Ignoring unrecognized column ',$columnNum,"\n";
		}
	    }
	}
	
	$columnNum++;
    }

    if($verbose) {
	print "Team column: $teamColumn\n";
	print "User name columns: ", join(', ',@userNameColumns), "\n";
	print "Question columns\n", Dumper(@questionColumns);
    }

    if(($teamColumn == -1) || (@userNameColumns == 0) ||
       (@questionColumns == 0)) {
	print STDERR "CSV columns were not identified correctly.\n";
	print STDERR ' Team column: ',$teamColumn,
	    ' Question columns count: ', scalar @questionColumns,
	    "\nUser name columns: ", join(', ',@userNameColumns),"\n";
	exit 3;
    }

    return;
}

# Read the next line from the CSV file handling line terminations and entries
# with funky characters and embedded newlines.
sub readCSVLine
{
    my $lineRead = FALSE;
    my $csvLine = '';

    # A few things cause problems for parsing the CSV data: line terminators,
    # funky characters, and embedded newlines in fields. This loop will try
    # to deal with all of those.
    while(!$lineRead && !eof()) {
	$_ = <>;
	s/\r*//;   # handle DOS line endings and any whitespace
	s/\s+$//;

	# Try to handle some known funky characters that are in the input
	tr/\x{2018}-\x{201F}\x{2032}-\x{2037}/''''""""'""`""/;
	
	# Build up the full CSV line
	$csvLine .= $_;
	
	# If there are no "s or an even number this should be a complete line
	if((($csvLine =~ tr/\"/\"/) % 2) == 0) {
	    $lineRead = TRUE;
	}
    }

    if(!$lineRead) {
	print STDERR "Could not build line from CVS file. Failed on line $.\n";
	print $csvLine,"\n";
	exit 3;
    }

    return $csvLine;
}

# Helper routine to report an error parsing a line from the CSV file into
# fields.
sub reportCSVParseError
{
    my($csvLine, $csvParser) = ($_[0]);
    
    print STDERR "Failed to parse line $..\n";
    my($code, $msg, $pos) = $csvParser->error_diag();
    print STDERR "code: $code  position: $pos\n";
    print STDERR $msg,"\n";
    print STDERR "Bad input:\n",$csvParser->error_input(),"\n";

    exit 2;
}

# Collect peer evaluation data from the raw CSV output
sub getRatings
{
    # This is used to build up a full line to parse from the CSV file. A line
    # will not parse if there is an embedded newline inside a comment. When
    # the parse fails we will check for that, and concatenate the next line
    # to this variable.
    my $csvLine;


    # All the rest of the information we care about has the format of
    # Question-text Rater-id Rated-id Answer in CSV file.
    my $ratingsCSV = Text::CSV->new();

    # ToDo - if using single team surveys there will be multiple files on the
    # command line with CSV data. After each one, we need to reset the line
    # counter and parse the header lines again. It is not clear whether this
    # will actually work. May need to clear many of the global variables too.
    
    # The first and third heading rows get skipped. The second row holds the
    # headings which are parsed to identify the contents of each column.
    my $dummy = readCSVLine();

    $csvLine = readCSVLine();
    if($ratingsCSV->parse($csvLine)) {
	my(@headingFields) = $ratingsCSV->fields();
	identifyColumns(\@headingFields);
    }
    else {
	print STDERR "Could not parse heading row.\n";
	reportCSVParseError($csvLine, $ratingsCSV);
	exit 2;
    }

    $dummy = readCSVLine();

    while (!eof()) {
	$csvLine = readCSVLine();

	if ($ratingsCSV->parse($csvLine)) {
	    # Get the fields and slice off the left side through the team
	    # column.
	    my @csvFields = $ratingsCSV->fields();
	    @csvFields = @csvFields[$teamColumn + 1 .. @csvFields - 1];

	    # Look in all the user name fields to find who did the rating
	    my $rater = '';
	    my $raterOK = FALSE;
	    foreach my $userNameIndex (@userNameColumns) {
		if($csvFields[$userNameIndex] ne '') {
		    if($rater eq '') {
			$rater = $csvFields[$userNameIndex];
			if(defined $ratedStudents{$rater}) {
			    if($verbose) {
				print "Rater is $rater\n";
			    }
			    $raterOK = TRUE;
			}
			else {
			    print STDERR
				"Rater $rater not known on line $. column ",
				$userNameIndex + $teamColumn + 1, "\n";
			}
		    }
		    else {
			print STDERR "Second rater found on line $. column ",
			    $userNameIndex + $teamColumn + 1, "\n";
		    }
		}
	    }

	    # Process this line of rating data if we knew the rater.
	    if($raterOK) {
		my $fieldIndex = 0;
		foreach my $field (@csvFields) {
		    # For each field, process the information if there is
		    # something in the field more than a single word and it is
		    # not one being ignored. All the rating answers are more
		    # than 4 characters long.
		    if((length($field) > 4) &&
	      ($questionColumns[$fieldIndex][QUESTION_INDEX] != IGNORE_FIELD)) {
			my $rated = $questionColumns[$fieldIndex][RATED_KEY];

			# ToDo Checks to make:
			#   - the rated student is on the same team as the
			#     rater. Report this only once for each response.
			#     Could be caused by the student selecting the
			#     wrong team.
			my $questionIndex =
			    $questionColumns[$fieldIndex][QUESTION_INDEX];
			my $raterPosition = @{$ratedStudents{$rater}}[POSITION];

			# The overall rating question is handled differently
			if($questionIndex == RATINGS) {
		     @{$ratedStudents{$rated}[$questionIndex]}[$raterPosition] =
				$ratingValues{$field};
			}
			else {
		     @{$ratedStudents{$rated}[$questionIndex]}[$raterPosition] =
				$field;
			}
		    }
		    
		    $fieldIndex++;
		}
	    }
	}

 	# We get here it is because of a parse error that we do not know how
	# to recover from.
        else {
	    reportCSVParseError($csvLine, $ratingsCSV);
	}
    }

    return;
}


use constant HEADER => 0;
use constant TEAM_COMMENT => 1;
use constant STUDENT_COMMENT => 2;
use constant PARSE_ERROR => 3;

# Check a line from the instructor comment file for the two types of headers.
# This will return an array with first element TRUE if a header was parsed.
# If there is a header the following elements indicate the type of header and
# the associated data from the header.
sub parseHeader {
    my($text) = ($_[0]);
    
    # Team comment header [[Team id]]
    if($text =~ /^\[\[Team\s+(.*)\]\]/) {
	return (TRUE, TEAM_COMMENT, $1);
    }

    # Student comment header [[student-name factor-override]]
    # Use a non-greedy match on student-name so that the factor override
    # can be isolated, if it exists.
    elsif($text =~ /^\[\[(.+?)\s*([0-9\.]*)\]\]/) {
	return (TRUE, STUDENT_COMMENT, $1, $2);
    }
    return (FALSE);
}


# This will read the instructor comments on individual team members and the
# team members.
sub readInstructorComments {
    my($commentsFilename) = ($_[0]);

    if($verbose) {
	print "Reading instructor comments from $commentsFilename\n";
    }

    if(!open(COMMENTS, $commentsFilename)) {
	print STDERR
	    "Can not open instructor comments file $commentsFilename: $!\n";
	exit 2;
    }

    my $parseState = HEADER;
    my $currentComment;
	
    while(<COMMENTS>) {
	# remove line terminators and any trailing whitespace
	s/\r*$//;
	s/\s+$//;

	# Ignore blank lines in the comments file.
	if(/\S/) {
	    # We are either looking for a header line or building up comment
	    # text.
	    my ($headerFound, $headerType, $payload1, $payload2) =
		parseHeader($_);

	    # Determine which type of field is being started and save the
	    # associated information for building up the comment.
	    if($headerFound) {
		if($headerType == TEAM_COMMENT) {
		    if($verbose) {
			print "Header for Team $payload1\n";
		    }
		    $parseState = TEAM_COMMENT;
	    $currentComment = \$teamsByName{$payload1}[INSTRUCTOR_TEAM];
		}

		else {
		    if($verbose) {
			print "Header for $payload1";
		    }
		    $parseState = STUDENT_COMMENT;

		    # Override the factor if one was in the header
		    if($payload2 ne '') {
			if($verbose) {
			    print
	" changing factor from $ratedStudents{$payload1}[FACTOR] to $payload2";
			}
			
			$ratedStudents{$payload1}[FACTOR] = $payload2;
		    }
		    
		    $currentComment =
			\$ratedStudents{$payload1}[INSTRUCTOR_STUDENT];
		    if($verbose) {
			print "\n"
		    }
		}
	    }

	    # Build up the lines of text for student or team comments
	    elsif(($parseState == STUDENT_COMMENT) ||
		  ($parseState == TEAM_COMMENT)) {
		if(${$currentComment} ne '') {
		    ${$currentComment} .= "\n";
		}
		${$currentComment} .= $_;
	    }
	    
	    # This state is only used at the start of reading the file which
	    # should begin with a header
	    elsif($parseState == HEADER) {
		print STDERR "Expecting header in instructor file at line $.\n";
	    }
	    else {
		print STDERR "Error in parsing instructor file at line $.\n";
		exit 4;
	    }
	}
    }
    
    return;
}


# This will output the peer evaluations for each student who was evaluated
# in a compressed text format
sub outputEvalsCompressed()
{
    if($verbose) {
	print "Outputting evaluation comments in compressed text format.\n";
    }
    
    if(!open(COMPRESSED,">",$evalsFilename)) {
	print STDERR
	    "Error opening evaluations file $evalsFilename.\n\t$!\n";
	exit 3;
   }

    # Print out the information for all rated students grouped by team
    foreach my $teamName (sort(keys(%teamsByName))) {
	foreach my $student (@{$teamsByName{$teamName}[STUDENTS]}) {
	    my $ratings = @{$student}[RATINGS];

	   printf COMPRESSED "@{$student}[FULLNAME] @{$student}[LOGINID] %.2f ",
		"@{$student}[FACTOR]";
	    
	    # Ratings array has rating, weighted rating, normalized rating
	    # so it is three times larger than the number of raters.
	    my $numRaters = @{$ratings}/3;

	    print COMPRESSED join('/', map {$ratingCategories[$_]}
		   reverse sort {$a <=> $b} @{$ratings}[0 .. ($numRaters - 1)]);
	    print COMPRESSED "\n";
	    
            foreach my $raterComment (@{@{$student}[FEEDBACK]}) {
		if(defined $raterComment) {
format COMPRESSED =
  ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< ~~
$raterComment
 
.
                    write COMPRESSED;
		}
            }
        }
    }

    close COMPRESSED;
    return;
}


# This will output the peer evaluations for each student who was evaluated
# substituting into fields in the supplied boilerplate file
sub outputEvalsBoilerplate()
{
    if($verbose) {
	print "Outputting evaluation comments using boilerplate file.\n";
    }
    
    if(!open(FORMATTED,">",$evalsFilename)) {
	print STDERR
	    "Error opening evaluations file $evalsFilename.\n\t$!\n";
	exit 3;
   }

    # Print out the information for all rated students grouped by team
    foreach my $teamName (sort(keys(%teamsByName))) {
	foreach my $student (@{$teamsByName{$teamName}[STUDENTS]}) {
	    my $ratings = @{$student}[RATINGS];
	    
	    foreach (@evalBoilerplate) {
		my $outputText = "$_";
		my $printHandled = FALSE;

		# ToDo - we don't need the if test. Just the s/// will work.
		$outputText =~ s/EMAIL/@{$student}[LOGINID]\@rit.edu/;
		$outputText =~ s/LOGINID/@{$student}[LOGINID]/;
		$outputText =~ s/FULLNAME/@{$student}[FULLNAME]/;
		$outputText =~ s/FIRSTNAME/@{$student}[FIRSTNAME]/;
		$outputText =~ s/LASTNAME/@{$student}[LASTNAME]/;
		if(@{$student}[INSTRUCTOR_STUDENT] ne '') {
		    $outputText =~
			s/INSTRUCTORSTUDENT/@{$student}[INSTRUCTOR_STUDENT]/;
		}
		else {
		    $outputText =~
			s/INSTRUCTORSTUDENT/No additional instructor comments/;
		}
		if($teamsByName{$teamName}[INSTRUCTOR_TEAM] ne '') {
		    $outputText =~
		     s/INSTRUCTORTEAM/$teamsByName{$teamName}[INSTRUCTOR_TEAM]/;
		}
		else {
		    $outputText =~
			s/INSTRUCTORTEAM/No additional instructor comments/;
		}
		if(/FACTOR/) {
		    my $factor = sprintf('%.2f',@{$student}[FACTOR]);
		    $outputText =~ s/FACTOR/$factor/;
		}

		if(/RATINGS/) {
		    # Ratings array has rating, weighted rating, normalized
		    # rating so it is three times larger than the number of
		    # rating.
		    my $numRaters = @$ratings/3;
		    my $ratings =
			join(" / ",map {$ratingCategories[$_]}
		 reverse sort {$a <=> $b} @{$ratings}[0 .. ($numRaters - 1)]);
		    
		    $outputText =~ s/RATINGS/$ratings/;
		}

		# When doing HTML output, the peer comments are placed in an
		# unordered list with each comment as one list item. For text,
		# they are simply formatted text.
		if(/PEERCOMMENTS/) {

		    my $newList = TRUE;
		    my $newLI = TRUE;

		    for my $raterComment (@{@{$student}[FEEDBACK]}) {
			if(defined $raterComment) {
			    if($htmlOutput) {
				if($newList) {
				    print FORMATTED "<ul>\n  <li>\n";
				    $newList = FALSE;
				    $newLI = FALSE;
				}
				elsif($newLI) {
				    print FORMATTED "  <li>\n";
				    $newLI = FALSE;
				}

				print FORMATTED "  $raterComment\n";
			    }
			    else {
				format FORMATTED =
    ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< ~~
    $raterComment

.
				    write FORMATTED;
			    }
			    

			    if(!$newLI) {
				print FORMATTED "  </li>\n";
				$newLI = TRUE;
			    }
			}
		    }

		    # Close any open structures in the list
		    if($htmlOutput) {
			if(!$newLI) {
			    print FORMATTED "  </li>\n";
			}
			if(!$newList) {
			    print FORMATTED "</ul>\n";
			}
		    }
		    
		    $printHandled = TRUE;
		}

		# If this line of boilerplate has not been printed, don't
		# forget to do it here.
		if(!$printHandled) {
		    print FORMATTED $outputText,"\n";;
		}
	    }
	}
    }

    close FORMATTED;
    return;
}

# This will output the confidential comments that were made.
sub confidentialOutput()
{
    if(!open(CONFIDENTIALFILE, ">", $confidentialFilename)) {
 	print STDERR
	    "Error opening confidential file $confidentialFilename.\n\t$!\n";
	exit 4;
    }

    # Output the confidential comments by team then if any were stored in the 
    # information array for a student on the team.
    foreach my $teamName (sort(keys(%teamsByName))) {
	foreach my $student (@{$teamsByName{$teamName}[STUDENTS]}) {
	    my $rated = @{$student}[KEY];

	    if(@{$ratedStudents{$rated}[CONFIDENTIAL]}) {
		print CONFIDENTIALFILE "CONFIDENTIAL comments on $rated\n";
		my $raterIndex = 0;
		foreach my $confidentialInfo
		    (@{$ratedStudents{$rated}[CONFIDENTIAL]}) {
			if(defined $confidentialInfo) {
			    my $rater =
			$teamsByName{$teamName}[STUDENTS][$raterIndex][KEY];
			    format CONFIDENTIALFILE =
@* - ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
$rater, $confidentialInfo
  ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< ~~
$confidentialInfo
.
			    write CONFIDENTIALFILE;
			}
			$raterIndex++;
	        }

                print CONFIDENTIALFILE "\n";
            }
        }
    }

    close CONFIDENTIALFILE;
    return;
}

# Calculate ratings for each student. This function will compute the factor
# a rater gave a student by normalizing the rating weight against the
# average rating weight for that rater. The factor for a student is then the
# average of the factors given by all team members.
sub computeFactors()
{
    if($verbose) {
	print "Computing factors.\n";
    }

    # Walk through each team. For each rater, store the rating weight given to
    # each team member. Compute the average weight, and then the normalized
    # value. For each student, compute the average normalized value as the
    # individual adjustment factor.
    foreach my $teamName (sort(keys(%teamsByName))) {
	my @teamData = @{$teamsByName{$teamName}[STUDENTS]};
	my $numStudents = @teamData;
	my @raterAverage;
	my @raterNumRatings;
	
	if($verbose) {
	    print "$numStudents students on team $teamName.\n";
	}

	# Determine the rating weight for each student, and accumulate the sum
	# of weights for each rater.
	for (my $studentIndex = 0;$studentIndex < @teamData; $studentIndex++) {
	    for (my $raterIndex = 0;$raterIndex < $numStudents; $raterIndex++) {
		my $rating = $teamData[$studentIndex][RATINGS][$raterIndex];

		if($rating > 0) {
	      $teamData[$studentIndex][RATINGS][$numStudents + $raterIndex] =
		    $ratingWeights[$rating];
	            $raterAverage[$raterIndex] += $ratingWeights[$rating];
	            $raterNumRatings[$raterIndex]++;
		}
		else {
	       $teamData[$studentIndex][RATINGS][$numStudents + $raterIndex] = 
		   undef;
	            $raterAverage[$raterIndex] = undef;
		}
	    }
	}
	
	# Compute the average rating weight for each rater who did ratings
	for (my $raterIndex = 0;$raterIndex < @raterAverage; $raterIndex++) {
	    if(defined $raterAverage[$raterIndex]){
		$raterAverage[$raterIndex] /= $raterNumRatings[$raterIndex];
	    }
	    else {
		print
		  "$teamData[$raterIndex][FULLNAME] did not provide ratings.\n";
	    }
	}

	# Compute the normalized weights and factor for each student.
	for (my $studentIndex = 0;$studentIndex < @teamData; $studentIndex++) {
	    # We have doubled the size of the array with the rating weights
	    my $numRaters = @{$teamData[$studentIndex][RATINGS]}/2;
	    my $numRated = 0;
	    
	    # Normalize the rating weights for each student by the average
	    # weight given by the rater.
	    for (my $raterIndex = 0;$raterIndex < $numRaters; $raterIndex++) {
		my $rating = $teamData[$studentIndex][RATINGS][$raterIndex];

		if(($rating > 0) && ($raterAverage[$raterIndex] > 0)) {
	 $teamData[$studentIndex][RATINGS][(2 * $numStudents) + $raterIndex] =
	       $teamData[$studentIndex][RATINGS][$numStudents + $raterIndex]/
	       $raterAverage[$raterIndex];

	            $teamData[$studentIndex][FACTOR] +=
	    $teamData[$studentIndex][RATINGS][(2 * $numStudents) + $raterIndex];
	            $numRated++;
		}
		else {
	   $teamData[$studentIndex][RATINGS][(2 * $numStudents) + $raterIndex] =
		   undef;
		}
	    }

	    # Average the normalized rating weights for a student to be the
	    # factor. It is then rounded to the 0.05 closest to 1.
	    if($numRated > 0) {
		my $factor = $teamData[$studentIndex][FACTOR] / $numRated;
		$teamData[$studentIndex][FACTOR] =
		                              int(20 * ($factor - 1))/20 + 1;
	    }
	    else {
		print
	        "There are no ratings for $teamData[$studentIndex][FULLNAME], ",
		    "$teamData[$studentIndex][LOGINID].\n";
	    }

	    if($verbose) {
		print Dumper($teamData[$studentIndex][RATINGS]);
		print
	"$teamData[$studentIndex][FULLNAME] $teamData[$studentIndex][FACTOR]\n";
	    }
	}
    }
    return;
}

# This will output the rating values for each student being evaluated
sub outputRatings()
{
    my $team;
    my $teamName;

    if($verbose) {
	print "Outputting ratings to $ratingsFilename.\n";
    }

    if(!open(RATINGSFILE,">", $ratingsFilename)) {
	print STDERR "Error opening ratings file $ratingsFilename.\n\t$!\n";
	exit 3;
    }

    # Walk through the team hash and output each student's ratings in
    # a CSV format with the order: Name quoted, e-mail address,
    # ratings.
    foreach my $teamName (sort(keys(%teamsByName))) {
	my $numStudents = @{$teamsByName{$teamName}[STUDENTS]};
				
	foreach my $student (@{$teamsByName{$teamName}[STUDENTS]}) {
	    print RATINGSFILE "\"@{$student}[FULLNAME]\",@{$student}[LOGINID]";
	    # The ratings and rating weights are integers
	    foreach my $ratingValue
		    (@{@{$student}[RATINGS]}[0 .. ((2 * $numStudents) - 1)]) {
		if(defined $ratingValue) {
		    print RATINGSFILE ",$ratingValue";
		}
		else {
		    print RATINGSFILE ",";
		}
	    }

	    # The normalized weights are floats to two decimals.
	    foreach my $ratingValue
		    (@{@{$student}[RATINGS]}[(2 * $numStudents) .. 
		                             (3 * $numStudents) - 1]) {
		if(defined $ratingValue) {
		    printf RATINGSFILE ",%.2f",$ratingValue;
		}
		else {
		    print RATINGSFILE ",";
		}
	    }
	    # The factor is a float to two decimals
	    printf RATINGSFILE ",%.2f\n",@{$student}[FACTOR];
	}

	print RATINGSFILE "\n";
    }

    close RATINGSFILE;
    return;
}
   
# This routine reads the survey template text file. The filename is given with
# the -b command line option.
sub readBoilerplate
{
    if(!open(BOILERPLATE, "<", $boilerplateFilename)) {
	print STDERR
	    "Error opening boilerplate file $boilerplateFilename.\n\t$!\n";
	exit 3;
    }

    @evalBoilerplate = ();
    while(<BOILERPLATE>) {
 	# remove line terminators and any trailing whitespace
 	s/\r*$//;
 	s/\s+$//;

	if(!$lastOrFirstNameUsed && (/LASTNAME/ || /FIRSTNAME/)) {
	    $lastOrFirstNameUsed = TRUE;
	}
	
	push(@evalBoilerplate, $_);
    }
    close BOILERPLATE;

    if($lastOrFirstNameUsed && !$lastAndFirstNameIdentified) {
	print
   "LASTNAME or FIRSTNAME used in boilerplate but not provided in all names.\n";
    }

    if($verbose) {
	print Dumper(@evalBoilerplate);
    }
    return;
}

#---------------------------------------------------------------------
#
# Start processing of raw peer evaluation input data
#
#---------------------------------------------------------------------

# Process any command line options
if(!getopts($cmdOptionKeys, \%cmdOptions)) {
  HELP_MESSAGE(); # will die there
}

# The SHOW_USAGE option will print a usage message
if($cmdOptions{+SHOW_USAGE}) {
  HELP_MESSAGE(); # will die there
}

# The VERBOSE option will track progress through the processing of comments
if($cmdOptions{+VERBOSE}) {
  $verbose = TRUE;
  print "Verbose mode is on.\n";
}

# Change the filename prefix used for the output files created by this script.
if($cmdOptions{+FILENAME_PREFIX}) {
    $filenamePrefix = $cmdOptions{+FILENAME_PREFIX};

    if($verbose) {
        print "Base filename set to $filenamePrefix\n";
    }
}

# Set the format of student information which is used as the unique key for a
# student.
if($cmdOptions{+SET_KEY_FORMAT}) {
    $studentKeyFormat = $cmdOptions{+SET_KEY_FORMAT};

    if(($studentKeyFormat =~ /LASTNAME/) ||
       ($studentKeyFormat =~ /FIRSTNAME/)) {
	$lastOrFirstNameUsed = TRUE;
    }
    
    if($verbose) {
	print 'Student key format set to ' . $studentKeyFormat . "\n";
    }
}

# The confidential option will output the confidential comments made by
# students to a text file with only the confidential comments.
if($cmdOptions{+CONFIDENTIAL_OPT}) {
  $confidential = TRUE;
  $confidentialFilename = $filenamePrefix . $confidentialTail;

  if($verbose) {
    print "Outputting confidential comments\n";
  }
}

# The RATINGS_OUTPUT option will output the student's name, and the ratings 
# he or she received.  The ratings will be output by team into a CSV
# format for easier import into Excel for peer evaluation factor calculations.
if($cmdOptions{+RATINGS_OUTPUT}) {
  $ratingsOutput = TRUE;
  $ratingsFilename = $filenamePrefix . $ratingsTail;

  if($verbose) {
    print "Outputting rating information\n";
  }
}

# The INSTRUCTOR_FILE option will input a file of instructor comments on
# teams and individuals.

if($cmdOptions{+INSTRUCTOR_FILE}) {
  $instructorFile = TRUE;
  $instructorFilename = $cmdOptions{+INSTRUCTOR_FILE};

  if($verbose) {
    print "Reading instructor comments from $instructorFilename\n";
  }
}

# Provide a boilerplate file for output of evaluation comments. If the file
# extension is .html the comments will be output as a <ul> element. Otherwise,
# straight text is output. If there is no boilerplate file, the output is in
# a compressed text format
if($cmdOptions{+BOILERPLATE_NAME}) {
    $boilerplateFilename = $cmdOptions{+BOILERPLATE_NAME};

    if($verbose) {
        print "Evaluation output will use $boilerplateFilename\n";
    }

    # Evaluate the extension to determine whether to use HTML or text output
    # for the evaluations file
    if($boilerplateFilename =~ /$evalsHTMLTail/) {
	$htmlOutput = TRUE;
	$evalsExtension = $evalsHTMLTail;

	if($verbose) {
	    print "Evaluation comments will be in HTML\n";
	}
    }
}

# The EVALUATIONS option will output the non-confidential comments made by
# students to a text file. Each student will be introduced with a header line
# that includes all of the ratings for that student. The default instructor
# comments of "None" ends a student's record. This file is ready for use by
# the make-peereval-reports script.
if($cmdOptions{+EVALUATIONS}) {
  $evaluations = TRUE;
  $evalsFilename = $filenamePrefix . $evalsExtension;

  if($verbose) {
    print "Outputting student peer evaluation comments to $evalsFilename\n";
  }
}

# Now the main processing of the input data.
if($verbose) {
    print "Beginning to process data\n";
}

# There should be two command line arguments with team info filename and
# the csv ratings file
if(@ARGV >= 1) {
    getStudents($ARGV[0]);
    shift;
    
    # This creates the student ratings hash.
    getRatings();

    # From the input data compute the factor as the average of the normalized
    # ratings weights assigned by all raters.
    computeFactors();
    
    # Read the file of instructor team and individual student comments.
    # This needs to be done after the factors are computed so that the
    # instructor student comments can override the factor.
    if($instructorFile) {
	readInstructorComments($instructorFilename);
    }

    if($verbose) {
	for my $teamName (sort(keys(%teamsByName))) {
	    print "Team $teamName:\n";
	    for my $student (@{$teamsByName{$teamName}[STUDENTS]}) {
		print "\t@{$student}[FULLNAME]\n";
	    }
	}
	print Dumper(%ratedStudents);
    }

    # The confidential comments are stored in the student ratings
    # hash. They can be output to a file at this point, if that was
    # selected from the command line.
    if($confidential) {
        if($verbose) {
          print "Outputting confidential comments to $confidentialFilename.\n";
        }

	confidentialOutput();
    }

    if($ratingsOutput) {
	outputRatings();
    }

    if($evaluations) {
	# Select output using a boilerplate file or in compressed format
	if(defined $boilerplateFilename) {
	    readBoilerplate();
	    outputEvalsBoilerplate();
	}
	else {
	    outputEvalsCompressed();
	}
    }
}

else {
  print STDERR "Missing team info filename.\n";
  HELP_MESSAGE();
  exit 1;
}
