#!/usr/local/bin/perl # # Please read all the comments down to the line that says "TOP". # These comments are divided into three sections: # # 1. usage instructions # 2. installation instructions # 3. standard copyright # # Feel free to share this script with other instructors of programming # classes, but please do not place the script in a publicly accessible # place. Comments, questions, and bug reports should be sent to # moss-request@cs.berkeley.edu. # # # # Section 1. Usage instructions # # moss [-l language] [-d] [-b basefile1] ... [-b basefilen] [-m #] [-c "string"] file1 file2 file3 ... # # Submitted files can be in any subdirectory of the directory # from which moss is run. Please do not use absolute pathnames # (e.g., /home/foo) or relative pathnames beginning with "..". # # The -l option specifies the source language of the tested programs. # The current possibilities are: c (C), cc (C++), java, ml, lisp, scheme, # pascal, ada, haskell, fortran, ascii, vhdl, html, perl, or matlab. The default is c. # Currently (as of 9/02) the last 6 of these are accessible using only the # experimental server (see below). After a period of testing the experimental # server will be made the standard server. # # Example: Compare the lisp programs foo.lisp and bar.lisp: # # moss -l lisp foo.lisp bar.lisp # # # The -d option specifies that submissions are by directory, not by file. # That is, files in a directory are taken to be part of the same program, # and reported matches are organized accordingly by directory. # # Example: Compare the programs foo and bar, which consist of .c and .h # files in the directories foo and bar respectively. # # moss -d foo/*.c foo/*.h bar/*.c bar/*.h # # Example: Each program consists of the *.c and *.h files in a directory under # the directory "assignment1." # # moss -d assignment1/*/*.h assignment1/*/*.c # # # The -b option names a "base file". Moss normally reports all code # that matches in pairs of files. When a base file is supplied, # program code that also appears in the base file is not counted in matches. # A typical base file will include, for example, the instructor-supplied # code for an assignment. Multiple -b options are allowed. You should # use a base file if it is convenient; base files improve results, but # are not usually necessary for obtaining useful information. # # If the -d option is used, then the -b option names a directory in which # the base file(s) are located. In this case, the files to be treated # as base files must appear in the file argument list. # # Examples: # # Submit all of the C++ files in the current directory, using skeleton.cc # as the base file: # # moss -l cc -b skeleton.cc *.cc # # Submit all of the ML programs in directories asn1.96/* and asn1.97/*, where # asn1.97/instructor/*.ml and asn1.96/instructor/*.ml contain the base files. # # moss -l ml -b asn1.97/instructor -b asn1.96/instructor -d asn1.97/*/*.ml asn1.96/*/*.ml # # The -m option sets the maximum number of times a given passage may appear # before it is ignored. A passage of code that appears in many programs # is probably legitimate sharing and not the result of plagiarism. With -m N, # any passage appearing in more than N programs is treated as if it appeared in # a base file (i.e., it is never reported). Option -m can be used to control # moss' sensitivity. With -m 2, moss reports only passages that appear # in exactly two programs. If one expects many very similar solutions # (e.g., the short first assignments typical of introductory programming # courses) then using -m 3 or -m 4 is a good way to eliminate all but # truly unusual matches between programs while still being able to detect # 3-way or 4-way plagiarism. With -m 1000000 (or any very # large number), moss reports all matches, no matter how often they appear. # The -m setting is most useful for large assignments where one also a base file # expected to hold all legitimately shared code. The default for -m is 10. # # Examples: # # moss -l pascal -m 2 *.pascal # moss -l cc -m 1000000 -b mycode.cc asn1/*.cc # # # The -c option supplies a comment string that is attached to the generated # report. This option facilitates matching queries submitted with replies # received, especially when several queries are submitted at once. # # Example: # # moss -l scheme -c "Scheme programs" *.sch # # The -x option sends queries to the current experimental version of the server. # The experimental server has the most recent Moss features and is also usually # less stable (read: may have more bugs). # # Example: # # moss -x -l ml *.ml # # # Section 2. Installation instructions. # # On many Unix systems this script should work as is. If you have problems # check the following: # # - You may need to change the very first line of this script # if perl is not in /usr/bin on your system. Just replace /usr/bin # with the pathname of the directory where perl resides. # # - The system programs uuencode and mail are required. # One of zip or tar is also required. # These should accessible through $PATH when this script is run. # Alternatively, the full pathnames can be put here. # # (If you don't have zip or tar, zip binaries for many machines # as well as source are available from: # http://mcftp.mclink.it/simtel.net/infozip/Zip.html # ) # $zip = "zip"; $tar = "tar"; $uuencode = "uuencode"; $mail = "mail"; # # - The following directories and files must be readable and writable. # On Unix systems there should be no need to change these settings, # but there is no harm in changing them, either. # $tempdir = "/tmp"; $zipfile = "$tempdir/mossfile"; $mailmessage = "$tempdir/mossmail"; # # 3. Standard Copyright # #Copyright (c) 1997 The Regents of the University of California. #All rights reserved. # #Permission to use, copy, modify, and distribute this software for any #purpose, without fee, and without written agreement is hereby granted, #provided that the above copyright notice and the following two #paragraphs appear in all copies of this software. # #IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR #DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT #OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF #CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # #THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, #INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY #AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS #ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO #PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. # # # STOP. It should not be necessary to change anything below this line # to use the script. # $server = 'moss@cs.berkeley.edu'; $noreq = "Request not sent."; $usage = "usage: moss [-l language] [-d] [-b basefile1] ... [-b basefilen] [-m #] [-c \"string\"] file1 file2 file3 ..."; # # The userid is used to authenticate your queries to the server; don't change it! # $userid=306435351; # # Process the command line options. This is done in a non-standard # way to allow multiple -b's. # $opt_l = "c"; # default language is c $opt_d = 0; $bindex = 0; $opt_c = ""; while (@ARGV && ($_ = $ARGV[0]) =~ /^-(.)(.*)/) { ($first,$rest) = ($1,$2); die "Unrecognized option -$first. $usage\n" unless (($first eq "d") || ($first eq "l") || ($first eq "b") || ($first eq "m") || ($first eq "c") || ($first eq "x")); shift(@ARGV); if ($first eq "d") { $opt_d = 1; } if ($first eq "b") { if($rest eq '') { die "No argument for option -b.\n" unless @ARGV; $rest = shift(@ARGV); } $opt_b[$bindex++] = $rest; } if ($first eq "l") { if($rest eq '') { die "No argument for option -l.\n" unless @ARGV; $rest = shift(@ARGV); } $opt_l = $rest; } if ($first eq "m") { if($rest eq '') { die "No argument for option -m.\n" unless @ARGV; $rest = shift(@ARGV); } $opt_m = $rest; } if ($first eq "c") { if($rest eq '') { die "No argument for option -c.\n" unless @ARGV; $rest = shift(@ARGV); } $opt_c = $rest; } # # The -x flag signals the server to use the current experimental (and # possibly unstable) version of moss. # if ($first eq "x") { $opt_x = 1; } } # # Check a bunch of things first to ensure that the # script will be able to run to completion. # # # make sure we have a supported language die "Unrecognized language $opt_l (should be one of c, cc, java, ml, pascal, ada, lisp, scheme, haskell, fortran, ascii, vhdl, html, perl, matlab)." unless $opt_l eq "c" || $opt_l eq "cc" || $opt_l eq "java" || $opt_l eq "ml" || $opt_l eq "pascal" || $opt_l eq "lisp" || $opt_l eq "scheme" || $opt_l eq "ada" || $opt_l eq "haskell" || $opt_l eq "fortran" || $opt_l eq "ascii" || $opt_l eq "vhdl" || $opt_l eq "html" || $opt_l eq "perl" || $opt_l eq "matlab" ; # # Make sure there is a directory for temporary files that we can read and write. # die ("The $tempdir directory does not exist; I need it to write temprorary files\n.") unless -e $tempdir; die ("The $tempdir directory is either unreadable or writablel I need it for temporary files.\n") unless -r $tempdir && -w $tempdir; # # Make sure all the argument files exist and are readable. # print "Checking files . . . \n"; $i = 0; while($i < $bindex) { die "Base file $opt_b does not exist. $noreq\n" unless -e "$opt_b[$i]"; die "Base file $opt_b is not readable. $noreq\n" unless -r "$opt_b[$i]"; if ($opt_d == 0) { die "Base file $opt_b is not a text file. $noreq\n" unless -T "$opt_b[$i]";} else { die "With -d, base file $opt_b[$i] should name a directory. $noreq\n" unless -d "$opt_b[$i]"; } $i++; } foreach $file (@ARGV) { die "File $file does not exist. $noreq\n" unless -e "$file"; die "File $file is not readable. $noreq\n" unless -r "$file"; die "File $file is not a text file. $noreq\n" unless -T "$file"; } if ("@ARGV" eq '') { die "No files submitted.\n $usage"; } print "OK\n"; # # These files shouldn't be there, but remove them just in case. # unlink "$zipfile.zip"; unlink "$zipfile.tar"; unlink $mailmessage; # # open the mail message # open(MAIL,">$mailmessage") || die "Could not open file $mailmessage for writing."; # # Now the real processing begins. # # # calculate file arguments for zip; # this may or may not include a base file. # if ($opt_d == 1) { $fileargs = "@ARGV";} else {$fileargs = "@opt_b @ARGV"}; print MAIL "query\n"; # # put user number in request; this is required! # print MAIL "user $userid\n"; # # put source language in request # print MAIL "-l $opt_l\n"; # # if there are basefiles, add them to the request, too. # $i = 0; while($i < $bindex) { print MAIL "-b $opt_b[$i++]\n"; } # # include the directory option if appropriate # if ($opt_d == 1) { print MAIL "-d\n"; } # # use experimental version of moss if requested # if ($opt_x == 1) { print MAIL "-x\n"; } # # and the -m option . . . # if ($opt_m ne '') { print MAIL "-m $opt_m\n"; } # # and any comment # if ($opt_c ne '') { print MAIL "-c $opt_c\n"; } # # zip and uuencode the source files # close(MAIL); system("$zip $zipfile $fileargs"); system("$uuencode $zipfile.zip mossfile.zip >> $mailmessage") if -e "$zipfile.zip"; if (!(-e "$zipfile.zip")) { print "No zip available; trying tar.\n"; system("$tar -cvf $zipfile.tar $fileargs"); system("$uuencode $zipfile.tar mossfile.tar >> $mailmessage") if -e "$zipfile.tar"; } die "Neither zip nor tar is available. $noreq" unless ((-e "$zipfile.zip") || (-e "$zipfile.tar")); # # send request # system("$mail $server < $mailmessage"); print "Request sent; results will be sent to you by email.\n"; # # cleanup # unlink "$zipfile.zip"; unlink "$zipfile.tar"; unlink $mailmessage;