#!/home/rebelsky/perl/bin/perl
#This module deals with pages on which ambiguous annotations are made.  For
#instance, if the word "my" appears five times on the page and someone 
#tries to annotate the word "my," we must determine which "my" they are 
#referring to before we store the context.  Detemine whether an annotation 
#is ambiguous, displays the web page for picking an instance, and gathers 
#the context for unambiguous annotations.
#(POD documentation at end)

##################
# Package Begins #
##################
#package ID
package Ambiguity;
use Exporter ();
@ISA = (Exporter);
@EXPORT = qw(annotationNum ambiguity whichOne);

#Import modules
use Search;
use Network;
use AFile;

###########################################################################
# ambiguity #
###########################################################################
# This subroutine runs annotationNum to see if something is ambiguous or 
# does not appear.  For unambiguous valid annotations it grabs the context 
# which it stores in a hash.
# ARGUMENTS: It takes in the html source code in string form, the 
# annotated text in string form, and a type glob which is used as a hash 
# reference to the hash where the data for the annotation is stored  
# RETURNS:  It returns error if the annotation did not exist, 1 if the 
# annotation was not ambiguous, and 0 if it was.
###########################################################################
sub ambiguity 
  {
    #read in arguments
    my $html = shift;           #html source code for page being annotated
    my $text = shift;           #the annotated text
    local(*form) = shift;       #hash containing data for the annotation
    #other variables  
    my $num;                    #times annotated text appears in source
 
    #determine number of times annotated text appears in html source
    $num = annotationNum($html,$text);

    #if it only appears once...grab context and be done
    if ($num == 1)
      {
        #Turn the annotated text into a search string called from 
        #Search->textToSearch
        $text =~ s/%7C/\|/sig;
        $text = textToSearch($text);
       
        #grab the context
        $html =~ /(.{0,100}\W$text(<|[^a-zA-Z_0-9<>][^>]*?<).{0,100})/si;
        $form{'context'} = $1;
        $form{'context'} =~ /(.{0,100}\W$text\W.{0,100})/si;
        $form{'context'} = $1;
        $form{'context'} =~ s/\|/%7C/sig;
       
        #return one for true...the annotation is all set
        return 1;
      }                         #if ($num == 1)
    #if the annotation doesn't appear...we are not set
    elsif ($num == 0)
      {
        #do the obvious
        return "error";
      }                         #elsif ($num == 0)
    else
      {
        #then the annotation must appear more than 1 time thus return 0 
        #for false...the annotation is not all set
        return 0;
      }                         #else
  }                             #sub ambiguity

###########################################################################
# whichOne #
###########################################################################
# This edits an html page that has an ambiguous annotation adding links
# for each instance of the text then it displays the new html so that the
# client can pick the instance that he/she wants.  It also stores a
# temporary copy of the html source for use by number.cgi (which is run
# when someone chooses an instance from the web page that this subroutine
# displays).
# ARGUMENTS: It takes in the html source code in string form, the address
# of the annotation file for this annotation is string form, and the client
# connection object in order to send the web page response.  
# RETURNS: It returns 1 to say that it ran correctly.
###########################################################################
sub whichOne 
  {
    #read in arguments
    my $html = shift;           #html source code
    my $file = shift;           #file for the annotation
    my $client = shift;         #client connection
    #variables
    my $new_html;               #html displayed to choose right instance
    my $text;                   #the annotated text
    my $num;                    #the counter for instance number
    my $replace;                #a variable for storing replacement text
    my $head;                   #the head of the html file
    my $char;                   #a character to save
    my $newline;                #the newline to put in the replacement
    my $i;                      #an iteration variable
    my $temphtml;               #file address of the temporary html file
    my $tempfile;               #address of the temporary annotation file
    my $info;                   #data from the original annotation file
    my $dir;                    #directory of original annotation file
    my $response;               #the response to send the client
    my $response_object;        #client response object
    local(*data);               #hash containing data for annotation

    #read the data from the annotation file
    readFile(*data,$file);

    #make sure there are no vertical bars in the hash before storing
    foreach $item (keys(%data))
      {
        $data{$item} =~ s/\|/%7C/sig;
      }#foreach $item (keys(%data))

    #put the annotation data in the right form
    $info = "$data{'author'}\n$data{'email'}\n$data{'date'}\/$data{'time'}\n$data{'title'}\n\|\n$data{'annotated_text'}\n\|\n$data{'annotation'}\n\|\n$data{'context'}\n\|\ntemporary\n\|\n$data{'protection'}";
    
    #put the vertical bars back in
    foreach $item (keys(%data))
      {
        $data{$item} =~ s/%7C/\|/sig;
      }#foreach $item (keys(%data))

    #find a file in the temp directory that doesn't already exist and then 
    #store the html source code there
    $i = 1;
    while (existsAndStore("/home/rebelsky/public_html/Blazers/Annotations/Summer1999/Stored_Annotations/temp/temp$i.tmp",$html))
      {
        $i = $i + 1;
      }                     #while (fileExists(...$i.tmp"))

    #make this file the temporary html file
    $temphtml = "/home/rebelsky/public_html/Blazers/Annotations/Summer1999/Stored_Annotations/temp/temp$i.tmp";
    
    #now find the next nonexistant temp file and store the annotation 
    #information in this file
    while (existsAndStore("/home/rebelsky/public_html/Blazers/Annotations/Summer1999/Stored_Annotations/temp/temp$i.tmp",$info))
      {
        $i = $i + 1;
      }                     #while (fileExists(...$i.tmp"))

    #make this file the temporary annotation file
    $tempfile = "/home/rebelsky/public_html/Blazers/Annotations/Summer1999/Stored_Annotations/temp/temp$i.tmp";

    #get the directory from the original annotation file
    $file =~ /(.*)\//i;
    $dir = $1;

    #delete the original file
    delFile($file);

    #start some variables to play with
    $new_html = $html;
    $text = $data{'annotated_text'}; 
    
    #turn the annotated text into a search string called from 
    #Search->textToSearch
    $text = textToSearch($text);
    
    #remove all 'a' tags from the document
    $new_html =~ s/<a(\s)+(.|\n)*?>//ig;
    $new_html =~ s/<\/a>//ig;

    #change the html source to do instance counting
    $num = 1;
    
    #find each instance of the annotated text
    while ($new_html =~ /((?:\W$text))(<|[^a-zA-Z_0-9<>][^>]*?<)/si)
      {
        #save the matched text for replacement
        $replace = $1;          #text string found plus the first character
        $char = $2;             #the rest of what was found

        #compute the replacement line
        $newline = "$replace<a href=http:\/\/ravel\/Annotation\/number.cgi?$num\&$tempfile\&$temphtml\&$dir alt=\"num\"><sup>$num<\/sup><\/a>$char";
        
        #make replacement links in the new html
        $new_html =~ s/\W$text(<|[^a-zA-Z_0-9<>][^>]*?<)/$newline/si;
        
        #iterate through the html source code
        $new_html =~ /((?:.*?\W$text))((<|[^a-zA-Z_0-9<>][^>]*?<).*)/si;
        $head = $head . $1;
        $new_html = $2;
        $num = $num + 1;
      }                         #while ($new_html =~...][^>]*?<)/si)

    #take off extra spaces from the annotated text
    $data{'annotated_text'} =~ s/^(\s)*//si;
    $data{'annotated_text'} =~ s/(\s)*$//si;
    
    #display the $new_html so the client can pick an instance
    $response = <<"EOF";
<h1>
Which instance of \'$data{'annotated_text'}\' do you wish to annotate?
<\/h1>  
<meta name=\"Ravel\" content=\"disallow: .*?-.*?-.*?-.*?\$\">
$head$new_html
EOF

    #create the response object and send it to the client
    $response_object = HTTP::Response->new(200);
    $response_object->content($response);
    $client->send_response($response_object);

    #return the true value to say that it ran correctly
    return 1;
  }                             #sub whichOne




###########################################################################
# annotationNum #
###########################################################################
# This subroutine determines the number of times that a string appears in 
# an html page.
# ARGUMENTS:  The source code of the page in string form and the text that 
# is being searched for in string form.
# RETURNS:  The number of times the string appears in the html source
###########################################################################
sub annotationNum 
  {
    #read in the arguments
    my $html = shift;           #html source code
    my $text = shift;           #annotated text
    #other variables
    my $num;                    #times annotated text appears in source  

    #turn the annotated text into a search string called from 
    #Search->textToSearch
    $text =~ s/%7C/\|/sig;
    $text = textToSearch($text);

    #determine the number of occurences of the text string in the html data
    $num = 0;
    while($html =~ /(?:.*?\W$text)((<|[^a-zA-Z_0-9<>][^>]*?<).*)/si)
      {
        $html = $1;
        $num = $num + 1; 
      }#while($html =~ s/(\W$text)(<|[^a-zA-Z_0-9<>][^>]*?<)/si)
  
    #return the above number
    return $num;
  }                             #sub annotationNum

#return true to show that the package loaded correctly
return 1;

#####################
# Pod Documentation #
#####################
=pod

=head1 ID

=over 4

=item Package

Ambiguity.pm

=item Author

Rachel Heck

=item Description

This module deals with pages on which ambiguous annotations are made.  For 
instance, if the word "my" appears five times on the page and someone 
tries to annotate the word "my," we must determine which "my" they are 
referring to before we store the context.  Detemine whether an annotation 
is ambiguous, displays the web page for picking an instance, and gathers 
the context for unambiguous annotations.

=back

=head1 Subroutines

=over 4

=item ambiguity

This subroutine runs annotationNum to see if something is ambiguous or 
does not appear.  For unambiguous valid annotations it grabs the context 
which it stores in a hash.

=item whichOne

This edits an html page that has an ambiguous annotation adding links for 
each instance of the text then it displays the new html so that the client 
can pick the instance that he/she wants.  It also stores a temporary copy 
of the html source for use by number.cgi (which is run when someone 
chooses an instance from the web page that this subroutine displays).

=item annotationNum

This subroutine determines the number of times that a string appears in an 
html page.

=back

=head1 History

=over 4

=item [9 July 1999]

ambiguity, whichOne, and annotationNum working correctly and commented

=item [12 July 1999]

The program was modified to grab up to 100 characters on either side of 
the annotated text as opposed to only 50.

=item [13 July 1999]

Uses the networked database.

=item [14 July 1999]

Allows newlines in the context and creates unique temporary html and 
annotation files for ambiguous annotations.

=item [15 July 1999]

Fixed bugs in the way it was zeroing in on the correct instance of the 
annotated text.  It now does not allow text that is in tags to be counted 
as instances.  It also uses Network->existsAndStore instead of 
Network->fileExists and then Network->writeFile.

=item [16 July 1999]

Fixed a bug so that the context could only be as large as the annotated 
text plus 101 characters on each side.  Pop-up annotation windows will not 
run through plug-ins with the ravel sever anymore.  The package containing 
readFile is now called AFile instead of Annotation.

=item [30 July 1999]

Before checking to see how many times the annotated text is in the html source code, change the uri escape code for | (%7C) back into a |.

=item [2 August 1999]

We have no clue what is going on.  The test for the while loop in annotationNum gets stuck when trying to annotate the string "research last summer" from the Web page http://www.math.grin.edu/~luebke/index.html.  I have no idea about what is going on.  Just don't try to annotate that.

=item [4 August 1999]

Web page display is being run directly through ravel now.  All cgi script 
calls are now proxy url calls.

=back

=cut

