
/* ------------------------------------------------------------------------
 > FILENAME:	parse_file
 > PURPOSE:	Main control for parsing a file of edge terms
 > AUTHORS:	Kevin Humphreys, Mark Hepple

 > NOTES:	format of edges assumed to be:
 >              edge(Vertex0,VertexN,
 >                   Cat, ToFind, Parents, Children,
 >                   Level,
 >                   Token0, TokenN,
 >                   EdgeID)
 >              where ToFind = [] (ie the edge is inactive)
 ------------------------------------------------------------------------ */

:- assert('$package_name'('shef.nlp.supple.prolog.cafe')).

cvsid_parse_file("$Id: parse_file.pl 7085 2005-12-05 16:32:03Z ian_roberts $").

% Horacio: 
% top level predicate
% returns the semantics
% the semantics goes to Java and to the Annotations

parse(Original,Output1):-
     retractall(external_sem_file(_)),
     retractall(semantic_output(_)),
     assert(semantic_output([])),
     parse(Original),
      semantic_output(Output),
      update_match(Output,Output1),
% new
output_file(FOUT),
open(FOUT,append,Handle),
write_semantics(Handle,Output1),
close(Handle).
% end 
%     (external_sem_file(External),
%      open(External,write,Handle),
%      write_semantics(Handle,Output1),
%      close(Handle);true).
     

parse([]) :- !.
parse(['--'|Args]) :- !, % swi-prolog flag - ignore it
    parse(Args).
% -n : non-interactive : dumping saved state
parse(['-n'|Args]) :- !.
% -v : be verbose
parse(['-v'|Args]) :-
    verbose(1), % trigger progress messages on stdout
    (retract(verbose_output(_)) 
    ;(telling(Current), assert(verbose_output(Current)))),
    parse(Args), !.
% -d : show debugging info
parse(['-d'|Args]) :-
    buchart_debug(1), !,
    ((member('-p',Args);best_parse_file(_))
    ;(telling(Current),assert(best_parse_file(Current)))), !,
    parse(['-v'|Args]).
% -ne : run first grammar only
parse(['-ne'|Args]) :-
	best_parse_cats(grammar1,Cs),
	retractall(best_parse_cats(_,_)),
	assert(best_parse_cats(grammar1,Cs)),
	parse(Args), !.
% -p parse_file : dump best parse trees into parse_file
parse(['-p', File |Args]) :-
    retractall(best_parse_file(_)),
    assert(best_parse_file(File)),
    ((member('-v',Args),assert(verbose_output(File))) ; true),
    ((retract(verbose_output(_)),assert(verbose_output(File))) ; true),
    parse(Args), !.
% -b parse_file : dump bracketed best parse trees into parse_file
parse(['-b', File |Args]) :-
    retractall(best_parse_file(_)),
    assert(best_parse_file(File)),
    assert(bracketed_parses),
    ((member('-v',Args),assert(verbose_output(File))) ; true),
    ((retract(verbose_output(_)),assert(verbose_output(File))) ; true),
    parse(Args), !.
% -c chart_file : dump chart into chart_file
parse(['-c', File |Args]) :-
    retractall(chart_file(_)),
    assert(chart_file(File)),
    parse(Args), !.
% -g grammar_file : read in and compile a single grammar
parse(['-g', File |Args]) :-
	retractall(grammar_file(_)),
	assert(grammar_file(File)),
	parse(Args), !.
% -f : filter the final chart to remove ambiguities, even if not
%      specified in the grammar file
parse(['-f'|Args]) :- 
	retractall(filter_chart),
	assert(filter_grammar(_)),
	parse(Args), !.

% -flag flag file for signal to buchart
parse(['-flag',File | Args]):-
     retractall(flag_file(_)),
     assert(flag_file(File)),
     parse(Args), !.
% -o output file : redirect stdout
parse(['-o', File |Args]) :-
    retractall(output_file(_)),
    assert(output_file(File)),
    parse(Args), !.
parse([File|Args]) :-
    ((output_file(OutFile), tell(OutFile))
    ;(telling(Current), tell(Current))),
    retractall(chart(_,_,_)),
    retractall(gensymmark(_,_)),
    vwrite('reading initial chart...'),
    read_chart_file(File,Charts),
    vwrite('done'),vnl,
    (grammar_file(Grammar); Grammar = []),
    vwrite('parsing charts...'), vnl,
    parse_charts(Charts,Grammar),told,
    vwrite('done'),vnl,
    ((chart_file(ChartFile), % do this last due to quintus seg. faults
      vwrite('writing out chart to '),vwrite(ChartFile),vnl,
      write_chart_file(File,ChartFile)) ; true),
    parse(Args),!.



% parse with all compiled-in grammars
parse_charts(Charts,[]) :- !,
	findall(Grammar, best_parse_cats(Grammar,_), Grammars),
	parse_charts2(Charts,Grammars).
% compile and parse with single grammar specified on command line
parse_charts(Charts,Grammar) :-
	retractall(best_parse_cats(_,_)),
	retractall(filter_grammar(_)),
	compile_grammars([Grammar],GrammarID),
	parse_charts2(Charts,[GrammarID]).

parse_charts2([],_).
parse_charts2([Chart|Charts],Grammars) :-
	parse_chart(Chart,Grammars),
	parse_charts2(Charts,Grammars).
	
% parse a single chart
parse_chart(chart(_:Sentence,_:Edges,_:NextEdge),Grammars) :-
        % set up gensym for edge identifiers
	LastEdge is NextEdge - 1,
	retractall(gensymmark('',_)),
	assert(gensymmark('',LastEdge)),
	% parse
	parse_chart2(Edges,Grammars,Sentence).


% last/only grammar - output best edges
parse_chart2(InputEdges,[Grammar],Sentence) :-
        % clear database
	retractall(edge(_,_,_,_,_,_,_,_,_,_)),
	sort_edges(InputEdges,Level1Edges,MaxVertex),
	parse_edges(Level1Edges,Grammar),

	((best_parse_cats(Grammar,all),
          % use all level 2 (= new) edges as best parse
	  findall(edge(S,E,C,[],0,Cs,1,TS,TE,I), % convert to level 1
	          edge(S,E,C,[],Ps,Cs,2,TS,TE,I),
	          BestEdges))
	; best_parse(Grammar,BestEdges,MaxVertex)), % converted to level 1
	% write results
	display_tdm_attributes(Sentence,BestEdges,InputEdges),

	((chart_file(_),	% cascaded version
	  % record next input chart ready to write out
	  ((filter_grammar(Grammar),
	    next_edges(BestEdges,InputEdges,NextInputEdges))
	  ;(findall(edge(S,E,C,[],0,[],1,TS,TE,I), % remove children list
		    member(edge(S,E,C,[],Ps,Cs,1,TS,TE,I),BestEdges),
		    NewBestEdges),
	    append(InputEdges,NewBestEdges,NextInputEdges))),

	  gensymmark('',X), NextEdge is X + 1,
	  assert(chart(sentence_n:Sentence,
	               edges:NextInputEdges,
		       next_edge_number:NextEdge)))
        ; true),

	((best_parse_file(_),
	  assert_edge_list(InputEdges),
	  display_best_parse(Sentence,BestEdges))
	;true).

% filter grammar - use only best edges in next parse
parse_chart2(InputEdges,[Grammar|Grammars],Sentence) :-
	filter_grammar(Grammar),
        % clear database
	retractall(edge(_,_,_,_,_,_,_,_,_,_)),
	sort_edges(InputEdges,Level1Edges,MaxVertex),
	parse_edges(Level1Edges,Grammar),
	best_parse(Grammar,BestEdges,MaxVertex),
	% keep all children of best edges at level 0 for final output
        next_edges(BestEdges,InputEdges,NextBestEdges),
	parse_chart2(NextBestEdges,Grammars,Sentence).

parse_chart2(InputEdges,[Grammar|Grammars],Sentence) :-
        % clear database
	retractall(edge(_,_,_,_,_,_,_,_,_,_)),
	sort_edges(InputEdges,Level1Edges,MaxVertex),
	parse_edges(Level1Edges,Grammar),
        % use all level 2 (= new) edges in next parse
	findall(edge(S,E,C,[],0,Cs,1,TS,TE,I),
	        edge(S,E,C,[],Ps,Cs,2,TS,TE,I),
	        NewEdges),
	append(InputEdges,NewEdges,NextInputEdges),
	parse_chart2(NextInputEdges,Grammars,Sentence).


% next_edges/3: convert all edges in best parse to level 1
next_edges([],_,[]).
next_edges([edge(S,E,C,[],Ps,Cs,_,TS,TE,I)|BestEdges],InputEdges,NextEdges) :-
        % and add children of level 2 edges as level 0 edges
	child_edges(Cs,InputEdges,ChildEdges),
	next_edges(BestEdges,InputEdges,RestEdges),
	append([edge(S,E,C,[],0,Cs,1,TS,TE,I)|ChildEdges],RestEdges,NextEdges).
next_edges([_|BestEdges],InputEdges,NextEdges) :-
	next_edges(BestEdges,InputEdges,NextEdges).

%child_edges(_,_,[]) :- !. % syntax output not required
child_edges([],_,[]).
child_edges([ID|RestIDs],InputEdges,Children) :- %nonvar(InputEdges),
	% check in both current chart and level 0 input
	(edge(S,E,C,[],Ps,Cs,_,TS,TE,ID)
        ;memberchk(edge(S,E,C,[],Ps,Cs,0,TS,TE,ID),InputEdges)),
	child_edges(Cs,InputEdges,CsChildren),
	child_edges(RestIDs,InputEdges,RestChildren),
	% convert to level 0, preventing inclusion in any future parse
	append([edge(S,E,C,[],Ps,Cs,0,TS,TE,ID)|CsChildren],RestChildren,Children).
child_edges([_|RestIDs],InputEdges,Children) :-
	child_edges(RestIDs,InputEdges,Children).



parse_edges([],GrammarID) :- !.
parse_edges([edge(V0,Vn,C,T,Ps,Cs,L,T0,Tn,EdgeID)|Edges],GrammarID) :-
%C =.. [Cat|_],write('adding '),write(Cat),
%write(' edge at '),write(V0),write('-'),write(Vn),nl,
    add_edge(V0,Vn,C,T,Ps,Cs,L,T0,Tn,EdgeID,GrammarID),
    parse_edges(Edges,GrammarID),!.


sort_edges(As,Cs,Max):- 
    sort_edges(As,[],Cs,0,Max).
	
% return sorted level 1 edges only, and remove creator
sort_edges([],Bs,Bs,Max,Max). 
sort_edges([edge(E0,E1,Cat,[],Ps,Chs,1,T1,TN,ID)|As],Bs,Cs,CMax,Max):- 
    add_to_sorted_edges(edge(E0,E1,Cat,[],0,Chs,1,T1,TN,ID),Bs,Bs1), 
    % find max vertex while we're scanning through 
    ((E1 >= CMax, NMax = E1)
    ;(E1 < CMax, NMax = CMax)),
    sort_edges(As,Bs1,Cs,NMax,Max). 
sort_edges([_|As],Bs,Cs,CMax,Max) :-
    sort_edges(As,Bs,Cs,CMax,Max).
	

add_to_sorted_edges(A,[B|Bs],[B|Cs]):- 
    A = edge(S1,_,_,_,_,_,_,_,_,_), 
    B = edge(S2,_,_,_,_,_,_,_,_,_), 
    S2 =< S1, !, % maintain input ordering for entries at same position
    add_to_sorted_edges(A,Bs,Cs). 
add_to_sorted_edges(A,Bs,[A|Bs]).


assert_edge_list([]).
assert_edge_list([edge(A,B,C,D,E,F,G,H,I,J)|Rest]) :-
	edge(A,B,C,D,E,F,G,H,I,J),
	assert_edge_list(Rest).
assert_edge_list([edge(A,B,C,D,E,F,G,H,I,J)|Rest]) :-
	assert(edge(A,B,C,D,E,F,G,H,I,J)),
	assert_edge_list(Rest).



parse_file(InFile,OutFile) :-
    (grammar_file(Grammar); Grammar = []),
    parse_file(InFile,OutFile,Grammar).

parse_file(InFile,OutFile,Grammar) :-
    retractall(chart(_,_,_,_,_)),
    read_chart_file(InFile,Charts),
    parse_charts(Charts,Grammar),
    write_chart_file(InFile,OutFile).


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% Revision 1.4  2002/03/15 13:26:00  saggion
% *** empty log message ***
%
% Revision 1.5  2002/02/11 13:32:24  saggion
% *** empty log message ***
%
% Revision 1.4  2001/12/19 11:56:34  saggion
% *** empty log message ***
%
% Revision 1.3  2001/12/11 13:25:16  saggion
% nothing
%
% Revision 1.2  2001/12/04 17:46:01  saggion
% Dealing with the external call (-sem argument, parse_file.pl and buchart_io.pl)
%
% Revision 1.1  2001/12/04 13:08:26  saggion
% files and directories for buchart now in mumis (from Lasie3)
%
% Revision 1.1  2001/06/26 17:28:36  saggion
% New directory for buchart and disint resources
%
% Revision 1.3  2001/05/14 17:42:57  saggion
% Modifications to buchart
%
% Revision 1.17  1998/02/20 16:12:04  kwh
% sicstus runtime version
%
% Revision 1.16  1998/01/09 16:43:30  kwh
% tree output (-p and -b options) working again
%
% Revision 1.15  1998/01/09 15:39:34  kwh
% -ne flag to run with ne grammar only
%
% Revision 1.14  1998/01/08 14:11:47  kwh
% use simple digits as rule id numbers, rather than ruleX format, to make comparison by sort correct
%
% Revision 1.13  1997/12/09 16:38:57  kwh
% remove best parse children from output chart (for next subgrammar)
%
% Revision 1.12  1997/12/01 15:55:52  kwh
% rationalised top level control, merge subgrammars during compilation, pass child edges through for syntax output, and revised docs
%
% Revision 1.11  1997/11/18 18:47:56  kwh
% keep all leaf/intermediate edges in chart for complete syntax output
%
% Revision 1.10  1997/10/15 13:41:58  kwh
% keep child histories in best parse charts
%
% Revision 1.9  1997/10/02 17:41:53  kwh
% minor speedup
%
% Revision 1.8  1997/09/30 17:24:47  kwh
% merge buchart_cascade changes back in
%
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

