/* ------------------------------------------------------------------------
 > FILENAME:	compile_grammar
 > PURPOSE:	
 > AUTHORS:	Kevin Humphreys, Mark Hepple
 > NOTES:	
 ------------------------------------------------------------------------ */

:- assert('$package_name'('shef.nlp.supple.prolog.cafe')).

cvsid_compile_grammar("$Id: compile_grammar.pl 7085 2005-12-05 16:32:03Z ian_roberts $").


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% compile_grammar.pl
% 
% writes out a compiled form of grammar rules or lexical entries
% with features according to the table feature_table/1, and default
% values (currently for lexical entries only) according to the table
% default_table/1. 
%
% e.g.: word('president', n(person:3, number:sing)) will be rewritten as:
%       word(president, n((sem : _1), 
%                         (s_form : president), 
%			  (m_root : president), 
%			  (ne_tag : _2), 
%			  (person : 3), 
%			  (number : sing), 
%			  (tense : _3))).
%
% kwh 10/4/95
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


:- multifile best_parse_cats/1, filter_chart/0, rule/2, word/2.
:- dynamic best_parse_cats/1, filter_chart/0, rule/2, word/2,
	 best_parse_cats/2, compiled_rule/5, compiled_word/2.

best_parse_cats(dummy). % make this best_parse_cats/1's "home file"
rule(dummy,dummy). % make this rule/2's "home file"


:- op(10,xfy,^).
:- op(505,xfx,:).


%%% table of all features in the order expected by the parser
% for categories in the seeded input chart:
feature_table(list_np, [s_form:_,m_root:_,m_affix:_,text:_,ne_tag:_,ne_type:_,gender:_]) :- !.
%nouns
feature_table(n, [s_form:_,m_root:_,m_affix:_,text:_,person:_,number:_]) :- !.
feature_table(pn, [s_form:_,m_root:_,m_affix:_,text:_,person:_,number:_]) :- !.
%adjectives
feature_table(jj, [s_form:_,m_root:_,m_affix:_,text:_,degree:_]) :- !.
%adverbs
feature_table(rb, [s_form:_,m_root:_,m_affix:_,text:_,degree:_]) :- !.
%verbs
feature_table(v, [s_form:_,m_root:_,m_affix:_,text:_,person:_,number:_,tense:_,vform:_]) :- !.

% for  semantic categories
feature_table(sem_cat,[s_form:_,text:_,type:_,kind:_,name:_]):-!.

% for  semantic categories
feature_table(sem_cat_1,[s_form:_,text:_,type:_,unit:_,count:_,kind:_,name:_]):-!.

feature_table(ne_date,[s_form:_,text:_]):-!.

%other terminals
feature_table(Category,	[s_form:_,m_root:_,m_affix:_,text:_]) :-
	memberchk(Category, [sym,period,comma,pps,wps,sgml,date,cdg,cc,cd,dt,ex,fw,in,ls,md,pdt,pos,prp,rp,to,uh,wdt,wp,wrb,top,bottom,ordinal,char]), !.


% for non-terminal categories in the grammars - assume all features + source
feature_table(tagged_location_np, [edge:_,sem:_,head:_,s_form:_,m_root:_,m_affix:_,text:_,source:_,ne_tag:_,ne_type:_]) :- !.
feature_table(Category,	[edge:_,sem:_,head:_,s_form:_,m_root:_,m_affix:_,text:_,source:_,person:_,number:_,gender:_,tense:_,aspect:_,voice:_,vform:_]).


compile_grammars(Grammars) :-
	compile_grammars(Grammars,_).

write_compiled_grammar(OutFile) :-
	telling(Old),
	tell(OutFile), told, tell(OutFile),
	write_best_parse_cats,
	write_compiled_grammar,
	write_compiled_lexicon,
	told,
	tell(Old).

write_best_parse_cats :-
	best_parse_cats(Grammar,Cats),
	Term =.. [best_parse_cats,Grammar,Cats],
	writeq(Term), write('.'), nl,
	fail.
write_best_parse_cats :-
	filter_grammar(Grammar),
	Term =.. [filter_grammar,Grammar],
	writeq(Term), write('.'), nl,
	fail.
write_best_parse_cats :- nl.
	

compile_grammars(Grammars,GrammarID) :-
	retractall(compiled_rule(_,_,_,_,_)),
	retractall(compiled_word(_,_)),
	retractall(gensymmark(grammar,_)),
	assert(gensymmark(grammar,0)),
	buchart_gensym(grammar,GrammarID),
	retractall(gensymmark(rule,_)),
	assert(gensymmark(rule,0)),
	compile_grammars2(Grammars,GrammarID),!.


compile_grammars2([],_) :- !.
compile_grammars2([File],GrammarID) :-
	retractall(rule(_,_)),
	retractall(word(_,_)),
	consult(File),
	((filter_chart, compile_grammar(GrammarID,_))
	;compile_last_grammar(GrammarID)),
	retractall(rule(_,_)),
	retractall(word(_,_)).
compile_grammars2([File|Rest],GrammarID) :-
	retractall(rule(_,_)),
	retractall(word(_,_)),
	consult(File),
	compile_grammar(GrammarID,NextGrammarID),
	compile_grammars2(Rest,NextGrammarID).


% compile_grammar(InFile)
% reads a grammar/lexicon from InFile and produces a compiled form,
% based on the table given above
%

% record best parse cats and signal end of current grammar
compile_grammar(GrammarID,NextGrammarID) :-
	filter_chart,
	best_parse_cats(BestCats),
	assert(filter_grammar(GrammarID)),
	assert(best_parse_cats(GrammarID,BestCats)),
	compile_grammar3(GrammarID),
	% set up next grammar
	buchart_gensym(grammar,NextGrammarID),
	retractall(gensymmark(rule,_)),
	assert(gensymmark(rule,0)),
	retractall(best_parse_cats(_)),
	retractall(filter_chart).
% add rules to current grammar
compile_grammar(GrammarID,GrammarID) :-
	compile_grammar3(GrammarID),
	retractall(best_parse_cats(_)).
	
% add rules to current grammar and record final best parse cats
compile_last_grammar(GrammarID) :-
	best_parse_cats(BestCats),
	assert(best_parse_cats(GrammarID,BestCats)),
	compile_grammar3(GrammarID),
	retractall(best_parse_cats(_)).


% compile_grammar2(InFile)
% produces a compiled form of all current rule/2 and word/2 clauses
%
compile_grammar2(InFile) :-
	compile_grammar3(InFile),
	compile_lexicon(InFile).


compile_grammar3(GrammarID) :-
	rule(LHS,RHS),
	compile_features([LHS],[CLHS]),
	compile_features(RHS,CRHS),
	reverse(CRHS,[LastRHS|RevRHS]),
	% avoid duplicates
	\+(compiled_rule(LastRHS,RevRHS,CLHS,_,GrammarID)),
	buchart_gensym(rule,RuleNum),
	% use digits only, to allow use of standard sort
	atom_chars(RuleNum,[_,_,_,_|NumChars]),
	number_chars(RNum,NumChars),
	assert(compiled_rule(LastRHS,RevRHS,CLHS,RNum,GrammarID)),
	fail.
compile_grammar3(_) :- !.


compile_features([],[]) :- !.
% multiple categories
compile_features([FL|Fs], Out) :-
	is_list(FL), !,
	member(F,FL), % backtrack
	compile_features([F],CF),
	compile_features(Fs,CFs),
	((CF = [], Out = CFs) % might be optional
	;(CF = [CF1], Out = [CF1|CFs])).
% without single optional category
compile_features([{F}|Fs], CFs) :-
	compile_features(Fs,CFs).
% with single optional category
compile_features([{F}|Fs], Out) :- !,
	compile_features([F],CF),
	compile_features(Fs,CFs),
	((CF = [CF1], Out = [CF1|CFs])
	;append(CF,CFs,Out)).
% single non-optional category
compile_features([F|Fs], [CF|CFs]) :-
	F =.. [Cat|Features],
	feature_table(Cat,Table),
	unify_features(Features,Table),
	CF =.. [Cat|Table], !,
	compile_features(Fs,CFs).

unify_features([], Table) :- !.
unify_features([F|Fs], _) :-
	member(F,Fs),
	error('Duplicated feature in rule', [F]).
unify_features([F|Fs], Table) :-
	member(F, Table),
	unify_features(Fs, Table), !.
unify_features([F|_], _) :-
	error('Feature in rule not defined in feature table', [F]).


write_compiled_grammar :-
	compiled_rule(LRHS,RHS,LHS,RuleNum,GrammarID),
	Term =.. [compiled_rule,LRHS,RHS,LHS,RuleNum,GrammarID],
	writeq(Term), write('.'), nl,
	fail.
write_compiled_grammar.


% compile_lexicon uses the following table to add values for the
% specified features if no explicit value given in an entry
default_table(Word,
	      [s_form:Word,
	       m_root:Word]).


compile_lexicon(InFile) :-
	word(Word,Features),
	compile_features([Features],[CFs]),
	CFs =.. [C|Fs],	
	default_table(Word,Defaults),
	add_default_values(Defaults, Fs),
	assert(compiled_word(Word,CFs)),
	fail.
compile_lexicon(_) :- !.

add_default_values([], Table) :- !.
add_default_values([D|Ds], Table) :-
	member(D, Table),
	add_default_values(Ds, Table), !.
add_default_values([D|Ds], Table) :-
	add_default_values(Ds, Table).

write_compiled_lexicon :-
	compiled_word(Word,Cat),
	Term =.. [word,Word,Cat],
	writeq(Term), write('.'), nl,
	fail.
write_compiled_lexicon.


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% Revision 1.22  1999/02/25 16:30:15  kwh
% remove possibly incompatible categories in feature table
%
% Revision 1.21  1999/02/18 12:01:30  kwh
% allow optional categories
%
% Revision 1.20  1998/04/01 18:25:15  kwh
% added source feature - preferred by best parse if source=list for ne grammar
%
% Revision 1.19  1998/03/25 18:51:45  kwh
% affixes passed into chart
%
% Revision 1.18  1998/02/24 21:17:37  kwh
% new default ne grammar, using top/bottom
%
% Revision 1.17  1998/02/24 15:24:17  kwh
% new text feature on edges for header/body distinction
%
% Revision 1.16  1998/02/20 16:12:00  kwh
% sicstus runtime version
%
% Revision 1.15  1998/02/07 19:29:21  kwh
% use number_chars for rule ids so that ordering check works as intended
%
% Revision 1.14  1998/02/05 15:28:24  kwh
% translate all adverbs to RB
%
% Revision 1.13  1998/01/08 14:11:45  kwh
% use simple digits as rule id numbers, rather than ruleX format, to make comparison by sort correct
%
% Revision 1.12  1997/12/01 15:55:48  kwh
% rationalised top level control, merge subgrammars during compilation, pass child edges through for syntax output, and revised docs
%
% Revision 1.11  1997/11/12 13:29:10  huyck
% Added changes for top and bottom and kludged the comma problem.
%
% Revision 1.10  1997/09/30  17:24:39  kwh
% merge buchart_cascade changes back in
%
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%