Log in Help
Print
Homeie 〉 ie_sample.html
 

GATE Information Extraction Sample Task

The task is to extract information about succession events from this text from the Wall Street Journal:

<DOC>
<DOCID> wsj93_050.0203 </DOCID>
<DOCNO> 930219-0013. </DOCNO>
<HL>    Marketing Brief:
@  Noted.... </HL>
<DD> 02/19/93 </DD>
<SO> WALL STREET JOURNAL (J), PAGE B5 </SO>
<CO>    NYTA </CO>
<IN> MEDIA (MED), PUBLISHING (PUB) </IN>
<TXT>
<p>
New York Times Co. named Russell T. Lewis, 45, president and general manager of its flagship New York Times newspaper, responsible for all business-side activities. He was executive vice president and deputy general manager. He succeeds Lance R. Primis, who in September was named president and chief operating officer of the parent.
</p>
</TXT>
</DOC> 

Output from an Information Extraction system might look like this: (Note: the information is not supposed to be easily human-readable in this form -- to render it readily comprehensible is the job of a text-summarisation programme -- this format may be entered into an on-line databese for future access and analysis).

<TEMPLATE-9302190013-1> :=
    DOC_NR: "9302190013"
    CONTENT: <SUCCESSION_EVENT-9302190013-1>
             <SUCCESSION_EVENT-9302190013-2>
             <SUCCESSION_EVENT-9302190013-3>
             <SUCCESSION_EVENT-9302190013-4>
             <SUCCESSION_EVENT-9302190013-5>
             <SUCCESSION_EVENT-9302190013-6>
<SUCCESSION_EVENT-9302190013-1> :=
    SUCCESSION_ORG: <ORGANIZATION-9302190013-2>
    POST: "president"
    IN_AND_OUT: <IN_AND_OUT-9302190013-1>
                <IN_AND_OUT-9302190013-2>
    VACANCY_REASON: REASSIGNMENT
<SUCCESSION_EVENT-9302190013-2> :=
    SUCCESSION_ORG: <ORGANIZATION-9302190013-2>
    POST: "general manager"
    IN_AND_OUT: <IN_AND_OUT-9302190013-3>
                <IN_AND_OUT-9302190013-4>
    VACANCY_REASON: REASSIGNMENT
<SUCCESSION_EVENT-9302190013-3> :=
    SUCCESSION_ORG: <ORGANIZATION-9302190013-2>
    POST: "executive vice president"
    IN_AND_OUT: <IN_AND_OUT-9302190013-5>
    VACANCY_REASON: REASSIGNMENT
<SUCCESSION_EVENT-9302190013-4> :=
    SUCCESSION_ORG: <ORGANIZATION-9302190013-2>
    POST: "deputy general manager"
    IN_AND_OUT: <IN_AND_OUT-9302190013-7>
    VACANCY_REASON: REASSIGNMENT
<SUCCESSION_EVENT-9302190013-5> :=
    SUCCESSION_ORG: <ORGANIZATION-9302190013-1>
    POST: "president"
    IN_AND_OUT: <IN_AND_OUT-9302190013-9>
    VACANCY_REASON: OTH_UNK
<SUCCESSION_EVENT-9302190013-6> :=
    SUCCESSION_ORG: <ORGANIZATION-9302190013-1>
    POST: "chief operating officer"
    IN_AND_OUT: <IN_AND_OUT-9302190013-10>
    VACANCY_REASON: OTH_UNK
<IN_AND_OUT-9302190013-1> :=
    IO_PERSON: <PERSON-9302190013-1>
    NEW_STATUS: IN
    ON_THE_JOB: UNCLEAR
    OTHER_ORG: <ORGANIZATION-9302190013-2>
    REL_OTHER_ORG: SAME_ORG
<IN_AND_OUT-9302190013-2> :=
    IO_PERSON: <PERSON-9302190013-2>
    NEW_STATUS: OUT
    ON_THE_JOB: NO
    OTHER_ORG: <ORGANIZATION-9302190013-1>
    REL_OTHER_ORG: RELATED_ORG
<IN_AND_OUT-9302190013-3> :=
    IO_PERSON: <PERSON-9302190013-1>
    NEW_STATUS: IN
    ON_THE_JOB: UNCLEAR
    OTHER_ORG: <ORGANIZATION-9302190013-2>
    REL_OTHER_ORG: SAME_ORG
<IN_AND_OUT-9302190013-4> :=
    IO_PERSON: <PERSON-9302190013-2>
    NEW_STATUS: OUT
    ON_THE_JOB: NO
    OTHER_ORG: <ORGANIZATION-9302190013-1>
    REL_OTHER_ORG: RELATED_ORG
<IN_AND_OUT-9302190013-5> :=
    IO_PERSON: <PERSON-9302190013-1>
    NEW_STATUS: OUT
    ON_THE_JOB: NO
    OTHER_ORG: <ORGANIZATION-9302190013-2>
    REL_OTHER_ORG: SAME_ORG
<IN_AND_OUT-9302190013-7> :=
    IO_PERSON: <PERSON-9302190013-1>
    NEW_STATUS: OUT
    ON_THE_JOB: NO
    OTHER_ORG: <ORGANIZATION-9302190013-2>
    REL_OTHER_ORG: SAME_ORG
<IN_AND_OUT-9302190013-9> :=
    IO_PERSON: <PERSON-9302190013-2>
    NEW_STATUS: IN
    ON_THE_JOB: YES
    OTHER_ORG: <ORGANIZATION-9302190013-2>
    REL_OTHER_ORG: RELATED_ORG


<IN_AND_OUT-9302190013-10> :=
    IO_PERSON: <PERSON-9302190013-2>
    NEW_STATUS: IN
    ON_THE_JOB: YES
    OTHER_ORG: <ORGANIZATION-9302190013-2>
    REL_OTHER_ORG: RELATED_ORG
<ORGANIZATION-9302190013-1> :=
    ORG_NAME: "New York Times Co."
    ORG_DESCRIPTOR: "the parent"
    ORG_TYPE: COMPANY
<ORGANIZATION-9302190013-2> :=
    ORG_NAME: "New York Times"
    ORG_DESCRIPTOR: "its flagship New York Times newspaper"
   / "flagship New York Times newspaper"
   / "the newspaper"
   / "the paper"
    ORG_TYPE: COMPANY
<PERSON-9302190013-1> :=
    PER_NAME: "Russell T. Lewis"
<PERSON-9302190013-2> :=
    PER_NAME: "Lance R. Primis"