/*------------------------------------------------------------------------- PROJECT YAO OBJECT SGML PARSER (OSGML) Version 1.0 Alpha level 5.0 An SGML Parser conforming to International Standard ISO 8879 -- Standard Generalized Markup Language ------------------------------------------------------------------------- (C) 1987-1994 IBM Corporation Licensed to Yuan-ze Institute of Technology (YUAN-ZE) for distribution under the terms of the License and Disclaimer of Warranties for Project YAO Materials, which includes the following text: "YUAN-ZE hereby grants to any user: (1) an irrevocable royalty-free, worldwide, non-exclusive license to use, execute, reproduce, display, perform and distribute copies of, and to prepare derivative works based upon these materials; and (2) the right to authorize others to do any of the foregoing." The full text of the License and Disclaimer of Warranties for Project YAO Materials should be consulted before using these materials. ------------------------------------------------------------------------- INTERIM RELEASE RESTRICTED DISTRIBUTION NOTICE Only a "final" version is subject to the YUAN-ZE license described above. A version is final if it is not identified as "Alpha" or "Beta", or if no later version is released within 60 days. Interim releases are confidential and restricted to the original recipient. ------------------------------------------------------------------------- //// osgmspec.txt /// ObjectSGML specification // Charles F. Goldfarb 1994-08-09 -------------------------------------------------------------------------*/ The files in the OSGML archive should be treated as a "source pool" in the Borland C++ 4.0 IDE. That source pool is used with the RAST target and with the targets for any applications you may define. (In a future release OSGML will be a DLL, but that is not presently possible because it currently writes its own error messages.) ------------------------------------------------------------------------- A framework for OSGML client applications is illustrated in RAST.CPP. (It will be even simpler for real applications as the filter for suppressing uninteresting events can be used; RAST is more complex in its event handling than most real applications.) The interface to OSGML clients is contained in the PARSER.H and EVENT.H files. MODELE.H contains methods for model checking; these could be made friendlier in a future release. Because of the availability of the above files, much obsolete material has been deleted from this document. ------------------------------------------------------------------------- Object SGML currently reports the events shown in the following table, plus a few more added in this release to eliminate the last bit of data testing on the part of the application. NOTE: This information is slightly out of sync with the present release. ObjectSgml productions -=-=-=-=-=-=-=-=-=-=- PRODUCTION ========== [1] primary_events = ( SGML_START_EV | SGML_END_EV DOCTYPE_START_EV | DOCTYPE_END_EV | LINKTYPE_START_EV | LINKTYPE_END_EV | SECTION_START_EV | SECTION_END_EV | (ELEMENT_START_EV, attribute_list_events) | ELEMENT_END_EV | DATA_EV | RE_EV | (ENTITY_START_EV, entity_events) | ENTITY_END_EV | PI_EV | COMMENT_EV | ERROR_EV | EOP_EV ) [2] attribute_list_events = (ATT_NAME_EV, (att_val_events | att_val_non_data_events))* [3] data_attribute_list_events = (DATA_ATT_NAME_EV, att_val_events)* [4] att_val_events = ( ATT_VAL_NMTOK_IN_GROUP_EV | ATT_VAL_IMPLIED_EV | ATT_VAL_INVALID_EV | ATT_VAL_CDATA_EV | ATT_VAL_NAME_EV | ATT_VAL_NMTOKEN_EV | ATT_VAL_NUMBER_EV | ATT_VAL_NUTOKEN_EV | ( ATT_VAL_TOKENS_EV, ( ATT_VAL_TOKEN_NAME_EV | ATT_VAL_TOKEN_NMTOKEN_EV | ATT_VAL_TOKEN_NUMBER_EV | ATT_VAL_TOKEN_NUTOKEN_EV )+ ) | ( ATT_VAL_COMPOSITE_EV, ( ATT_VAL_COMP_DATA_EV | ATT_VAL_COMP_CDATA_EV )+ ) | ( ATT_VAL_COMPOSITE_SDATA_EV, ( ATT_VAL_COMP_DATA_EV | ATT_VAL_COMP_CDATA_EV | ATT_VAL_COMP_SDATA_EV )+ ) ) [5] att_val_non_data_events = ( (ATT_VAL_NOTATION_EV, ext_id_events, data_attribute_list_events) | (ATT_VAL_ENTITY_EV, entity_events) | ATT_VAL_ID_EV ATT_VAL_IDREF_EV | ( ATT_VAL_TOKENS_EV, ( (ATT_VAL_TOKEN_ENTITY_EV, entity_events) | ATT_VAL_TOKEN_IDREF_EV ) ) [6] entity_events = ( ENT_INT_SGML_EV | ENT_INT_CDATA_EV | ENT_INT_SDATA_EV | ENT_INT_PI_EV | ENT_INT_STARTTAG_EV | ENT_INT_ENDTAG_EV | ENT_INT_MS_EV | ENT_INT_MD_EV | ( (ENT_EXT_SGML_EV | ENT_EXT_SUBDOC_EV), ext_id_events ) | ( ( ENT_EXT_CDATA_EV | ENT_EXT_NDATA_EV | ENT_EXT_SDATA_EV ), ext_id_events, ENT_NOTATION_EV, ext_id_events, data_attribute_list_events ) ) Note: when the ATT_VAL_NOTATION event is produced by [2] the data_attribute_list_events describes the default attribute values as specified in <|ATTLIST #NOTATION ...> when the ENT_NOTATION event is produced by [3] (either directly, or thru [2]+[3]), the data_attribute_list_events describes the attribute values specified (or defaulted) on the entity declaration. [7] ext_id_events = (PUBLIC_ID_EV, SYSTEM_ID_EV?) | SYSTEM_ID_EV | SYSTEM_ID_OMITTED_EV METHODS ======= General: event() // to make current the next event event_peek() // to peek at the next event (w/o making it current) event_repeat() // to make the next call to event() repeat the current event Specific to each event: * = ESIS event; methods are for ESIS and opt non-ESIS data Event | ESIS | non-ESIS | | SGML_START_EV | *none* | SGML_END_EV | *none* | DOCTYPE_START_EV | name() | DOCTYPE_END_EV | name() | LINKTYPE_START_EV | name() | LINKTYPE_END_EV | name() | SECTION_START_EV | name() | section_status() SECTION_END_EV | name() | * ELEMENT_START_EV | name() | is_inferred() | | is_inclusion() | | is_explicit_conref() | | element_type() * ELEMENT_END_EV | name() | element_type() * DATA_EV | value() | * RE_EV | *none* | * ENTITY_START_EV | name() | entity_def() ENTITY_END_EV | name() | entity_def() * PI_EV | value() | COMMENT_EV | *none* | * ERROR_EV | *none* | EOP_EV | *none* | * ATT_NAME_EV | name() | is_att_specified() | | is_name_omitted() * DATA_ATT_NAME_EV | name() | TAG_DATA_ DEFAULT_ATT_VAL * ATT_VAL_IMPLIED_EV | *none* | ATT_VAL_INVALID_EV | value() | * ATT_VAL_ENTITY_EV | value() | * ATT_VAL_ID_EV | value() | * ATT_VAL_IDREF_EV | value() | * ATT_VAL_NAME_EV | value() | * ATT_VAL_NMTOKEN_EV | value() | * ATT_VAL_NUMBER_EV | value() | * ATT_VAL_NUTOKEN_EV | value() | * ATT_VAL_NOTATION_EV | value() | notation_def() * ATT_VAL_NMTOK_IN_GROUP_EV | value() | * ATT_VAL_CDATA_EV | value() | * ATT_VAL_COMPOSITE_EV | value() | count() * ATT_VAL_COMPOSITE_SDATA_EV | value() | count() * ATT_VAL_COMP_DATA_EV | value() | * ATT_VAL_COMP_CDATA_EV | value() | entity_def() * ATT_VAL_COMP_SDATA_EV | value() | entity_def() * ATT_VAL_TOKENS_EV | value() | count() * ATT_VAL_TOKEN_ENTITY_EV | value() | entity_def() ATT_VAL_TOKEN_IDREF_EV | value() | ATT_VAL_TOKEN_NAME_EV | value() | ATT_VAL_TOKEN_NMTOKEN_EV | value() | ATT_VAL_TOKEN_NUMBER_EV | value() | ATT_VAL_TOKEN_NUTOKEN_EV | value() | ENT_INT_SGML_EV | value() | * ENT_INT_CDATA_EV | value() | * ENT_INT_SDATA_EV | value() | ENT_INT_PI_EV | value() | ENT_INT_STARTTAG_EV | value() | ENT_INT_ENDTAG_EV | value() | ENT_INT_MS_EV | value() | ENT_INT_MD_EV | value() | ENT_EXT_SGML_EV | | entity_def() * ENT_EXT_SUBDOC_EV | | entity_def() * ENT_EXT_CDATA_EV | | entity_def() * ENT_EXT_NDATA_EV | | entity_def() * ENT_EXT_SDATA_EV | | entity_def() * ENT_NOTATION_EV | name() | notation_def() * PUBLIC_ID_EV | value() | * PUBLIC_ID_EMPTY_EV | *none* | * SYSTEM_ID_EV | value() | * SYSTEM_ID_OMITTED_EV | *none* | * SYSTEM_ID_EMPTY_EV | *none* | Note: value() and name() *really* means zstring(), string(), string_length() when *none* is specified, those methods return an empty string (so there is no such thing as specific methods, except non-ESIS) ------------------------------------------------------------------------- Persistence is handled by a cacheing mechanism between the application and the event-based parser interface. The high-level interface that the cache offers enables the information objects to be kept around for any duration the application may like, and also offers the ability to navigate in the (parsed) document without re-parsing the document or a portion thereof. The cache and persistence have not yet been implemented. It should be possible to do so with standard interfaces to the cache, such that third-party data bases and memory managers can be used. The following code fragments are included to show some of the possibilities envisioned. */ #ifndef __OBJECT_H_ #define __OBJECT_H_ #include #include //to be added: based on HyTime proploc #include class SGMLobject { Persistence advisory, actual; public: virtual Location location() = 0; // native system location virtual Property proploc() = 0; Persistence setadvisory(Persistence); Persistence setadvisory(SGMLobject * same_as_this_object); Persistence getadvisory(); Persistence getactual(); }; #endif // local variables: // mode: c++ // end: //// persist.h /// SGML Information Object Persistence in cache // Erik Naggum #ifndef __PERSIST_H_ #define __PERSIST_H_ /* Persistency (cache storage optimization parameters) for SGML objects. A hot link is maintained when "SAME-AS" is specified in case there is a change in the value of the property for the target object. */ class Persistence { enum { EVENT, ELEMENT, ENTITY, SESSION, CONTEXT, PERMANENT } duration; }; #endif // local variables: // mode: c++ // end: