summaryrefslogtreecommitdiffstats
path: root/libxml2/test/intsubset2.xml
diff options
context:
space:
mode:
authorWilliam Joye <wjoye@cfa.harvard.edu>2016-11-17 20:58:44 (GMT)
committerWilliam Joye <wjoye@cfa.harvard.edu>2016-11-17 20:58:44 (GMT)
commit4f65b25b474bf534e40c3d3aa47d43e8457fcc9a (patch)
tree21aab4f800fd8a5046a929b3243edf9d461917d5 /libxml2/test/intsubset2.xml
parent5720be2a1ff34bf88992db24716f1e489a745e01 (diff)
parent4464fd97bf7e813b5badf1937e59f6fb5d2be646 (diff)
downloadblt-4f65b25b474bf534e40c3d3aa47d43e8457fcc9a.zip
blt-4f65b25b474bf534e40c3d3aa47d43e8457fcc9a.tar.gz
blt-4f65b25b474bf534e40c3d3aa47d43e8457fcc9a.tar.bz2
Merge commit '4464fd97bf7e813b5badf1937e59f6fb5d2be646' as 'libxml2'
Diffstat (limited to 'libxml2/test/intsubset2.xml')
-rw-r--r--libxml2/test/intsubset2.xml282
1 files changed, 282 insertions, 0 deletions
diff --git a/libxml2/test/intsubset2.xml b/libxml2/test/intsubset2.xml
new file mode 100644
index 0000000..4ae845a
--- /dev/null
+++ b/libxml2/test/intsubset2.xml
@@ -0,0 +1,282 @@
+<?xml version="1.0"?>
+<!DOCTYPE kanjidic2 [
+ <!-- Version 1.3
+ This is the DTD of the XML-format kanji file combining information from
+ the KANJIDIC and KANJD212 files. It is intended to be largely self-
+ documenting, with each field being accompanied by an explanatory
+ comment.
+
+ The file covers the following kanji:
+ (a) the 6,355 kanji from JIS X 0208;
+ (b) the 5,801 kanji from JIS X 0212;
+ (c) the 3,625 kanji from JIS X 0213 as follows:
+ (i) the 2,741 kanji which are also in JIS X 0212 have
+ JIS X 0213 code-points (kuten) added to the existing entry;
+ (ii) the 884 "new" kanji have new entries.
+
+ At the end of the explanation for a number of fields there is a tag
+ with the format [N]. This indicates the leading letter(s) of the
+ equivalent field in the KANJIDIC and KANJD212 files.
+
+ The KANJIDIC documentation should also be read for additional
+ information about the information in the file.
+ -->
+<!ELEMENT kanjidic2 (header,character*)>
+<!ELEMENT header (file_version,database_version,date_of_creation)>
+<!--
+ The single header element will contain identification information
+ about the version of the file
+ -->
+<!ELEMENT file_version (#PCDATA)>
+<!--
+ This field denotes the version of kanjidic2 structure, as more
+ than one version may exist.
+ -->
+<!ELEMENT database_version (#PCDATA)>
+<!--
+ The version of the file, in the format YYYY-NN, where NN will be
+ a number starting with 01 for the first version released in a
+ calendar year, then increasing for each version in that year.
+ -->
+<!ELEMENT date_of_creation (#PCDATA)>
+<!--
+ The date the file was created in international format (YYYY-MM-DD).
+ -->
+<!ELEMENT character (literal,codepoint, radical, misc, dic_number?, query_code?, reading_meaning?,nanori?)*>
+<!ELEMENT literal (#PCDATA)>
+<!--
+ The character itself in UTF8 coding.
+ -->
+<!ELEMENT codepoint (cp_value+)>
+ <!--
+ The codepoint element states the code of the character in the various
+ character set standards.
+ -->
+<!ELEMENT cp_value (#PCDATA)>
+ <!--
+ The cp_value contains the codepoint of the character in a particular
+ standard. The standard will be identified in the cp_type attribute.
+ -->
+<!ATTLIST cp_value cp_type CDATA #REQUIRED>
+ <!--
+ The cp_type attribute states the coding standard applying to the
+ element. The values assigned so far are:
+ jis208 - JIS X 0208-1997 - kuten coding (nn-nn)
+ jis212 - JIS X 0212-1990 - kuten coding (nn-nn)
+ jis213 - JIS X 0213-2000 - kuten coding (p-nn-nn)
+ ucs - Unicode 4.0 - hex coding (4 or 5 hexadecimal digits)
+ -->
+<!ELEMENT radical (rad_value+)>
+<!ELEMENT rad_value (#PCDATA)>
+ <!--
+ The radical number, in the range 1 to 214. The particular
+ classification type is stated in the rad_type attribute.
+ -->
+<!ATTLIST rad_value rad_type CDATA #REQUIRED>
+ <!--
+ The rad_type attribute states the type of radical classification.
+ classical - as recorded in the KangXi Zidian.
+ nelson - as used in the Nelson "Modern Japanese-English
+ Character Dictionary" (i.e. the Classic, not the New Nelson).
+ This will only be used where Nelson reclassified the kanji.
+ -->
+<!ELEMENT misc (grade?, stroke_count+, variant*, freq*, rad_name*)>
+<!ELEMENT grade (#PCDATA)>
+ <!--
+ The Jouyou Kanji grade level. 1 through 6 indicate the grade in which
+ the kanji is taught in Japanese schools. 8 indicates it is one of the
+ remaining Jouyou Kanji to be learned in junior high school, and 9
+ indicates it is a Jinmeiyou (for use in names) kanji. [G]
+ -->
+<!ELEMENT stroke_count (#PCDATA)>
+ <!--
+ The stroke count of the kanji, including the radical. If more than
+ one, the first is considered the accepted count, while subsequent ones
+ are common miscounts. (See Appendix E. of the KANJIDIC documentation
+ for some of the rules applied when counting strokes in some of the
+ radicals.) [S]
+ -->
+<!ELEMENT variant (#PCDATA)>
+ <!--
+ A cross-reference code to another kanji, usually regarded as a variant.
+ The type of cross-reference is given in the var_type attribute.
+ -->
+<!ATTLIST variant var_type CDATA #REQUIRED>
+ <!--
+ The var_type attribute indicates the type of variant code. The current
+ values are:
+ jis208 - in JIS X 0208 - kuten coding
+ jis212 - in JIS X 0212 - kuten coding
+ jis213 - in JIS X 0213 - kuten coding
+ deroo - De Roo number - numeric
+ njecd - Halpern NJECD index number - numeric
+ s_h - The Kanji Dictionary (Spahn & Hadamitzky) - descriptor
+ nelson - "Classic" Nelson - numeric
+ oneill - Japanese Names (O'Neill) - numeric
+ -->
+<!ELEMENT freq (#PCDATA)>
+ <!--
+ A frequency-of-use ranking. The 2,500 most-used characters have a
+ ranking; those characters that lack this field are not ranked. The
+ frequency is a number from 1 to 2,500 that expresses the relative
+ frequency of occurrence of a character in modern Japanese. This is
+ based on a survey in newspapers, so it is biassed towards kanji
+ used in newspaper articles. The discrimination between the less
+ frequently used kanji is not strong.
+ -->
+<!ELEMENT rad_name (#PCDATA)>
+ <!--
+ When the kanji is itself a radical and has a name, this element
+ contains the name (in hiragana.) [T2]
+ -->
+<!ELEMENT dic_number (dic_ref+)>
+ <!--
+ This element contains the index numbers and similar unstructured
+ information such as page numbers in a number of published dictionaries,
+ and instructional books on kanji.
+ -->
+<!ELEMENT dic_ref (#PCDATA)>
+ <!--
+ Each dic_ref contains an index number. The particular dictionary,
+ etc. is defined by the dr_type attribute.
+ -->
+<!ATTLIST dic_ref dr_type CDATA #REQUIRED>
+ <!--
+ The dr_type defines the dictionary or reference book, etc. to which
+ dic_ref element applies. The initial allocation is:
+ nelson_c - "Modern Reader's Japanese-English Character Dictionary",
+ edited by Andrew Nelson (now published as the "Classic"
+ Nelson).
+ nelson_n - "The New Nelson Japanese-English Character Dictionary",
+ edited by John Haig.
+ halpern_njecd - "New Japanese-English Character Dictionary",
+ edited by Jack Halpern.
+ halpern_kkld - "Kanji Learners Dictionary" (Kodansha) edited by
+ Jack Halpern.
+ heisig - "Remembering The Kanji" by James Heisig.
+ gakken - "A New Dictionary of Kanji Usage" (Gakken)
+ oneill_names - "Japanese Names", by P.G. O'Neill.
+ oneill_kk - "Essential Kanji" by P.G. O'Neill.
+ moro - "Daikanwajiten" compiled by Morohashi. For some kanji two
+ additional attributes are used: m_vol: the volume of the
+ dictionary in which the kanji is found, and m_page: the page
+ number in the volume.
+ henshall - "A Guide To Remembering Japanese Characters" by
+ Kenneth G. Henshall.
+ sh_kk - "Kanji and Kana" by Spahn and Hadamitzky.
+ sakade - "A Guide To Reading and Writing Japanese" edited by
+ Florence Sakade.
+ henshall3 - "A Guide To Reading and Writing Japanese" 3rd
+ edition, edited by Henshall, Seeley and De Groot.
+ tutt_cards - Tuttle Kanji Cards, compiled by Alexander Kask.
+ crowley - "The Kanji Way to Japanese Language Power" by
+ Dale Crowley.
+ kanji_in_context - "Kanji in Context" by Nishiguchi and Kono.
+ busy_people - "Japanese For Busy People" vols I-III, published
+ by the AJLT. The codes are the volume.chapter.
+ kodansha_compact - the "Kodansha Compact Kanji Guide".
+ -->
+<!ATTLIST dic_ref m_vol CDATA #IMPLIED>
+ <!--
+ See above under "moro".
+ -->
+<!ATTLIST dic_ref m_page CDATA #IMPLIED>
+ <!--
+ See above under "moro".
+ -->
+<!ELEMENT query_code (q_code+)>
+ <!--
+ These codes contain information relating to the glyph, and can be used
+ for finding a required kanji. The type of code is defined by the
+ qc_type attribute.
+ -->
+<!ELEMENT q_code (#PCDATA)>
+ <!--
+ The q_code contains the actual query-code value, according to the
+ qc_type attribute.
+ -->
+<!ATTLIST q_code qc_type CDATA #REQUIRED>
+ <!--
+ The q_code attribute defines the type of query code. The current values
+ are:
+ skip - Halpern's SKIP (System of Kanji Indexing by Patterns)
+ code. The format is n-nn-nn. See the KANJIDIC documentation
+ for a description of the code and restrictions on the
+ commercial use of this data. [P]
+
+ sh_desc - the descriptor codes for The Kanji Dictionary (Tuttle
+ 1996) by Spahn and Hadamitzky. They are in the form nxnn.n,
+ e.g. 3k11.2, where the kanji has 3 strokes in the
+ identifying radical, it is radical "k" in the SH
+ classification system, there are 11 other strokes, and it is
+ the 2nd kanji in the 3k11 sequence. (I am very grateful to
+ Mark Spahn for providing the list of these descriptor codes
+ for the kanji in this file.) [I]
+ four_corner - the "Four Corner" code for the kanji. This is a code
+ invented by Wang Chen in 1928. See the KANJIDIC documentation
+ for an overview of the Four Corner System. [Q]
+
+ deroo - the codes developed by the late Father Joseph De Roo, and
+ published in his book "2001 Kanji" (Bojinsha). Fr De Roo
+ gave his permission for these codes to be included. [DR]
+ misclass - a possible misclassification of the kanji according
+ to one of the code types. (See the "Z" codes in the KANJIDIC
+ documentation for more details.)
+
+ -->
+<!ELEMENT reading_meaning (rmgroup*, nanori*)>
+ <!--
+ The readings for the kanji in several languages, and the meanings, also
+ in several languages. The readings and meanings are grouped to enable
+ the handling of the situation where the meaning is differentiated by
+ reading. [T1]
+ -->
+<!ELEMENT nanori (#PCDATA)>
+ <!--
+ Japanese readings that are now only associated with names.
+ -->
+<!ELEMENT rmgroup (reading*, meaning*)>
+<!ELEMENT reading (#PCDATA)>
+ <!--
+ The reading element contains the reading or pronunciation
+ of the kanji.
+ -->
+<!ATTLIST reading r_type CDATA #REQUIRED>
+ <!--
+ The r_type attribute defines the type of reading in the reading
+ element. The current values are:
+ pinyin - the modern PinYin romanization of the Chinese reading
+ of the kanji. The tones are represented by a concluding
+ digit. [Y]
+ korean_r - the romanized form of the Korean reading(s) of the
+ kanji. The readings are in the (Republic of Korea) Ministry
+ of Education style of romanization. [W]
+ korean_h - the Korean reading(s) of the kanji in hangul.
+ ja_on - the "on" Japanese reading of the kanji, in katakana. A
+ second attribute r_status, if present, will indicate with
+ a value of "jy" whether the reading is approved for a
+ "Jouyou kanji".
+ ja_kun - the "kun" Japanese reading of the kanji, in hiragana.
+ Where relevant the okurigana is also included separated by a
+ ".". Readings associated with prefixes and suffixes are
+ marked with a "-". A second attribute r_status, if present,
+ will indicate with a value of "jy" whether the reading is
+ approved for a "Jouyou kanji".
+ -->
+<!ATTLIST reading r_status CDATA #IMPLIED>
+ <!--
+ See under ja_on and ja_kun above.
+ -->
+<!ELEMENT meaning (#PCDATA)>
+ <!--
+ The meaning associated with the kanji.
+ -->
+<!ATTLIST meaning m_lang CDATA #IMPLIED>
+ <!--
+ The m_lang attribute defines the target language of the meaning. It
+ will be coded using the two-letter language code from the ISO 639
+ standard. When absent, the value "en" (i.e. English) is implied. [{}]
+ -->
+] >
+<kanjidic2>
+</kanjidic2>