diff options
-rw-r--r-- | docs/export-0.3.xsd | 146 | ||||
-rw-r--r-- | docs/export-demo.xml | 115 |
2 files changed, 261 insertions, 0 deletions
diff --git a/docs/export-0.3.xsd b/docs/export-0.3.xsd new file mode 100644 index 000000000000..02403c62d1d1 --- /dev/null +++ b/docs/export-0.3.xsd @@ -0,0 +1,146 @@ +<?xml version="1.0" encoding="UTF-8" ?> +<!-- + This is an XML Schema description of the format + output by MediaWiki's Special:Export system. + + Version 0.2 adds optional basic file upload info support, + which is used by our OAI export/import submodule. + + Version 0.3 adds some site configuration information such + as a list of defined namespaces. + + The canonical URL to the schema document is: + http://www.mediawiki.org/xml/export-0.3.xsd + + Use the namespace: + http://www.mediawiki.org/xml/export-0.3/ +--> +<schema xmlns="http://www.w3.org/2001/XMLSchema" + xmlns:mw="http://www.mediawiki.org/xml/export-0.3/" + targetNamespace="http://www.mediawiki.org/xml/export-0.3/" + elementFormDefault="qualified"> + + <annotation> + <documentation xml:lang="en"> + MediaWiki's page export format + </documentation> + </annotation> + + <!-- Need this to reference xml:lang --> + <import namespace="http://www.w3.org/XML/1998/namespace" + schemaLocation="http://www.w3.org/2001/xml.xsd"/> + + <!-- Our root element --> + <element name="mediawiki" type="mw:MediaWikiType"/> + + <complexType name="MediaWikiType"> + <sequence> + <element name="siteinfo" type="mw:SiteInfoType" + minOccurs="0" maxOccurs="1"/> + <element name="page" type="mw:PageType" + minOccurs="0" maxOccurs="unbounded"/> + </sequence> + <attribute name="version" type="string" use="required"/> + <attribute ref="xml:lang" use="required"/> + </complexType> + + <complexType name="SiteInfoType"> + <sequence> + <element name="sitename" type="string" minOccurs="0" /> + <element name="base" type="anyURI" minOccurs="0" /> + <element name="generator" type="string" minOccurs="0" /> + <element name="case" type="mw:CaseType" minOccurs="0" /> + <element name="namespaces" type="mw:NamespacesType" minOccurs="0" /> + </sequence> + </complexType> + + <simpleType name="CaseType"> + <restriction base="NMTOKEN"> + <!-- Cannot have two titles differing only by case of first letter. --> + <!-- Default behavior through 1.5, $wgCapitalLinks = true --> + <enumeration value="first-letter" /> + + <!-- Complete title is case-sensitive --> + <!-- Behavior when $wgCapitalLinks = false --> + <enumeration value="case-sensitive" /> + + <!-- Cannot have two titles differing only by case. --> + <!-- Not yet implemented as of MediaWiki 1.5 --> + <enumeration value="case-insensitive" /> + </restriction> + </simpleType> + + <complexType name="NamespacesType"> + <sequence> + <element name="namespace" type="mw:NamespaceType" + minOccurs="0" maxOccurs="unbounded" /> + </sequence> + </complexType> + + <complexType name="NamespaceType"> + <simpleContent> + <extension base="string"> + <attribute name="key" type="integer" /> + </extension> + </simpleContent> + </complexType> + + <complexType name="PageType"> + <sequence> + <!-- Title in text form. (Using spaces, not underscores; with namespace ) --> + <element name="title" type="string"/> + + <!-- optional page ID number --> + <element name="id" type="positiveInteger" minOccurs="0"/> + + <!-- comma-separated list of string tokens, if present --> + <element name="restrictions" type="string" minOccurs="0"/> + + <!-- Zero or more sets of revision or upload data --> + <choice minOccurs="0" maxOccurs="unbounded"> + <element name="revision" type="mw:RevisionType" /> + <element name="upload" type="mw:UploadType" /> + </choice> + </sequence> + </complexType> + + <complexType name="RevisionType"> + <sequence> + <element name="id" type="positiveInteger" minOccurs="0"/> + <element name="timestamp" type="dateTime"/> + <element name="contributor" type="mw:ContributorType"/> + <element name="minor" minOccurs="0" /> + <element name="comment" type="string" minOccurs="0"/> + <element name="text" type="string"/> + </sequence> + </complexType> + + <complexType name="ContributorType"> + <sequence> + <element name="username" type="string" minOccurs="0"/> + <element name="id" type="positiveInteger" minOccurs="0" /> + + <element name="ip" type="string" minOccurs="0"/> + </sequence> + </complexType> + + <complexType name="UploadType"> + <sequence> + <!-- Revision-style data... --> + <element name="timestamp" type="dateTime"/> + <element name="contributor" type="mw:ContributorType"/> + <element name="comment" type="string" minOccurs="0"/> + + <!-- Filename. (Using underscores, not spaces. No 'Image:' namespace marker.) --> + <element name="filename" type="string"/> + + <!-- URI at which this resource can be obtained --> + <element name="src" type="anyURI"/> + + <element name="size" type="positiveInteger" /> + + <!-- TODO: add other metadata fields --> + </sequence> + </complexType> + +</schema> diff --git a/docs/export-demo.xml b/docs/export-demo.xml new file mode 100644 index 000000000000..941a09731c5d --- /dev/null +++ b/docs/export-demo.xml @@ -0,0 +1,115 @@ +<mediawiki xmlns="http://www.mediawiki.org/xml/export-0.3/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.3/ http://www.mediawiki.org/xml/export-0.3.xsd" version="0.3" xml:lang="en"> + +<!-- Optional global configuration info --> +<siteinfo> + <!-- Site name, as set in $wgSitename --> + <sitename>DemoWiki</sitename> + + <!-- Forgot where you got this set? --> + <base>http://example.com/wiki/Main_Page</base> + + <!-- Source software version --> + <generator>MediaWiki 1.5.0</generator> + + <!-- Title case sensitivity options of the wiki this data came from --> + <!-- May be 'first-letter', 'case-sensitive', or 'case-insensitive' --> + <case>first-letter</case> + + <!-- Defined namespace keys on the source wiki. --> + <!-- Titles can be substring-split to obtain the symbolic numeric key --> + <namespaces> + <namespace key="-2">Media</namespace> + <namespace key="-1">Special</namespace> + <namespace key="0"></namespace> + <namespace key="1">Talk</namespace> + <namespace key="2">User</namespace> + <namespace key="3">User talk</namespace> + <namespace key="4">DemoWiki</namespace> + <namespace key="5">DemoWIki talk</namespace> + <namespace key="6">Image</namespace> + <namespace key="7">Image talk</namespace> + <namespace key="8">MediaWiki</namespace> + <namespace key="9">MediaWiki talk</namespace> + <namespace key="10">Template</namespace> + <namespace key="11">Template talk</namespace> + <namespace key="12">Help</namespace> + <namespace key="13">Help talk</namespace> + <namespace key="14">Category</namespace> + <namespace key="15">Category talk</namespace> + </namespaces> +</siteinfo> + +<!-- The rest of the data will be a series of page records --> +<page> + <!-- Titles are listed here in text form, with namespace prefix --> + <!-- if any, and spaces rather than the underscores used in URLs. --> + <title>Page title</title> + + <!-- The page's immutable page_id number in the source database. --> + <!-- Page ID numbers are kept across page moves, but may change --> + <!-- if a page is deleted and recreated. --> + <id>1</id> + + <!-- If restricted, the ACL is listed here raw. --> + <restrictions>edit=sysop:move=sysop</restrictions> + + <!-- With a series of revision records... --> + + <!-- Remember this is XML; if you must use a regex-based extractor --> + <!-- in place of a standard XML parser, be very careful. --> + <!-- * Don't forget to decode character entities! --> + <!-- * If using a 'loose' XML parser, ensure that whitespace is --> + <!-- preserved in the <text> elements. --> + <revision> + <!-- Unique revision ID number (rev_id) in the source database. --> + <!-- This number uniquely identifies the revision on that wiki. --> + <id>100</id> + + <timestamp>2001-01-15T13:15:00Z</timestamp> + <contributor><username>Foobar</username><id>42</id></contributor> + <minor /> + <comment>I have just one thing to say!</comment> + <text>A bunch of [[text]] here.</text> + </revision> + + <revision> + <timestamp>2001-01-15T13:10:27Z</timestamp> + <contributor><ip>10.0.0.2</ip></contributor> + <comment>new!</comment> + <text>An earlier [[revision]].</text> + </revision> +</page> + +<page> + <title>Talk:Page title</title> + <id>2</id> + <revision> + <id>101</id> + <timestamp>2001-01-15T14:03:00Z</timestamp> + <contributor><ip>10.0.0.2</ip></contributor> + <comment>hey</comment> + <text>WHYD YOU LOCK PAGE??!!! i was editing that jerk</text> + </revision> +</page> + +<page> + <title>Image:Some image.jpg</title> + <id>3</id> + <revision> + <id>102</id> + <timestamp>2001-01-15T20:34:12Z</timestamp> + <contributor><username>Foobar</username><id>42</id></contributor> + <comment>My awesomeest image!</comment> + <text>This is an awesome little imgae. I lurves it. {{PD}}</text> + </revision> + <upload> + <timestamp>2001-01-15T20:34:12Z</timestamp> + <contributor><username>Foobar</username><id>42</id></contributor> + <comment>My awesomeest image!</comment> + <filename>Some_image.jpg</filename> + <src>http://upload.wikimedia.org/commons/2/22/Some_image.jpg</src> + <size>12345</size> + </upload> +</page> + +</mediawiki> |