<?xml version="1.0"?>
<!--
***** BEGIN LICENSE BLOCK *****
Version: MPL 1.1/GPL 2.0/LGPL 2.1

The contents of this file are subject to the Mozilla Public License Version
1.1 (the "License"); you may not use this file except in compliance with the
License. You may obtain a copy of the License at http://www.mozilla.org/MPL/

Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for
the specific language governing rights and limitations under the License.

The Original Code is PharmGen.

The Initial Developer of the Original Code is
PharmGKB (The Pharmacogenetics and Pharmacogenetics Knowledge Base,
supported by NIH U01GM61374).
Portions created by the Initial Developer are Copyright (C) 2005
the Initial Developer. All Rights Reserved.

Contributor(s):

Alternatively, the contents of this file may be used under the terms of
either the GNU General Public License Version 2 or later (the "GPL"), or the
GNU Lesser General Public License Version 2.1 or later (the "LGPL"), in
which case the provisions of the GPL or the LGPL are applicable instead of
those above. If you wish to allow use of your version of this file only
under the terms of either the GPL or the LGPL, and not to allow others to
use your version of this file under the terms of the MPL, indicate your
decision by deleting the provisions above and replace them with the notice
and other provisions required by the GPL or the LGPL. If you do not delete
the provisions above, a recipient may use your version of this file under
the terms of any one of the MPL, the GPL or the LGPL.

***** END LICENSE BLOCK *****
-->

<xsd:schema version="4.0"
  targetNamespace="http://www.pharmgkb.org/schema/"
  xmlns="http://www.pharmgkb.org/schema/"
  xmlns:xsd="http://www.w3.org/2001/XMLSchema"
  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  xmlns:sch="http://www.ascc.net/xml/schematron"
  elementFormDefault="qualified">

  <xsd:annotation>
    <xsd:documentation>
      This XML schema defines sequence related entities.
      For complete documentation, see http://preview.pharmgkb.org/schema/.
    </xsd:documentation>
  </xsd:annotation>


  <!-- Included XML Schema Files -->
  <xsd:include schemaLocation="http://preview.pharmgkb.org/schema/types.xsd" />
  <xsd:include schemaLocation="http://preview.pharmgkb.org/schema/types-sequence.xsd" />
  <xsd:include schemaLocation="http://preview.pharmgkb.org/schema/haplotype.xsd" />



  <!-- gene -->
  <xsd:element name="gene">
    <xsd:annotation>
      <xsd:documentation>
        This class defines a gene.
        See http://preview.pharmgkb.org/schema/docs/gene.html for details.
      </xsd:documentation>
      <xsd:appinfo>
        <sch:pattern name="Constraints for geneXref">
          <sch:rule context="pgkb:geneXref">
            <sch:report test="not(@resource='PharmGKB')">Resource attribute must be 'PharmGKB'</sch:report>
            <sch:report test="@resource='PharmGKB' and
                              not((starts-with(., 'PA') and floor(substring(., 3))=number(substring(., 3))) or
                              floor(.)=number(.))">Cross-reference resource from PharmGKB does not appear to have a valid PharmGKB Accession ID</sch:report>
          </sch:rule>
        </sch:pattern>
      </xsd:appinfo>
    </xsd:annotation>
    <xsd:complexType>
      <xsd:complexContent>
        <xsd:extension base="AccessionObjectClass">
          <!-- subelements -->
          <xsd:sequence>
            <xsd:element name="name" type="xsd:token" minOccurs="0" maxOccurs="1" />
            <xsd:element name="altName" type="xsd:token" minOccurs="0" maxOccurs="unbounded" />
            <xsd:element name="symbol" type="xsd:token" minOccurs="0" maxOccurs="1" />
            <xsd:element name="altSymbol" type="xsd:token" minOccurs="0" maxOccurs="unbounded"/>
            <xsd:element name="term" type="OntologyTermClass" minOccurs="0" maxOccurs="unbounded" />
            <xsd:element name="xref" type="XrefClass" minOccurs="0" maxOccurs="unbounded" />
            <xsd:element name="organism" type="OrganismClass" minOccurs="0" maxOccurs="1" />
            <xsd:element name="product" type="xsd:token" minOccurs="0" maxOccurs="unbounded" />
            <xsd:element name="omimPhenotype" type="xsd:token" minOccurs="0" maxOccurs="unbounded" />
          </xsd:sequence>
        </xsd:extension>
      </xsd:complexContent>
    </xsd:complexType>
  </xsd:element>


  <!-- rna -->
  <xsd:element name="rna">
    <xsd:annotation>
      <xsd:documentation>
        This class defines an RNA sequence.
        See http://preview.pharmgkb.org/schema/docs/rna.html for details.
      </xsd:documentation>
      <xsd:appinfo>
        <sch:pattern name="Required elements for RnaClass">
          <sch:rule context="pgkb:rna">
            <sch:report test="not(@pharmgkbId) and
                              not(pgkb:type)">Missing element:  type</sch:report>
            <sch:report test="not(@pharmgkbId) and
                              not(pgkb:rnaSequence)">Missing element:  rnaSequence</sch:report>
            <sch:report test="pgkb:rnaSequence and
                              string-length(translate(pgkb:rnaSequence, 'AaCcGgUu ', '')) != 0">Invalid format:  rnaSequence must comply with the IUPAC codes</sch:report>
          </sch:rule>
          <sch:pattern name="Constraints for rnaXref">
            <sch:rule context="pgkb:rnaXref">
              <sch:report test="not(@resource='PharmGKB') and not(@resource='local')">Resource attribute must be 'PharmGKB' or 'local'</sch:report>
              <sch:report test="@resource='PharmGKB' and
                                not((starts-with(., 'PA') and floor(substring(., 3))=number(substring(., 3))) or
                                floor(.)=number(.))">Cross-reference resource from PharmGKB does not appear to have a valid PharmGKB Accession ID</sch:report>
            </sch:rule>
          </sch:pattern>
        </sch:pattern>
      </xsd:appinfo>
    </xsd:annotation>
    <xsd:complexType>
      <xsd:complexContent>
        <xsd:extension base="AccessionObjectClass">
          <!-- subelements -->
          <xsd:sequence>
            <xsd:element name="name" type="NonEmptyTokenType" minOccurs="0" maxOccurs="1" />
            <!--
            XXX: this should use the RnaSequenceType, but a bug in Xerces prevents us from doing so
            XXX: until the bug is fixed, we'll check for this using Schematron
          -->
            <xsd:element name="type" minOccurs="0" maxOccurs="1">
              <xsd:simpleType>
                <xsd:restriction base="xsd:token">
                  <xsd:enumeration value="mRNA" />
                  <xsd:enumeration value="rRNA" />
                  <xsd:enumeration value="tRNA" />
                </xsd:restriction>
              </xsd:simpleType>
            </xsd:element>
            <xsd:element name="rnaSequence" type="RnaSequenceType" minOccurs="0" maxOccurs="1" />
            <xsd:element name="geneXref" type="XrefClass" minOccurs="0" maxOccurs="1" />
            <xsd:element name="spliceSetXref" type="XrefClass" minOccurs="0" maxOccurs="1" />
            <xsd:element name="proteinXref" type="XrefClass" minOccurs="0" maxOccurs="1" />
            <xsd:element name="term" type="OntologyTermClass" minOccurs="0" maxOccurs="unbounded" />
            <xsd:element name="xref" type="XrefClass" minOccurs="0" maxOccurs="unbounded" />
          </xsd:sequence>
        </xsd:extension>
      </xsd:complexContent>
    </xsd:complexType>
  </xsd:element>

  <!-- protein -->
  <xsd:element name="protein">
    <xsd:annotation>
      <xsd:documentation>
        This class defines an protein sequence.
        See http://preview.pharmgkb.org/schema/docs/protein.html for details.
      </xsd:documentation>
      <xsd:appinfo>
        <sch:pattern name="Required elements for protein">
          <sch:rule context="pgkb:protein">
            <sch:report test="pgkb:aminoAcidSequence and
                              string-length(translate(pgkb:aminoAcidSequence, 'AaBbCcDdEeFfGgHhIiKkLlMmNnPpQqRrSsTtUuVvWwXxYyZz ', '')) != 0">Invalid format:  aminoAcidSequence must comply with the IUPAC-IUP amino acid symbols</sch:report>
          </sch:rule>
          <sch:pattern name="Constraints for proteinXref">
            <sch:rule context="pgkb:proteinXref">
              <sch:report test="not(@resource='PharmGKB') and not(@resource='local')">Resource attribute must be 'PharmGKB' or 'local'</sch:report>
              <sch:report test="@resource='PharmGKB' and
                                not((starts-with(., 'PA') and floor(substring(., 3))=number(substring(., 3))) or
                                floor(.)=number(.))">Cross-reference resource from PharmGKB does not appear to have a valid PharmGKB Accession ID</sch:report>
            </sch:rule>
          </sch:pattern>
        </sch:pattern>
      </xsd:appinfo>
    </xsd:annotation>
    <xsd:complexType>
      <xsd:complexContent>
        <xsd:extension base="AccessionObjectClass">
          <!-- subelements -->
          <xsd:sequence>
            <!--
            XXX: this should use the ProteinSequenceType, but a bug in Xerces prevents us from doing so
            XXX: until the bug is fixed, we'll check for this using Schematron
            -->
            <xsd:element name="name" type="NonEmptyTokenType" minOccurs="0" maxOccurs="1" />
            <xsd:element name="aminoAcidSequence" type="ProteinSequenceType" minOccurs="1" maxOccurs="1" />
            <xsd:element name="geneXref" type="XrefClass" minOccurs="1" maxOccurs="1" />
            <xsd:element name="rnaXref" type="XrefClass" minOccurs="0" maxOccurs="unbounded" />
            <xsd:element name="term" type="OntologyTermClass" minOccurs="0" maxOccurs="unbounded" />
            <xsd:element name="xref" type="XrefClass" minOccurs="0" maxOccurs="unbounded" />
          </xsd:sequence>
        </xsd:extension>
      </xsd:complexContent>
    </xsd:complexType>
  </xsd:element>

  <!-- namedAllele -->
  <xsd:element name="namedAllele">
    <xsd:annotation>
      <xsd:documentation>
        This class defines a named allele.
        See http://preview.pharmgkb.org/schema/docs/namedAllele.html for details.
      </xsd:documentation>
      <xsd:appinfo>
        <sch:pattern name="Constraints for namedAlleleXref">
          <sch:rule context="pgkb:namedAlleleXref">
            <sch:report test="not(@resource='PharmGKB')">Resource attribute must be 'PharmGKB'</sch:report>
            <sch:report test="@resource='PharmGKB' and
                              not((starts-with(., 'PA') and floor(substring(., 3))=number(substring(., 3))) or
                              floor(.)=number(.))">Cross-reference resource from PharmGKB does not appear to have a valid PharmGKB Accession ID</sch:report>
          </sch:rule>
        </sch:pattern>
      </xsd:appinfo>
    </xsd:annotation>
    <xsd:complexType>
      <xsd:complexContent>
        <xsd:extension base="AccessionObjectClass">
          <!-- subelements -->
          <xsd:sequence>
            <xsd:element name="name" type="NonEmptyStringType" minOccurs="1" maxOccurs="1" />
            <xsd:element name="history" type="NonEmptyStringType" minOccurs="0" maxOccurs="1" />
            <xsd:element name="polymorphismXref" type="XrefClass" minOccurs="1" maxOccurs="unbounded" />
            <xsd:element name="term" type="OntologyTermClass" minOccurs="0" maxOccurs="unbounded" />
            <xsd:element name="xref" type="XrefClass" minOccurs="0" maxOccurs="unbounded" />
          </xsd:sequence>
        </xsd:extension>
      </xsd:complexContent>
    </xsd:complexType>
  </xsd:element>

</xsd:schema>