| 1 | <molgenis name="xgap" label="xGaP: Extensible database for genotype and phenotype experiments"> |
|---|
| 2 | <!-- INVESTIGATION --> |
|---|
| 3 | <module name="xgap.core"> |
|---|
| 4 | <description>Core entities.</description> |
|---|
| 5 | <entity name="Investigation" extends="FugeInvestigation"> |
|---|
| 6 | <unique fields="name" description="Name is unique" /> |
|---|
| 7 | </entity> |
|---|
| 8 | <entity name="ProtocolApplication" |
|---|
| 9 | extends="FugeProtocolApplication"> |
|---|
| 10 | <field name="Status" type="enum" |
|---|
| 11 | enum_options="[inprocess, final]" default="inprocess" |
|---|
| 12 | description="The status of this protocolapplication (inprocess = still working on it, final = ready for further analysis)." /> |
|---|
| 13 | <field name="Investigation" type="xref" |
|---|
| 14 | xref_entity="Investigation" xref_field="id" |
|---|
| 15 | xref_label="name" |
|---|
| 16 | description="Reference to the Investigation this protocolapplication belongs to." /> |
|---|
| 17 | <unique fields="name,Investigation" |
|---|
| 18 | description="Name is unique within an Investigation" /> |
|---|
| 19 | </entity> |
|---|
| 20 | <!-- DATA --> |
|---|
| 21 | <entity name="Data" extends="FugeData"> |
|---|
| 22 | <description> |
|---|
| 23 | Generic structure for describing data matrices such as |
|---|
| 24 | genotype result, gene expression measurement, QTL |
|---|
| 25 | calculation, etc. |
|---|
| 26 | </description> |
|---|
| 27 | <field name="Investigation" type="xref" |
|---|
| 28 | xref_entity="Investigation" xref_field="id" |
|---|
| 29 | xref_label="name" |
|---|
| 30 | description="Reference to the Investigation this data is measured as part of." /> |
|---|
| 31 | <field name="RowType" type="enum" |
|---|
| 32 | enum_options="[Marker,Probe,ProbeSet,Individual,Sample,PairedSample,MassPeak,Gene,Trait,Subject,Strain,Metabolite]" |
|---|
| 33 | description="Type of the columns of this matrix. Each column refers to a Trait or Subject (DimensionElement). " /> |
|---|
| 34 | <field name="ColType" type="enum" |
|---|
| 35 | enum_options="[Marker,Probe,ProbeSet,Individual,Sample,PairedSample,MassPeak,Gene,Trait,Subject,Strain,Metabolite]" |
|---|
| 36 | description="Type of the rows of this matrix. Each row refers to a Trait or Subject (DimensionElement)" /> |
|---|
| 37 | <field name="ValueType" type="enum" |
|---|
| 38 | enum_options="[Text,Decimal]" |
|---|
| 39 | description="Type of the values of this matrix. E.g. text strings or decimal numbers." /> |
|---|
| 40 | <field name="TotalRows" type="int" default="0"/> |
|---|
| 41 | <field name="TotalCols" type="int" default="0"/> |
|---|
| 42 | <unique fields="name,Investigation" /> |
|---|
| 43 | </entity> |
|---|
| 44 | <entity name="DimensionElement" |
|---|
| 45 | extends="FugeDimensionElement"> |
|---|
| 46 | <description> |
|---|
| 47 | Describes the biological material or subject which is |
|---|
| 48 | being 'measured' by an Data set. |
|---|
| 49 | <br /> |
|---|
| 50 | For example an 'Sample' extends from Item, which makes |
|---|
| 51 | it possible that a microarray-assay Data set such sample |
|---|
| 52 | (as DataElement can reference any Item). |
|---|
| 53 | <br /> |
|---|
| 54 | An DimensionElement is always linked to a single one |
|---|
| 55 | Investigation. |
|---|
| 56 | </description> |
|---|
| 57 | <field name="Investigation" type="xref_single" |
|---|
| 58 | xref_entity="Investigation" xref_field="id" |
|---|
| 59 | xref_label="name" |
|---|
| 60 | description="Reference to the investigation this belongs to." /> |
|---|
| 61 | <unique fields="name,Investigation,Type" |
|---|
| 62 | description="Name is unique within an investigation within its type (e.g. there can be a Gene and a Protein with the same name, but not another Gene)." /> |
|---|
| 63 | </entity> |
|---|
| 64 | <entity name="DataElement" abstract="true"> |
|---|
| 65 | <description> |
|---|
| 66 | Generic data structure for aiming the values of a data |
|---|
| 67 | set as described in Data. Each DataElement describes a |
|---|
| 68 | cell in a data matrix of rows (from) and columns (to), |
|---|
| 69 | e.g., in "dataset X" it was measured that geneX (col) |
|---|
| 70 | relates to geneZ (row). |
|---|
| 71 | </description> |
|---|
| 72 | <field nillable="false" auto="true" name="id" |
|---|
| 73 | type="int" description="automatically generated id-field" |
|---|
| 74 | unique="true" /> |
|---|
| 75 | <field name="Data" type="xref" xref_entity="Data" |
|---|
| 76 | xref_field="id" xref_label="name" |
|---|
| 77 | description="Reference to the data this entity belongs to." /> |
|---|
| 78 | <field name="Col" type="xref" xref_entity="DimensionElement" |
|---|
| 79 | xref_field="id" xref_label="name" |
|---|
| 80 | description="References the DimensionElement on one end of the relation. Can be ommited for 1D data (i.e., a data list)" /> |
|---|
| 81 | <field name="Row" type="xref" xref_entity="DimensionElement" |
|---|
| 82 | xref_field="id" xref_label="name" |
|---|
| 83 | description="References the DimensionElement on the other end of the relation." /> |
|---|
| 84 | <field name="RowIndex" type="int" |
|---|
| 85 | description="Row position in the matrix." /> |
|---|
| 86 | <field name="ColIndex" type="int" |
|---|
| 87 | description="Col position in the matrix." /> |
|---|
| 88 | <unique fields="ColIndex,RowIndex,Data"/> |
|---|
| 89 | <unique fields="RowIndex,ColIndex,Data"/> |
|---|
| 90 | <!-- |
|---|
| 91 | <unique fields="Row,Col,Data"/> |
|---|
| 92 | <unique fields="Col,Row,Data"/> --> |
|---|
| 93 | </entity> |
|---|
| 94 | <entity name="DecimalDataElement" implements="DataElement"> |
|---|
| 95 | <description> |
|---|
| 96 | A DataElement for storing decimal data. |
|---|
| 97 | </description> |
|---|
| 98 | <field name="Value" label="Value" nillable="true" |
|---|
| 99 | type="decimal" description="The value, e.g., correlation." /> |
|---|
| 100 | </entity> |
|---|
| 101 | <entity name="TextDataElement" implements="DataElement"> |
|---|
| 102 | <description>Store text data</description> |
|---|
| 103 | <field name="Value" label="Value" type="text" |
|---|
| 104 | nillable="true" |
|---|
| 105 | description="The value, e.g., genotype strings like AA, BA, BB." /> |
|---|
| 106 | </entity> |
|---|
| 107 | </module> |
|---|
| 108 | <!--SUBJECTS --> |
|---|
| 109 | <!-- issue: how about a genetype: that can be on level of individual or on level of RILs. For now, this is for the user to find out. --> |
|---|
| 110 | <module name="xgap.subject"> |
|---|
| 111 | <description>Subject variants.</description> |
|---|
| 112 | <entity name="Species" extends="OntologyTerm" |
|---|
| 113 | description="Ontology of species. E.g. Arabidopsis thaliana" /> |
|---|
| 114 | <entity name="Tissue" extends="OntologyTerm" |
|---|
| 115 | description="Ontology of tissue. E.g. spleen" /> |
|---|
| 116 | <entity name="SampleLabel" extends="OntologyTerm" |
|---|
| 117 | description="Ontology of sample labels. E.g. cy3, cy5" /> |
|---|
| 118 | <entity name="Subject" extends="DimensionElement"> |
|---|
| 119 | <field name="Species" type="xref" nillable="true" |
|---|
| 120 | xref_entity="Species" xref_field="id" xref_label="name" |
|---|
| 121 | description="The species this subject is an instance of/part of/extracted from." /> |
|---|
| 122 | </entity> |
|---|
| 123 | <entity name="Strain" extends="Subject"> |
|---|
| 124 | <description> |
|---|
| 125 | A variant or subtype of animal, plant, virus or |
|---|
| 126 | bacteria. |
|---|
| 127 | </description> |
|---|
| 128 | <field name="StrainType" type="enum" |
|---|
| 129 | enum_options="[Natural,Parental,F1,RI,RCC,CSS,Other]" default="RI" |
|---|
| 130 | description="Indicate the type of Strain (Natural=wild type, Parental=parents of a cross, F1=First generation of cross, RCC=Recombinant congenic, CSS=chromosome substitution)" /> |
|---|
| 131 | <!-- TODO move StrainType to become an ontology? --> |
|---|
| 132 | <field name="FounderStrains" type="mref" nillable="true" |
|---|
| 133 | xref_entity="Strain" xref_field="id" xref_label="name" |
|---|
| 134 | description="The strain(s) that were crossed to create this strain." /> |
|---|
| 135 | <!-- TODO: don't we need info on how many generation the cross has been inbreeded? --> |
|---|
| 136 | </entity> |
|---|
| 137 | <entity name="Individual" extends="Subject"> |
|---|
| 138 | <description>Biological individuals.</description> |
|---|
| 139 | <field name="Strain" type="xref" xref_entity="Strain" |
|---|
| 140 | xref_field="id" xref_label="name" nillable="true" |
|---|
| 141 | description="Refers to the strain this individual is part of." /> |
|---|
| 142 | <field name="Mother" type="xref" nillable="true" |
|---|
| 143 | xref_entity="Individual" xref_field="id" xref_label="name" |
|---|
| 144 | description="Refers to the mother of the individual." /> |
|---|
| 145 | <field name="Father" type="xref" nillable="true" |
|---|
| 146 | xref_entity="Individual" xref_field="id" xref_label="name" |
|---|
| 147 | description="Refers to the father of the individual." /> |
|---|
| 148 | </entity> |
|---|
| 149 | <entity name="Sample" extends="Subject"> |
|---|
| 150 | <description></description> |
|---|
| 151 | <field name="Individual" type="xref" |
|---|
| 152 | xref_entity="Individual" xref_field="id" xref_label="name" |
|---|
| 153 | nillable="true" |
|---|
| 154 | description="The individual from which this sample was taken." /> |
|---|
| 155 | <field name="Tissue" type="xref" xref_entity="Tissue" |
|---|
| 156 | xref_field="id" xref_label="name" nillable="true" |
|---|
| 157 | description="The tissue from which this sample was taken." /> |
|---|
| 158 | </entity> |
|---|
| 159 | <entity name="PairedSample" extends="Subject"> |
|---|
| 160 | <description> |
|---|
| 161 | A pair of samples labeled for a two-color microarray |
|---|
| 162 | experiment. |
|---|
| 163 | </description> |
|---|
| 164 | <field name="Subject1" type="xref" xref_entity="Individual" |
|---|
| 165 | xref_field="id" xref_label="name" |
|---|
| 166 | description="The first subject" /> |
|---|
| 167 | <field name="Label1" type="xref" xref_entity="SampleLabel" |
|---|
| 168 | xref_field="id" xref_label="name" nillable="true" |
|---|
| 169 | description="Which channel or Fluorescent labeling is associated with the first subject" /> |
|---|
| 170 | <field name="Subject2" type="xref" xref_entity="Individual" |
|---|
| 171 | xref_field="id" xref_label="name" |
|---|
| 172 | description="The second sample" /> |
|---|
| 173 | <field name="Label2" type="xref" xref_entity="SampleLabel" |
|---|
| 174 | xref_field="id" xref_label="name" nillable="true" |
|---|
| 175 | description="Which channel or Fluorescent labeling is associated with the second subject" /> |
|---|
| 176 | </entity> |
|---|
| 177 | </module> |
|---|
| 178 | <!-- TRAITS --> |
|---|
| 179 | <module name="xgap.trait"> |
|---|
| 180 | <description>Trait variants.</description> |
|---|
| 181 | <entity name="MeasurementUnit" extends="OntologyTerm" |
|---|
| 182 | description="Ontology of measurment unit, e.g. centimeter, grams (TODO: adopt PATO?)" /> |
|---|
| 183 | <entity name="Locus" abstract="true"> |
|---|
| 184 | <description> |
|---|
| 185 | Common structure for entities that have a genomic |
|---|
| 186 | position. Typical examples of such traits are genes, |
|---|
| 187 | probes and markers. |
|---|
| 188 | </description> |
|---|
| 189 | <field name="cM" label="cMPosition" type="decimal" |
|---|
| 190 | nillable="true" |
|---|
| 191 | description="genetic map position in centi morgan (cM)." /> |
|---|
| 192 | <field name="Chr" label="Chromosome" type="varchar" |
|---|
| 193 | length="10" nillable="true" |
|---|
| 194 | description="chromosome name or number string (1, 2, âŠ, x, y)" /> |
|---|
| 195 | <field name="bpStart" label="Start (5')" type="long" |
|---|
| 196 | nillable="true" |
|---|
| 197 | description="numeric basepair postion (5') on the chromosome" /> |
|---|
| 198 | <field name="bpEnd" label="End" type="long" nillable="true" |
|---|
| 199 | description="numeric basepair postion (3') on the chromosome" /> |
|---|
| 200 | <field name="Species" label="Species" type="xref" |
|---|
| 201 | xref_entity="Species" xref_field="id" xref_label="name" |
|---|
| 202 | nillable="true" |
|---|
| 203 | description="Reference to the species this position belongs to." /> |
|---|
| 204 | <field name="Seq" type="text" nillable="true" |
|---|
| 205 | description="The FASTA text representation of the sequence." /> |
|---|
| 206 | </entity> |
|---|
| 207 | <entity name="Trait" extends="DimensionElement"> |
|---|
| 208 | <description> |
|---|
| 209 | Traits that are being measured. Traits can be very |
|---|
| 210 | diverse. The specifics of these traits can be added, as |
|---|
| 211 | for example in a "Gene" (Gene extends Trait). |
|---|
| 212 | </description> |
|---|
| 213 | </entity> |
|---|
| 214 | <entity name="Gene" extends="Trait" implements="Locus"> |
|---|
| 215 | <description> |
|---|
| 216 | Trait annotations specific for genes. |
|---|
| 217 | </description> |
|---|
| 218 | <field name="Symbol" nillable="true" |
|---|
| 219 | description="Main symbol this gene is known by (not necessarily unique, in constrast to 'name')" /> |
|---|
| 220 | <field name="Aliases" nillable="true" |
|---|
| 221 | description="Alternative symbols this gene is known by, separated by ','" /> |
|---|
| 222 | <field name="Orientation" type="enum" enum_options="[F,R]" |
|---|
| 223 | nillable="true" |
|---|
| 224 | description="Orientation of the gene on the genome (F=forward, R=reverse)" /> |
|---|
| 225 | <field name="Control" type="bool" nillable="true" |
|---|
| 226 | description="Indicating whether this is a 'housekeeping' gene that can be used as control." /> |
|---|
| 227 | </entity> |
|---|
| 228 | <entity name="Protein" extends="Trait"> |
|---|
| 229 | <description> |
|---|
| 230 | Trait annotations specific for proteins. |
|---|
| 231 | </description> |
|---|
| 232 | <!-- Question: Is there a specific notation involved that we can create type for? --> |
|---|
| 233 | <field name="Gene" label="Gene" type="xref" |
|---|
| 234 | xref_entity="Gene" xref_field="id" xref_label="name" |
|---|
| 235 | nillable="true" description="The gene that produces this protein" /> |
|---|
| 236 | <field name="Sequence" label="Sequence" type="text" |
|---|
| 237 | nillable="true" description="The aminoacid sequence." /> |
|---|
| 238 | <field name="Mass" label="Mass" type="decimal" |
|---|
| 239 | nillable="true" description="The mass of this metabolite" /> |
|---|
| 240 | </entity> |
|---|
| 241 | <entity name="Metabolite" extends="Trait"> |
|---|
| 242 | <description> |
|---|
| 243 | Trait annotations specific for metabolites. |
|---|
| 244 | </description> |
|---|
| 245 | <field name="Formula" label="Formula" type="varchar" |
|---|
| 246 | length="128" nillable="true" |
|---|
| 247 | description="The chemical formula of a metabolite." /> |
|---|
| 248 | <field name="Mass" label="Mass" type="decimal" |
|---|
| 249 | nillable="true" description="The mass of this metabolite" /> |
|---|
| 250 | <field name="Structure" label="Structure" type="text" |
|---|
| 251 | nillable="true" |
|---|
| 252 | description="The chemical structure of a metabolite (in SMILES representation)." /> |
|---|
| 253 | </entity> |
|---|
| 254 | <entity name="Phenotype" extends="Trait"> |
|---|
| 255 | <description> |
|---|
| 256 | Trait annotations specific for classical phenotypes. |
|---|
| 257 | </description> |
|---|
| 258 | <!-- Question: Is there a specific notation involved that we can create type for? --> |
|---|
| 259 | <field name="Unit" label="Unit" type="xref" |
|---|
| 260 | xref_entity="MeasurementUnit" xref_field="id" |
|---|
| 261 | xref_label="name" nillable="true" |
|---|
| 262 | description="The unit this trait was measured in like centimeters, gram, etc (TODO: make ontology)." /> |
|---|
| 263 | </entity> |
|---|
| 264 | <entity name="ProtocolElement" extends="Trait"> |
|---|
| 265 | <description> |
|---|
| 266 | Quantitative treats that are part of a protocol. E.g. |
|---|
| 267 | microarray probes, marker set. |
|---|
| 268 | </description> |
|---|
| 269 | <field name="Protocol" type="xref_single" nillable="true" |
|---|
| 270 | xref_entity="Protocol" xref_field="id" xref_label="name" |
|---|
| 271 | description="Reference to the protocol this trait is part of." /> |
|---|
| 272 | </entity> |
|---|
| 273 | <entity name="Marker" extends="ProtocolElement" |
|---|
| 274 | implements="Locus"> |
|---|
| 275 | <description> |
|---|
| 276 | Trait annotations specific for markers. |
|---|
| 277 | </description> |
|---|
| 278 | </entity> |
|---|
| 279 | <entity name="Probe" extends="ProtocolElement" |
|---|
| 280 | implements="Locus"> |
|---|
| 281 | <description> |
|---|
| 282 | A piece of sequence that reports for the expression of a |
|---|
| 283 | gene, typically spotted onto a microarray. |
|---|
| 284 | </description> |
|---|
| 285 | <field name="Mismatch" type="bool" |
|---|
| 286 | description="Indicating whether the probe is a match" |
|---|
| 287 | default="false" /> |
|---|
| 288 | <field name="ProbeSet" type="xref" nillable="true" |
|---|
| 289 | xref_entity="ProbeSet" xref_field="id" xref_label="name" |
|---|
| 290 | description="Optional: probeset this probe belongs to (e.g., in Affymetrix assays)." /> |
|---|
| 291 | </entity> |
|---|
| 292 | <entity name="Spot" extends="Probe"> |
|---|
| 293 | <description> |
|---|
| 294 | This is the spot on a microarray. |
|---|
| 295 | <br /> |
|---|
| 296 | Note: We don't distinquish between probes (the sequence) |
|---|
| 297 | and spots (the sequence as spotted on the array). |
|---|
| 298 | </description> |
|---|
| 299 | <field name="X" type="int" description="Row" /> |
|---|
| 300 | <field name="Y" type="int" description="Column" /> |
|---|
| 301 | <field name="GridX" type="int" description="Meta Row" |
|---|
| 302 | nillable="true" /> |
|---|
| 303 | <field name="GridY" type="int" description="Meta Column" |
|---|
| 304 | nillable="true" /> |
|---|
| 305 | <unique fields="Protocol,X,Y,GridX,GridY" /> |
|---|
| 306 | </entity> |
|---|
| 307 | <entity name="ProbeSet" extends="ProtocolElement"> |
|---|
| 308 | <description> |
|---|
| 309 | A set of Probes. E.g. Affymetrix probeset has multiple |
|---|
| 310 | probes. |
|---|
| 311 | </description> |
|---|
| 312 | </entity> |
|---|
| 313 | <entity name="MassPeak" extends="ProtocolElement"> |
|---|
| 314 | <description> |
|---|
| 315 | A peak that has been selected within a mass spectrometry |
|---|
| 316 | experiment. |
|---|
| 317 | </description> |
|---|
| 318 | <field name="MZ" type="decimal" optional="true" |
|---|
| 319 | description="Mass over charge ratio of this peak." /> |
|---|
| 320 | <field name="Intensity" type="decimal" optional="true" |
|---|
| 321 | description="Intensity of this peak." /> |
|---|
| 322 | <field name="RetentionTime" type="decimal" optional="true" |
|---|
| 323 | description="The retention-time of this peak in minutes." /> |
|---|
| 324 | </entity> |
|---|
| 325 | </module> |
|---|
| 326 | </molgenis> |
|---|