Commit cee90da4 authored by R.W.Majeed's avatar R.W.Majeed

support for eav tables

parent 20bdf48a
......@@ -3,3 +3,8 @@ Validate exceptions
test for specific errors in data files,
e.g. missing visit start timestamps in rows.
To do so, add data files with errors to test/resources
Import configuration
--------------------
Allow multiple visit tables with different IDs. Fact tables
can choose which visit table to reference. e.g. <visit-id table="1">fallnr</visit-id>
\ No newline at end of file
......@@ -13,6 +13,7 @@ import de.sekmi.histream.Observation;
import de.sekmi.histream.ObservationFactory;
import de.sekmi.histream.ObservationSupplier;
import de.sekmi.histream.etl.config.DataSource;
import de.sekmi.histream.etl.config.EavTable;
import de.sekmi.histream.etl.config.Meta;
import de.sekmi.histream.etl.config.PatientTable;
import de.sekmi.histream.etl.config.VisitTable;
......@@ -62,10 +63,11 @@ public class ETLObservationSupplier implements ObservationSupplier{
private PatientTable pt;
private VisitTable vt;
private List<WideTable> wt;
private List<EavTable> et;
private RecordSupplier<PatientRow> pr;
private RecordSupplier<VisitRow> vr;
private List<RecordSupplier<WideRow>> wr;
private List<RecordSupplier<? extends FactRow>> fr;
private FactGroupingQueue queue;
......@@ -118,6 +120,7 @@ public class ETLObservationSupplier implements ObservationSupplier{
pt = ds.getPatientTable();
vt = ds.getVisitTable();
wt = ds.getWideTables();
et = ds.getEavTables();
// TODO long tables
Meta meta = ds.getMeta();
......@@ -131,12 +134,17 @@ public class ETLObservationSupplier implements ObservationSupplier{
factory.getExtensionAccessor(Visit.class));
// open all tables
wr = new ArrayList<>(wt.size());
fr = new ArrayList<>(wt.size());
for( WideTable t : wt ){
//@SuppressWarnings("resource")
RecordSupplier<WideRow> s = t.open(factory, meta);
queue.addFactTable(s);
wr.add(s);
fr.add(s);
}
for( EavTable t : et ){
RecordSupplier<EavRow> s = t.open(factory, meta);
queue.addFactTable(s);
fr.add(s);
}
queue.prepare();
......@@ -184,8 +192,8 @@ public class ETLObservationSupplier implements ObservationSupplier{
}
vr=null;
}
if( wr != null ){
Iterator<RecordSupplier<WideRow>> i = wr.iterator();
if( fr != null ){
Iterator<RecordSupplier<? extends FactRow>> i = fr.iterator();
while( i.hasNext() ){
try{ i.next().close(); }
catch( IOException e ){
......
package de.sekmi.histream.etl;
import java.util.Arrays;
import java.util.List;
import de.sekmi.histream.Observation;
/**
* Row from EAV table. Per definition, only
* a single fact per row is contained.
*
* @author R.W.Majeed
*
*/
public class EavRow implements FactRow {
private Observation fact;
public EavRow(Observation fact){
this.fact = fact;
}
@Override
public List<Observation> getFacts() {
return Arrays.asList(fact);
}
@Override
public String getPatientId() {return fact.getPatientId();}
@Override
public String getVisitId() {return fact.getEncounterId();}
}
......@@ -5,6 +5,13 @@ import java.util.List;
import de.sekmi.histream.Observation;
/**
* Wide fact row which. A single wide row
* may contain multiple facts.
*
* @author R.W.Majeed
*
*/
public class WideRow implements FactRow{
private String patid;
private String visit;
......
......@@ -44,6 +44,9 @@ public class DataSource {
@XmlElement(name="wide-table")
WideTable[] wideTables;
@XmlElement(name="eav-table")
EavTable[] eavTables;
public Meta getMeta(){return meta;}
public PatientTable getPatientTable(){
......@@ -58,4 +61,8 @@ public class DataSource {
return Arrays.asList(wideTables);
}
public List<EavTable> getEavTables(){
return Arrays.asList(eavTables);
}
}
package de.sekmi.histream.etl.config;
import java.math.BigDecimal;
import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlType;
import de.sekmi.histream.DateTimeAccuracy;
import de.sekmi.histream.Observation;
import de.sekmi.histream.ObservationFactory;
import de.sekmi.histream.Value;
import de.sekmi.histream.etl.ColumnMap;
import de.sekmi.histream.etl.EavRow;
import de.sekmi.histream.etl.ParseException;
import de.sekmi.histream.impl.NumericValue;
import de.sekmi.histream.impl.StringValue;
public class EavTable extends Table<EavRow> {
@XmlElement
DataTableIdat idat;
@XmlElement
MDAT mdat;
@XmlType(name="eav-mdat")
@XmlAccessorType(XmlAccessType.FIELD)
public static class MDAT{
StringColumn concept;
DateTimeColumn start;
DateTimeColumn end;
StringColumn location;
StringColumn type;
StringColumn value;
StringColumn unit;
}
@Override
public ColumnMap getColumnMap(String[] headers) throws ParseException {
ColumnMap map = new ColumnMap(headers);
if( idat.patientId == null ){
throw new ParseException("datasource/eav-table/idat/patient-id column not specified");
}
if( idat.visitId == null ){
throw new ParseException("datasource/eav-table/idat/visit-id column not specified");
}
map.registerColumn(idat.patientId);
map.registerColumn(idat.visitId);
if( mdat.concept == null ){
throw new ParseException("datasource/eav-table/mdat/concept column not specified");
}
if( mdat.start == null ){
throw new ParseException("datasource/eav-table/mdat/start column not specified");
}
map.registerColumn(mdat.concept);
map.registerColumn(mdat.start);
if( mdat.end != null ){
map.registerColumn(mdat.end);
}
if( mdat.location != null ){
map.registerColumn(mdat.location);
}
if( mdat.type != null ){
map.registerColumn(mdat.type);
}
if( mdat.value != null ){
map.registerColumn(mdat.value);
}
if( mdat.unit != null ){
map.registerColumn(mdat.unit);
}
// make sure all columns are specified
validateAllHeaders(headers, map, idat.ignore);
return map;
}
@Override
public EavRow fillRecord(ColumnMap map, Object[] row, ObservationFactory factory) throws ParseException {
String patid = idat.patientId.valueOf(map, row);
DateTimeAccuracy start = mdat.start.valueOf(map,row);
String concept = mdat.concept.valueOf(map,row);
Observation fact = factory.createObservation(patid, concept, start);
String visit = idat.visitId.valueOf(map, row);
if( visit != null ){
fact.setEncounterId(visit);
}
String value = mdat.value.valueOf(map,row);
if( value != null ){
// generate/parse value
String type = null;
if( mdat.type != null ){
type = mdat.type.valueOf(map,row);
}
Value factValue = null;
if( type == null ){
// for now, use string
// TODO determine type automatically from string representation
factValue = new StringValue(value);
}else if( type.equals(StringColumn.class.getAnnotation(XmlType.class).name()) ){
factValue = new StringValue(value);
}else if( type.equals(DecimalColumn.class.getAnnotation(XmlType.class).name())
|| type.equals(IntegerColumn.class.getAnnotation(XmlType.class).name()) ){
try{
factValue = new NumericValue(new BigDecimal(value));
}catch( NumberFormatException e ){
throw new ParseException("Unable to parse number", e);
}
}
fact.setValue(factValue);
}
return new EavRow(fact);
}
}
......@@ -90,6 +90,10 @@ public class TestMarshall {
s.wideTables[0].concepts[0].modifiers[0] = new Concept.Modifier("DOSE");
s.wideTables[0].concepts[0].modifiers[0].value = new StringColumn("dosis");
s.eavTables = new EavTable[1];
s.eavTables[0] = new EavTable();
s.eavTables[0].source = new FileSource("asdf.txt", "\\t");
JAXB.marshal(s, System.out);
}
......
......@@ -12,6 +12,7 @@ import org.junit.Test;
import de.sekmi.histream.Observation;
import de.sekmi.histream.ObservationFactory;
import de.sekmi.histream.Value;
import de.sekmi.histream.etl.EavRow;
import de.sekmi.histream.etl.ParseException;
import de.sekmi.histream.etl.PatientRow;
import de.sekmi.histream.etl.RecordSupplier;
......@@ -69,4 +70,20 @@ public class TestReadTables {
}
}
@Test
public void testReadEavTable() throws IOException, ParseException{
try( RecordSupplier<EavRow> s = ds.eavTables[0].open(of,ds.getMeta()) ){
EavRow r = s.get();
Assert.assertNotNull(r);
Assert.assertTrue(r.getFacts().size() > 0);
Observation o = r.getFacts().get(0);
Assert.assertEquals("f_eav_b", o.getConceptId());
Assert.assertEquals(Value.Type.Numeric, o.getValue().getType());
Assert.assertEquals(BigDecimal.valueOf(3.9), o.getValue().getNumericValue());
ExternalSourceType e = o.getSource();
Assert.assertNotNull(e);
Assert.assertEquals("test-1", e.getSourceId());
}
}
}
......@@ -6,10 +6,10 @@
</meta>
<patient-table>
<source xsi:type="plain-file">
<url>test-1-patients.txt</url>
<separator>\t</separator>
</source>
<source xsi:type="plain-file">
<url>test-1-patients.txt</url>
<separator>\t</separator>
</source>
<idat>
<patient-id>patid</patient-id>
<given-name>vorname</given-name>
......@@ -19,15 +19,18 @@
<gender>geschlecht</gender>
<ignore xsi:type="string">ignoriert1</ignore>
<ignore xsi:type="string">patfakt1</ignore>
</idat>
</idat>
<!-- for MDAT in patient table, use the same patient-table also as
a visit table (visit date needed). the patient id can be re-used
as visit id, or a constant visit ID can be used. -->
</patient-table>
<!-- optional -->
<visit-table>
<source xsi:type="plain-file">
<url>test-1-visits.txt</url>
<type>text/csv</type>
<source xsi:type="plain-file">
<url>test-1-visits.txt</url>
<type>text/csv</type>
<separator>\t</separator>
</source>
</source>
<idat>
<patient-id>patid</patient-id>
<visit-id>fallnr</visit-id>
......@@ -43,12 +46,11 @@
</concept>
</mdat>
</visit-table>
<wide-table>
<source xsi:type="plain-file">
<url>test-1-widetable.txt</url>
<source xsi:type="plain-file">
<url>test-1-widetable.txt</url>
<separator>\t</separator>
</source>
</source>
<idat>
<patient-id>patid</patient-id>
<visit-id>fallnr</visit-id>
......@@ -69,5 +71,26 @@
</mdat>
</wide-table>
<eav-table>
<source xsi:type="plain-file">
<url>test-1-eavtable.txt</url>
<separator>\t</separator>
</source>
<idat>
<patient-id>patid</patient-id>
<visit-id>event</visit-id>
<ignore xsi:type="string">locat</ignore>
<ignore xsi:type="string">user</ignore>
</idat>
<mdat>
<concept>param</concept>
<start format="u-M-d">start_ts</start>
<end format="u-M-d" na="@">end</end>
<!-- location -->
<type>type</type>
<value na="@">value</value>
<unit na="@">unit</unit>
</mdat>
</eav-table>
</datasource>
patid event locat user param start_ts end type value unit
p1 v1 L1 u1 f_eav_b 2013-08-19 @ decimal 3.9 @
p1 v1 L1 u1 f_eav_c 2013-08-19 @ decimal 11.2 @
p1 v1 L1 u1 f_eav_d 2013-08-19 @ decimal 66.0 @
p1 v1 L1 u1 f_eav_f 2013-08-19 @ decimal 27.0 @
p1 v1 L1 u1 f_eav_i 2013-08-19 @ decimal 0.34 @
p1 v1 L1 u1 f_eav_l 2013-08-19 @ decimal 0.9 @
p1 v1 L1 u1 f_eav_v 2013-08-19 @ decimal 4.8 @
p1 v1 L1 u1 f_eav_a 2013-08-19 @ decimal 9.0 @
p1 v2 L1 u1 f_eav_b 2013-08-18 @ decimal 4.1 @
p1 v2 L1 u1 f_eav_c 2013-08-18 @ decimal 7.7 @
p1 v2 L1 u1 f_eav_d 2013-08-18 @ decimal 72.8 @
p1 v2 L1 u1 f_eav_f 2013-08-18 @ decimal 20.6 @
p1 v2 L1 u1 f_eav_i 2013-08-18 @ decimal 0.36 @
p1 v2 L1 u1 f_eav_l 2013-08-18 @ decimal 1.3 @
p1 v2 L1 u1 f_eav_v 2013-08-18 @ decimal 3.8 @
p2 v3 L1 u1 f_eav_b 2013-08-24 @ decimal 4.7 @
p2 v3 L1 u1 f_eav_c 2013-08-24 @ decimal 6.9 @
p2 v3 L1 u1 f_eav_d 2013-08-24 @ decimal 56.1 @
p2 v3 L1 u1 f_eav_f 2013-08-24 @ decimal 33.2 @
p2 v3 L1 u1 f_eav_i 2013-08-24 @ decimal 0.44 @
p2 v3 L1 u1 f_eav_l 2013-08-24 @ decimal 1.2 @
p2 v3 L1 u1 f_eav_v 2013-08-24 @ decimal 0.7 @
p2 v3 L1 u1 f_eav_a 2013-08-24 @ decimal 29.0 @
p3 v4 L1 u1 f_eav_b 2013-08-30 @ decimal 3.5 @
p3 v4 L1 u1 f_eav_c 2013-08-30 @ decimal 7.2 @
p3 v4 L1 u1 f_eav_d 2013-08-30 @ decimal 61.9 @
p3 v4 L1 u1 f_eav_f 2013-08-30 @ decimal 28.3 @
p3 v4 L1 u1 f_eav_i 2013-08-30 @ decimal 0.32 @
p3 v4 L1 u1 f_eav_l 2013-08-30 @ decimal 1.1 @
p3 v4 L1 u1 f_eav_v 2013-08-30 @ decimal 0.5 @
p3 v4 L1 u1 f_eav_a 2013-08-30 @ decimal 12.0 @
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment