Commit cee90da4 authored by R.W.Majeed's avatar R.W.Majeed

support for eav tables

parent 20bdf48a
...@@ -3,3 +3,8 @@ Validate exceptions ...@@ -3,3 +3,8 @@ Validate exceptions
test for specific errors in data files, test for specific errors in data files,
e.g. missing visit start timestamps in rows. e.g. missing visit start timestamps in rows.
To do so, add data files with errors to test/resources To do so, add data files with errors to test/resources
Import configuration
--------------------
Allow multiple visit tables with different IDs. Fact tables
can choose which visit table to reference. e.g. <visit-id table="1">fallnr</visit-id>
\ No newline at end of file
...@@ -13,6 +13,7 @@ import de.sekmi.histream.Observation; ...@@ -13,6 +13,7 @@ import de.sekmi.histream.Observation;
import de.sekmi.histream.ObservationFactory; import de.sekmi.histream.ObservationFactory;
import de.sekmi.histream.ObservationSupplier; import de.sekmi.histream.ObservationSupplier;
import de.sekmi.histream.etl.config.DataSource; import de.sekmi.histream.etl.config.DataSource;
import de.sekmi.histream.etl.config.EavTable;
import de.sekmi.histream.etl.config.Meta; import de.sekmi.histream.etl.config.Meta;
import de.sekmi.histream.etl.config.PatientTable; import de.sekmi.histream.etl.config.PatientTable;
import de.sekmi.histream.etl.config.VisitTable; import de.sekmi.histream.etl.config.VisitTable;
...@@ -62,10 +63,11 @@ public class ETLObservationSupplier implements ObservationSupplier{ ...@@ -62,10 +63,11 @@ public class ETLObservationSupplier implements ObservationSupplier{
private PatientTable pt; private PatientTable pt;
private VisitTable vt; private VisitTable vt;
private List<WideTable> wt; private List<WideTable> wt;
private List<EavTable> et;
private RecordSupplier<PatientRow> pr; private RecordSupplier<PatientRow> pr;
private RecordSupplier<VisitRow> vr; private RecordSupplier<VisitRow> vr;
private List<RecordSupplier<WideRow>> wr; private List<RecordSupplier<? extends FactRow>> fr;
private FactGroupingQueue queue; private FactGroupingQueue queue;
...@@ -118,6 +120,7 @@ public class ETLObservationSupplier implements ObservationSupplier{ ...@@ -118,6 +120,7 @@ public class ETLObservationSupplier implements ObservationSupplier{
pt = ds.getPatientTable(); pt = ds.getPatientTable();
vt = ds.getVisitTable(); vt = ds.getVisitTable();
wt = ds.getWideTables(); wt = ds.getWideTables();
et = ds.getEavTables();
// TODO long tables // TODO long tables
Meta meta = ds.getMeta(); Meta meta = ds.getMeta();
...@@ -131,12 +134,17 @@ public class ETLObservationSupplier implements ObservationSupplier{ ...@@ -131,12 +134,17 @@ public class ETLObservationSupplier implements ObservationSupplier{
factory.getExtensionAccessor(Visit.class)); factory.getExtensionAccessor(Visit.class));
// open all tables // open all tables
wr = new ArrayList<>(wt.size()); fr = new ArrayList<>(wt.size());
for( WideTable t : wt ){ for( WideTable t : wt ){
//@SuppressWarnings("resource") //@SuppressWarnings("resource")
RecordSupplier<WideRow> s = t.open(factory, meta); RecordSupplier<WideRow> s = t.open(factory, meta);
queue.addFactTable(s); queue.addFactTable(s);
wr.add(s); fr.add(s);
}
for( EavTable t : et ){
RecordSupplier<EavRow> s = t.open(factory, meta);
queue.addFactTable(s);
fr.add(s);
} }
queue.prepare(); queue.prepare();
...@@ -184,8 +192,8 @@ public class ETLObservationSupplier implements ObservationSupplier{ ...@@ -184,8 +192,8 @@ public class ETLObservationSupplier implements ObservationSupplier{
} }
vr=null; vr=null;
} }
if( wr != null ){ if( fr != null ){
Iterator<RecordSupplier<WideRow>> i = wr.iterator(); Iterator<RecordSupplier<? extends FactRow>> i = fr.iterator();
while( i.hasNext() ){ while( i.hasNext() ){
try{ i.next().close(); } try{ i.next().close(); }
catch( IOException e ){ catch( IOException e ){
......
package de.sekmi.histream.etl;
import java.util.Arrays;
import java.util.List;
import de.sekmi.histream.Observation;
/**
* Row from EAV table. Per definition, only
* a single fact per row is contained.
*
* @author R.W.Majeed
*
*/
public class EavRow implements FactRow {
private Observation fact;
public EavRow(Observation fact){
this.fact = fact;
}
@Override
public List<Observation> getFacts() {
return Arrays.asList(fact);
}
@Override
public String getPatientId() {return fact.getPatientId();}
@Override
public String getVisitId() {return fact.getEncounterId();}
}
...@@ -5,6 +5,13 @@ import java.util.List; ...@@ -5,6 +5,13 @@ import java.util.List;
import de.sekmi.histream.Observation; import de.sekmi.histream.Observation;
/**
* Wide fact row which. A single wide row
* may contain multiple facts.
*
* @author R.W.Majeed
*
*/
public class WideRow implements FactRow{ public class WideRow implements FactRow{
private String patid; private String patid;
private String visit; private String visit;
......
...@@ -44,6 +44,9 @@ public class DataSource { ...@@ -44,6 +44,9 @@ public class DataSource {
@XmlElement(name="wide-table") @XmlElement(name="wide-table")
WideTable[] wideTables; WideTable[] wideTables;
@XmlElement(name="eav-table")
EavTable[] eavTables;
public Meta getMeta(){return meta;} public Meta getMeta(){return meta;}
public PatientTable getPatientTable(){ public PatientTable getPatientTable(){
...@@ -58,4 +61,8 @@ public class DataSource { ...@@ -58,4 +61,8 @@ public class DataSource {
return Arrays.asList(wideTables); return Arrays.asList(wideTables);
} }
public List<EavTable> getEavTables(){
return Arrays.asList(eavTables);
}
} }
package de.sekmi.histream.etl.config;
import java.math.BigDecimal;
import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlType;
import de.sekmi.histream.DateTimeAccuracy;
import de.sekmi.histream.Observation;
import de.sekmi.histream.ObservationFactory;
import de.sekmi.histream.Value;
import de.sekmi.histream.etl.ColumnMap;
import de.sekmi.histream.etl.EavRow;
import de.sekmi.histream.etl.ParseException;
import de.sekmi.histream.impl.NumericValue;
import de.sekmi.histream.impl.StringValue;
public class EavTable extends Table<EavRow> {
@XmlElement
DataTableIdat idat;
@XmlElement
MDAT mdat;
@XmlType(name="eav-mdat")
@XmlAccessorType(XmlAccessType.FIELD)
public static class MDAT{
StringColumn concept;
DateTimeColumn start;
DateTimeColumn end;
StringColumn location;
StringColumn type;
StringColumn value;
StringColumn unit;
}
@Override
public ColumnMap getColumnMap(String[] headers) throws ParseException {
ColumnMap map = new ColumnMap(headers);
if( idat.patientId == null ){
throw new ParseException("datasource/eav-table/idat/patient-id column not specified");
}
if( idat.visitId == null ){
throw new ParseException("datasource/eav-table/idat/visit-id column not specified");
}
map.registerColumn(idat.patientId);
map.registerColumn(idat.visitId);
if( mdat.concept == null ){
throw new ParseException("datasource/eav-table/mdat/concept column not specified");
}
if( mdat.start == null ){
throw new ParseException("datasource/eav-table/mdat/start column not specified");
}
map.registerColumn(mdat.concept);
map.registerColumn(mdat.start);
if( mdat.end != null ){
map.registerColumn(mdat.end);
}
if( mdat.location != null ){
map.registerColumn(mdat.location);
}
if( mdat.type != null ){
map.registerColumn(mdat.type);
}
if( mdat.value != null ){
map.registerColumn(mdat.value);
}
if( mdat.unit != null ){
map.registerColumn(mdat.unit);
}
// make sure all columns are specified
validateAllHeaders(headers, map, idat.ignore);
return map;
}
@Override
public EavRow fillRecord(ColumnMap map, Object[] row, ObservationFactory factory) throws ParseException {
String patid = idat.patientId.valueOf(map, row);
DateTimeAccuracy start = mdat.start.valueOf(map,row);
String concept = mdat.concept.valueOf(map,row);
Observation fact = factory.createObservation(patid, concept, start);
String visit = idat.visitId.valueOf(map, row);
if( visit != null ){
fact.setEncounterId(visit);
}
String value = mdat.value.valueOf(map,row);
if( value != null ){
// generate/parse value
String type = null;
if( mdat.type != null ){
type = mdat.type.valueOf(map,row);
}
Value factValue = null;
if( type == null ){
// for now, use string
// TODO determine type automatically from string representation
factValue = new StringValue(value);
}else if( type.equals(StringColumn.class.getAnnotation(XmlType.class).name()) ){
factValue = new StringValue(value);
}else if( type.equals(DecimalColumn.class.getAnnotation(XmlType.class).name())
|| type.equals(IntegerColumn.class.getAnnotation(XmlType.class).name()) ){
try{
factValue = new NumericValue(new BigDecimal(value));
}catch( NumberFormatException e ){
throw new ParseException("Unable to parse number", e);
}
}
fact.setValue(factValue);
}
return new EavRow(fact);
}
}
...@@ -90,6 +90,10 @@ public class TestMarshall { ...@@ -90,6 +90,10 @@ public class TestMarshall {
s.wideTables[0].concepts[0].modifiers[0] = new Concept.Modifier("DOSE"); s.wideTables[0].concepts[0].modifiers[0] = new Concept.Modifier("DOSE");
s.wideTables[0].concepts[0].modifiers[0].value = new StringColumn("dosis"); s.wideTables[0].concepts[0].modifiers[0].value = new StringColumn("dosis");
s.eavTables = new EavTable[1];
s.eavTables[0] = new EavTable();
s.eavTables[0].source = new FileSource("asdf.txt", "\\t");
JAXB.marshal(s, System.out); JAXB.marshal(s, System.out);
} }
......
...@@ -12,6 +12,7 @@ import org.junit.Test; ...@@ -12,6 +12,7 @@ import org.junit.Test;
import de.sekmi.histream.Observation; import de.sekmi.histream.Observation;
import de.sekmi.histream.ObservationFactory; import de.sekmi.histream.ObservationFactory;
import de.sekmi.histream.Value; import de.sekmi.histream.Value;
import de.sekmi.histream.etl.EavRow;
import de.sekmi.histream.etl.ParseException; import de.sekmi.histream.etl.ParseException;
import de.sekmi.histream.etl.PatientRow; import de.sekmi.histream.etl.PatientRow;
import de.sekmi.histream.etl.RecordSupplier; import de.sekmi.histream.etl.RecordSupplier;
...@@ -69,4 +70,20 @@ public class TestReadTables { ...@@ -69,4 +70,20 @@ public class TestReadTables {
} }
} }
@Test
public void testReadEavTable() throws IOException, ParseException{
try( RecordSupplier<EavRow> s = ds.eavTables[0].open(of,ds.getMeta()) ){
EavRow r = s.get();
Assert.assertNotNull(r);
Assert.assertTrue(r.getFacts().size() > 0);
Observation o = r.getFacts().get(0);
Assert.assertEquals("f_eav_b", o.getConceptId());
Assert.assertEquals(Value.Type.Numeric, o.getValue().getType());
Assert.assertEquals(BigDecimal.valueOf(3.9), o.getValue().getNumericValue());
ExternalSourceType e = o.getSource();
Assert.assertNotNull(e);
Assert.assertEquals("test-1", e.getSourceId());
}
}
} }
...@@ -6,10 +6,10 @@ ...@@ -6,10 +6,10 @@
</meta> </meta>
<patient-table> <patient-table>
<source xsi:type="plain-file"> <source xsi:type="plain-file">
<url>test-1-patients.txt</url> <url>test-1-patients.txt</url>
<separator>\t</separator> <separator>\t</separator>
</source> </source>
<idat> <idat>
<patient-id>patid</patient-id> <patient-id>patid</patient-id>
<given-name>vorname</given-name> <given-name>vorname</given-name>
...@@ -19,15 +19,18 @@ ...@@ -19,15 +19,18 @@
<gender>geschlecht</gender> <gender>geschlecht</gender>
<ignore xsi:type="string">ignoriert1</ignore> <ignore xsi:type="string">ignoriert1</ignore>
<ignore xsi:type="string">patfakt1</ignore> <ignore xsi:type="string">patfakt1</ignore>
</idat> </idat>
<!-- for MDAT in patient table, use the same patient-table also as
a visit table (visit date needed). the patient id can be re-used
as visit id, or a constant visit ID can be used. -->
</patient-table> </patient-table>
<!-- optional --> <!-- optional -->
<visit-table> <visit-table>
<source xsi:type="plain-file"> <source xsi:type="plain-file">
<url>test-1-visits.txt</url> <url>test-1-visits.txt</url>
<type>text/csv</type> <type>text/csv</type>
<separator>\t</separator> <separator>\t</separator>
</source> </source>
<idat> <idat>
<patient-id>patid</patient-id> <patient-id>patid</patient-id>
<visit-id>fallnr</visit-id> <visit-id>fallnr</visit-id>
...@@ -43,12 +46,11 @@ ...@@ -43,12 +46,11 @@
</concept> </concept>
</mdat> </mdat>
</visit-table> </visit-table>
<wide-table> <wide-table>
<source xsi:type="plain-file"> <source xsi:type="plain-file">
<url>test-1-widetable.txt</url> <url>test-1-widetable.txt</url>
<separator>\t</separator> <separator>\t</separator>
</source> </source>
<idat> <idat>
<patient-id>patid</patient-id> <patient-id>patid</patient-id>
<visit-id>fallnr</visit-id> <visit-id>fallnr</visit-id>
...@@ -69,5 +71,26 @@ ...@@ -69,5 +71,26 @@
</mdat> </mdat>
</wide-table> </wide-table>
<eav-table>
<source xsi:type="plain-file">
<url>test-1-eavtable.txt</url>
<separator>\t</separator>
</source>
<idat>
<patient-id>patid</patient-id>
<visit-id>event</visit-id>
<ignore xsi:type="string">locat</ignore>
<ignore xsi:type="string">user</ignore>
</idat>
<mdat>
<concept>param</concept>
<start format="u-M-d">start_ts</start>
<end format="u-M-d" na="@">end</end>
<!-- location -->
<type>type</type>
<value na="@">value</value>
<unit na="@">unit</unit>
</mdat>
</eav-table>
</datasource> </datasource>
patid event locat user param start_ts end type value unit
p1 v1 L1 u1 f_eav_b 2013-08-19 @ decimal 3.9 @
p1 v1 L1 u1 f_eav_c 2013-08-19 @ decimal 11.2 @
p1 v1 L1 u1 f_eav_d 2013-08-19 @ decimal 66.0 @
p1 v1 L1 u1 f_eav_f 2013-08-19 @ decimal 27.0 @
p1 v1 L1 u1 f_eav_i 2013-08-19 @ decimal 0.34 @
p1 v1 L1 u1 f_eav_l 2013-08-19 @ decimal 0.9 @
p1 v1 L1 u1 f_eav_v 2013-08-19 @ decimal 4.8 @
p1 v1 L1 u1 f_eav_a 2013-08-19 @ decimal 9.0 @
p1 v2 L1 u1 f_eav_b 2013-08-18 @ decimal 4.1 @
p1 v2 L1 u1 f_eav_c 2013-08-18 @ decimal 7.7 @
p1 v2 L1 u1 f_eav_d 2013-08-18 @ decimal 72.8 @
p1 v2 L1 u1 f_eav_f 2013-08-18 @ decimal 20.6 @
p1 v2 L1 u1 f_eav_i 2013-08-18 @ decimal 0.36 @
p1 v2 L1 u1 f_eav_l 2013-08-18 @ decimal 1.3 @
p1 v2 L1 u1 f_eav_v 2013-08-18 @ decimal 3.8 @
p2 v3 L1 u1 f_eav_b 2013-08-24 @ decimal 4.7 @
p2 v3 L1 u1 f_eav_c 2013-08-24 @ decimal 6.9 @
p2 v3 L1 u1 f_eav_d 2013-08-24 @ decimal 56.1 @
p2 v3 L1 u1 f_eav_f 2013-08-24 @ decimal 33.2 @
p2 v3 L1 u1 f_eav_i 2013-08-24 @ decimal 0.44 @
p2 v3 L1 u1 f_eav_l 2013-08-24 @ decimal 1.2 @
p2 v3 L1 u1 f_eav_v 2013-08-24 @ decimal 0.7 @
p2 v3 L1 u1 f_eav_a 2013-08-24 @ decimal 29.0 @
p3 v4 L1 u1 f_eav_b 2013-08-30 @ decimal 3.5 @
p3 v4 L1 u1 f_eav_c 2013-08-30 @ decimal 7.2 @
p3 v4 L1 u1 f_eav_d 2013-08-30 @ decimal 61.9 @
p3 v4 L1 u1 f_eav_f 2013-08-30 @ decimal 28.3 @
p3 v4 L1 u1 f_eav_i 2013-08-30 @ decimal 0.32 @
p3 v4 L1 u1 f_eav_l 2013-08-30 @ decimal 1.1 @
p3 v4 L1 u1 f_eav_v 2013-08-30 @ decimal 0.5 @
p3 v4 L1 u1 f_eav_a 2013-08-30 @ decimal 12.0 @
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment