Commit 3d658d95 authored by rwm's avatar rwm
Browse files

wide table parsing implemented with basic functionality

parent 469a02e9
...@@ -12,9 +12,34 @@ ...@@ -12,9 +12,34 @@
<version>0.2-alpha</version> <version>0.2-alpha</version>
</parent> </parent>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<build> <build>
<plugins> <plugins>
<!-- TODO: Fix schemagen invokation (some annotations wrong?)
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>jaxb2-maven-plugin</artifactId>
<version>2.1</version>
<executions>
<execution>
<id>schemagen</id>
<goals>
<goal>schemagen</goal>
</goals>
</execution>
</executions>
<configuration>
<sources>
<source>src/main/java/de/sekmi/histream/etl/config</source>
</sources>
</configuration>
</plugin>
-->
</plugins> </plugins>
</build> </build>
<dependencies> <dependencies>
<dependency> <dependency>
......
...@@ -27,12 +27,20 @@ public class ColumnMap{ ...@@ -27,12 +27,20 @@ public class ColumnMap{
/** /**
* Register a column and lookup it's index in the header list. * Register a column and lookup it's index in the header list.
* @param column * @param column column to register
* @throws ParseException if the column cannot be found in the headers * @throws ParseException if the column cannot be found in the headers
* @throws NullPointerException if column is null * @throws NullPointerException if column is null
*/ */
public void registerColumn(Column column)throws ParseException{ public void registerColumn(Column column)throws ParseException{
Objects.requireNonNull(column); Objects.requireNonNull(column);
column.validate(); // TODO: maybe call after unmarshal of column
if( column.getName().isEmpty() ){
// no reference to column, probably constant value
// no need to register
return;
}
if( map.containsKey(column.getName()) ){ if( map.containsKey(column.getName()) ){
// column name already registered // column name already registered
return; return;
......
...@@ -3,17 +3,20 @@ package de.sekmi.histream.etl; ...@@ -3,17 +3,20 @@ package de.sekmi.histream.etl;
import java.io.IOException; import java.io.IOException;
import java.util.function.Supplier; import java.util.function.Supplier;
import de.sekmi.histream.ObservationFactory;
import de.sekmi.histream.etl.config.Table; import de.sekmi.histream.etl.config.Table;
public class RecordSupplier<R> implements Supplier<R>, AutoCloseable{ public class RecordSupplier<R> implements Supplier<R>, AutoCloseable{
RowSupplier rows; private RowSupplier rows;
Table<R> table; private Table<R> table;
ColumnMap map; private ColumnMap map;
private ObservationFactory factory;
public RecordSupplier(RowSupplier rows, Table<R> table)throws ParseException{ public RecordSupplier(RowSupplier rows, Table<R> table, ObservationFactory factory)throws ParseException{
this.rows = rows; this.rows = rows;
this.table = table; this.table = table;
this.map = table.getColumnMap(rows.getHeaders()); this.map = table.getColumnMap(rows.getHeaders());
this.factory = factory;
} }
@Override @Override
...@@ -31,7 +34,7 @@ public class RecordSupplier<R> implements Supplier<R>, AutoCloseable{ ...@@ -31,7 +34,7 @@ public class RecordSupplier<R> implements Supplier<R>, AutoCloseable{
} }
R p; R p;
try { try {
p = table.fillRecord(map, row); p = table.fillRecord(map, row, factory);
} catch (ParseException e) { } catch (ParseException e) {
throw new UncheckedParseException(e); throw new UncheckedParseException(e);
} }
......
package de.sekmi.histream.etl; package de.sekmi.histream.etl;
import java.util.ArrayList;
import java.util.List;
import de.sekmi.histream.Observation;
public class WideRow { public class WideRow {
private String patid;
private String visit;
private List<Observation> facts;
public WideRow(String patid, String visit){
this.visit = visit;
this.patid = patid;
this.facts = new ArrayList<>();
}
public void addFact(Observation o){
this.facts.add(o);
}
public List<Observation> getFacts(){
return facts;
}
public String getPatientId(){return patid;}
public String getVisitId(){return visit;}
} }
package de.sekmi.histream.etl.config; package de.sekmi.histream.etl.config;
import java.text.DecimalFormat; import java.text.DecimalFormat;
import java.util.Objects;
import javax.xml.bind.annotation.XmlAttribute; import javax.xml.bind.annotation.XmlAttribute;
import javax.xml.bind.annotation.XmlSeeAlso; import javax.xml.bind.annotation.XmlSeeAlso;
...@@ -10,6 +11,13 @@ import javax.xml.bind.annotation.XmlValue; ...@@ -10,6 +11,13 @@ import javax.xml.bind.annotation.XmlValue;
import de.sekmi.histream.etl.ColumnMap; import de.sekmi.histream.etl.ColumnMap;
import de.sekmi.histream.etl.ParseException; import de.sekmi.histream.etl.ParseException;
/**
* Abstract table column.
* A column does not need a name, if it has a constant-value assigned.
*
* @author marap1
*
*/
@XmlTransient @XmlTransient
@XmlSeeAlso({StringColumn.class}) @XmlSeeAlso({StringColumn.class})
public abstract class Column { public abstract class Column {
...@@ -98,7 +106,15 @@ public abstract class Column { ...@@ -98,7 +106,15 @@ public abstract class Column {
} }
public Object valueOf(ColumnMap map, Object[] row) throws ParseException{ public Object valueOf(ColumnMap map, Object[] row) throws ParseException{
return this.valueOf(row[map.indexOf(this)]); if( name.isEmpty() ){
// use constant value if available
return valueOf(null);
}
Objects.requireNonNull(map);
Objects.requireNonNull(row);
Integer index = map.indexOf(this);
Objects.requireNonNull(index);
return this.valueOf(row[index]);
} }
public String applyRegularExpression(String input){ public String applyRegularExpression(String input){
...@@ -117,4 +133,10 @@ public abstract class Column { ...@@ -117,4 +133,10 @@ public abstract class Column {
@XmlAttribute @XmlAttribute
String format; String format;
} }
public void validate()throws ParseException{
if( name.isEmpty() && constantValue == null ){
throw new ParseException("Empty column name only allowed if constant-value is specified");
}
}
} }
package de.sekmi.histream.etl.config; package de.sekmi.histream.etl.config;
import java.math.BigDecimal;
import javax.xml.bind.annotation.XmlAccessType; import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType; import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlAttribute; import javax.xml.bind.annotation.XmlAttribute;
import javax.xml.bind.annotation.XmlElement; import javax.xml.bind.annotation.XmlElement;
import de.sekmi.histream.DateTimeAccuracy;
import de.sekmi.histream.Observation;
import de.sekmi.histream.ObservationFactory;
import de.sekmi.histream.etl.ColumnMap;
import de.sekmi.histream.etl.ParseException;
import de.sekmi.histream.impl.NumericValue;
import de.sekmi.histream.impl.StringValue;
/** /**
* Concept from a wide table * Concept from a wide table
* @author Raphael * @author Raphael
...@@ -49,4 +59,34 @@ public class Concept{ ...@@ -49,4 +59,34 @@ public class Concept{
this.start = new DateTimeColumn(startColumn, format); this.start = new DateTimeColumn(startColumn, format);
} }
protected Observation createObservation(String patid, String visit, ObservationFactory factory, ColumnMap map, Object[] row) throws ParseException{
DateTimeAccuracy start = (DateTimeAccuracy)this.start.valueOf(map,row);
Observation o = factory.createObservation(patid, this.id, start);
if( visit != null ){
o.setEncounterId(visit);
}
Object value = this.value.valueOf(map, row);
String unit = null;
if( this.unit != null ){
unit = (String)this.unit.valueOf(map, row);
}
if( value == null ){
// no value
o.setValue(null);
}else if( value instanceof String ){
// string
o.setValue(new StringValue((String)value));
// TODO: set unit
}else if( value instanceof BigDecimal ){
// numeric
NumericValue v = new NumericValue((BigDecimal)value,unit);
o.setValue(v);
}else{
throw new ParseException("Unsupported value type for concept id "+this.id+": "+value.getClass());
}
// TODO: modifiers
return o;
}
} }
\ No newline at end of file
...@@ -7,6 +7,7 @@ import javax.xml.bind.annotation.XmlElement; ...@@ -7,6 +7,7 @@ import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlElementWrapper; import javax.xml.bind.annotation.XmlElementWrapper;
import de.sekmi.histream.DateTimeAccuracy; import de.sekmi.histream.DateTimeAccuracy;
import de.sekmi.histream.ObservationFactory;
import de.sekmi.histream.etl.ColumnMap; import de.sekmi.histream.etl.ColumnMap;
import de.sekmi.histream.etl.ParseException; import de.sekmi.histream.etl.ParseException;
import de.sekmi.histream.etl.PatientRow; import de.sekmi.histream.etl.PatientRow;
...@@ -51,7 +52,7 @@ public class PatientTable extends Table<PatientRow> implements WideInterface{ ...@@ -51,7 +52,7 @@ public class PatientTable extends Table<PatientRow> implements WideInterface{
@Override @Override
public PatientRow fillRecord(ColumnMap map, Object[] row) throws ParseException { public PatientRow fillRecord(ColumnMap map, Object[] row, ObservationFactory factory) throws ParseException {
PatientRow patient = new PatientRow(); PatientRow patient = new PatientRow();
patient.setId(idat.patientId.valueOf(map, row).toString()); patient.setId(idat.patientId.valueOf(map, row).toString());
patient.setNames((String)idat.firstname.valueOf(map, row), (String)idat.surname.valueOf(map, row)); patient.setNames((String)idat.firstname.valueOf(map, row), (String)idat.surname.valueOf(map, row));
......
...@@ -5,6 +5,7 @@ import java.io.IOException; ...@@ -5,6 +5,7 @@ import java.io.IOException;
import javax.xml.bind.annotation.XmlElement; import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlTransient; import javax.xml.bind.annotation.XmlTransient;
import de.sekmi.histream.ObservationFactory;
import de.sekmi.histream.etl.ColumnMap; import de.sekmi.histream.etl.ColumnMap;
import de.sekmi.histream.etl.ParseException; import de.sekmi.histream.etl.ParseException;
import de.sekmi.histream.etl.RecordSupplier; import de.sekmi.histream.etl.RecordSupplier;
...@@ -50,10 +51,11 @@ public abstract class Table<T> { ...@@ -50,10 +51,11 @@ public abstract class Table<T> {
} }
} }
public abstract T fillRecord(ColumnMap map, Object[] row) throws ParseException; public abstract T fillRecord(ColumnMap map, Object[] row, ObservationFactory factory) throws ParseException;
public RecordSupplier<T> open() throws IOException, ParseException{ public RecordSupplier<T> open(ObservationFactory factory) throws IOException, ParseException{
return new RecordSupplier<>(source.rows(), this); return new RecordSupplier<>(source.rows(), this, factory);
} }
} }
...@@ -7,6 +7,7 @@ import javax.xml.bind.annotation.XmlElementWrapper; ...@@ -7,6 +7,7 @@ import javax.xml.bind.annotation.XmlElementWrapper;
import javax.xml.bind.annotation.XmlType; import javax.xml.bind.annotation.XmlType;
import de.sekmi.histream.DateTimeAccuracy; import de.sekmi.histream.DateTimeAccuracy;
import de.sekmi.histream.ObservationFactory;
import de.sekmi.histream.etl.ColumnMap; import de.sekmi.histream.etl.ColumnMap;
import de.sekmi.histream.etl.ParseException; import de.sekmi.histream.etl.ParseException;
import de.sekmi.histream.etl.VisitRow; import de.sekmi.histream.etl.VisitRow;
...@@ -45,7 +46,7 @@ public class VisitTable extends Table<VisitRow> implements WideInterface{ ...@@ -45,7 +46,7 @@ public class VisitTable extends Table<VisitRow> implements WideInterface{
} }
@Override @Override
public VisitRow fillRecord(ColumnMap map, Object[] row) throws ParseException { public VisitRow fillRecord(ColumnMap map, Object[] row, ObservationFactory factory) throws ParseException {
VisitRow visit = new VisitRow(); VisitRow visit = new VisitRow();
visit.setId(idat.visitId.valueOf(map, row).toString()); visit.setId(idat.visitId.valueOf(map, row).toString());
visit.setPatientId(idat.patientId.valueOf(map, row).toString()); visit.setPatientId(idat.patientId.valueOf(map, row).toString());
......
...@@ -3,6 +3,8 @@ package de.sekmi.histream.etl.config; ...@@ -3,6 +3,8 @@ package de.sekmi.histream.etl.config;
import javax.xml.bind.annotation.XmlElement; import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlElementWrapper; import javax.xml.bind.annotation.XmlElementWrapper;
import de.sekmi.histream.Observation;
import de.sekmi.histream.ObservationFactory;
import de.sekmi.histream.etl.ColumnMap; import de.sekmi.histream.etl.ColumnMap;
import de.sekmi.histream.etl.ParseException; import de.sekmi.histream.etl.ParseException;
import de.sekmi.histream.etl.WideRow; import de.sekmi.histream.etl.WideRow;
...@@ -28,8 +30,14 @@ public class WideTable extends Table<WideRow> { ...@@ -28,8 +30,14 @@ public class WideTable extends Table<WideRow> {
} }
@Override @Override
public WideRow fillRecord(ColumnMap map, Object[] row) throws ParseException { public WideRow fillRecord(ColumnMap map, Object[] row, ObservationFactory factory) throws ParseException {
// TODO Auto-generated method stub String patid = (String)idat.patientId.valueOf(map, row);
return null; String visit = (String)idat.visitId.valueOf(map, row);
WideRow rec = new WideRow(patid,visit);
for( Concept c : concepts ){
Observation o = c.createObservation(patid, visit, factory, map, row);
rec.addFact(o);
}
return rec;
} }
} }
...@@ -8,22 +8,34 @@ import javax.xml.bind.JAXB; ...@@ -8,22 +8,34 @@ import javax.xml.bind.JAXB;
import org.junit.Test; import org.junit.Test;
import de.sekmi.histream.Observation;
import de.sekmi.histream.ObservationFactory;
import de.sekmi.histream.Value;
import de.sekmi.histream.etl.ParseException; import de.sekmi.histream.etl.ParseException;
import de.sekmi.histream.etl.PatientRow; import de.sekmi.histream.etl.PatientRow;
import de.sekmi.histream.etl.RecordSupplier; import de.sekmi.histream.etl.RecordSupplier;
import de.sekmi.histream.etl.VisitRow; import de.sekmi.histream.etl.VisitRow;
import de.sekmi.histream.etl.WideRow;
import de.sekmi.histream.impl.ObservationFactoryImpl;
import org.junit.Assert; import org.junit.Assert;
import org.junit.Before;
public class TestReadTables { public class TestReadTables {
private DataSource ds;
private ObservationFactory of;
@Test @Before
public void testReadPatients() throws IOException, ParseException{ public void loadConfiguration() throws IOException{
DataSource ds;
try( InputStream in = getClass().getResourceAsStream("/test-1-datasource.xml") ){ try( InputStream in = getClass().getResourceAsStream("/test-1-datasource.xml") ){
ds = JAXB.unmarshal(in, DataSource.class); ds = JAXB.unmarshal(in, DataSource.class);
} }
try( RecordSupplier<PatientRow> s = ds.patientTable.open() ){ of = new ObservationFactoryImpl();
}
@Test
public void testReadPatients() throws IOException, ParseException{
try( RecordSupplier<PatientRow> s = ds.patientTable.open(of) ){
PatientRow r = s.get(); PatientRow r = s.get();
Assert.assertEquals("1", r.getId()); Assert.assertEquals("1", r.getId());
Assert.assertEquals(2003, r.getBirthDate().get(ChronoField.YEAR)); Assert.assertEquals(2003, r.getBirthDate().get(ChronoField.YEAR));
...@@ -32,15 +44,23 @@ public class TestReadTables { ...@@ -32,15 +44,23 @@ public class TestReadTables {
} }
@Test @Test
public void testReadVisits() throws IOException, ParseException{ public void testReadVisits() throws IOException, ParseException{
DataSource ds; try( RecordSupplier<VisitRow> s = ds.visitTable.open(of) ){
try( InputStream in = getClass().getResourceAsStream("/test-1-datasource.xml") ){
ds = JAXB.unmarshal(in, DataSource.class);
}
try( RecordSupplier<VisitRow> s = ds.visitTable.open() ){
VisitRow r = s.get(); VisitRow r = s.get();
Assert.assertEquals("1", r.getId()); Assert.assertEquals("1", r.getId());
Assert.assertEquals(2013, r.getStartTime().get(ChronoField.YEAR)); Assert.assertEquals(2013, r.getStartTime().get(ChronoField.YEAR));
} }
} }
@Test
public void testReadWideTable() throws IOException, ParseException{
try( RecordSupplier<WideRow> s = ds.wideTables[0].open(of) ){
WideRow r = s.get();
Assert.assertNotNull(r);
Assert.assertTrue(r.getFacts().size() > 0);
Observation o = r.getFacts().get(0);
Assert.assertEquals("natrium", o.getConceptId());
Assert.assertEquals(Value.Type.Text, o.getValue().getType());
Assert.assertEquals("124", o.getValue().getStringValue());
}
}
} }
...@@ -61,7 +61,7 @@ ...@@ -61,7 +61,7 @@
<wide-table> <wide-table>
<source xsi:type="fileSource"> <source xsi:type="fileSource">
<url>file:test-1-widetable.txt</url> <url>file:src/test/resources/test-1-widetable.txt</url>
<separator>\t</separator> <separator>\t</separator>
</source> </source>
<idat> <idat>
...@@ -70,11 +70,12 @@ ...@@ -70,11 +70,12 @@
</idat> </idat>
<mdat> <mdat>
<concept id="natrium"> <concept id="natrium">
<!-- TODO implement numeric columns -->
<value xsi:type="stringColumn">na</value> <value xsi:type="stringColumn">na</value>
<start>zeitpunkt</start> <start format="d.M.u[ H[:m[:s]]]">zeitpunkt</start>
<unit constant-value="mmol/l" /> <unit constant-value="mmol/l" />
<modifier id="other"> <modifier id="other">
<value xsi:type="stringColumn">other</value> <value xsi:type="stringColumn" constant-value=""/>
</modifier> </modifier>
</concept> </concept>
</mdat> </mdat>
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment