Commit 3d658d95 authored by rwm's avatar rwm
Browse files

wide table parsing implemented with basic functionality

parent 469a02e9
......@@ -11,10 +11,35 @@
<artifactId>histream</artifactId>
<version>0.2-alpha</version>
</parent>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<build>
<plugins>
<!-- TODO: Fix schemagen invokation (some annotations wrong?)
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>jaxb2-maven-plugin</artifactId>
<version>2.1</version>
<executions>
<execution>
<id>schemagen</id>
<goals>
<goal>schemagen</goal>
</goals>
</execution>
</executions>
<configuration>
<sources>
<source>src/main/java/de/sekmi/histream/etl/config</source>
</sources>
</configuration>
</plugin>
-->
</plugins>
</build>
<dependencies>
<dependency>
......
......@@ -27,12 +27,20 @@ public class ColumnMap{
/**
* Register a column and lookup it's index in the header list.
* @param column
* @param column column to register
* @throws ParseException if the column cannot be found in the headers
* @throws NullPointerException if column is null
*/
public void registerColumn(Column column)throws ParseException{
Objects.requireNonNull(column);
column.validate(); // TODO: maybe call after unmarshal of column
if( column.getName().isEmpty() ){
// no reference to column, probably constant value
// no need to register
return;
}
if( map.containsKey(column.getName()) ){
// column name already registered
return;
......
......@@ -3,17 +3,20 @@ package de.sekmi.histream.etl;
import java.io.IOException;
import java.util.function.Supplier;
import de.sekmi.histream.ObservationFactory;
import de.sekmi.histream.etl.config.Table;
public class RecordSupplier<R> implements Supplier<R>, AutoCloseable{
RowSupplier rows;
Table<R> table;
ColumnMap map;
private RowSupplier rows;
private Table<R> table;
private ColumnMap map;
private ObservationFactory factory;
public RecordSupplier(RowSupplier rows, Table<R> table)throws ParseException{
public RecordSupplier(RowSupplier rows, Table<R> table, ObservationFactory factory)throws ParseException{
this.rows = rows;
this.table = table;
this.map = table.getColumnMap(rows.getHeaders());
this.factory = factory;
}
@Override
......@@ -31,7 +34,7 @@ public class RecordSupplier<R> implements Supplier<R>, AutoCloseable{
}
R p;
try {
p = table.fillRecord(map, row);
p = table.fillRecord(map, row, factory);
} catch (ParseException e) {
throw new UncheckedParseException(e);
}
......
package de.sekmi.histream.etl;
public class WideRow {
import java.util.ArrayList;
import java.util.List;
import de.sekmi.histream.Observation;
public class WideRow {
private String patid;
private String visit;
private List<Observation> facts;
public WideRow(String patid, String visit){
this.visit = visit;
this.patid = patid;
this.facts = new ArrayList<>();
}
public void addFact(Observation o){
this.facts.add(o);
}
public List<Observation> getFacts(){
return facts;
}
public String getPatientId(){return patid;}
public String getVisitId(){return visit;}
}
package de.sekmi.histream.etl.config;
import java.text.DecimalFormat;
import java.util.Objects;
import javax.xml.bind.annotation.XmlAttribute;
import javax.xml.bind.annotation.XmlSeeAlso;
......@@ -10,6 +11,13 @@ import javax.xml.bind.annotation.XmlValue;
import de.sekmi.histream.etl.ColumnMap;
import de.sekmi.histream.etl.ParseException;
/**
* Abstract table column.
* A column does not need a name, if it has a constant-value assigned.
*
* @author marap1
*
*/
@XmlTransient
@XmlSeeAlso({StringColumn.class})
public abstract class Column {
......@@ -98,7 +106,15 @@ public abstract class Column {
}
public Object valueOf(ColumnMap map, Object[] row) throws ParseException{
return this.valueOf(row[map.indexOf(this)]);
if( name.isEmpty() ){
// use constant value if available
return valueOf(null);
}
Objects.requireNonNull(map);
Objects.requireNonNull(row);
Integer index = map.indexOf(this);
Objects.requireNonNull(index);
return this.valueOf(row[index]);
}
public String applyRegularExpression(String input){
......@@ -117,4 +133,10 @@ public abstract class Column {
@XmlAttribute
String format;
}
public void validate()throws ParseException{
if( name.isEmpty() && constantValue == null ){
throw new ParseException("Empty column name only allowed if constant-value is specified");
}
}
}
package de.sekmi.histream.etl.config;
import java.math.BigDecimal;
import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlAttribute;
import javax.xml.bind.annotation.XmlElement;
import de.sekmi.histream.DateTimeAccuracy;
import de.sekmi.histream.Observation;
import de.sekmi.histream.ObservationFactory;
import de.sekmi.histream.etl.ColumnMap;
import de.sekmi.histream.etl.ParseException;
import de.sekmi.histream.impl.NumericValue;
import de.sekmi.histream.impl.StringValue;
/**
* Concept from a wide table
* @author Raphael
......@@ -49,4 +59,34 @@ public class Concept{
this.start = new DateTimeColumn(startColumn, format);
}
protected Observation createObservation(String patid, String visit, ObservationFactory factory, ColumnMap map, Object[] row) throws ParseException{
DateTimeAccuracy start = (DateTimeAccuracy)this.start.valueOf(map,row);
Observation o = factory.createObservation(patid, this.id, start);
if( visit != null ){
o.setEncounterId(visit);
}
Object value = this.value.valueOf(map, row);
String unit = null;
if( this.unit != null ){
unit = (String)this.unit.valueOf(map, row);
}
if( value == null ){
// no value
o.setValue(null);
}else if( value instanceof String ){
// string
o.setValue(new StringValue((String)value));
// TODO: set unit
}else if( value instanceof BigDecimal ){
// numeric
NumericValue v = new NumericValue((BigDecimal)value,unit);
o.setValue(v);
}else{
throw new ParseException("Unsupported value type for concept id "+this.id+": "+value.getClass());
}
// TODO: modifiers
return o;
}
}
\ No newline at end of file
......@@ -7,6 +7,7 @@ import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlElementWrapper;
import de.sekmi.histream.DateTimeAccuracy;
import de.sekmi.histream.ObservationFactory;
import de.sekmi.histream.etl.ColumnMap;
import de.sekmi.histream.etl.ParseException;
import de.sekmi.histream.etl.PatientRow;
......@@ -51,7 +52,7 @@ public class PatientTable extends Table<PatientRow> implements WideInterface{
@Override
public PatientRow fillRecord(ColumnMap map, Object[] row) throws ParseException {
public PatientRow fillRecord(ColumnMap map, Object[] row, ObservationFactory factory) throws ParseException {
PatientRow patient = new PatientRow();
patient.setId(idat.patientId.valueOf(map, row).toString());
patient.setNames((String)idat.firstname.valueOf(map, row), (String)idat.surname.valueOf(map, row));
......
......@@ -5,6 +5,7 @@ import java.io.IOException;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlTransient;
import de.sekmi.histream.ObservationFactory;
import de.sekmi.histream.etl.ColumnMap;
import de.sekmi.histream.etl.ParseException;
import de.sekmi.histream.etl.RecordSupplier;
......@@ -50,10 +51,11 @@ public abstract class Table<T> {
}
}
public abstract T fillRecord(ColumnMap map, Object[] row) throws ParseException;
public abstract T fillRecord(ColumnMap map, Object[] row, ObservationFactory factory) throws ParseException;
public RecordSupplier<T> open() throws IOException, ParseException{
return new RecordSupplier<>(source.rows(), this);
public RecordSupplier<T> open(ObservationFactory factory) throws IOException, ParseException{
return new RecordSupplier<>(source.rows(), this, factory);
}
}
......@@ -7,6 +7,7 @@ import javax.xml.bind.annotation.XmlElementWrapper;
import javax.xml.bind.annotation.XmlType;
import de.sekmi.histream.DateTimeAccuracy;
import de.sekmi.histream.ObservationFactory;
import de.sekmi.histream.etl.ColumnMap;
import de.sekmi.histream.etl.ParseException;
import de.sekmi.histream.etl.VisitRow;
......@@ -45,7 +46,7 @@ public class VisitTable extends Table<VisitRow> implements WideInterface{
}
@Override
public VisitRow fillRecord(ColumnMap map, Object[] row) throws ParseException {
public VisitRow fillRecord(ColumnMap map, Object[] row, ObservationFactory factory) throws ParseException {
VisitRow visit = new VisitRow();
visit.setId(idat.visitId.valueOf(map, row).toString());
visit.setPatientId(idat.patientId.valueOf(map, row).toString());
......
......@@ -3,6 +3,8 @@ package de.sekmi.histream.etl.config;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlElementWrapper;
import de.sekmi.histream.Observation;
import de.sekmi.histream.ObservationFactory;
import de.sekmi.histream.etl.ColumnMap;
import de.sekmi.histream.etl.ParseException;
import de.sekmi.histream.etl.WideRow;
......@@ -28,8 +30,14 @@ public class WideTable extends Table<WideRow> {
}
@Override
public WideRow fillRecord(ColumnMap map, Object[] row) throws ParseException {
// TODO Auto-generated method stub
return null;
public WideRow fillRecord(ColumnMap map, Object[] row, ObservationFactory factory) throws ParseException {
String patid = (String)idat.patientId.valueOf(map, row);
String visit = (String)idat.visitId.valueOf(map, row);
WideRow rec = new WideRow(patid,visit);
for( Concept c : concepts ){
Observation o = c.createObservation(patid, visit, factory, map, row);
rec.addFact(o);
}
return rec;
}
}
......@@ -8,22 +8,34 @@ import javax.xml.bind.JAXB;
import org.junit.Test;
import de.sekmi.histream.Observation;
import de.sekmi.histream.ObservationFactory;
import de.sekmi.histream.Value;
import de.sekmi.histream.etl.ParseException;
import de.sekmi.histream.etl.PatientRow;
import de.sekmi.histream.etl.RecordSupplier;
import de.sekmi.histream.etl.VisitRow;
import de.sekmi.histream.etl.WideRow;
import de.sekmi.histream.impl.ObservationFactoryImpl;
import org.junit.Assert;
import org.junit.Before;
public class TestReadTables {
@Test
public void testReadPatients() throws IOException, ParseException{
DataSource ds;
private DataSource ds;
private ObservationFactory of;
@Before
public void loadConfiguration() throws IOException{
try( InputStream in = getClass().getResourceAsStream("/test-1-datasource.xml") ){
ds = JAXB.unmarshal(in, DataSource.class);
}
try( RecordSupplier<PatientRow> s = ds.patientTable.open() ){
of = new ObservationFactoryImpl();
}
@Test
public void testReadPatients() throws IOException, ParseException{
try( RecordSupplier<PatientRow> s = ds.patientTable.open(of) ){
PatientRow r = s.get();
Assert.assertEquals("1", r.getId());
Assert.assertEquals(2003, r.getBirthDate().get(ChronoField.YEAR));
......@@ -32,15 +44,23 @@ public class TestReadTables {
}
@Test
public void testReadVisits() throws IOException, ParseException{
DataSource ds;
try( InputStream in = getClass().getResourceAsStream("/test-1-datasource.xml") ){
ds = JAXB.unmarshal(in, DataSource.class);
}
try( RecordSupplier<VisitRow> s = ds.visitTable.open() ){
try( RecordSupplier<VisitRow> s = ds.visitTable.open(of) ){
VisitRow r = s.get();
Assert.assertEquals("1", r.getId());
Assert.assertEquals(2013, r.getStartTime().get(ChronoField.YEAR));
}
}
@Test
public void testReadWideTable() throws IOException, ParseException{
try( RecordSupplier<WideRow> s = ds.wideTables[0].open(of) ){
WideRow r = s.get();
Assert.assertNotNull(r);
Assert.assertTrue(r.getFacts().size() > 0);
Observation o = r.getFacts().get(0);
Assert.assertEquals("natrium", o.getConceptId());
Assert.assertEquals(Value.Type.Text, o.getValue().getType());
Assert.assertEquals("124", o.getValue().getStringValue());
}
}
}
......@@ -61,7 +61,7 @@
<wide-table>
<source xsi:type="fileSource">
<url>file:test-1-widetable.txt</url>
<url>file:src/test/resources/test-1-widetable.txt</url>
<separator>\t</separator>
</source>
<idat>
......@@ -70,11 +70,12 @@
</idat>
<mdat>
<concept id="natrium">
<!-- TODO implement numeric columns -->
<value xsi:type="stringColumn">na</value>
<start>zeitpunkt</start>
<start format="d.M.u[ H[:m[:s]]]">zeitpunkt</start>
<unit constant-value="mmol/l" />
<modifier id="other">
<value xsi:type="stringColumn">other</value>
<value xsi:type="stringColumn" constant-value=""/>
</modifier>
</concept>
</mdat>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment