Commit 334d4ff3 authored by R.W.Majeed's avatar R.W.Majeed
Browse files

typed columns

parent b2b13333
package de.sekmi.histream.etl.config; package de.sekmi.histream.etl.config;
import javax.xml.bind.annotation.XmlAttribute; import java.text.DecimalFormat;
import javax.xml.bind.annotation.XmlValue;
import javax.xml.bind.annotation.XmlAttribute;
public class Column { import javax.xml.bind.annotation.XmlSeeAlso;
private Column(){ import javax.xml.bind.annotation.XmlTransient;
} import javax.xml.bind.annotation.XmlValue;
public Column(String name){
this(); @XmlTransient
this.name = name; @XmlSeeAlso({StringColumn.class})
} public class Column {
@XmlAttribute protected Column(){
String na; }
public Column(String name){
@XmlAttribute(name="constant-value") this();
String constantValue; this.name = name;
}
@XmlValue /**
String name; * If this string is found in the column data, the resulting value will be null.
} */
@XmlAttribute
String na;
/**
* If set (e.g. non-null), this will always overwrite any other value.
* Datatype formats and regular expressions are still applied to the constant value.
*/
@XmlAttribute(name="constant-value")
String constantValue;
/**
* Regular expression which needs to match the input string
*/
@XmlAttribute(name="regex-match")
String regexMatch;
/**
* Replace the input value with the specified string or regular expression group from {@link #regexMatch}.
* If not specified, the full input string is used (regardless of match region).
*/
@XmlAttribute(name="regex-replace")
String regexReplace;
/**
* Action to perform if the {@link #regexMatch} did not match the input string.
* Either use NA (usually null) for the value, or drop the whole concept/fact.
*/
@XmlAttribute(name="regex-nomatch-action")
String regexNoMatchAction; // either na or drop
/**
* Report a warning if the {@link #regexMatch} did not match the input string.
* Defaults to true.
*/
@XmlAttribute(name="regex-nomatch-warning")
Boolean regexNoMatchWarning;
/**
* Column name to use for reading input values.
*/
@XmlValue
String name;
/**
* Convert a string input value to the output data type. The resulting type depends
* on the type attribute and can be one of Long, BigDecimal, String, DateTime
* or DateTimeAccuracy (for incomplete dates).
* <p>
* TODO: how to read SQL table data, which already contains types (e.g. sql.Integer)
*
* @param value input value. e.g. from text table column
* @return output type representing the input value
*/
public Object valueOf(String value){
if( constantValue != null ){
value = constantValue;
}
if( na != null && value != null && na.equals(value) ){
value = null;
}
if( value != null && regexMatch != null ){
value = applyRegularExpression(value);
}
return value;
}
public String applyRegularExpression(String input){
// TODO: apply
return input;
}
public static class IntegerColumn extends Column{
}
public static class DecimalColumn extends Column{
@XmlTransient
DecimalFormat decimalFormat;
/**
* Decimal format string for parsing via {@link DecimalFormat}
* @see DecimalFormat#DecimalFormat(String)
*/
@XmlAttribute
String format;
}
}
package de.sekmi.histream.etl.config;
import java.util.HashMap;
import java.util.Map;
/**
* Maps {@link Column}s to header/table indices
*
* @author Raphael
*
*/
public class ColumnMap{
String[] headers;
Map<String, Integer> map;
public ColumnMap(String[] headers){
this.headers = headers;
this.map = new HashMap<>();
}
public boolean registerColumn(Column column){
for( int i=0; i<headers.length; i++ ){
if( column.name.equals(headers[i]) ){
map.put(column.name, Integer.valueOf(i) );
return true;
}
}
return false;
}
}
\ No newline at end of file
package de.sekmi.histream.etl.config; package de.sekmi.histream.etl.config;
import javax.xml.bind.annotation.XmlAccessType; import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType; import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlAttribute; import javax.xml.bind.annotation.XmlAttribute;
import javax.xml.bind.annotation.XmlElement; import javax.xml.bind.annotation.XmlElement;
/** /**
* Concept from a wide table * Concept from a wide table
* @author Raphael * @author Raphael
* *
*/ */
@XmlAccessorType(XmlAccessType.FIELD) @XmlAccessorType(XmlAccessType.FIELD)
public class Concept{ public class Concept{
@XmlAttribute(required=true) @XmlAttribute(required=true)
String id; String id;
// TODO: value should contain also type (string,decimal,integer,...) // TODO: value should contain also type (string,decimal,integer,...)
Column value; Column value;
Column unit; StringColumn unit;
@XmlElement(required=true) @XmlElement(required=true)
Column start; DateTimeColumn start;
Column end; DateTimeColumn end;
@XmlElement(name="modifier") @XmlElement(name="modifier")
Modifier[] modifiers; Modifier[] modifiers;
// ... // ...
@XmlAccessorType(XmlAccessType.FIELD) @XmlAccessorType(XmlAccessType.FIELD)
public static class Modifier{ public static class Modifier{
@XmlAttribute(required=true) @XmlAttribute(required=true)
String id; String id;
// TODO: value with type // TODO: value with type
Column value; Column value;
Column unit; StringColumn unit;
private Modifier(){ private Modifier(){
} }
public Modifier(String id){ public Modifier(String id){
this(); this();
this.id = id; this.id = id;
} }
} }
private Concept(){ private Concept(){
} }
public Concept(String id, String startColumn){ public Concept(String id, String startColumn, String format){
this(); this();
this.id = id; this.id = id;
this.start = new Column(startColumn); this.start = new DateTimeColumn(startColumn, format);
} }
} }
\ No newline at end of file
package de.sekmi.histream.etl.config; package de.sekmi.histream.etl.config;
import javax.xml.bind.annotation.XmlAccessType; import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType; import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement; import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlElementWrapper; import javax.xml.bind.annotation.XmlElementWrapper;
import javax.xml.bind.annotation.XmlRootElement;
@XmlAccessorType(XmlAccessType.FIELD) import javax.xml.bind.annotation.XmlSeeAlso;
public class DataSource {
@XmlElement @XmlRootElement
Meta meta; @XmlAccessorType(XmlAccessType.FIELD)
@XmlSeeAlso({Column.class, StringColumn.class})
@XmlElementWrapper(name="transformation") public class DataSource {
@XmlElement(name="xml-source") @XmlElement
XmlSource[] xmlSources; Meta meta;
@XmlElement(name="patient-table",required=true) @XmlElementWrapper(name="transformation")
PatientTable patientTable; @XmlElement(name="xml-source")
XmlSource[] xmlSources;
@XmlElement(name="visit-table")
VisitTable visitTable; @XmlElement(name="patient-table",required=true)
PatientTable patientTable;
@XmlElement(name="wide-table")
WideTable[] wideTables; @XmlElement(name="visit-table")
VisitTable visitTable;
@XmlElement(name="wide-table")
} WideTable[] wideTables;
}
package de.sekmi.histream.etl.config; package de.sekmi.histream.etl.config;
import javax.xml.bind.annotation.XmlElement; import javax.xml.bind.annotation.XmlElement;
public class DataTableIdat extends IdatColumns { public class DataTableIdat extends IdatColumns {
@XmlElement(name="visit-id") @XmlElement(name="visit-id")
Column visitId; StringColumn visitId;
} }
package de.sekmi.histream.etl.config;
import java.time.format.DateTimeFormatter;
import javax.xml.bind.annotation.XmlAttribute;
import javax.xml.bind.annotation.XmlTransient;
/**
* Date and Time column.
* TODO implement parsing of partial date/time. e.g. 2003-10
*
* @author Raphael
*
*/
public class DateTimeColumn extends Column{
@XmlTransient
DateTimeFormatter formatter;
/**
* Decimal format string for parsing via {@link DateTimeFormatter}
* @see DateTimeFormatter#ofPattern(String)
*/
@XmlAttribute
String format;
public DateTimeColumn(String name, String format){
super(name);
this.format = format;
}
protected DateTimeColumn(){
super();
}
}
\ No newline at end of file
package de.sekmi.histream.etl.config; package de.sekmi.histream.etl.config;
import javax.xml.bind.annotation.XmlElement; import javax.xml.bind.annotation.XmlElement;
public class IdatColumns { public class IdatColumns {
@XmlElement(name="patient-id") @XmlElement(name="patient-id")
Column patientId; StringColumn patientId;
} }
package de.sekmi.histream.etl.config; package de.sekmi.histream.etl.config;
import javax.xml.bind.annotation.XmlAccessType; import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType; import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement; import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlElementWrapper; import javax.xml.bind.annotation.XmlElementWrapper;
/** /**
* Patient table. Contains patient id and other identifying information. * Patient table. Contains patient id and other identifying information.
* Can also contain medical data * Can also contain medical data
* @author marap1 * @author marap1
* *
*/ */
public class PatientTable extends Table implements WideInterface{ public class PatientTable extends Table implements WideInterface{
@XmlElement @XmlElement
IDAT idat; IDAT idat;
@XmlElementWrapper(name="mdat") @XmlElementWrapper(name="mdat")
@XmlElement(name="concept") @XmlElement(name="concept")
Concept[] concepts; Concept[] concepts;
@XmlAccessorType(XmlAccessType.FIELD) @XmlAccessorType(XmlAccessType.FIELD)
public static class IDAT extends IdatColumns{ public static class IDAT extends IdatColumns{
Column firstname; StringColumn firstname;
Column lastname; StringColumn lastname;
Column birthdate; DateTimeColumn birthdate;
Column deathdate; DateTimeColumn deathdate;
Column gender; StringColumn gender;
Column[] ignore; Column[] ignore;
} }
}
}
package de.sekmi.histream.etl.config;
public class StringColumn extends Column{
public StringColumn(String name) {
super(name);
}
protected StringColumn(){
super();
}
}
\ No newline at end of file
package de.sekmi.histream.etl.config; package de.sekmi.histream.etl.config;
import javax.xml.bind.annotation.XmlAccessType; import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType; import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement; import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlElementWrapper; import javax.xml.bind.annotation.XmlElementWrapper;
import javax.xml.bind.annotation.XmlType; import javax.xml.bind.annotation.XmlType;
public class VisitTable extends Table implements WideInterface{ public class VisitTable extends Table implements WideInterface{
@XmlElement @XmlElement
IDAT idat; IDAT idat;
@XmlElementWrapper(name="mdat") @XmlElementWrapper(name="mdat")
@XmlElement(name="concept") @XmlElement(name="concept")
Concept[] concepts; Concept[] concepts;
@XmlType(name="patient-idat") @XmlType(name="patient-idat")
@XmlAccessorType(XmlAccessType.FIELD) @XmlAccessorType(XmlAccessType.FIELD)
public static class IDAT extends IdatColumns{ public static class IDAT extends IdatColumns{
@XmlElement(name="visit-id") @XmlElement(name="visit-id")
Column visitId; StringColumn visitId;
Column start; DateTimeColumn start;
Column end; DateTimeColumn end;
// TODO inpatient/outpatient state // TODO inpatient/outpatient state
Column[] ignore; Column[] ignore;
} }
} }
package de.sekmi.histream.etl.config; package de.sekmi.histream.etl.config;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URL; import java.net.URL;
import javax.xml.bind.JAXB; import javax.xml.bind.JAXB;
import org.junit.Assert; import org.junit.Assert;
import org.junit.Test; import org.junit.Test;
import de.sekmi.histream.etl.config.DataSource; import de.sekmi.histream.etl.config.DataSource;
public class TestMarshall { public class TestMarshall {
@Test @Test
public void testUnmarshall() throws IOException{ public void testUnmarshall() throws IOException{
try( InputStream in = getClass().getResourceAsStream("/test-1-datasource.txt") ){ try( InputStream in = getClass().getResourceAsStream("/test-1-datasource.xml") ){
DataSource ds = JAXB.unmarshal(in, DataSource.class); DataSource ds = JAXB.unmarshal(in, DataSource.class);
Assert.assertNotNull(ds.meta); Assert.assertNotNull(ds.meta);
Assert.assertEquals("replace-source",ds.meta.etlStrategy); Assert.assertEquals("replace-source",ds.meta.etlStrategy);
Assert.assertEquals("test-1",ds.meta.sourceId); Assert.assertEquals("test-1",ds.meta.sourceId);
// patient table // patient table
Assert.assertNotNull(ds.patientTable); Assert.assertNotNull(ds.patientTable);
Assert.assertNotNull(ds.patientTable.source); Assert.assertNotNull(ds.patientTable.source);
Assert.assertNotNull(ds.patientTable.idat); Assert.assertNotNull(ds.patientTable.idat);
Assert.assertEquals("patid",ds.patientTable.idat.patientId.name); Assert.assertEquals("patid",ds.patientTable.idat.patientId.name);
Assert.assertEquals("geburtsdatum",ds.patientTable.idat.birthdate.name); Assert.assertEquals("geburtsdatum",ds.patientTable.idat.birthdate.name);
Assert.assertEquals("geschlecht",ds.patientTable.idat.gender.name); Assert.assertEquals("geschlecht",ds.patientTable.idat.gender.name);
// visit table // visit table
Assert.assertNotNull(ds.visitTable); Assert.assertNotNull(ds.visitTable);
Assert.assertNotNull(ds.visitTable.source); Assert.assertNotNull(ds.visitTable.source);
Assert.assertNotNull(ds.visitTable.idat); Assert.assertNotNull(ds.visitTable.idat);
Assert.assertEquals("patid",ds.visitTable.idat.patientId.name); Assert.assertEquals("patid",ds.visitTable.idat.patientId.name);
Assert.assertEquals("fallnr",ds.visitTable.idat.visitId.name); Assert.assertEquals("fallnr",ds.visitTable.idat.visitId.name);
// wide table // wide table
Assert.assertNotNull(ds.wideTables); Assert.assertNotNull(ds.wideTables);
Assert.assertEquals(1, ds.wideTables.length); Assert.assertEquals(1, ds.wideTables.length);
WideTable t = ds.wideTables[0]; WideTable t = ds.wideTables[0];
Assert.assertNotNull(t); Assert.assertNotNull(t);
Assert.assertNotNull(t.idat); Assert.assertNotNull(t.idat);
Assert.assertEquals("patid",t.idat.patientId.name); Assert.assertEquals("patid",t.idat.patientId.name);
Assert.assertEquals("fallnr",t.idat.visitId.name); Assert.assertEquals("fallnr",t.idat.visitId.name);