Commit 334d4ff3 authored by R.W.Majeed's avatar R.W.Majeed

typed columns

parent b2b13333
package de.sekmi.histream.etl.config;
import javax.xml.bind.annotation.XmlAttribute;
import javax.xml.bind.annotation.XmlValue;
public class Column {
private Column(){
}
public Column(String name){
this();
this.name = name;
}
@XmlAttribute
String na;
@XmlAttribute(name="constant-value")
String constantValue;
@XmlValue
String name;
}
package de.sekmi.histream.etl.config;
import java.text.DecimalFormat;
import javax.xml.bind.annotation.XmlAttribute;
import javax.xml.bind.annotation.XmlSeeAlso;
import javax.xml.bind.annotation.XmlTransient;
import javax.xml.bind.annotation.XmlValue;
@XmlTransient
@XmlSeeAlso({StringColumn.class})
public class Column {
protected Column(){
}
public Column(String name){
this();
this.name = name;
}
/**
* If this string is found in the column data, the resulting value will be null.
*/
@XmlAttribute
String na;
/**
* If set (e.g. non-null), this will always overwrite any other value.
* Datatype formats and regular expressions are still applied to the constant value.
*/
@XmlAttribute(name="constant-value")
String constantValue;
/**
* Regular expression which needs to match the input string
*/
@XmlAttribute(name="regex-match")
String regexMatch;
/**
* Replace the input value with the specified string or regular expression group from {@link #regexMatch}.
* If not specified, the full input string is used (regardless of match region).
*/
@XmlAttribute(name="regex-replace")
String regexReplace;
/**
* Action to perform if the {@link #regexMatch} did not match the input string.
* Either use NA (usually null) for the value, or drop the whole concept/fact.
*/
@XmlAttribute(name="regex-nomatch-action")
String regexNoMatchAction; // either na or drop
/**
* Report a warning if the {@link #regexMatch} did not match the input string.
* Defaults to true.
*/
@XmlAttribute(name="regex-nomatch-warning")
Boolean regexNoMatchWarning;
/**
* Column name to use for reading input values.
*/
@XmlValue
String name;
/**
* Convert a string input value to the output data type. The resulting type depends
* on the type attribute and can be one of Long, BigDecimal, String, DateTime
* or DateTimeAccuracy (for incomplete dates).
* <p>
* TODO: how to read SQL table data, which already contains types (e.g. sql.Integer)
*
* @param value input value. e.g. from text table column
* @return output type representing the input value
*/
public Object valueOf(String value){
if( constantValue != null ){
value = constantValue;
}
if( na != null && value != null && na.equals(value) ){
value = null;
}
if( value != null && regexMatch != null ){
value = applyRegularExpression(value);
}
return value;
}
public String applyRegularExpression(String input){
// TODO: apply
return input;
}
public static class IntegerColumn extends Column{
}
public static class DecimalColumn extends Column{
@XmlTransient
DecimalFormat decimalFormat;
/**
* Decimal format string for parsing via {@link DecimalFormat}
* @see DecimalFormat#DecimalFormat(String)
*/
@XmlAttribute
String format;
}
}
package de.sekmi.histream.etl.config;
import java.util.HashMap;
import java.util.Map;
/**
* Maps {@link Column}s to header/table indices
*
* @author Raphael
*
*/
public class ColumnMap{
String[] headers;
Map<String, Integer> map;
public ColumnMap(String[] headers){
this.headers = headers;
this.map = new HashMap<>();
}
public boolean registerColumn(Column column){
for( int i=0; i<headers.length; i++ ){
if( column.name.equals(headers[i]) ){
map.put(column.name, Integer.valueOf(i) );
return true;
}
}
return false;
}
}
\ No newline at end of file
package de.sekmi.histream.etl.config;
import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlAttribute;
import javax.xml.bind.annotation.XmlElement;
/**
* Concept from a wide table
* @author Raphael
*
*/
@XmlAccessorType(XmlAccessType.FIELD)
public class Concept{
@XmlAttribute(required=true)
String id;
// TODO: value should contain also type (string,decimal,integer,...)
Column value;
Column unit;
@XmlElement(required=true)
Column start;
Column end;
@XmlElement(name="modifier")
Modifier[] modifiers;
// ...
@XmlAccessorType(XmlAccessType.FIELD)
public static class Modifier{
@XmlAttribute(required=true)
String id;
// TODO: value with type
Column value;
Column unit;
private Modifier(){
}
public Modifier(String id){
this();
this.id = id;
}
}
private Concept(){
}
public Concept(String id, String startColumn){
this();
this.id = id;
this.start = new Column(startColumn);
}
package de.sekmi.histream.etl.config;
import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlAttribute;
import javax.xml.bind.annotation.XmlElement;
/**
* Concept from a wide table
* @author Raphael
*
*/
@XmlAccessorType(XmlAccessType.FIELD)
public class Concept{
@XmlAttribute(required=true)
String id;
// TODO: value should contain also type (string,decimal,integer,...)
Column value;
StringColumn unit;
@XmlElement(required=true)
DateTimeColumn start;
DateTimeColumn end;
@XmlElement(name="modifier")
Modifier[] modifiers;
// ...
@XmlAccessorType(XmlAccessType.FIELD)
public static class Modifier{
@XmlAttribute(required=true)
String id;
// TODO: value with type
Column value;
StringColumn unit;
private Modifier(){
}
public Modifier(String id){
this();
this.id = id;
}
}
private Concept(){
}
public Concept(String id, String startColumn, String format){
this();
this.id = id;
this.start = new DateTimeColumn(startColumn, format);
}
}
\ No newline at end of file
package de.sekmi.histream.etl.config;
import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlElementWrapper;
@XmlAccessorType(XmlAccessType.FIELD)
public class DataSource {
@XmlElement
Meta meta;
@XmlElementWrapper(name="transformation")
@XmlElement(name="xml-source")
XmlSource[] xmlSources;
@XmlElement(name="patient-table",required=true)
PatientTable patientTable;
@XmlElement(name="visit-table")
VisitTable visitTable;
@XmlElement(name="wide-table")
WideTable[] wideTables;
}
package de.sekmi.histream.etl.config;
import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlElementWrapper;
import javax.xml.bind.annotation.XmlRootElement;
import javax.xml.bind.annotation.XmlSeeAlso;
@XmlRootElement
@XmlAccessorType(XmlAccessType.FIELD)
@XmlSeeAlso({Column.class, StringColumn.class})
public class DataSource {
@XmlElement
Meta meta;
@XmlElementWrapper(name="transformation")
@XmlElement(name="xml-source")
XmlSource[] xmlSources;
@XmlElement(name="patient-table",required=true)
PatientTable patientTable;
@XmlElement(name="visit-table")
VisitTable visitTable;
@XmlElement(name="wide-table")
WideTable[] wideTables;
}
package de.sekmi.histream.etl.config;
import javax.xml.bind.annotation.XmlElement;
public class DataTableIdat extends IdatColumns {
@XmlElement(name="visit-id")
Column visitId;
}
package de.sekmi.histream.etl.config;
import javax.xml.bind.annotation.XmlElement;
public class DataTableIdat extends IdatColumns {
@XmlElement(name="visit-id")
StringColumn visitId;
}
package de.sekmi.histream.etl.config;
import java.time.format.DateTimeFormatter;
import javax.xml.bind.annotation.XmlAttribute;
import javax.xml.bind.annotation.XmlTransient;
/**
* Date and Time column.
* TODO implement parsing of partial date/time. e.g. 2003-10
*
* @author Raphael
*
*/
public class DateTimeColumn extends Column{
@XmlTransient
DateTimeFormatter formatter;
/**
* Decimal format string for parsing via {@link DateTimeFormatter}
* @see DateTimeFormatter#ofPattern(String)
*/
@XmlAttribute
String format;
public DateTimeColumn(String name, String format){
super(name);
this.format = format;
}
protected DateTimeColumn(){
super();
}
}
\ No newline at end of file
package de.sekmi.histream.etl.config;
import javax.xml.bind.annotation.XmlElement;
public class IdatColumns {
@XmlElement(name="patient-id")
Column patientId;
}
package de.sekmi.histream.etl.config;
import javax.xml.bind.annotation.XmlElement;
public class IdatColumns {
@XmlElement(name="patient-id")
StringColumn patientId;
}
package de.sekmi.histream.etl.config;
import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlElementWrapper;
/**
* Patient table. Contains patient id and other identifying information.
* Can also contain medical data
* @author marap1
*
*/
public class PatientTable extends Table implements WideInterface{
@XmlElement
IDAT idat;
@XmlElementWrapper(name="mdat")
@XmlElement(name="concept")
Concept[] concepts;
@XmlAccessorType(XmlAccessType.FIELD)
public static class IDAT extends IdatColumns{
Column firstname;
Column lastname;
Column birthdate;
Column deathdate;
Column gender;
Column[] ignore;
}
}
package de.sekmi.histream.etl.config;
import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlElementWrapper;
/**
* Patient table. Contains patient id and other identifying information.
* Can also contain medical data
* @author marap1
*
*/
public class PatientTable extends Table implements WideInterface{
@XmlElement
IDAT idat;
@XmlElementWrapper(name="mdat")
@XmlElement(name="concept")
Concept[] concepts;
@XmlAccessorType(XmlAccessType.FIELD)
public static class IDAT extends IdatColumns{
StringColumn firstname;
StringColumn lastname;
DateTimeColumn birthdate;
DateTimeColumn deathdate;
StringColumn gender;
Column[] ignore;
}
}
package de.sekmi.histream.etl.config;
public class StringColumn extends Column{
public StringColumn(String name) {
super(name);
}
protected StringColumn(){
super();
}
}
\ No newline at end of file
package de.sekmi.histream.etl.config;
import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlElementWrapper;
import javax.xml.bind.annotation.XmlType;
public class VisitTable extends Table implements WideInterface{
@XmlElement
IDAT idat;
@XmlElementWrapper(name="mdat")
@XmlElement(name="concept")
Concept[] concepts;
@XmlType(name="patient-idat")
@XmlAccessorType(XmlAccessType.FIELD)
public static class IDAT extends IdatColumns{
@XmlElement(name="visit-id")
Column visitId;
Column start;
Column end;
// TODO inpatient/outpatient state
Column[] ignore;
}
}
package de.sekmi.histream.etl.config;
import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlElementWrapper;
import javax.xml.bind.annotation.XmlType;
public class VisitTable extends Table implements WideInterface{
@XmlElement
IDAT idat;
@XmlElementWrapper(name="mdat")
@XmlElement(name="concept")
Concept[] concepts;
@XmlType(name="patient-idat")
@XmlAccessorType(XmlAccessType.FIELD)
public static class IDAT extends IdatColumns{
@XmlElement(name="visit-id")
StringColumn visitId;
DateTimeColumn start;
DateTimeColumn end;
// TODO inpatient/outpatient state
Column[] ignore;
}
}
package de.sekmi.histream.etl.config;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
import javax.xml.bind.JAXB;
import org.junit.Assert;
import org.junit.Test;
import de.sekmi.histream.etl.config.DataSource;
public class TestMarshall {
@Test
public void testUnmarshall() throws IOException{
try( InputStream in = getClass().getResourceAsStream("/test-1-datasource.txt") ){
DataSource ds = JAXB.unmarshal(in, DataSource.class);
Assert.assertNotNull(ds.meta);
Assert.assertEquals("replace-source",ds.meta.etlStrategy);
Assert.assertEquals("test-1",ds.meta.sourceId);
// patient table
Assert.assertNotNull(ds.patientTable);
Assert.assertNotNull(ds.patientTable.source);
Assert.assertNotNull(ds.patientTable.idat);
Assert.assertEquals("patid",ds.patientTable.idat.patientId.name);
Assert.assertEquals("geburtsdatum",ds.patientTable.idat.birthdate.name);
Assert.assertEquals("geschlecht",ds.patientTable.idat.gender.name);
// visit table
Assert.assertNotNull(ds.visitTable);
Assert.assertNotNull(ds.visitTable.source);
Assert.assertNotNull(ds.visitTable.idat);
Assert.assertEquals("patid",ds.visitTable.idat.patientId.name);
Assert.assertEquals("fallnr",ds.visitTable.idat.visitId.name);
// wide table
Assert.assertNotNull(ds.wideTables);
Assert.assertEquals(1, ds.wideTables.length);
WideTable t = ds.wideTables[0];
Assert.assertNotNull(t);
Assert.assertNotNull(t.idat);
Assert.assertEquals("patid",t.idat.patientId.name);
Assert.assertEquals("fallnr",t.idat.visitId.name);
// concepts
Assert.assertNotNull(t.concepts);
Assert.assertTrue(t.concepts.length > 0);
Concept c = t.concepts[0];
Assert.assertNotNull(c);
Assert.assertEquals("natrium", c.id);
Assert.assertEquals("na", c.value.name);
Assert.assertEquals("mmol/l", c.unit.constantValue);
}
}
@Test
public void testMarshal() throws MalformedURLException{
DataSource s = new DataSource();
s.meta = new Meta();
s.meta.sourceId = "SID";
s.meta.etlStrategy = "replace-source";
s.xmlSources = new XmlSource[1];
s.xmlSources[0] = new XmlSource();
s.xmlSources[0].url = new URL("http://lala");
s.xmlSources[0].transform = new XmlSource.Transform[1];
s.xmlSources[0].transform[0] = new XmlSource.Transform("file:my.xsl","c:/to/file");
s.patientTable = new PatientTable();
s.patientTable.source = new FileSource("file:patient.source","text/csv");
s.patientTable.idat = new PatientTable.IDAT();
s.patientTable.idat.patientId = new Column("patid");
s.visitTable = new VisitTable();
s.visitTable.source = new FileSource("file:lala.txt", "text/plain");
s.visitTable.idat = new VisitTable.IDAT();
s.visitTable.idat.patientId = new Column("patid");
s.visitTable.idat.visitId = new Column("visit");
s.visitTable.concepts = new Concept[1];
s.visitTable.concepts[0] = new Concept("vconcept","start");
s.wideTables = new WideTable[1];
s.wideTables[0] = new WideTable();
s.wideTables[0].source = new SQLSource("org.postgresql.Driver","jdbc:postgresql://localhost:15432/i2b2");
s.wideTables[0].idat = new DataTableIdat();
s.wideTables[0].idat.patientId = new Column("patid");
s.wideTables[0].concepts = new Concept[2];
s.wideTables[0].concepts[0] = new Concept("ACC","zeit");
s.wideTables[0].concepts[0].modifiers = new Concept.Modifier[1];
s.wideTables[0].concepts[0].modifiers[0] = new Concept.Modifier("DOSE");
s.wideTables[0].concepts[0].modifiers[0].value = new Column("dosis");
JAXB.marshal(s, System.out);
}
}
package de.sekmi.histream.etl.config;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
import javax.xml.bind.JAXB;
import org.junit.Assert;
import org.junit.Test;
import de.sekmi.histream.etl.config.DataSource;
public class TestMarshall {
@Test
public void testUnmarshall() throws IOException{
try( InputStream in = getClass().getResourceAsStream("/test-1-datasource.xml") ){
DataSource ds = JAXB.unmarshal(in, DataSource.class);
Assert.assertNotNull(ds.meta);
Assert.assertEquals("replace-source",ds.meta.etlStrategy);
Assert.assertEquals("test-1",ds.meta.sourceId);
// patient table
Assert.assertNotNull(ds.patientTable);
Assert.assertNotNull(ds.patientTable.source);
Assert.assertNotNull(ds.patientTable.idat);
Assert.assertEquals("patid",ds.patientTable.idat.patientId.name);
Assert.assertEquals("geburtsdatum",ds.patientTable.idat.birthdate.name);
Assert.assertEquals("geschlecht",ds.patientTable.idat.gender.name);
// visit table
Assert.assertNotNull(ds.visitTable);
Assert.assertNotNull(ds.visitTable.source);
Assert.assertNotNull(ds.visitTable.idat);
Assert.assertEquals("patid",ds.visitTable.idat.patientId.name);
Assert.assertEquals("fallnr",ds.visitTable.idat.visitId.name);
// wide table
Assert.assertNotNull(ds.wideTables);
Assert.assertEquals(1, ds.wideTables.length);
WideTable t = ds.wideTables[0];
Assert.assertNotNull(t);
Assert.assertNotNull(t.idat);
Assert.assertEquals("patid",t.idat.patientId.name);
Assert.assertEquals("fallnr",t.idat.visitId.name);
// concepts
Assert.assertNotNull(t.concepts);
Assert.assertTrue(t.concepts.length > 0);
Concept c = t.concepts[0];
Assert.assertNotNull(c);
Assert.assertEquals("natrium", c.id);
Assert.assertEquals("na", c.value.name);
Assert.assertEquals("mmol/l", c.unit.constantValue);
}
}