Commit 334d4ff3 authored by R.W.Majeed's avatar R.W.Majeed
Browse files

typed columns

parent b2b13333
package de.sekmi.histream.etl.config; package de.sekmi.histream.etl.config;
import java.text.DecimalFormat;
import javax.xml.bind.annotation.XmlAttribute; import javax.xml.bind.annotation.XmlAttribute;
import javax.xml.bind.annotation.XmlSeeAlso;
import javax.xml.bind.annotation.XmlTransient;
import javax.xml.bind.annotation.XmlValue; import javax.xml.bind.annotation.XmlValue;
@XmlTransient
@XmlSeeAlso({StringColumn.class})
public class Column { public class Column {
private Column(){ protected Column(){
} }
public Column(String name){ public Column(String name){
this(); this();
this.name = name; this.name = name;
} }
/**
* If this string is found in the column data, the resulting value will be null.
*/
@XmlAttribute @XmlAttribute
String na; String na;
/**
* If set (e.g. non-null), this will always overwrite any other value.
* Datatype formats and regular expressions are still applied to the constant value.
*/
@XmlAttribute(name="constant-value") @XmlAttribute(name="constant-value")
String constantValue; String constantValue;
/**
* Regular expression which needs to match the input string
*/
@XmlAttribute(name="regex-match")
String regexMatch;
/**
* Replace the input value with the specified string or regular expression group from {@link #regexMatch}.
* If not specified, the full input string is used (regardless of match region).
*/
@XmlAttribute(name="regex-replace")
String regexReplace;
/**
* Action to perform if the {@link #regexMatch} did not match the input string.
* Either use NA (usually null) for the value, or drop the whole concept/fact.
*/
@XmlAttribute(name="regex-nomatch-action")
String regexNoMatchAction; // either na or drop
/**
* Report a warning if the {@link #regexMatch} did not match the input string.
* Defaults to true.
*/
@XmlAttribute(name="regex-nomatch-warning")
Boolean regexNoMatchWarning;
/**
* Column name to use for reading input values.
*/
@XmlValue @XmlValue
String name; String name;
/**
* Convert a string input value to the output data type. The resulting type depends
* on the type attribute and can be one of Long, BigDecimal, String, DateTime
* or DateTimeAccuracy (for incomplete dates).
* <p>
* TODO: how to read SQL table data, which already contains types (e.g. sql.Integer)
*
* @param value input value. e.g. from text table column
* @return output type representing the input value
*/
public Object valueOf(String value){
if( constantValue != null ){
value = constantValue;
}
if( na != null && value != null && na.equals(value) ){
value = null;
}
if( value != null && regexMatch != null ){
value = applyRegularExpression(value);
}
return value;
}
public String applyRegularExpression(String input){
// TODO: apply
return input;
}
public static class IntegerColumn extends Column{
}
public static class DecimalColumn extends Column{
@XmlTransient
DecimalFormat decimalFormat;
/**
* Decimal format string for parsing via {@link DecimalFormat}
* @see DecimalFormat#DecimalFormat(String)
*/
@XmlAttribute
String format;
}
} }
package de.sekmi.histream.etl.config;
import java.util.HashMap;
import java.util.Map;
/**
* Maps {@link Column}s to header/table indices
*
* @author Raphael
*
*/
public class ColumnMap{
String[] headers;
Map<String, Integer> map;
public ColumnMap(String[] headers){
this.headers = headers;
this.map = new HashMap<>();
}
public boolean registerColumn(Column column){
for( int i=0; i<headers.length; i++ ){
if( column.name.equals(headers[i]) ){
map.put(column.name, Integer.valueOf(i) );
return true;
}
}
return false;
}
}
\ No newline at end of file
...@@ -16,10 +16,10 @@ public class Concept{ ...@@ -16,10 +16,10 @@ public class Concept{
String id; String id;
// TODO: value should contain also type (string,decimal,integer,...) // TODO: value should contain also type (string,decimal,integer,...)
Column value; Column value;
Column unit; StringColumn unit;
@XmlElement(required=true) @XmlElement(required=true)
Column start; DateTimeColumn start;
Column end; DateTimeColumn end;
@XmlElement(name="modifier") @XmlElement(name="modifier")
Modifier[] modifiers; Modifier[] modifiers;
// ... // ...
...@@ -30,7 +30,7 @@ public class Concept{ ...@@ -30,7 +30,7 @@ public class Concept{
String id; String id;
// TODO: value with type // TODO: value with type
Column value; Column value;
Column unit; StringColumn unit;
private Modifier(){ private Modifier(){
} }
...@@ -43,10 +43,10 @@ public class Concept{ ...@@ -43,10 +43,10 @@ public class Concept{
private Concept(){ private Concept(){
} }
public Concept(String id, String startColumn){ public Concept(String id, String startColumn, String format){
this(); this();
this.id = id; this.id = id;
this.start = new Column(startColumn); this.start = new DateTimeColumn(startColumn, format);
} }
} }
\ No newline at end of file
...@@ -4,8 +4,12 @@ import javax.xml.bind.annotation.XmlAccessType; ...@@ -4,8 +4,12 @@ import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType; import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement; import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlElementWrapper; import javax.xml.bind.annotation.XmlElementWrapper;
import javax.xml.bind.annotation.XmlRootElement;
import javax.xml.bind.annotation.XmlSeeAlso;
@XmlRootElement
@XmlAccessorType(XmlAccessType.FIELD) @XmlAccessorType(XmlAccessType.FIELD)
@XmlSeeAlso({Column.class, StringColumn.class})
public class DataSource { public class DataSource {
@XmlElement @XmlElement
Meta meta; Meta meta;
......
...@@ -4,5 +4,5 @@ import javax.xml.bind.annotation.XmlElement; ...@@ -4,5 +4,5 @@ import javax.xml.bind.annotation.XmlElement;
public class DataTableIdat extends IdatColumns { public class DataTableIdat extends IdatColumns {
@XmlElement(name="visit-id") @XmlElement(name="visit-id")
Column visitId; StringColumn visitId;
} }
package de.sekmi.histream.etl.config;
import java.time.format.DateTimeFormatter;
import javax.xml.bind.annotation.XmlAttribute;
import javax.xml.bind.annotation.XmlTransient;
/**
* Date and Time column.
* TODO implement parsing of partial date/time. e.g. 2003-10
*
* @author Raphael
*
*/
public class DateTimeColumn extends Column{
@XmlTransient
DateTimeFormatter formatter;
/**
* Decimal format string for parsing via {@link DateTimeFormatter}
* @see DateTimeFormatter#ofPattern(String)
*/
@XmlAttribute
String format;
public DateTimeColumn(String name, String format){
super(name);
this.format = format;
}
protected DateTimeColumn(){
super();
}
}
\ No newline at end of file
...@@ -4,5 +4,5 @@ import javax.xml.bind.annotation.XmlElement; ...@@ -4,5 +4,5 @@ import javax.xml.bind.annotation.XmlElement;
public class IdatColumns { public class IdatColumns {
@XmlElement(name="patient-id") @XmlElement(name="patient-id")
Column patientId; StringColumn patientId;
} }
...@@ -21,12 +21,11 @@ public class PatientTable extends Table implements WideInterface{ ...@@ -21,12 +21,11 @@ public class PatientTable extends Table implements WideInterface{
@XmlAccessorType(XmlAccessType.FIELD) @XmlAccessorType(XmlAccessType.FIELD)
public static class IDAT extends IdatColumns{ public static class IDAT extends IdatColumns{
Column firstname; StringColumn firstname;
Column lastname; StringColumn lastname;
Column birthdate; DateTimeColumn birthdate;
Column deathdate; DateTimeColumn deathdate;
Column gender; StringColumn gender;
Column[] ignore; Column[] ignore;
} }
} }
package de.sekmi.histream.etl.config;
public class StringColumn extends Column{
public StringColumn(String name) {
super(name);
}
protected StringColumn(){
super();
}
}
\ No newline at end of file
...@@ -18,9 +18,9 @@ public class VisitTable extends Table implements WideInterface{ ...@@ -18,9 +18,9 @@ public class VisitTable extends Table implements WideInterface{
@XmlAccessorType(XmlAccessType.FIELD) @XmlAccessorType(XmlAccessType.FIELD)
public static class IDAT extends IdatColumns{ public static class IDAT extends IdatColumns{
@XmlElement(name="visit-id") @XmlElement(name="visit-id")
Column visitId; StringColumn visitId;
Column start; DateTimeColumn start;
Column end; DateTimeColumn end;
// TODO inpatient/outpatient state // TODO inpatient/outpatient state
Column[] ignore; Column[] ignore;
} }
......
...@@ -17,7 +17,7 @@ public class TestMarshall { ...@@ -17,7 +17,7 @@ public class TestMarshall {
@Test @Test
public void testUnmarshall() throws IOException{ public void testUnmarshall() throws IOException{
try( InputStream in = getClass().getResourceAsStream("/test-1-datasource.txt") ){ try( InputStream in = getClass().getResourceAsStream("/test-1-datasource.xml") ){
DataSource ds = JAXB.unmarshal(in, DataSource.class); DataSource ds = JAXB.unmarshal(in, DataSource.class);
Assert.assertNotNull(ds.meta); Assert.assertNotNull(ds.meta);
Assert.assertEquals("replace-source",ds.meta.etlStrategy); Assert.assertEquals("replace-source",ds.meta.etlStrategy);
...@@ -67,24 +67,24 @@ public class TestMarshall { ...@@ -67,24 +67,24 @@ public class TestMarshall {
s.patientTable = new PatientTable(); s.patientTable = new PatientTable();
s.patientTable.source = new FileSource("file:patient.source","text/csv"); s.patientTable.source = new FileSource("file:patient.source","text/csv");
s.patientTable.idat = new PatientTable.IDAT(); s.patientTable.idat = new PatientTable.IDAT();
s.patientTable.idat.patientId = new Column("patid"); s.patientTable.idat.patientId = new StringColumn("patid");
s.visitTable = new VisitTable(); s.visitTable = new VisitTable();
s.visitTable.source = new FileSource("file:lala.txt", "text/plain"); s.visitTable.source = new FileSource("file:lala.txt", "text/plain");
s.visitTable.idat = new VisitTable.IDAT(); s.visitTable.idat = new VisitTable.IDAT();
s.visitTable.idat.patientId = new Column("patid"); s.visitTable.idat.patientId = new StringColumn("patid");
s.visitTable.idat.visitId = new Column("visit"); s.visitTable.idat.visitId = new StringColumn("visit");
s.visitTable.concepts = new Concept[1]; s.visitTable.concepts = new Concept[1];
s.visitTable.concepts[0] = new Concept("vconcept","start"); s.visitTable.concepts[0] = new Concept("vconcept","start","yyyy-MM-ddTHH:mm:ss");
s.wideTables = new WideTable[1]; s.wideTables = new WideTable[1];
s.wideTables[0] = new WideTable(); s.wideTables[0] = new WideTable();
s.wideTables[0].source = new SQLSource("org.postgresql.Driver","jdbc:postgresql://localhost:15432/i2b2"); s.wideTables[0].source = new SQLSource("org.postgresql.Driver","jdbc:postgresql://localhost:15432/i2b2");
s.wideTables[0].idat = new DataTableIdat(); s.wideTables[0].idat = new DataTableIdat();
s.wideTables[0].idat.patientId = new Column("patid"); s.wideTables[0].idat.patientId = new StringColumn("patid");
s.wideTables[0].concepts = new Concept[2]; s.wideTables[0].concepts = new Concept[2];
s.wideTables[0].concepts[0] = new Concept("ACC","zeit"); s.wideTables[0].concepts[0] = new Concept("ACC","zeit","yyyy-MM-ddTHH:mm:ss");
s.wideTables[0].concepts[0].modifiers = new Concept.Modifier[1]; s.wideTables[0].concepts[0].modifiers = new Concept.Modifier[1];
s.wideTables[0].concepts[0].modifiers[0] = new Concept.Modifier("DOSE"); s.wideTables[0].concepts[0].modifiers[0] = new Concept.Modifier("DOSE");
s.wideTables[0].concepts[0].modifiers[0].value = new Column("dosis"); s.wideTables[0].concepts[0].modifiers[0].value = new StringColumn("dosis");
JAXB.marshal(s, System.out); JAXB.marshal(s, System.out);
......
<!DOCTYPE configuration > <?xml version="1.0" encoding="UTF-8"?>
<datasource version="1.0"> <datasource version="1.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" >
<meta> <meta>
<etl-strategy>replace-source</etl-strategy> <etl-strategy>replace-source</etl-strategy>
<source-id>test-1</source-id> <source-id>test-1</source-id>
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
--> -->
<patient-table> <patient-table>
<source xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="fileSource"> <source xsi:type="fileSource">
<url>file:test-1-patients.txt</url> <url>file:test-1-patients.txt</url>
<type>text/csv</type> <type>text/csv</type>
</source> </source>
...@@ -33,13 +33,13 @@ ...@@ -33,13 +33,13 @@
<birthdate>geburtsdatum</birthdate> <birthdate>geburtsdatum</birthdate>
<deathdate>verstorben</deathdate> <deathdate>verstorben</deathdate>
<gender>geschlecht</gender> <gender>geschlecht</gender>
<ignore>ignoriert1</ignore> <ignore xsi:type="stringColumn">ignoriert1</ignore>
<ignore>patfakt1</ignore> <ignore xsi:type="stringColumn">patfakt1</ignore>
</idat> </idat>
</patient-table> </patient-table>
<!-- optional --> <!-- optional -->
<visit-table> <visit-table>
<source xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="fileSource"> <source xsi:type="fileSource">
<url>file:test-1-visits.txt</url> <url>file:test-1-visits.txt</url>
<type>text/csv</type> <type>text/csv</type>
</source> </source>
...@@ -52,14 +52,14 @@ ...@@ -52,14 +52,14 @@
<mdat> <mdat>
<!-- in/out code --> <!-- in/out code -->
<concept id="weight"> <concept id="weight">
<value>gewicht</value> <value xsi:type="stringColumn">gewicht</value>
<start>start</start> <start>start</start>
</concept> </concept>
</mdat> </mdat>
</visit-table> </visit-table>
<wide-table> <wide-table>
<source xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="fileSource"> <source xsi:type="fileSource">
<url>file:test-1-widetable.txt</url> <url>file:test-1-widetable.txt</url>
<type>text/csv</type> <type>text/csv</type>
</source> </source>
...@@ -69,11 +69,11 @@ ...@@ -69,11 +69,11 @@
</idat> </idat>
<mdat> <mdat>
<concept id="natrium"> <concept id="natrium">
<value>na</value> <value xsi:type="stringColumn">na</value>
<start>zeitpunkt</start> <start>zeitpunkt</start>
<unit constant-value="mmol/l" /> <unit constant-value="mmol/l" />
<modifier id="other"> <modifier id="other">
<value field="other" /> <value xsi:type="stringColumn">other</value>
</modifier> </modifier>
</concept> </concept>
</mdat> </mdat>
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment