Commit 334d4ff3 authored by R.W.Majeed's avatar R.W.Majeed
Browse files

typed columns

parent b2b13333
package de.sekmi.histream.etl.config;
import java.text.DecimalFormat;
import javax.xml.bind.annotation.XmlAttribute;
import javax.xml.bind.annotation.XmlSeeAlso;
import javax.xml.bind.annotation.XmlTransient;
import javax.xml.bind.annotation.XmlValue;
@XmlTransient
@XmlSeeAlso({StringColumn.class})
public class Column {
private Column(){
protected Column(){
}
public Column(String name){
this();
this.name = name;
}
/**
* If this string is found in the column data, the resulting value will be null.
*/
@XmlAttribute
String na;
/**
* If set (e.g. non-null), this will always overwrite any other value.
* Datatype formats and regular expressions are still applied to the constant value.
*/
@XmlAttribute(name="constant-value")
String constantValue;
/**
* Regular expression which needs to match the input string
*/
@XmlAttribute(name="regex-match")
String regexMatch;
/**
* Replace the input value with the specified string or regular expression group from {@link #regexMatch}.
* If not specified, the full input string is used (regardless of match region).
*/
@XmlAttribute(name="regex-replace")
String regexReplace;
/**
* Action to perform if the {@link #regexMatch} did not match the input string.
* Either use NA (usually null) for the value, or drop the whole concept/fact.
*/
@XmlAttribute(name="regex-nomatch-action")
String regexNoMatchAction; // either na or drop
/**
* Report a warning if the {@link #regexMatch} did not match the input string.
* Defaults to true.
*/
@XmlAttribute(name="regex-nomatch-warning")
Boolean regexNoMatchWarning;
/**
* Column name to use for reading input values.
*/
@XmlValue
String name;
/**
* Convert a string input value to the output data type. The resulting type depends
* on the type attribute and can be one of Long, BigDecimal, String, DateTime
* or DateTimeAccuracy (for incomplete dates).
* <p>
* TODO: how to read SQL table data, which already contains types (e.g. sql.Integer)
*
* @param value input value. e.g. from text table column
* @return output type representing the input value
*/
public Object valueOf(String value){
if( constantValue != null ){
value = constantValue;
}
if( na != null && value != null && na.equals(value) ){
value = null;
}
if( value != null && regexMatch != null ){
value = applyRegularExpression(value);
}
return value;
}
public String applyRegularExpression(String input){
// TODO: apply
return input;
}
public static class IntegerColumn extends Column{
}
public static class DecimalColumn extends Column{
@XmlTransient
DecimalFormat decimalFormat;
/**
* Decimal format string for parsing via {@link DecimalFormat}
* @see DecimalFormat#DecimalFormat(String)
*/
@XmlAttribute
String format;
}
}
package de.sekmi.histream.etl.config;
import java.util.HashMap;
import java.util.Map;
/**
* Maps {@link Column}s to header/table indices
*
* @author Raphael
*
*/
public class ColumnMap{
String[] headers;
Map<String, Integer> map;
public ColumnMap(String[] headers){
this.headers = headers;
this.map = new HashMap<>();
}
public boolean registerColumn(Column column){
for( int i=0; i<headers.length; i++ ){
if( column.name.equals(headers[i]) ){
map.put(column.name, Integer.valueOf(i) );
return true;
}
}
return false;
}
}
\ No newline at end of file
......@@ -16,10 +16,10 @@ public class Concept{
String id;
// TODO: value should contain also type (string,decimal,integer,...)
Column value;
Column unit;
StringColumn unit;
@XmlElement(required=true)
Column start;
Column end;
DateTimeColumn start;
DateTimeColumn end;
@XmlElement(name="modifier")
Modifier[] modifiers;
// ...
......@@ -30,7 +30,7 @@ public class Concept{
String id;
// TODO: value with type
Column value;
Column unit;
StringColumn unit;
private Modifier(){
}
......@@ -43,10 +43,10 @@ public class Concept{
private Concept(){
}
public Concept(String id, String startColumn){
public Concept(String id, String startColumn, String format){
this();
this.id = id;
this.start = new Column(startColumn);
this.start = new DateTimeColumn(startColumn, format);
}
}
\ No newline at end of file
......@@ -4,8 +4,12 @@ import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlElementWrapper;
import javax.xml.bind.annotation.XmlRootElement;
import javax.xml.bind.annotation.XmlSeeAlso;
@XmlRootElement
@XmlAccessorType(XmlAccessType.FIELD)
@XmlSeeAlso({Column.class, StringColumn.class})
public class DataSource {
@XmlElement
Meta meta;
......
......@@ -4,5 +4,5 @@ import javax.xml.bind.annotation.XmlElement;
public class DataTableIdat extends IdatColumns {
@XmlElement(name="visit-id")
Column visitId;
StringColumn visitId;
}
package de.sekmi.histream.etl.config;
import java.time.format.DateTimeFormatter;
import javax.xml.bind.annotation.XmlAttribute;
import javax.xml.bind.annotation.XmlTransient;
/**
* Date and Time column.
* TODO implement parsing of partial date/time. e.g. 2003-10
*
* @author Raphael
*
*/
public class DateTimeColumn extends Column{
@XmlTransient
DateTimeFormatter formatter;
/**
* Decimal format string for parsing via {@link DateTimeFormatter}
* @see DateTimeFormatter#ofPattern(String)
*/
@XmlAttribute
String format;
public DateTimeColumn(String name, String format){
super(name);
this.format = format;
}
protected DateTimeColumn(){
super();
}
}
\ No newline at end of file
......@@ -4,5 +4,5 @@ import javax.xml.bind.annotation.XmlElement;
public class IdatColumns {
@XmlElement(name="patient-id")
Column patientId;
StringColumn patientId;
}
......@@ -21,12 +21,11 @@ public class PatientTable extends Table implements WideInterface{
@XmlAccessorType(XmlAccessType.FIELD)
public static class IDAT extends IdatColumns{
Column firstname;
Column lastname;
Column birthdate;
Column deathdate;
Column gender;
StringColumn firstname;
StringColumn lastname;
DateTimeColumn birthdate;
DateTimeColumn deathdate;
StringColumn gender;
Column[] ignore;
}
}
package de.sekmi.histream.etl.config;
public class StringColumn extends Column{
public StringColumn(String name) {
super(name);
}
protected StringColumn(){
super();
}
}
\ No newline at end of file
......@@ -18,9 +18,9 @@ public class VisitTable extends Table implements WideInterface{
@XmlAccessorType(XmlAccessType.FIELD)
public static class IDAT extends IdatColumns{
@XmlElement(name="visit-id")
Column visitId;
Column start;
Column end;
StringColumn visitId;
DateTimeColumn start;
DateTimeColumn end;
// TODO inpatient/outpatient state
Column[] ignore;
}
......
......@@ -17,7 +17,7 @@ public class TestMarshall {
@Test
public void testUnmarshall() throws IOException{
try( InputStream in = getClass().getResourceAsStream("/test-1-datasource.txt") ){
try( InputStream in = getClass().getResourceAsStream("/test-1-datasource.xml") ){
DataSource ds = JAXB.unmarshal(in, DataSource.class);
Assert.assertNotNull(ds.meta);
Assert.assertEquals("replace-source",ds.meta.etlStrategy);
......@@ -67,24 +67,24 @@ public class TestMarshall {
s.patientTable = new PatientTable();
s.patientTable.source = new FileSource("file:patient.source","text/csv");
s.patientTable.idat = new PatientTable.IDAT();
s.patientTable.idat.patientId = new Column("patid");
s.patientTable.idat.patientId = new StringColumn("patid");
s.visitTable = new VisitTable();
s.visitTable.source = new FileSource("file:lala.txt", "text/plain");
s.visitTable.idat = new VisitTable.IDAT();
s.visitTable.idat.patientId = new Column("patid");
s.visitTable.idat.visitId = new Column("visit");
s.visitTable.idat.patientId = new StringColumn("patid");
s.visitTable.idat.visitId = new StringColumn("visit");
s.visitTable.concepts = new Concept[1];
s.visitTable.concepts[0] = new Concept("vconcept","start");
s.visitTable.concepts[0] = new Concept("vconcept","start","yyyy-MM-ddTHH:mm:ss");
s.wideTables = new WideTable[1];
s.wideTables[0] = new WideTable();
s.wideTables[0].source = new SQLSource("org.postgresql.Driver","jdbc:postgresql://localhost:15432/i2b2");
s.wideTables[0].idat = new DataTableIdat();
s.wideTables[0].idat.patientId = new Column("patid");
s.wideTables[0].idat.patientId = new StringColumn("patid");
s.wideTables[0].concepts = new Concept[2];
s.wideTables[0].concepts[0] = new Concept("ACC","zeit");
s.wideTables[0].concepts[0] = new Concept("ACC","zeit","yyyy-MM-ddTHH:mm:ss");
s.wideTables[0].concepts[0].modifiers = new Concept.Modifier[1];
s.wideTables[0].concepts[0].modifiers[0] = new Concept.Modifier("DOSE");
s.wideTables[0].concepts[0].modifiers[0].value = new Column("dosis");
s.wideTables[0].concepts[0].modifiers[0].value = new StringColumn("dosis");
JAXB.marshal(s, System.out);
......
<!DOCTYPE configuration >
<datasource version="1.0">
<?xml version="1.0" encoding="UTF-8"?>
<datasource version="1.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" >
<meta>
<etl-strategy>replace-source</etl-strategy>
<source-id>test-1</source-id>
......@@ -22,7 +22,7 @@
-->
<patient-table>
<source xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="fileSource">
<source xsi:type="fileSource">
<url>file:test-1-patients.txt</url>
<type>text/csv</type>
</source>
......@@ -33,13 +33,13 @@
<birthdate>geburtsdatum</birthdate>
<deathdate>verstorben</deathdate>
<gender>geschlecht</gender>
<ignore>ignoriert1</ignore>
<ignore>patfakt1</ignore>
<ignore xsi:type="stringColumn">ignoriert1</ignore>
<ignore xsi:type="stringColumn">patfakt1</ignore>
</idat>
</patient-table>
<!-- optional -->
<visit-table>
<source xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="fileSource">
<source xsi:type="fileSource">
<url>file:test-1-visits.txt</url>
<type>text/csv</type>
</source>
......@@ -52,14 +52,14 @@
<mdat>
<!-- in/out code -->
<concept id="weight">
<value>gewicht</value>
<value xsi:type="stringColumn">gewicht</value>
<start>start</start>
</concept>
</mdat>
</visit-table>
<wide-table>
<source xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="fileSource">
<source xsi:type="fileSource">
<url>file:test-1-widetable.txt</url>
<type>text/csv</type>
</source>
......@@ -69,11 +69,11 @@
</idat>
<mdat>
<concept id="natrium">
<value>na</value>
<value xsi:type="stringColumn">na</value>
<start>zeitpunkt</start>
<unit constant-value="mmol/l" />
<modifier id="other">
<value field="other" />
<value xsi:type="stringColumn">other</value>
</modifier>
</concept>
</mdat>
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment