Commit 327d8715 authored by rwm's avatar rwm

map rules are parsed from import configuration

parent a81df474
......@@ -3,6 +3,7 @@ package de.sekmi.histream.etl.config;
import java.util.Objects;
import javax.xml.bind.annotation.XmlAttribute;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlSeeAlso;
import javax.xml.bind.annotation.XmlTransient;
......@@ -39,12 +40,6 @@ public abstract class Column<T> {
@XmlAttribute(name="constant-value")
String constantValue;
/**
* Regular expression which needs to match the input string
*/
@XmlAttribute(name="regex-match")
String regexMatch;
/**
* Replace the input value with the specified string or regular expression group from {@link #regexMatch}.
* If not specified, the full input string is used (regardless of match region).
......@@ -52,26 +47,16 @@ public abstract class Column<T> {
@XmlAttribute(name="regex-replace")
String regexReplace;
/**
* Action to perform if the {@link #regexMatch} did not match the input string.
* Either use NA (usually null) for the value, or drop the whole concept/fact.
*/
@XmlAttribute(name="regex-nomatch-action")
String regexNoMatchAction; // either na or drop
/**
* Report a warning if the {@link #regexMatch} did not match the input string.
* Defaults to true.
*/
@XmlAttribute(name="regex-nomatch-warning")
Boolean regexNoMatchWarning;
/**
* Column name to use for reading input values.
*/
@XmlAttribute
@XmlAttribute(required=true)
String column;
@XmlElement(required=false)
MapRules map;
/**
* Column name to use for reading input values
* @return column name
......@@ -92,25 +77,37 @@ public abstract class Column<T> {
* @throws ParseException on errors with regular expressions
*/
public Object preprocessValue(Object value)throws ParseException{
// use constant value if provided
if( constantValue != null ){
value = constantValue;
}
if( na != null && value != null && na.equals(value) ){
value = null;
// apply regular expression replacements
if( value != null && regexReplace != null ){
value = applyRegexReplace((String)value);
}
if( value != null && regexMatch != null ){
if( !(value instanceof String) ){
throw new ParseException("regex-match can only be used on String, but found "+value.getClass().getName());
}
value = applyRegularExpression((String)value);
// apply map rules
if( map != null ){
// TODO apply map rules
// TODO find way to communicate warnings
// TODO find way to set action (inplace/drop/generate)
}
// check for na result
if( na != null && value != null && na.equals(value) ){
value = null;
}
return value;
}
private String applyRegexReplace(String value){
// TODO apply replace
return value;
}
public T valueOf(ColumnMap map, Object[] row) throws ParseException{
if( column == null || column.isEmpty() ){
// use constant value if available
......@@ -122,11 +119,6 @@ public abstract class Column<T> {
Objects.requireNonNull(index);
return this.valueOf(row[index]);
}
public String applyRegularExpression(String input){
// TODO: apply
return input;
}
public void validate()throws ParseException{
if( column == null && constantValue == null ){
......
package de.sekmi.histream.etl.config;
import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement;
@XmlAccessorType(XmlAccessType.FIELD)
public class MapCase {
String value;
@XmlElement(name="set-value")
String setValue;
@XmlElement(name="set-concept")
String setConcept;
// TODO use enum
String action;
@XmlElement(name="log-warning")
String logWarning;
}
package de.sekmi.histream.etl.config;
import javax.xml.bind.annotation.XmlElement;
/**
* Map rules for columns or concepts
* @author R.W.Majeed
*
*/
public class MapRules {
@XmlElement(name="case")
MapCase[] cases;
@XmlElement(required=false)
MapCase otherwise;
}
......@@ -32,6 +32,9 @@ public abstract class Table<T extends FactRow> {
* @throws ParseException if headers could not be found/mapped
*/
protected void mapRegisterConcept(ColumnMap map, Concept c) throws ParseException{
if( c.start == null ){
throw new ParseException("Start timestamp undefined for concept '"+c.id+"'");
}
map.registerColumn(c.start);
if( c.end != null ){
map.registerColumn(c.end);
......
......@@ -30,6 +30,9 @@ public class TestMarshall {
Assert.assertEquals("patid",ds.patientTable.idat.patientId.column);
Assert.assertEquals("geburtsdatum",ds.patientTable.idat.birthdate.column);
Assert.assertEquals("geschlecht",ds.patientTable.idat.gender.column);
// check gender mapping
Assert.assertNotNull(ds.patientTable.idat.gender.map);
Assert.assertEquals(2,ds.patientTable.idat.gender.map.cases.length);
Assert.assertEquals("vorname",ds.patientTable.idat.givenName.column);
Assert.assertEquals("nachname",ds.patientTable.idat.surname.column);
......
......@@ -4,7 +4,7 @@
<id>test-1</id>
<etl-strategy>replace-source</etl-strategy>
</meta>
<patient-table>
<source xsi:type="plain-file">
<url>test-1-patients.txt</url>
......@@ -17,9 +17,9 @@
<birthdate format="d.M.u" na="" column="geburtsdatum"/>
<deathdate format="d.M.u" na="" column="verstorben"/>
<gender column="geschlecht">
<map>
<if value="W" set-value="F"/>
<if value="M" set-value="M"/>
<map> <!-- maps a column -->
<case value="W" set-value="F"/>
<case value="M" set-value="M"/>
<otherwise set-value="" log-warning="Unexpected gender value"/>
</map>
</gender>
......@@ -74,6 +74,16 @@
<value column="k" xsi:type="decimal"/>
<start column="zeitpunkt" format="d.M.u[ H[:m[:s]]]"/>
</concept>
<concept id="enum1">
<value column="en" na="" xsi:type="string">
<map>
<case value="1" set-value="" set-concept="enum1_1"/>
<case value="2" set-value="" set-concept="enum1_2"/>
<otherwise action="drop-fact" />
</map>
</value>
<start column="zeitpunkt" format="d.M.u[ H[:m[:s]]]"/>
</concept>
</mdat>
</wide-table>
......@@ -97,16 +107,17 @@
<value column="value" na="@"/>
<unit column="unit" na="@"/>
</mdat>
<eav-map>
<apply-mapping>
<!-- for eav facts, the map always applies to the fact value -->
<map concept="f_eav_x">
<if value="1" set-value="" set-concept="f_eav_x_1"/>
<case value="1" set-value="" set-concept="f_eav_x_1"/>
<!-- action inplace is default -->
<if value="0" set-value="" set-concept="f_eav_x_0" action="inplace" />
<case value="0" set-value="" set-concept="f_eav_x_0" action="inplace" />
<!-- action drop will not produce any fact -->
<otherwise log-warning="Unexpected value" action="drop" />
<otherwise log-warning="Unexpected value" action="drop-fact" />
<!-- action generate will produce a new concept with the given values -->
</map>
</eav-map>
</apply-mapping>
</eav-table>
</datasource>
patid fallnr zeitpunkt na k co2 cl glucose ca bun creatinine
p1 v1 01.01.2010 124 5.8 25 101 107 10.1 17 0.9
p1 v2 02.01.2010 123 5.7 28 101 106 9.8 15 1.3
p2 v3 03.01.2010 124 5.8 25 101 107 10.1 17 0.9
p3 v4 04.01.2010 124 5.8 25 101 107 10.1 17 0.9
patid fallnr zeitpunkt na k co2 cl glucose ca bun creatinine en
p1 v1 01.01.2010 124 5.8 25 101 107 10.1 17 0.9 1
p1 v2 02.01.2010 123 5.7 28 101 106 9.8 15 1.3 2
p2 v3 03.01.2010 124 5.8 25 101 107 10.1 17 0.9 1
p3 v4 04.01.2010 124 5.8 25 101 107 10.1 17 0.9 1
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment