Commit b2b13333 authored by R.W.Majeed's avatar R.W.Majeed

Import module for any table based format

parent 70b98829
.settings/
.classpath
.project
target/
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<name>HIStream : Import</name>
<groupId>de.sekmi.histream</groupId>
<artifactId>histream-import</artifactId>
<version>0.2-alpha</version>
<parent>
<groupId>de.sekmi.histream</groupId>
<artifactId>histream</artifactId>
<version>0.2-alpha</version>
</parent>
<build>
<plugins>
</plugins>
</build>
<dependencies>
<dependency>
<groupId>de.sekmi.histream</groupId>
<artifactId>histream-core</artifactId>
<version>0.2-alpha</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
</dependency>
</dependencies>
</project>
\ No newline at end of file
package de.sekmi.histream.etl;
import de.sekmi.histream.Observation;
import de.sekmi.histream.ObservationSupplier;
public class ETLObservationSupplier implements ObservationSupplier{
@Override
public Observation get() {
// TODO Auto-generated method stub
return null;
}
@Override
public void close() throws Exception {
// TODO Auto-generated method stub
}
@Override
public String getMeta(String arg0) {
// TODO Auto-generated method stub
return null;
}
}
package de.sekmi.histream.etl.config;
import javax.xml.bind.annotation.XmlAttribute;
import javax.xml.bind.annotation.XmlValue;
public class Column {
private Column(){
}
public Column(String name){
this();
this.name = name;
}
@XmlAttribute
String na;
@XmlAttribute(name="constant-value")
String constantValue;
@XmlValue
String name;
}
package de.sekmi.histream.etl.config;
import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlAttribute;
import javax.xml.bind.annotation.XmlElement;
/**
* Concept from a wide table
* @author Raphael
*
*/
@XmlAccessorType(XmlAccessType.FIELD)
public class Concept{
@XmlAttribute(required=true)
String id;
// TODO: value should contain also type (string,decimal,integer,...)
Column value;
Column unit;
@XmlElement(required=true)
Column start;
Column end;
@XmlElement(name="modifier")
Modifier[] modifiers;
// ...
@XmlAccessorType(XmlAccessType.FIELD)
public static class Modifier{
@XmlAttribute(required=true)
String id;
// TODO: value with type
Column value;
Column unit;
private Modifier(){
}
public Modifier(String id){
this();
this.id = id;
}
}
private Concept(){
}
public Concept(String id, String startColumn){
this();
this.id = id;
this.start = new Column(startColumn);
}
}
\ No newline at end of file
package de.sekmi.histream.etl.config;
import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlElementWrapper;
@XmlAccessorType(XmlAccessType.FIELD)
public class DataSource {
@XmlElement
Meta meta;
@XmlElementWrapper(name="transformation")
@XmlElement(name="xml-source")
XmlSource[] xmlSources;
@XmlElement(name="patient-table",required=true)
PatientTable patientTable;
@XmlElement(name="visit-table")
VisitTable visitTable;
@XmlElement(name="wide-table")
WideTable[] wideTables;
}
package de.sekmi.histream.etl.config;
import javax.xml.bind.annotation.XmlElement;
public class DataTableIdat extends IdatColumns {
@XmlElement(name="visit-id")
Column visitId;
}
package de.sekmi.histream.etl.config;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.stream.Stream;
import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement;
@XmlAccessorType(XmlAccessType.FIELD)
public class FileSource extends TableSource{
@XmlElement
URL url;
@XmlElement
String type;
private FileSource(){
}
public FileSource(String url, String type) throws MalformedURLException{
this();
this.url = new URL(url);
this.type = type;
}
@Override
public String[] getHeaders() {
// TODO Auto-generated method stub
return null;
}
@Override
public Stream<String[]> rows() {
return null;
}
}
package de.sekmi.histream.etl.config;
import javax.xml.bind.annotation.XmlElement;
public class IdatColumns {
@XmlElement(name="patient-id")
Column patientId;
}
package de.sekmi.histream.etl.config;
import javax.xml.bind.annotation.XmlElement;
public class Meta {
@XmlElement(name="source-id")
String sourceId;
@XmlElement(name="etl-strategy")
String etlStrategy;
}
package de.sekmi.histream.etl.config;
import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlElementWrapper;
/**
* Patient table. Contains patient id and other identifying information.
* Can also contain medical data
* @author marap1
*
*/
public class PatientTable extends Table implements WideInterface{
@XmlElement
IDAT idat;
@XmlElementWrapper(name="mdat")
@XmlElement(name="concept")
Concept[] concepts;
@XmlAccessorType(XmlAccessType.FIELD)
public static class IDAT extends IdatColumns{
Column firstname;
Column lastname;
Column birthdate;
Column deathdate;
Column gender;
Column[] ignore;
}
}
package de.sekmi.histream.etl.config;
import java.util.stream.Stream;
import javax.xml.bind.annotation.XmlElement;
public class SQLSource extends TableSource {
@XmlElement
String jdbcDriver;
@XmlElement
String connectString;
@XmlElement
String sql;
private SQLSource() {
}
public SQLSource(String driver, String connectString){
this();
this.jdbcDriver = driver;
this.connectString = connectString;
}
@Override
public String[] getHeaders() {
// TODO Auto-generated method stub
return null;
}
@Override
public Stream<String[]> rows() {
// TODO Auto-generated method stub
return null;
}
}
package de.sekmi.histream.etl.config;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlTransient;
@XmlTransient
public class Table {
@XmlElement(required=true)
TableSource source;
}
package de.sekmi.histream.etl.config;
import java.util.stream.Stream;
import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlSeeAlso;
import javax.xml.bind.annotation.XmlTransient;
@XmlTransient
@XmlAccessorType(XmlAccessType.NONE)
@XmlSeeAlso({FileSource.class, SQLSource.class})
public abstract class TableSource{
public abstract String[] getHeaders();
public abstract Stream<String[]> rows();
}
\ No newline at end of file
package de.sekmi.histream.etl.config;
import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlElementWrapper;
import javax.xml.bind.annotation.XmlType;
public class VisitTable extends Table implements WideInterface{
@XmlElement
IDAT idat;
@XmlElementWrapper(name="mdat")
@XmlElement(name="concept")
Concept[] concepts;
@XmlType(name="patient-idat")
@XmlAccessorType(XmlAccessType.FIELD)
public static class IDAT extends IdatColumns{
@XmlElement(name="visit-id")
Column visitId;
Column start;
Column end;
// TODO inpatient/outpatient state
Column[] ignore;
}
}
package de.sekmi.histream.etl.config;
public interface WideInterface {
}
package de.sekmi.histream.etl.config;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlElementWrapper;
public class WideTable extends Table {
@XmlElement
DataTableIdat idat;
@XmlElementWrapper(name="mdat")
@XmlElement(name="concept")
Concept[] concepts;
}
package de.sekmi.histream.etl.config;
import java.net.MalformedURLException;
import java.net.URL;
import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlAttribute;
import javax.xml.bind.annotation.XmlElement;
@XmlAccessorType(XmlAccessType.FIELD)
public class XmlSource {
@XmlAttribute
URL url;
@XmlElement
Transform[] transform;
public static class Transform{
@XmlAttribute
URL with;
@XmlAttribute
String to; // TODO use Path internally
public Transform(){
}
public Transform(String with, String to) throws MalformedURLException{
this.with = new URL(with);
this.to = to;
}
}
}
/**
*
*/
/**
* @author marap1
*
*/
package de.sekmi.histream.etl.config;
\ No newline at end of file
/**
*
*/
/**
* @author marap1
*
*/
package de.sekmi.histream.etl;
\ No newline at end of file
package de.sekmi.histream.etl.config;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
import javax.xml.bind.JAXB;
import org.junit.Assert;
import org.junit.Test;
import de.sekmi.histream.etl.config.DataSource;
public class TestMarshall {
@Test
public void testUnmarshall() throws IOException{
try( InputStream in = getClass().getResourceAsStream("/test-1-datasource.txt") ){
DataSource ds = JAXB.unmarshal(in, DataSource.class);
Assert.assertNotNull(ds.meta);
Assert.assertEquals("replace-source",ds.meta.etlStrategy);
Assert.assertEquals("test-1",ds.meta.sourceId);
// patient table
Assert.assertNotNull(ds.patientTable);
Assert.assertNotNull(ds.patientTable.source);
Assert.assertNotNull(ds.patientTable.idat);
Assert.assertEquals("patid",ds.patientTable.idat.patientId.name);
Assert.assertEquals("geburtsdatum",ds.patientTable.idat.birthdate.name);
Assert.assertEquals("geschlecht",ds.patientTable.idat.gender.name);
// visit table
Assert.assertNotNull(ds.visitTable);
Assert.assertNotNull(ds.visitTable.source);
Assert.assertNotNull(ds.visitTable.idat);
Assert.assertEquals("patid",ds.visitTable.idat.patientId.name);
Assert.assertEquals("fallnr",ds.visitTable.idat.visitId.name);
// wide table
Assert.assertNotNull(ds.wideTables);
Assert.assertEquals(1, ds.wideTables.length);
WideTable t = ds.wideTables[0];
Assert.assertNotNull(t);
Assert.assertNotNull(t.idat);
Assert.assertEquals("patid",t.idat.patientId.name);
Assert.assertEquals("fallnr",t.idat.visitId.name);
// concepts
Assert.assertNotNull(t.concepts);
Assert.assertTrue(t.concepts.length > 0);
Concept c = t.concepts[0];
Assert.assertNotNull(c);
Assert.assertEquals("natrium", c.id);
Assert.assertEquals("na", c.value.name);
Assert.assertEquals("mmol/l", c.unit.constantValue);
}
}
@Test
public void testMarshal() throws MalformedURLException{
DataSource s = new DataSource();
s.meta = new Meta();
s.meta.sourceId = "SID";
s.meta.etlStrategy = "replace-source";
s.xmlSources = new XmlSource[1];
s.xmlSources[0] = new XmlSource();
s.xmlSources[0].url = new URL("http://lala");
s.xmlSources[0].transform = new XmlSource.Transform[1];
s.xmlSources[0].transform[0] = new XmlSource.Transform("file:my.xsl","c:/to/file");
s.patientTable = new PatientTable();
s.patientTable.source = new FileSource("file:patient.source","text/csv");
s.patientTable.idat = new PatientTable.IDAT();
s.patientTable.idat.patientId = new Column("patid");
s.visitTable = new VisitTable();
s.visitTable.source = new FileSource("file:lala.txt", "text/plain");
s.visitTable.idat = new VisitTable.IDAT();
s.visitTable.idat.patientId = new Column("patid");
s.visitTable.idat.visitId = new Column("visit");
s.visitTable.concepts = new Concept[1];
s.visitTable.concepts[0] = new Concept("vconcept","start");
s.wideTables = new WideTable[1];
s.wideTables[0] = new WideTable();
s.wideTables[0].source = new SQLSource("org.postgresql.Driver","jdbc:postgresql://localhost:15432/i2b2");
s.wideTables[0].idat = new DataTableIdat();
s.wideTables[0].idat.patientId = new Column("patid");
s.wideTables[0].concepts = new Concept[2];
s.wideTables[0].concepts[0] = new Concept("ACC","zeit");
s.wideTables[0].concepts[0].modifiers = new Concept.Modifier[1];
s.wideTables[0].concepts[0].modifiers[0] = new Concept.Modifier("DOSE");
s.wideTables[0].concepts[0].modifiers[0].value = new Column("dosis");
JAXB.marshal(s, System.out);
}
}
<!DOCTYPE configuration >
<datasource version="1.0">
<meta>
<etl-strategy></etl-strategy>
<source-id></source-id>
</meta>
<!-- erstmal weglassen -->
<transformations>
<xml-source url="xxxx.xml">
<transform with="a.xslt" to="lala.txt" />
<transform with="b.xslt" to="dddd.txt" />
</xml-source>
</transformations>
<!--
<value-mappings>
<map id="geschlecht">
<value from="M" to="M" />
<value from="W" to="F" />
</map>
</value-mappings>
-->
<patient-table>
<source>
<sql jdbc-driver="com.mysql..." db-url="jdbc:mysql://localhost/EMP" db-user="asdf" db-password="mkmlkd">
SELECT * FROM patienten p
</sql>
</source>
<idat>
<patient-id>ID</patient-id>
<firstname>Vorname</firstname>
<surname>Nachname</surname>
<birthdate></birthdate>
<deathdate></deathdate>
<gender map="geschlecht">geschlecht</gender>
<ignore>...</ignore>
</idat>
<mdat>
<concept id="gewicht">
<value>gewicht</value>
<start>eingabedatum</start>
<unit constant="kg"/>
</concept>
<concept id="lufu">
<value>lufu_fev1</value>
<start>visit_date</start>
</concept>
</mdat>
</patient-table>
<!-- optional -->
<visit-table>
<idat>
<patient-id>patid</patient-id>
<visit-id>event</visit-id>
<start format="ISO" na="@" truncate-to="year">asdf</start>
<end>asdff</end>
<ignore>...</ignore>
</idat>
<mdat>
<!-- in/out code -->
<concept id="lufu">
<value>lufu_fev1</value>
<start>visit_date</start>
</concept>
</mdat>
</visit-table>
<widetable>
<source>
<file url="file://c:/lala.txt" type="text/csv" key-column="PatientID" />
</source>
<idat>
<patient-id>asdfg</patient-id>
<visit-id>a33edd</visit-id>
</idat>
<mdat>
<concept id="blutdruck">
<value field="value" />
<start field="beginn" />
<end field="ende" />
<unit field="einheit" />
<modifier id="method">
<value field="methode" />
<unit fixed="mm" />
</modifier>
<modifier id="other">
<value field="other" />
</modifier>
</concept>
</mdat>
..
</widetable>
<eavtable>
<source>
<file url="file://c:/lala.txt" type="text/csv;delimiter=\t;encoding=UTF-8;" key-column="PatientID" />
<!-- file ODER sql -->
<sql>
SELECT konzept, beginn, ende, einheit FROM blutdruckmessungen WHERE patid=?
</sql>
</source>
<idat>
<patient-id>asdf</patient-id>
<visit-id>asdf</visit-id>
<ignore>...</ignore>
</idat>
<columns>
<concept>parameter</concept>
<value>wert</value>