Commit df24a214 authored by R.W.Majeed's avatar R.W.Majeed
Browse files

processing of patient tables implemented

parent 9a9650da
package de.sekmi.histream.etl.config;
package de.sekmi.histream.etl;
import java.util.HashMap;
import java.util.Map;
import de.sekmi.histream.etl.config.Column;
/**
* Maps {@link Column}s to header/table indices
*
......@@ -11,6 +13,10 @@ import java.util.Map;
*/
public class ColumnMap{
String[] headers;
/**
* Maps column names to row indices
*/
Map<String, Integer> map;
public ColumnMap(String[] headers){
......@@ -19,12 +25,20 @@ public class ColumnMap{
}
public boolean registerColumn(Column column){
if( map.containsKey(column.getName()) ){
// column name already registered
return true;
}
// find name and map to index
for( int i=0; i<headers.length; i++ ){
if( column.name.equals(headers[i]) ){
map.put(column.name, Integer.valueOf(i) );
if( headers[i].equals(column.getName()) ){
map.put(column.getName(), Integer.valueOf(i) );
return true;
}
}
return false;
}
public Integer indexOf(Column column){
return map.get(column.getName());
}
}
\ No newline at end of file
package de.sekmi.histream.etl;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UncheckedIOException;
import java.net.URL;
import java.util.regex.Pattern;
public class FileRowSupplier extends RowSupplier {
private Pattern fieldSeparatorPattern;
private BufferedReader in;
private String[] headers;
public FileRowSupplier(URL location, String fieldSeparator) throws IOException{
this(location, Pattern.compile(Pattern.quote(fieldSeparator)));
}
public FileRowSupplier(URL location, Pattern pattern) throws IOException{
this.fieldSeparatorPattern = pattern;
this.in = new BufferedReader(new InputStreamReader(location.openStream()));
// TODO: check whether needed to close underlying InputStream
// load headers
String line = in.readLine();
this.headers = fieldSeparatorPattern.split(line);
}
@Override
public String[] getHeaders() throws IOException {
return headers;
}
@Override
public Object[] get() {
String line;
try {
line = in.readLine();
} catch (IOException e) {
throw new UncheckedIOException(e);
}
if( line == null ){
// end of file
return null;
}
String[] fields = fieldSeparatorPattern.split(line);
return fields;
}
@Override
public void close() throws IOException {
in.close();
}
}
package de.sekmi.histream.etl;
public class ParseException extends Exception {
/**
*
*/
private static final long serialVersionUID = 1L;
public ParseException(String message){
super(message);
}
}
package de.sekmi.histream.etl;
import java.time.Instant;
import de.sekmi.histream.DateTimeAccuracy;
import de.sekmi.histream.etl.config.PatientTable;
import de.sekmi.histream.ext.Patient;
/**
* Row of patient data
* TODO: implement {@link Patient}
* @author Raphael
*
*/
public class PatientRow implements Patient{
String patid;
String firstname;
String lastname;
DateTimeAccuracy birthdate;
DateTimeAccuracy deathdate;
// TODO concepts
public void setNames(String first, String last){
this.firstname = first;
this.lastname = last;
}
public void setBirthDate(DateTimeAccuracy date){
this.birthdate = date;
}
public void setDeathDate(DateTimeAccuracy date){
this.deathdate = date;
}
public static PatientRow load(Object[] input, ColumnMap map, PatientTable table){
PatientRow row = new PatientRow();
row.patid = null; // table.patid.valueOf(input[map.findIndex(table.patid)])
return row;
}
@Override
public String getId() {
return patid;
}
@Override
public void setId(String patientId) {
this.patid = patientId;
}
@Override
public String getSourceId() {
return null;
}
@Override
public Instant getSourceTimestamp() {
return null;
}
@Override
public void setSourceId(String arg0) {
// TODO Auto-generated method stub
}
@Override
public void setSourceTimestamp(Instant arg0) {
// TODO Auto-generated method stub
}
@Override
public DateTimeAccuracy getBirthDate() {
return this.birthdate;
}
@Override
public DateTimeAccuracy getDeathDate() {
return this.deathdate;
}
@Override
public Sex getSex() {
// TODO Auto-generated method stub
return null;
}
@Override
public void setSex(Sex arg0) {
// TODO Auto-generated method stub
}
}
package de.sekmi.histream.etl;
import java.io.IOException;
import java.util.function.Supplier;
import de.sekmi.histream.etl.config.PatientTable;
public class PatientStream implements Supplier<PatientRow>, AutoCloseable{
RowSupplier rows;
PatientTable table;
ColumnMap map;
public PatientStream(RowSupplier rows, PatientTable table) throws IOException{
this.rows = rows;
this.table = table;
this.map = table.getColumnMap(rows.getHeaders());
}
@Override
public void close() throws IOException {
rows.close();
}
@Override
public PatientRow get() {
Object[] row = rows.get();
if( row == null ){
// no more rows
return null;
}
PatientRow p;
try {
p = table.fillPatient(map, row);
} catch (ParseException e) {
throw new UncheckedParseException(e);
}
return p;
}
}
package de.sekmi.histream.etl;
import java.io.IOException;
import java.util.function.Supplier;
public abstract class RowSupplier implements Supplier<Object[]>, AutoCloseable{
public RowSupplier(){
}
public abstract String[] getHeaders()throws IOException;
@Override
public abstract Object[] get();
@Override
public abstract void close() throws IOException;
}
package de.sekmi.histream.etl;
public class UncheckedParseException extends RuntimeException {
/**
*
*/
private static final long serialVersionUID = 1L;
public UncheckedParseException(ParseException cause){
super(cause);
}
@Override
public ParseException getCause(){
return (ParseException)super.getCause();
}
}
package de.sekmi.histream.etl;
public class VisitRow {
}
package de.sekmi.histream.etl;
import java.util.Spliterator;
import java.util.function.Consumer;
public class VisitSupplier implements Spliterator<VisitRow>{
@Override
public boolean tryAdvance(Consumer<? super VisitRow> action) {
// TODO Auto-generated method stub
return false;
}
@Override
public Spliterator<VisitRow> trySplit() {
// TODO Auto-generated method stub
return null;
}
@Override
public long estimateSize() {
// TODO Auto-generated method stub
return 0;
}
@Override
public int characteristics() {
// TODO Auto-generated method stub
return 0;
}
}
......@@ -7,9 +7,12 @@ import javax.xml.bind.annotation.XmlSeeAlso;
import javax.xml.bind.annotation.XmlTransient;
import javax.xml.bind.annotation.XmlValue;
import de.sekmi.histream.etl.ColumnMap;
import de.sekmi.histream.etl.ParseException;
@XmlTransient
@XmlSeeAlso({StringColumn.class})
public class Column {
public abstract class Column {
protected Column(){
}
public Column(String name){
......@@ -62,6 +65,8 @@ public class Column {
@XmlValue
String name;
public String getName(){return name;}
/**
* Convert a string input value to the output data type. The resulting type depends
* on the type attribute and can be one of Long, BigDecimal, String, DateTime
......@@ -72,7 +77,7 @@ public class Column {
* @param value input value. e.g. from text table column
* @return output type representing the input value
*/
public Object valueOf(String value){
public Object valueOf(Object value)throws ParseException{
if( constantValue != null ){
value = constantValue;
}
......@@ -82,20 +87,25 @@ public class Column {
}
if( value != null && regexMatch != null ){
value = applyRegularExpression(value);
if( !(value instanceof String) ){
throw new ParseException("regex-match can only be used on String, but found "+value.getClass().getName());
}
value = applyRegularExpression((String)value);
}
return value;
}
public Object valueOf(ColumnMap map, Object[] row) throws ParseException{
return this.valueOf(row[map.indexOf(this)]);
}
public String applyRegularExpression(String input){
// TODO: apply
return input;
}
public static class IntegerColumn extends Column{
}
public static class DecimalColumn extends Column{
@XmlTransient
DecimalFormat decimalFormat;
......
package de.sekmi.histream.etl.config;
import java.sql.Timestamp;
import java.time.format.DateTimeFormatter;
import javax.xml.bind.annotation.XmlAttribute;
import javax.xml.bind.annotation.XmlTransient;
import de.sekmi.histream.DateTimeAccuracy;
import de.sekmi.histream.etl.ParseException;
/**
* Date and Time column.
* TODO implement parsing of partial date/time. e.g. 2003-10
......@@ -31,4 +35,25 @@ public class DateTimeColumn extends Column{
protected DateTimeColumn(){
super();
}
@Override
public Object valueOf(Object value) throws ParseException{
value = super.valueOf(value);
if( value instanceof String ){
// parse date according to format
if( formatter == null && format != null ){
formatter = DateTimeFormatter.ofPattern(format);
}
if( formatter == null ){
throw new ParseException("format must be specified for DateTime fields if strings are parsed");
}
// TODO parse
return DateTimeAccuracy.parse(formatter,(String)value);
}else if( value instanceof Timestamp ){
// convert from timestamp
return null;
}else{
throw new IllegalArgumentException("Don't know how to parse type "+value.getClass()+" to datetime");
}
}
}
\ No newline at end of file
package de.sekmi.histream.etl.config;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.stream.Stream;
import java.util.regex.Pattern;
import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement;
import de.sekmi.histream.etl.FileRowSupplier;
import de.sekmi.histream.etl.RowSupplier;
@XmlAccessorType(XmlAccessType.FIELD)
public class FileSource extends TableSource{
@XmlElement
URL url;
@XmlElement
String type;
String encoding;
@XmlElement
String separator;
private FileSource(){
}
public FileSource(String url, String type) throws MalformedURLException{
public FileSource(String url, String separator) throws MalformedURLException{
this();
this.url = new URL(url);
this.type = type;
this.separator = separator;
}
@Override
public String[] getHeaders() {
// TODO Auto-generated method stub
return null;
public RowSupplier rows() throws IOException {
return new FileRowSupplier(url, Pattern.compile(separator));
}
@Override
public Stream<String[]> rows() {
return null;
}
}
package de.sekmi.histream.etl.config;
import java.io.IOException;
import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlElementWrapper;
import de.sekmi.histream.DateTimeAccuracy;
import de.sekmi.histream.etl.ColumnMap;
import de.sekmi.histream.etl.ParseException;
import de.sekmi.histream.etl.PatientRow;
import de.sekmi.histream.etl.PatientStream;
/**
* Patient table. Contains patient id and other identifying information.
* Can also contain medical data
......@@ -22,10 +30,49 @@ public class PatientTable extends Table implements WideInterface{
@XmlAccessorType(XmlAccessType.FIELD)
public static class IDAT extends IdatColumns{
StringColumn firstname;
StringColumn lastname;
StringColumn surname;
DateTimeColumn birthdate;
DateTimeColumn deathdate;
StringColumn gender;
Column[] ignore;
}
@Override
public ColumnMap getColumnMap(String[] headers) {
ColumnMap map = new ColumnMap(headers);
if( !map.registerColumn(idat.patientId) ){
throw new IllegalArgumentException("patientId column name '"+idat.patientId.name+"' not found in patient table headers");
}
if( idat.firstname != null && !map.registerColumn(idat.firstname) ){
throw new IllegalArgumentException("firstname column not found in patient header");
}
if( idat.surname != null && !map.registerColumn(idat.surname) ){
throw new IllegalArgumentException("surname column not found in patient header");
}
if( idat.birthdate != null && !map.registerColumn(idat.birthdate) ){
throw new IllegalArgumentException("birthdate column not found in patient header");
}
if( idat.deathdate != null && !map.registerColumn(idat.deathdate) ){
throw new IllegalArgumentException("deathdate column not found in patient header");
}
if( idat.gender != null && !map.registerColumn(idat.gender) ){
throw new IllegalArgumentException("gender column not found in patient header");
}
return map;
}
public PatientRow fillPatient(ColumnMap map, Object[] row) throws ParseException{
PatientRow patient = new PatientRow();
patient.setId(idat.patientId.valueOf(map, row).toString());
patient.setNames((String)idat.firstname.valueOf(map, row), (String)idat.surname.valueOf(map, row));
patient.setBirthDate((DateTimeAccuracy)idat.birthdate.valueOf(map, row));
patient.setDeathDate((DateTimeAccuracy)idat.deathdate.valueOf(map, row));
// TODO concepts
return patient;
}
public PatientStream open() throws IOException{
return new PatientStream(source.rows(), this);
}
}
package de.sekmi.histream.etl.config;
import java.util.stream.Stream;
import javax.xml.bind.annotation.XmlElement;
import de.sekmi.histream.etl.RowSupplier;
public class SQLSource extends TableSource {
@XmlElement
String jdbcDriver;
......@@ -19,15 +18,8 @@ public class SQLSource extends TableSource {
this.jdbcDriver = driver;
this.connectString = connectString;
}
@Override
public String[] getHeaders() {
// TODO Auto-generated method stub
return null;
}
@Override
public Stream<String[]> rows() {
public RowSupplier rows() {
// TODO Auto-generated method stub
return null;
}
......
......@@ -3,9 +3,34 @@ package de.sekmi.histream.etl.config;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlTransient;
import de.sekmi.histream.etl.ColumnMap;
@XmlTransient
public class Table {
public abstract class Table {
<