Commit df24a214 authored by R.W.Majeed's avatar R.W.Majeed
Browse files

processing of patient tables implemented

parent 9a9650da
package de.sekmi.histream.etl.config; package de.sekmi.histream.etl;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
import de.sekmi.histream.etl.config.Column;
/** /**
* Maps {@link Column}s to header/table indices * Maps {@link Column}s to header/table indices
* *
...@@ -11,6 +13,10 @@ import java.util.Map; ...@@ -11,6 +13,10 @@ import java.util.Map;
*/ */
public class ColumnMap{ public class ColumnMap{
String[] headers; String[] headers;
/**
* Maps column names to row indices
*/
Map<String, Integer> map; Map<String, Integer> map;
public ColumnMap(String[] headers){ public ColumnMap(String[] headers){
...@@ -19,12 +25,20 @@ public class ColumnMap{ ...@@ -19,12 +25,20 @@ public class ColumnMap{
} }
public boolean registerColumn(Column column){ public boolean registerColumn(Column column){
if( map.containsKey(column.getName()) ){
// column name already registered
return true;
}
// find name and map to index
for( int i=0; i<headers.length; i++ ){ for( int i=0; i<headers.length; i++ ){
if( column.name.equals(headers[i]) ){ if( headers[i].equals(column.getName()) ){
map.put(column.name, Integer.valueOf(i) ); map.put(column.getName(), Integer.valueOf(i) );
return true; return true;
} }
} }
return false; return false;
} }
public Integer indexOf(Column column){
return map.get(column.getName());
}
} }
\ No newline at end of file
package de.sekmi.histream.etl;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UncheckedIOException;
import java.net.URL;
import java.util.regex.Pattern;
public class FileRowSupplier extends RowSupplier {
private Pattern fieldSeparatorPattern;
private BufferedReader in;
private String[] headers;
public FileRowSupplier(URL location, String fieldSeparator) throws IOException{
this(location, Pattern.compile(Pattern.quote(fieldSeparator)));
}
public FileRowSupplier(URL location, Pattern pattern) throws IOException{
this.fieldSeparatorPattern = pattern;
this.in = new BufferedReader(new InputStreamReader(location.openStream()));
// TODO: check whether needed to close underlying InputStream
// load headers
String line = in.readLine();
this.headers = fieldSeparatorPattern.split(line);
}
@Override
public String[] getHeaders() throws IOException {
return headers;
}
@Override
public Object[] get() {
String line;
try {
line = in.readLine();
} catch (IOException e) {
throw new UncheckedIOException(e);
}
if( line == null ){
// end of file
return null;
}
String[] fields = fieldSeparatorPattern.split(line);
return fields;
}
@Override
public void close() throws IOException {
in.close();
}
}
package de.sekmi.histream.etl;
public class ParseException extends Exception {
/**
*
*/
private static final long serialVersionUID = 1L;
public ParseException(String message){
super(message);
}
}
package de.sekmi.histream.etl;
import java.time.Instant;
import de.sekmi.histream.DateTimeAccuracy;
import de.sekmi.histream.etl.config.PatientTable;
import de.sekmi.histream.ext.Patient;
/**
* Row of patient data
* TODO: implement {@link Patient}
* @author Raphael
*
*/
public class PatientRow implements Patient{
String patid;
String firstname;
String lastname;
DateTimeAccuracy birthdate;
DateTimeAccuracy deathdate;
// TODO concepts
public void setNames(String first, String last){
this.firstname = first;
this.lastname = last;
}
public void setBirthDate(DateTimeAccuracy date){
this.birthdate = date;
}
public void setDeathDate(DateTimeAccuracy date){
this.deathdate = date;
}
public static PatientRow load(Object[] input, ColumnMap map, PatientTable table){
PatientRow row = new PatientRow();
row.patid = null; // table.patid.valueOf(input[map.findIndex(table.patid)])
return row;
}
@Override
public String getId() {
return patid;
}
@Override
public void setId(String patientId) {
this.patid = patientId;
}
@Override
public String getSourceId() {
return null;
}
@Override
public Instant getSourceTimestamp() {
return null;
}
@Override
public void setSourceId(String arg0) {
// TODO Auto-generated method stub
}
@Override
public void setSourceTimestamp(Instant arg0) {
// TODO Auto-generated method stub
}
@Override
public DateTimeAccuracy getBirthDate() {
return this.birthdate;
}
@Override
public DateTimeAccuracy getDeathDate() {
return this.deathdate;
}
@Override
public Sex getSex() {
// TODO Auto-generated method stub
return null;
}
@Override
public void setSex(Sex arg0) {
// TODO Auto-generated method stub
}
}
package de.sekmi.histream.etl;
import java.io.IOException;
import java.util.function.Supplier;
import de.sekmi.histream.etl.config.PatientTable;
public class PatientStream implements Supplier<PatientRow>, AutoCloseable{
RowSupplier rows;
PatientTable table;
ColumnMap map;
public PatientStream(RowSupplier rows, PatientTable table) throws IOException{
this.rows = rows;
this.table = table;
this.map = table.getColumnMap(rows.getHeaders());
}
@Override
public void close() throws IOException {
rows.close();
}
@Override
public PatientRow get() {
Object[] row = rows.get();
if( row == null ){
// no more rows
return null;
}
PatientRow p;
try {
p = table.fillPatient(map, row);
} catch (ParseException e) {
throw new UncheckedParseException(e);
}
return p;
}
}
package de.sekmi.histream.etl;
import java.io.IOException;
import java.util.function.Supplier;
public abstract class RowSupplier implements Supplier<Object[]>, AutoCloseable{
public RowSupplier(){
}
public abstract String[] getHeaders()throws IOException;
@Override
public abstract Object[] get();
@Override
public abstract void close() throws IOException;
}
package de.sekmi.histream.etl;
public class UncheckedParseException extends RuntimeException {
/**
*
*/
private static final long serialVersionUID = 1L;
public UncheckedParseException(ParseException cause){
super(cause);
}
@Override
public ParseException getCause(){
return (ParseException)super.getCause();
}
}
package de.sekmi.histream.etl;
public class VisitRow {
}
package de.sekmi.histream.etl;
import java.util.Spliterator;
import java.util.function.Consumer;
public class VisitSupplier implements Spliterator<VisitRow>{
@Override
public boolean tryAdvance(Consumer<? super VisitRow> action) {
// TODO Auto-generated method stub
return false;
}
@Override
public Spliterator<VisitRow> trySplit() {
// TODO Auto-generated method stub
return null;
}
@Override
public long estimateSize() {
// TODO Auto-generated method stub
return 0;
}
@Override
public int characteristics() {
// TODO Auto-generated method stub
return 0;
}
}
...@@ -7,9 +7,12 @@ import javax.xml.bind.annotation.XmlSeeAlso; ...@@ -7,9 +7,12 @@ import javax.xml.bind.annotation.XmlSeeAlso;
import javax.xml.bind.annotation.XmlTransient; import javax.xml.bind.annotation.XmlTransient;
import javax.xml.bind.annotation.XmlValue; import javax.xml.bind.annotation.XmlValue;
import de.sekmi.histream.etl.ColumnMap;
import de.sekmi.histream.etl.ParseException;
@XmlTransient @XmlTransient
@XmlSeeAlso({StringColumn.class}) @XmlSeeAlso({StringColumn.class})
public class Column { public abstract class Column {
protected Column(){ protected Column(){
} }
public Column(String name){ public Column(String name){
...@@ -62,6 +65,8 @@ public class Column { ...@@ -62,6 +65,8 @@ public class Column {
@XmlValue @XmlValue
String name; String name;
public String getName(){return name;}
/** /**
* Convert a string input value to the output data type. The resulting type depends * Convert a string input value to the output data type. The resulting type depends
* on the type attribute and can be one of Long, BigDecimal, String, DateTime * on the type attribute and can be one of Long, BigDecimal, String, DateTime
...@@ -72,7 +77,7 @@ public class Column { ...@@ -72,7 +77,7 @@ public class Column {
* @param value input value. e.g. from text table column * @param value input value. e.g. from text table column
* @return output type representing the input value * @return output type representing the input value
*/ */
public Object valueOf(String value){ public Object valueOf(Object value)throws ParseException{
if( constantValue != null ){ if( constantValue != null ){
value = constantValue; value = constantValue;
} }
...@@ -82,20 +87,25 @@ public class Column { ...@@ -82,20 +87,25 @@ public class Column {
} }
if( value != null && regexMatch != null ){ if( value != null && regexMatch != null ){
value = applyRegularExpression(value); if( !(value instanceof String) ){
throw new ParseException("regex-match can only be used on String, but found "+value.getClass().getName());
}
value = applyRegularExpression((String)value);
} }
return value; return value;
} }
public Object valueOf(ColumnMap map, Object[] row) throws ParseException{
return this.valueOf(row[map.indexOf(this)]);
}
public String applyRegularExpression(String input){ public String applyRegularExpression(String input){
// TODO: apply // TODO: apply
return input; return input;
} }
public static class IntegerColumn extends Column{
}
public static class DecimalColumn extends Column{ public static class DecimalColumn extends Column{
@XmlTransient @XmlTransient
DecimalFormat decimalFormat; DecimalFormat decimalFormat;
......
package de.sekmi.histream.etl.config; package de.sekmi.histream.etl.config;
import java.sql.Timestamp;
import java.time.format.DateTimeFormatter; import java.time.format.DateTimeFormatter;
import javax.xml.bind.annotation.XmlAttribute; import javax.xml.bind.annotation.XmlAttribute;
import javax.xml.bind.annotation.XmlTransient; import javax.xml.bind.annotation.XmlTransient;
import de.sekmi.histream.DateTimeAccuracy;
import de.sekmi.histream.etl.ParseException;
/** /**
* Date and Time column. * Date and Time column.
* TODO implement parsing of partial date/time. e.g. 2003-10 * TODO implement parsing of partial date/time. e.g. 2003-10
...@@ -31,4 +35,25 @@ public class DateTimeColumn extends Column{ ...@@ -31,4 +35,25 @@ public class DateTimeColumn extends Column{
protected DateTimeColumn(){ protected DateTimeColumn(){
super(); super();
} }
@Override
public Object valueOf(Object value) throws ParseException{
value = super.valueOf(value);
if( value instanceof String ){
// parse date according to format
if( formatter == null && format != null ){
formatter = DateTimeFormatter.ofPattern(format);
}
if( formatter == null ){
throw new ParseException("format must be specified for DateTime fields if strings are parsed");
}
// TODO parse
return DateTimeAccuracy.parse(formatter,(String)value);
}else if( value instanceof Timestamp ){
// convert from timestamp
return null;
}else{
throw new IllegalArgumentException("Don't know how to parse type "+value.getClass()+" to datetime");
}
}
} }
\ No newline at end of file
package de.sekmi.histream.etl.config; package de.sekmi.histream.etl.config;
import java.io.IOException;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URL; import java.net.URL;
import java.util.stream.Stream; import java.util.regex.Pattern;
import javax.xml.bind.annotation.XmlAccessType; import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType; import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement; import javax.xml.bind.annotation.XmlElement;
import de.sekmi.histream.etl.FileRowSupplier;
import de.sekmi.histream.etl.RowSupplier;
@XmlAccessorType(XmlAccessType.FIELD) @XmlAccessorType(XmlAccessType.FIELD)
public class FileSource extends TableSource{ public class FileSource extends TableSource{
@XmlElement @XmlElement
URL url; URL url;
@XmlElement @XmlElement
String type; String encoding;
@XmlElement
String separator;
private FileSource(){ private FileSource(){
} }
public FileSource(String url, String type) throws MalformedURLException{ public FileSource(String url, String separator) throws MalformedURLException{
this(); this();
this.url = new URL(url); this.url = new URL(url);
this.type = type; this.separator = separator;
} }
@Override @Override
public String[] getHeaders() { public RowSupplier rows() throws IOException {
// TODO Auto-generated method stub return new FileRowSupplier(url, Pattern.compile(separator));
return null;
} }
@Override
public Stream<String[]> rows() {
return null;
}
} }
package de.sekmi.histream.etl.config; package de.sekmi.histream.etl.config;
import java.io.IOException;
import javax.xml.bind.annotation.XmlAccessType; import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType; import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement; import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlElementWrapper; import javax.xml.bind.annotation.XmlElementWrapper;
import de.sekmi.histream.DateTimeAccuracy;
import de.sekmi.histream.etl.ColumnMap;
import de.sekmi.histream.etl.ParseException;
import de.sekmi.histream.etl.PatientRow;
import de.sekmi.histream.etl.PatientStream;
/** /**
* Patient table. Contains patient id and other identifying information. * Patient table. Contains patient id and other identifying information.
* Can also contain medical data * Can also contain medical data
...@@ -22,10 +30,49 @@ public class PatientTable extends Table implements WideInterface{ ...@@ -22,10 +30,49 @@ public class PatientTable extends Table implements WideInterface{
@XmlAccessorType(XmlAccessType.FIELD) @XmlAccessorType(XmlAccessType.FIELD)
public static class IDAT extends IdatColumns{ public static class IDAT extends IdatColumns{
StringColumn firstname; StringColumn firstname;
StringColumn lastname; StringColumn surname;
DateTimeColumn birthdate; DateTimeColumn birthdate;