...
 
Commits (6)
......@@ -42,6 +42,9 @@ public interface Visit extends IdExtensionType,ExternalSourceType {
String getLocationId();
void setLocationId(String locationId);
String getProviderId();
void setProviderId(String providerId);
public enum Status{
Inpatient, Outpatient, Emergency
}
......
......@@ -36,6 +36,7 @@ public class VisitImpl extends StoredExtensionType implements Visit {
private Status status;
private String patientId;
private String locationId;
private String providerId;
/**
* Empty constructor protected, only
......@@ -110,6 +111,15 @@ public class VisitImpl extends StoredExtensionType implements Visit {
checkAndUpdateDirty(this.startTime, startTime);
this.startTime = startTime;
}
@Override
public String getProviderId() {
return this.providerId;
}
@Override
public void setProviderId(String providerId) {
checkAndUpdateDirty(this.providerId, providerId);
this.providerId = providerId;
}
......
......@@ -7,7 +7,7 @@ For standalone operation of import descriptor parsing,
put histream-core.jar, histream-import.jar and histream-js.jar
into a single folder and run the following command:
java -cp \*.jar de.sekmi.histream.etl.XMLExport datasource.xml
java --add-modules java.xml.bind -cp \* de.sekmi.histream.etl.XMLExport datasource.xml
Schema/XSD for import descriptions
......
......@@ -12,7 +12,7 @@ import java.time.Instant;
import com.opencsv.CSVParser;
import com.opencsv.CSVReader;
public class FileRowSupplier extends RowSupplier {
public class FileRowSupplier implements RowSupplier {
private CSVReader in;
private String[] headers;
private URL url;
......@@ -96,8 +96,32 @@ public class FileRowSupplier extends RowSupplier {
return timestamp;
}
/**
* Returns the line number of the record previously
* returned by {@link #get()}.
* @return previous record's line number
*/
public int getLineNo() {
if( lineNo == 0 ) {
throw new IllegalStateException("Line no requires call to get() first");
}
// returned line numbers start with 1, the variable starts with 0
// no need to subtract 1
return lineNo;
}
/**
* Retrieves the URL for the source file.
* @return source file's URL
*/
public URL getSourceURL() {
return url;
}
@Override
public String getLocation() {
return formatLocation(url, lineNo);
}
public static String formatLocation(URL url, int lineNo) {
return url.toString()+":"+lineNo;
}
......
package de.sekmi.histream.etl;
/**
* Feedback from mapping operation. E.g. drop fact or override concept.
*
* @author R.W.Majeed
*
*/
// TODO allow different subclasses of feedback. some places allow mapping, but drop-fact does not make sense (e.g. gender, location). In some cases, action might drop value, modifier or whole observation.
public class MapFeedback {
private boolean drop;
private String concept;
......
package de.sekmi.histream.etl;
import java.io.IOException;
import java.net.URL;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
public class MemoryTable implements RowSupplier{
private Row[] rows;
private String[] headers;
private Instant timestamp;
private URL url;
/** index into rows to retrieve next by {@link #get()} */
private int pointer;
public MemoryTable(FileRowSupplier source) {
headers = source.getHeaders();
url = source.getSourceURL();
timestamp = source.getTimestamp();
//
// create array to contain all rows
ArrayList<Row> list = new ArrayList<>();
for( Object[] row=source.get(); row!=null; row=source.get()) {
list.add(new Row(row, source.getLineNo()));
}
rows = list.toArray(new Row[list.size()]);
// point to index 0, get() will retrieve first row
pointer = 0;
}
private class Row{
Object[] row;
int lineNo;
public Row(Object[] row, int lineNo) {
this.row = row;
this.lineNo = lineNo;
}
}
@Override
public String[] getHeaders() {
return headers;
}
private int[] columnPositions(String[] columns) {
final int[] pos = new int[columns.length];
// find positions of sort headers
for( int i=0; i<pos.length; i++ ) {
int j;
for( j=0; j<headers.length; j++ ) {
if( columns[i].equals(headers[j]) ) {
break;
}
}
if( j == headers.length ) {
throw new IllegalArgumentException("Sort header '"+columns[j]+"' not found in "+url);
}
pos[i] = j;
}
return pos;
}
/**
* Keep only one row per unique occurrence of the specified columns.
* Data must be sorted beforehand by the same columns.
* @param columns columns which should be unique per row
* @throws IllegalArgumentException column header not found
*/
public void unique(String[] columns) throws IllegalArgumentException, IllegalStateException{
final int[] pos = columnPositions(columns);
unique(pos);
}
/**
* Keep only one row per unique occurrence of the specified columns.
* Data must be sorted beforehand by the same columns.
* @param columns columns which should be unique per row
* @throws IllegalArgumentException column header not found
*/
public void unique(final int[] columns) throws IllegalArgumentException, IllegalStateException{
if( pointer != 0 ) {
throw new IllegalStateException("Method may not be used after retrieving rows");
}
// make sure we have data
if( rows.length < 1 ) {
return;
}
boolean[] keep = new boolean[rows.length];
keep[0] = true; // always keep first row
int keepCount = 1;
// determine which rows to keep
for( int i=1; i<rows.length; i++ ) {
int j;
for( j=0; j<columns.length; j++ ) {
// determine whether to keep row[i]
// compare to previous row
Object o1 = rows[i-1].row[columns[j]];
Object o2 = rows[i].row[columns[j]];
boolean valueEqual;
if( o1 == null ) {
if( o2 == null ) {
// both null -> same
valueEqual = true;
}else {
// different
valueEqual = false;
}
}else if( o2 == null ) {
// o1 not null (otherwise if case before) -> different
valueEqual = false;
}else {
valueEqual = o1.equals(o2);
}
if( valueEqual == false ) {
// stop comparing more columns, if one column is found different
break;
}
}
if( j == columns.length ) {
// all rows were equal, drop row
keep[i] = false;
}else {
// at least one column differs relative to previous row
keep[i] = true;
keepCount ++;
}
}
// update array, keep only marked
Row[] keepRows = new Row[keepCount];
int r = 0;
for( int i=0; i<rows.length; i++ ) {
if( keep[i] ) {
keepRows[r] = rows[i];
r ++;
}
}
this.rows = keepRows;
}
/**
* Sort the data table by the specified columns
* @param columns columns for sort order
* @throws IllegalArgumentException column header not found
* @throws IllegalStateException rows retrieved before sorting
*/
public void sort(String[] columns) throws IllegalArgumentException, IllegalStateException{
final int[] pos = columnPositions(columns);
sort(pos);
}
/**
* Sort the data table by the specified columns
* @param columns columns for sort order
* @throws IllegalStateException rows retrieved before sorting
*/
public void sort(int[] columns) throws IllegalStateException {
if( pointer != 0 ) {
throw new IllegalStateException("Method may not be used after retrieving rows");
}
Arrays.sort(rows, new Comparator<Row>() {
@SuppressWarnings({ "unchecked", "rawtypes" })
@Override
public int compare(Row r1, Row r2) {
int order=0;
for( int i=0; i< columns.length; i++ ) {
Object o1 = r1.row[columns[i]];
Object o2 = r2.row[columns[i]];
// sort nulls first
if( o1 == null ) {
order = (o2 == null)?0:-1;
}else if( o2 == null ) {
// o1 not null because that would be handled in the first if case
order = 1;
}else {
order = ((Comparable)o1).compareTo(o2);
}
// continue with next colum only, if fist column was equal
if( order != 0 ) {
break;
}
}
return order;
}
});
}
@Override
public Object[] get() {
if( pointer >= rows.length ) {
return null;
}
Object[] row = rows[pointer].row;
pointer ++;
return row;
}
@Override
public void close() throws IOException {
// nothing to do, data lives in memory
}
protected int getLineNumber() {
if( pointer == 0 ) {
throw new IllegalStateException("Line no requires call to get() first");
}
return rows[pointer-1].lineNo;
}
@Override
public String getLocation() {
return FileRowSupplier.formatLocation(url, getLineNumber());
}
@Override
public Instant getTimestamp() {
return timestamp;
}
public int getRowCount() {
return rows.length;
}
}
......@@ -4,11 +4,8 @@ import java.io.IOException;
import java.time.Instant;
import java.util.function.Supplier;
public abstract class RowSupplier implements Supplier<Object[]>, AutoCloseable{
public interface RowSupplier extends Supplier<Object[]>, AutoCloseable{
public RowSupplier(){
}
public abstract String[] getHeaders();
@Override
......
package de.sekmi.histream.etl;
import java.util.Spliterator;
import java.util.function.Consumer;
public class VisitSupplier implements Spliterator<VisitRow>{
@Override
public boolean tryAdvance(Consumer<? super VisitRow> action) {
// TODO Auto-generated method stub
return false;
}
@Override
public Spliterator<VisitRow> trySplit() {
// TODO Auto-generated method stub
return null;
}
@Override
public long estimateSize() {
// TODO Auto-generated method stub
return 0;
}
@Override
public int characteristics() {
// TODO Auto-generated method stub
return 0;
}
}
......@@ -154,9 +154,10 @@ public abstract class Column<T> {
}
/**
* Process and return the column value from a table row without map rule processing.
* This method behaves as if {@link #valueOf(ColumnMap, Object[], MapFeedback)} was called
* with the last argument set to {@code null}.
* Process and return the column value from a table row with limited map rule processing.
* The mapping operation is allowed, but does not allow actions (like drop fact) or concept modification
* In contrast to {@link #valueOf(ColumnMap, Object[], MapFeedback)} with the last argument set to {@code null} (thus not allowing any map operation)
* this method allows mapping.
*
* @see #valueOf(Object)
* @param colMap column map
......@@ -165,7 +166,12 @@ public abstract class Column<T> {
* @throws ParseException parse errors
*/
public T valueOf(ColumnMap colMap, Object[] row) throws ParseException{
return valueOf(colMap, row, null);
MapFeedback mf = new MapFeedback();
T value = valueOf(colMap, row, mf);
if( mf.isActionDrop() || mf.getConceptOverride() != null ){
throw new ParseException("concept override or drop not allowed for column "+getName());
}
return value;
}
private T processedValue(String val, MapFeedback mapFeedback) throws ParseException{
......@@ -177,7 +183,7 @@ public abstract class Column<T> {
// apply map rules
if( map != null ){
if( mapFeedback == null ){
throw new ParseException("map element allowed for column "+getName());
throw new ParseException("map element not supported for column "+getName());
}
applyMapRules(val, mapFeedback);
// use value override, if present
......
......@@ -54,10 +54,17 @@ public class Concept{
private Concept(){
}
public Concept(String id, String startColumn, String format){
/**
* Constructor used for testing
*
* @param id concept id
* @param startColumn start column name
* @param format start date format
*/
Concept(String id, String startColumn, String format){
this();
this.id = id;
this.start = new DateTimeColumn(startColumn, format);
this.start = new DateTimeColumn(startColumn, format, null);
}
private Value createObservationValue(Object val, String unit) throws ParseException{
......
......@@ -51,15 +51,14 @@ public class CsvFile extends TableSource{
// @XmlElement
// char escape;
private CsvFile(){
protected CsvFile(){
}
public CsvFile(String urlSpec, String separator) throws MalformedURLException{
this();
this.url = urlSpec;
this.separator = separator;
}
@Override
public RowSupplier rows(Meta meta) throws IOException {
protected FileRowSupplier openRowSupplier(Meta meta) throws IOException {
// resolve url relative to base url from metadata
URL base = meta.getLocation();
URL source = (base == null)?new URL(url):new URL(base, url);
......@@ -74,4 +73,8 @@ public class CsvFile extends TableSource{
return new FileRowSupplier(source, separator, charset);
}
@Override
public RowSupplier rows(Meta meta) throws IOException {
return openRowSupplier(meta);
}
}
package de.sekmi.histream.etl.config;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.charset.Charset;
import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlType;
import de.sekmi.histream.etl.FileRowSupplier;
import de.sekmi.histream.etl.MemoryTable;
import de.sekmi.histream.etl.RowSupplier;
/**
* Table source reading plain text tables.
* TODO implement escape sequences and quoting OR use opencsv dependency
*
* @author R.W.Majeed
*
*/
@XmlAccessorType(XmlAccessType.FIELD)
@XmlType(name="csv-filtered")
public class CsvFiltered extends CsvFile{
protected CsvFiltered(){
super();
}
public CsvFiltered(String urlSpec, String separator) throws MalformedURLException{
super(urlSpec,separator);
}
@Override
public RowSupplier rows(Meta meta) throws IOException {
FileRowSupplier rows = super.openRowSupplier(meta);
MemoryTable data = new MemoryTable(rows);
// TODO sort, filter unique, etc.
return data;
}
}
......@@ -4,6 +4,8 @@ import java.sql.Timestamp;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeParseException;
import java.util.IllformedLocaleException;
import java.util.Locale;
import javax.xml.bind.annotation.XmlAttribute;
import javax.xml.bind.annotation.XmlTransient;
......@@ -22,18 +24,45 @@ public class DateTimeColumn extends Column<DateTimeAccuracy>{
@XmlTransient
DateTimeFormatter formatter;
/**
* Format string for parsing via {@link DateTimeFormatter}
* @see DateTimeFormatter#ofPattern(String)
* Format string for parsing via {@link DateTimeFormatter}.
* For locale specific formats, the locale can be specified
* via {@link #locale}.
* <p>
* If no locale is specified, the parsing
* is done via {@link DateTimeFormatter#ofPattern(String)} which
* uses the system default locale.
* </p>
*/
@XmlAttribute
String format;
/** Locale to use for parsing date strings. Specified as BCP 47 tag.
* Some formats support naming months e.g. Mar 1st 2018.
* In these cases, a locale must be specified to indicate the language
* for month names etc.
* See {@link DateTimeFormatter#ofPattern(String, java.util.Locale)}
* <p>
* If no locale specified, the java default locale is used.
* See {@link DateTimeFormatter#ofPattern(String)}
* </p>
* */
@XmlAttribute
String locale;
@XmlAttribute
String zone;
public DateTimeColumn(String name, String format){
/**
* Construct a date time column
* @param name column name
* @param format date format
* @param locale locale, set to {@code null} for java default locale
*/
public DateTimeColumn(String name, String format, String locale){
super(name);
this.format = format;
this.locale = locale;
}
protected DateTimeColumn(){
......@@ -54,7 +83,13 @@ public class DateTimeColumn extends Column<DateTimeAccuracy>{
public DateTimeAccuracy valueFromString(String input) throws ParseException {
// parse date according to format
if( formatter == null && format != null ){
if( locale == null ) {
formatter = DateTimeFormatter.ofPattern(format);
}else try {
formatter = DateTimeFormatter.ofPattern(format, Locale.forLanguageTag(locale));
}catch( IllformedLocaleException e ) {
throw new ParseException("Failed to parse DateTime column locale tag: "+locale);
}
}
if( formatter == null ){
throw new ParseException("format must be specified for DateTime fields if strings are parsed");
......
......@@ -98,11 +98,7 @@ public class PatientTable extends Table<PatientRow> implements ConceptTable{
patient.setDeathDate(idat.deathdate.valueOf(map, row, null));
}
if( idat.gender != null ){
MapFeedback mf = new MapFeedback();
String genderCode = idat.gender.valueOf(map, row, mf);
if( mf.isActionDrop() || mf.getConceptOverride() != null ){
throw new ParseException("concept override or drop not allowed for patient gender");
}
String genderCode = idat.gender.valueOf(map, row);
// gender may omitted
if( genderCode != null ){
try{
......
......@@ -103,6 +103,9 @@ public class VisitTable extends Table<VisitRow> implements ConceptTable{
if( idat.location != null ){
visit.setLocationId(idat.location.valueOf(map, row));
}
if( idat.provider != null ){
visit.setProviderId(idat.provider.valueOf(map, row));
}
// TODO other
// concepts
......
package de.sekmi.histream.etl;
import java.io.IOException;
import java.nio.charset.Charset;
import org.junit.Test;
import static org.junit.Assert.*;
public class TestMemoryTable {
@Test
public void verifyOriginalOrderWithoutModification() throws IOException {
FileRowSupplier visits = new FileRowSupplier(getClass().getResource("/data/p21khg/ICD.csv"), ";", Charset.forName("ASCII"));
MemoryTable mt = new MemoryTable(visits);
assertEquals(15, mt.getRowCount());
// verify that the table is previously unsorted
Object[] r = mt.get();
// first record on line 2 (first line were headers)
assertEquals(2, mt.getLineNumber());
assertEquals("KH-internes-Kennzeichen", mt.getHeaders()[3]);
// visit 1
assertEquals("1", r[3]);
// visit 2
assertEquals("2", mt.get()[3]);
mt.close();
}
@Test
public void verifySortSingleColumn() throws IOException {
FileRowSupplier visits = new FileRowSupplier(getClass().getResource("/data/p21khg/ICD.csv"), ";", Charset.forName("ASCII"));
MemoryTable mt = new MemoryTable(visits);
// sorting by single column
mt.sort(new int[] {3});
// now we should have two consecutive rows with visit 1
assertEquals("1", mt.get()[3]);
assertEquals("1", mt.get()[3]);
assertEquals("11", mt.get()[3]);
assertEquals("2", mt.get()[3]);
mt.close();
}
@Test
public void verifyUniqueSingleColumn() throws IOException {
FileRowSupplier visits = new FileRowSupplier(getClass().getResource("/data/p21khg/ICD.csv"), ";", Charset.forName("ASCII"));
MemoryTable mt = new MemoryTable(visits);
// sorting by single column
mt.sort(new int[] {3});
mt.unique(new int[] {3});
// now we should have only one row with visit 1
assertEquals("1", mt.get()[3]);
assertEquals("11", mt.get()[3]);
assertEquals("2", mt.get()[3]);
assertEquals("3", mt.get()[3]);
assertEquals("4", mt.get()[3]);
assertEquals("5", mt.get()[3]);
assertEquals("6", mt.get()[3]);
assertEquals("7", mt.get()[3]);
// in total without duplicates, there should be 11 rows
assertEquals(8, mt.getRowCount());
mt.close();
}
}
......@@ -6,9 +6,16 @@
</meta>
<patient-table>
<source xsi:type="csv-file">
<source xsi:type="csv-filtered">
<url>p21khg/_PAT.csv</url>
<separator>;</separator>
<convert
<sort-cols>
<col>Patid</col>
</sort-cols>
<unique-filter>
<col>Patid</col>
</unique-filter>
</source>
<idat>
<patient-id column="Patientennummer"/>
......