Commit e133de42 authored by R.W.Majeed's avatar R.W.Majeed

validator transformation can drop duplicate elements

parent cc5347d1
......@@ -18,7 +18,7 @@ public class DuplicateConceptException extends ValidationException {
private static final long serialVersionUID = 1L;
public DuplicateConceptException(Observation fact) {
super("Duplicate fact patient="+fact.getPatientId()+", visit="+fact.getEncounterId()+", start="+fact.getStartTime()+", concept="+fact.getConceptId());
super("Duplicate concept '"+fact.getConceptId()+"' for patient="+fact.getPatientId()+", visit="+fact.getEncounterId()+" at start="+fact.getStartTime());
setObservation(fact);
}
}
......@@ -3,12 +3,21 @@ package de.sekmi.histream.etl.validation;
import java.util.HashSet;
import java.util.Set;
import java.util.function.Consumer;
import de.sekmi.histream.Observation;
import de.sekmi.histream.ObservationException;
import de.sekmi.histream.impl.AbstractObservationHandler;
import de.sekmi.histream.io.transform.Transformation;
import de.sekmi.histream.io.transform.TransformationException;
public class Validator extends AbstractObservationHandler{
/**
* Validates facts coming from a grouped patient stream.
*
* @author R.W.Majeed
*
*/
public class Validator extends AbstractObservationHandler implements Transformation{
private String prevPatient;
private String prevVisit;
......@@ -16,19 +25,42 @@ public class Validator extends AbstractObservationHandler{
private Set<String> visits;
private Set<StartPlusConcept> concepts;
private Consumer<Observation> droppedConceptHandler;
private boolean duplicateVisitCheck;
private boolean duplicateConceptCheck;
public Validator(){
public Validator(boolean duplicateVisitCheck, boolean duplicateConceptCheck){
patients = new HashSet<>();
visits = new HashSet<>();
concepts = new HashSet<>();
this.duplicateVisitCheck = true;
this.duplicateConceptCheck = true;
this.duplicateVisitCheck = duplicateVisitCheck;
this.duplicateConceptCheck = duplicateConceptCheck;
}
/**
* Configure the validator to drop duplicate concepts.
* Does only work if the validator is used as {@link Transformation}.
*
* @param droppedConceptAction action to perform for dropped concepts
* @throws NullPointerException if the argument is null
* @throws UnsupportedOperationException if the validator was configured not to validate concepts
*/
public void dropDuplicateConcepts(Consumer<Observation> droppedConceptAction)throws NullPointerException, UnsupportedOperationException{
if( false == duplicateConceptCheck ){
throw new IllegalArgumentException("need duplicateConceptCheck to dropDuplicateConcepts");
}
this.droppedConceptHandler = droppedConceptAction;
}
@Override
protected void acceptOrException(Observation t) throws ObservationException {
/**
* Validate facts in a grouped patient stream
* @param t fact to validate
* @throws DuplicatePatientException duplicate patient outside of grouped patient
* @throws DuplicateVisitException duplicate visit outside of grouped visit
* @throws DuplicateConceptException duplicate concept id for same visit and timestamp
*/
public void validateFact(Observation t)throws DuplicatePatientException, DuplicateVisitException, DuplicateConceptException{
String patid = t.getPatientId();
String encid = t.getEncounterId();
......@@ -70,7 +102,10 @@ public class Validator extends AbstractObservationHandler{
}
}
}
}
@Override
protected void acceptOrException(Observation t) throws ObservationException {
validateFact(t);
}
@Override
......@@ -79,4 +114,24 @@ public class Validator extends AbstractObservationHandler{
}
@Override
public Observation transform(Observation fact, Consumer<Observation> generatedReceiver)
throws TransformationException {
try {
validateFact(fact);
} catch (DuplicateConceptException e ){
// ignore duplicate concepts?
if( droppedConceptHandler != null ){
// callback to report ignored fact
droppedConceptHandler.accept(fact);
return null;
}else{
reportError(e);
}
} catch (DuplicatePatientException | DuplicateVisitException e) {
reportError(e);
}
return fact;
}
}
......@@ -10,10 +10,14 @@ import de.sekmi.histream.io.Streams;
public class TestValidator {
/**
* Should validate successfully without exception
* @throws Exception should not occur
*/
@Test
public void validateData1() throws Exception{
try( ObservationSupplier os = ETLObservationSupplier.load(getClass().getResource("/data/test-1-datasource.xml")) ){
Validator v = new Validator();
Validator v = new Validator(true,true);
v.setErrorHandler(e -> {throw new RuntimeException(e);});
Streams.transfer(os, v);
}
......@@ -21,7 +25,7 @@ public class TestValidator {
@Test
public void validateData2() throws Exception{
try( ObservationSupplier os = ETLObservationSupplier.load(getClass().getResource("/data/test-2-datasource.xml")) ){
Validator v = new Validator();
Validator v = new Validator(true,true);
v.setErrorHandler(e -> {throw new RuntimeException(e);});
Streams.transfer(os, v);
}catch( RuntimeException e ){
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment