Commit c3bed170 authored by R.W.Majeed's avatar R.W.Majeed

validator for etl import

parent f14ffe9b
package de.sekmi.histream.etl.validation;
import de.sekmi.histream.DateTimeAccuracy;
import de.sekmi.histream.Observation;
public class StartPlusConcept{
DateTimeAccuracy start;
String concept;
public StartPlusConcept(DateTimeAccuracy start, String concept){
this.start = start;
this.concept = concept;
}
public StartPlusConcept(Observation t) {
this(t.getStartTime(), t.getConceptId());
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + ((concept == null) ? 0 : concept.hashCode());
result = prime * result + ((start == null) ? 0 : start.hashCode());
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
StartPlusConcept other = (StartPlusConcept) obj;
if (concept == null) {
if (other.concept != null)
return false;
} else if (!concept.equals(other.concept))
return false;
if (start == null) {
if (other.start != null)
return false;
} else if (!start.equals(other.start))
return false;
return true;
}
}
\ No newline at end of file
package de.sekmi.histream.etl.validation;
import de.sekmi.histream.ObservationException;
public class ValidationException extends ObservationException {
/**
*
*/
private static final long serialVersionUID = 1L;
public ValidationException(String message) {
super(message);
}
}
package de.sekmi.histream.etl.validation;
import java.util.HashSet;
import java.util.Set;
import de.sekmi.histream.Observation;
import de.sekmi.histream.ObservationException;
import de.sekmi.histream.impl.AbstractObservationHandler;
public class Validator extends AbstractObservationHandler{
private String prevPatient;
private String prevVisit;
private Set<String> patients;
private Set<String> visits;
private Set<StartPlusConcept> concepts;
private boolean duplicateVisitCheck;
private boolean duplicateConceptCheck;
public Validator(){
patients = new HashSet<>();
visits = new HashSet<>();
concepts = new HashSet<>();
this.duplicateVisitCheck = true;
this.duplicateConceptCheck = true;
}
@Override
protected void acceptOrException(Observation t) throws ObservationException {
String patid = t.getPatientId();
String encid = t.getEncounterId();
if( prevPatient == null || !prevPatient.equals(patid) ){
// check if patient already known
// clear visit
visits.clear();
prevVisit = encid;
visits.add(encid);
prevPatient = patid; // remember patient to suppress errors for the same patient
if( patients.contains(patid) ){
throw new ValidationException("Duplicate patient '"+patid+"'");
}else{
patients.add(patid);
}
}else{
// patient already known.
// check if encounter already known
if( duplicateVisitCheck && !prevVisit.equals(encid) ){
prevVisit = encid; // remember encounter to suppress errors for the same encounter
if( visits.contains(encid) ){
throw new ValidationException("Duplicate encounter '"+encid+"' for patient '"+patid+"'");
}else{
visits.add(encid);
}
}
// check for duplicate non-repeating start+concept tuples
if( duplicateConceptCheck ){
StartPlusConcept spc = new StartPlusConcept(t);
if( concepts.contains(spc) ){
throw new ValidationException("Duplicate concept: patid="+patid+", visit="+encid+", concept="+t.getConceptId()+", start="+t.getStartTime());
}else{
concepts.add(spc);
}
}
}
}
@Override
public void setMeta(String key, String value) {
// TODO Auto-generated method stub
}
}
package de.sekmi.histream.etl.validation;
import org.junit.Test;
import de.sekmi.histream.ObservationSupplier;
import de.sekmi.histream.etl.ETLObservationSupplier;
import de.sekmi.histream.io.Streams;
public class TestValidator {
@Test
public void validateData1() throws Exception{
try( ObservationSupplier os = ETLObservationSupplier.load(getClass().getResource("/data/test-1-datasource.xml")) ){
Validator v = new Validator();
v.setErrorHandler(e -> {throw new RuntimeException(e);});
Streams.transfer(os, v);
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment