Commit ca1aeb89 authored by R.W.Majeed's avatar R.W.Majeed
Browse files

removed experimental duplicate fact filter.

required sort index comparator moved to external repo for development, because it was not working.
parent e7fb28c8
......@@ -18,21 +18,29 @@ public class DuplicateFactFilter extends PostProcessingFilter{
@XmlElement
public String[] concept;
private static class FactComparator implements Comparator<Fact>{
static class FactComparator implements Comparator<Fact>{
@Override
public int compare(Fact o1, Fact o2) {
int cmp = o1.getObservation().getStartTime().compareTo(
o2.getObservation().getStartTime() );
if( cmp == 0 ){
// if times are equal, sort by concept
cmp = o1.getConcept().compareTo(o2.getConcept());
}
return cmp;
return DuplicateFactFilter.compare(o1, o2);
}
}
public static int compare(Fact o1, Fact o2) {
int cmp = o1.getObservation().getStartTime().compareTo(
o2.getObservation().getStartTime() );
if( true )return cmp;
if( cmp == 0 ){
// if times are equal, sort by concept
cmp = o1.getConcept().compareTo(o2.getConcept());
}
return cmp;
}
private void removeAllDuplicates(AbstractFacts facts){
// order by start and concept
facts.sort( new FactComparator() );
if( true ) {
throw new UnsupportedOperationException("Not yet implemented");
}
//facts.sort( new FactComparator() );
ArrayList<Integer> duplicates = new ArrayList<>();
......
......@@ -103,7 +103,7 @@ public class TestETLSupplier {
Patient p = fact.getExtension(Patient.class);
Assert.assertNotNull(p);
Assert.assertEquals("p1", p.getId());
ZoneId zone = ZoneOffset.UTC.normalized();
ZoneId zone = ZoneId.systemDefault();
Assert.assertEquals(DateTimeAccuracy.parsePartialIso8601("2003-02-01",zone), p.getBirthDate());
Assert.assertEquals(DateTimeAccuracy.parsePartialIso8601("2003-02-11",zone), p.getDeathDate());
......
......@@ -69,10 +69,11 @@ public class TestMarshall {
// check post processing
Assert.assertNotNull(ds.postProcessing);
Assert.assertEquals(3, ds.postProcessing.filter.length);
Assert.assertEquals(DuplicateFactFilter.class, ds.postProcessing.filter[0].getClass());
DuplicateFactFilter f = (DuplicateFactFilter)ds.postProcessing.filter[0];
Assert.assertEquals(1, f.concept.length);
Assert.assertEquals(2, ds.postProcessing.filter.length);
// duplicate fact filter removed for now
// Assert.assertEquals(DuplicateFactFilter.class, ds.postProcessing.filter[0].getClass());
// DuplicateFactFilter f = (DuplicateFactFilter)ds.postProcessing.filter[0];
// Assert.assertEquals(1, f.concept.length);
ScriptFilter sf = (ScriptFilter)ds.postProcessing.filter[1];
Assert.assertNotNull(sf);
......
......@@ -22,7 +22,8 @@ import static org.junit.Assert.*;
public class TestDuplicateFactFilter {
@Test
// XXX implement filter first
//@Test
public void verifyComparator() throws ScriptException, ParseException{
ObservationFactory of = new ObservationFactoryImpl(new SimplePatientExtension(), new SimpleVisitExtension());
DuplicateFactFilter filter = new DuplicateFactFilter();
......@@ -31,10 +32,18 @@ public class TestDuplicateFactFilter {
AbstractFacts facts = e.wrapEncounterFacts("P1", "E1", DateTimeAccuracy.parsePartialIso8601("2001-02-03", ZoneId.systemDefault()), new ArrayList<>());
Fact a = facts.add("lala");
Fact b = facts.add("xx");
assertTrue( DuplicateFactFilter.compare(a, b) < 0 );
facts.add("lala");
// assertTrue( DuplicateFactFilter.compare(a, b) < 0 );
b.start("2000-01-02T03:04:05Z");
// assertTrue( DuplicateFactFilter.compare(a, b) > 0 );
Fact c = facts.add("xx").start("2000-01-02T03:05Z");
Fact d = facts.add("lala"); // add duplicate
// assertTrue( DuplicateFactFilter.compare(c, b) > 0 );
// assertTrue( DuplicateFactFilter.compare(a, d) == 0 );
// assertTrue( DuplicateFactFilter.compare(a, c) > 0 );
// assertTrue( DuplicateFactFilter.compare(d, c) > 0 );
filter.processVisit(facts);
assertEquals(2, facts.size());
assertEquals(3, facts.size());
// TODO compare with timestamps
}
}
......@@ -69,7 +69,8 @@ public class TestValidator {
}
Assert.fail("Exception expected");
}
@Test
// XXX implement duplicate fact filter first
//@Test
public void validateData4WithDuplicateFilter() throws Exception{
// duplicate concepts
try( ObservationSupplier os = ETLObservationSupplier.load(getClass().getResource("/data/test-4-datasource2.xml")) ){
......
......@@ -139,9 +139,6 @@
</eav-table>
<!-- scripts are run for each complete encounter in the order of occurrence -->
<post-processing>
<filter xsi:type="duplicate-fact">
<concept>kalium</concept>
</filter>
<filter xsi:type="javascript">
<script><![CDATA[
if( facts.get("natrium") && facts.get("kalium") ){
......
......@@ -76,5 +76,11 @@
</eav-table>
<post-processing>
<filter xsi:type="duplicate-fact"/>
<!-- also possible to specify concepts to filter
<filter xsi:type="duplicate-fact">
<concept>kalium</concept>
</filter>
-->
</post-processing>
</datasource>
package de.sekmi.histream.scripting;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import de.sekmi.histream.Observation;
......@@ -88,18 +86,9 @@ public abstract class AbstractFacts {
return f;
}
/*
* TODO use linked sort to sort both arrays simultaneously by the order given by the comparator
public void sort(Comparator<Fact> comparator){
Integer[] indices = new Integer[facts.size()];
for( int i=0; i<indices.length; i++ ){
indices[i] = i;
}
// determine sort order
Arrays.sort(indices, new Comparator<Integer>() {
@Override
public int compare(Integer o1, Integer o2) {
return comparator.compare(facts.get(o1), facts.get(o2));
}
});
// reorder both arrays
for( int i=0; i<indices.length; i++ ){
while( i != indices[i] ){
......@@ -121,4 +110,19 @@ public abstract class AbstractFacts {
}
}
}
*/
@Override
public String toString() {
StringBuilder builder = new StringBuilder(facts.size() * 128);
builder.append("[\n");
for( int i=0; i<facts.size(); i++ ){
if( i != 0 ) {
builder.append(",\n");
}
builder.append(facts.get(i).toString());
}
builder.append("\n]");
return builder.toString();
}
}
package de.sekmi.histream.scripting;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
public class SortIndexComparator<T> {
private Comparator<T> comparator;
public SortIndexComparator(Comparator<T> comparator){
this.comparator = comparator;
}
/**
* Determine the sort index for a list without changing the sort order
* @param list list
* @return sort order
*/
public Integer[] sortIndexes(List<T> list) {
Integer[] indices = new Integer[list.size()];
// fill array with sequence
for( int i=0; i<indices.length; i++ ){
indices[i] = i;
}
// determine sort order
Arrays.sort(indices, new Comparator<Integer>() {
@Override
public int compare(Integer o1, Integer o2) {
return comparator.compare(list.get(o1), list.get(o2));
}
});
return indices;
// convert to basic array
// int[] order = new int[indices.length];
// for( int i=0; i<indices.length; i++ ) {
// order[i] = indices[i];
// }
// return order;
}
/**
* Sort two lists. The primary list determines the sort order and the second
* list is sorted in the same order as the primary.
* @param primary primary list to sort
* @param linked linked list to be sorted in the same order as the primary list
*/
// this implementation does not work
// public <U> void linkedSort(List<T> primary, List<U> linked){
// Integer[] indices = sortIndexes(primary);
// for( int i=0; i<indices.length; i++ ){
// while( i != indices[i] ){
//
// // store old target values which will be overridden
// int oldI = indices[indices[i]];
// T oldF = primary.get(indices[i]);
// U oldO = linked.get(indices[i]);
//
// // replace target values
// primary.set(indices[i], primary.get(i));
// linked.set(indices[i], linked.get(i));
// indices[indices[i]] = indices[i];
//
// // move old targets to old values
// indices[i] = oldI;
// primary.set(i, oldF);
// linked.set(i, oldO);
// }
// }
// }
// public <U> void linkedSortNew(List<T> primary, List<U> linked){
// // calculate sort indices
// Integer[] indices = sortIndexes(primary);
//
// // first pass: transform index to destination
// /* write 0 at index position index[0],
// * store old value and write
// */
// int j=0;
// int p=0;
// int t;
// t = indices[0];
// do {
// t = indices[j];
// indices[j] = p;
// j = t;
// }
//
// // TODO clone lists and use random access
// for( int i=0; i<indices.length; i++ ){
// while( i != indices[i] ){
//
// // store old target values which will be overridden
// int oldI = indices[indices[i]];
// T oldF = primary.get(i);
// U oldO = linked.get(i);
//
// primary.set(i, primary.get(indices[i]));
// linked.set(i, linked.get(indices[i]));
//
// primary.set(indices[i], oldF);
// linked.set(indices[i], oldO);
//
//
// indices[i]
//
// // replace target values
// primary.set(indices[i], primary.get(i));
// linked.set(indices[i], linked.get(i));
// indices[indices[i]] = indices[i];
//
// // move old targets to old values
// indices[i] = oldI;
// primary.set(i, oldF);
// linked.set(i, oldO);
// }
// }
// }
}
package de.sekmi.histream.scripting;
import static org.junit.Assert.*;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import java.util.Random;
import org.junit.Test;
public class TestSortIndexComparator {
private class IntComparator implements Comparator<Integer>{
@Override
public int compare(Integer o1, Integer o2) {
return o1.compareTo(o2);
}
}
private class CharComparator implements Comparator<Character>{
@Override
public int compare(Character o1, Character o2) {
return o1.compareTo(o2);
}
}
@Test
public void verifyComparator() {
Character[] testData = new Character[] { 'd', 'b', 'g', 'e', 'h', 'j', 'i', 'c', 'f' , 'k', 'a'};
SortIndexComparator<Character> c = new SortIndexComparator<>(new CharComparator());
Integer[] order = c.sortIndexes(Arrays.asList(testData));
System.out.println("Original: "+Arrays.toString(testData));
StringBuilder b = new StringBuilder();
int curr,prev = -1;
for( int i=0; i<testData.length; i++ ) {
if( i != 0 ) {
b.append(", ");
}
b.append(testData[order[i]]);
curr = testData[order[i]];
assertTrue(prev <= curr);
prev = curr;
}
System.out.println("with sortIndexes: "+b.toString());
System.out.println("testData: "+Arrays.toString(testData));
Object[] other = new Object[testData.length];
for( int i=0; i<testData.length; i++ ) {
other[i] = new Object();
}
List<Character> a = Arrays.asList(testData);
List<Object> d = Arrays.asList(other);
c.linkedSort(a, d);
// output
b = new StringBuilder();
for( int i=0; i<a.size(); i++ ) {
if( i != 0 ) {
b.append(", ");
}
b.append(a.get(i));
}
System.out.println("linkedSort.primary: "+b.toString());
}
private static class Ref{
Integer ref;
public Ref(Integer ref) {
this.ref = ref;
}
}
/**
* TODO implement and verify linked sort method
*/
// @Test
public void verifyLinkedSort() {
Random rand = new Random(42);
// generate a random integer array
Integer[] testData = new Integer[1000];
// and two copies of the array
Ref[] copy = new Ref[testData.length];
// fill with random numbers
// copies contain different objects but same numbers
for( int i=0; i<testData.length; i++ ) {
testData[i] = rand.nextInt();
// fill copies
copy[i] = new Ref(testData[i]);
}
// perform linked sort
SortIndexComparator<Integer> sic = new SortIndexComparator<>(new IntComparator());
List<Integer> a = new ArrayList<>(Arrays.asList(testData));
List<Ref> b = new ArrayList<>(Arrays.asList(copy));
sic.linkedSort(a, b);
Integer prev = a.get(0);
for( int i=0; i<a.size(); i++ ){
Integer curr = a.get(i);
// make sure primary was sorted in ascending order
assertTrue("Sort order violated at index "+i+": "+prev+", "+curr, prev <= curr );
// make sure the copy was sorted linked to primary
assertTrue( curr == b.get(i).ref );
// next
prev = curr;
}
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment