From ca1aeb899fddf5791219e61ad2abcc74c39e22b3 Mon Sep 17 00:00:00 2001 From: "R.W.Majeed" Date: Tue, 19 Jun 2018 12:29:30 +0200 Subject: [PATCH] removed experimental duplicate fact filter. required sort index comparator moved to external repo for development, because it was not working. --- .../etl/filter/DuplicateFactFilter.java | 26 ++-- .../sekmi/histream/etl/TestETLSupplier.java | 2 +- .../histream/etl/config/TestMarshall.java | 9 +- .../etl/filter/TestDuplicateFactFilter.java | 17 ++- .../etl/validation/TestValidator.java | 3 +- .../test/resources/data/test-1-datasource.xml | 3 - .../resources/data/test-4-datasource2.xml | 6 + .../histream/scripting/AbstractFacts.java | 30 +++-- .../src/main/java/SortIndexComparator.java | 122 ++++++++++++++++++ .../test/java/TestSortIndexComparator.java | 116 +++++++++++++++++ 10 files changed, 299 insertions(+), 35 deletions(-) create mode 100644 linked-sort/src/main/java/SortIndexComparator.java create mode 100644 linked-sort/src/test/java/TestSortIndexComparator.java diff --git a/histream-import/src/main/java/de/sekmi/histream/etl/filter/DuplicateFactFilter.java b/histream-import/src/main/java/de/sekmi/histream/etl/filter/DuplicateFactFilter.java index 3a41cf5..17f04d7 100644 --- a/histream-import/src/main/java/de/sekmi/histream/etl/filter/DuplicateFactFilter.java +++ b/histream-import/src/main/java/de/sekmi/histream/etl/filter/DuplicateFactFilter.java @@ -18,21 +18,29 @@ public class DuplicateFactFilter extends PostProcessingFilter{ @XmlElement public String[] concept; - private static class FactComparator implements Comparator{ + static class FactComparator implements Comparator{ @Override public int compare(Fact o1, Fact o2) { - int cmp = o1.getObservation().getStartTime().compareTo( - o2.getObservation().getStartTime() ); - if( cmp == 0 ){ - // if times are equal, sort by concept - cmp = o1.getConcept().compareTo(o2.getConcept()); - } - return cmp; + return DuplicateFactFilter.compare(o1, o2); + } + } + public static int compare(Fact o1, Fact o2) { + int cmp = o1.getObservation().getStartTime().compareTo( + o2.getObservation().getStartTime() ); + if( true )return cmp; + if( cmp == 0 ){ + // if times are equal, sort by concept + cmp = o1.getConcept().compareTo(o2.getConcept()); } + return cmp; } private void removeAllDuplicates(AbstractFacts facts){ // order by start and concept - facts.sort( new FactComparator() ); + if( true ) { + throw new UnsupportedOperationException("Not yet implemented"); + } + //facts.sort( new FactComparator() ); + ArrayList duplicates = new ArrayList<>(); diff --git a/histream-import/src/test/java/de/sekmi/histream/etl/TestETLSupplier.java b/histream-import/src/test/java/de/sekmi/histream/etl/TestETLSupplier.java index cbde594..f706a96 100644 --- a/histream-import/src/test/java/de/sekmi/histream/etl/TestETLSupplier.java +++ b/histream-import/src/test/java/de/sekmi/histream/etl/TestETLSupplier.java @@ -103,7 +103,7 @@ public class TestETLSupplier { Patient p = fact.getExtension(Patient.class); Assert.assertNotNull(p); Assert.assertEquals("p1", p.getId()); - ZoneId zone = ZoneOffset.UTC.normalized(); + ZoneId zone = ZoneId.systemDefault(); Assert.assertEquals(DateTimeAccuracy.parsePartialIso8601("2003-02-01",zone), p.getBirthDate()); Assert.assertEquals(DateTimeAccuracy.parsePartialIso8601("2003-02-11",zone), p.getDeathDate()); diff --git a/histream-import/src/test/java/de/sekmi/histream/etl/config/TestMarshall.java b/histream-import/src/test/java/de/sekmi/histream/etl/config/TestMarshall.java index 345a7d3..9bbb902 100644 --- a/histream-import/src/test/java/de/sekmi/histream/etl/config/TestMarshall.java +++ b/histream-import/src/test/java/de/sekmi/histream/etl/config/TestMarshall.java @@ -69,10 +69,11 @@ public class TestMarshall { // check post processing Assert.assertNotNull(ds.postProcessing); - Assert.assertEquals(3, ds.postProcessing.filter.length); - Assert.assertEquals(DuplicateFactFilter.class, ds.postProcessing.filter[0].getClass()); - DuplicateFactFilter f = (DuplicateFactFilter)ds.postProcessing.filter[0]; - Assert.assertEquals(1, f.concept.length); + Assert.assertEquals(2, ds.postProcessing.filter.length); +// duplicate fact filter removed for now +// Assert.assertEquals(DuplicateFactFilter.class, ds.postProcessing.filter[0].getClass()); +// DuplicateFactFilter f = (DuplicateFactFilter)ds.postProcessing.filter[0]; +// Assert.assertEquals(1, f.concept.length); ScriptFilter sf = (ScriptFilter)ds.postProcessing.filter[1]; Assert.assertNotNull(sf); diff --git a/histream-import/src/test/java/de/sekmi/histream/etl/filter/TestDuplicateFactFilter.java b/histream-import/src/test/java/de/sekmi/histream/etl/filter/TestDuplicateFactFilter.java index a08eb48..4c5c19c 100644 --- a/histream-import/src/test/java/de/sekmi/histream/etl/filter/TestDuplicateFactFilter.java +++ b/histream-import/src/test/java/de/sekmi/histream/etl/filter/TestDuplicateFactFilter.java @@ -22,7 +22,8 @@ import static org.junit.Assert.*; public class TestDuplicateFactFilter { - @Test + // XXX implement filter first + //@Test public void verifyComparator() throws ScriptException, ParseException{ ObservationFactory of = new ObservationFactoryImpl(new SimplePatientExtension(), new SimpleVisitExtension()); DuplicateFactFilter filter = new DuplicateFactFilter(); @@ -31,10 +32,18 @@ public class TestDuplicateFactFilter { AbstractFacts facts = e.wrapEncounterFacts("P1", "E1", DateTimeAccuracy.parsePartialIso8601("2001-02-03", ZoneId.systemDefault()), new ArrayList<>()); Fact a = facts.add("lala"); Fact b = facts.add("xx"); - assertTrue( DuplicateFactFilter.compare(a, b) < 0 ); - facts.add("lala"); +// assertTrue( DuplicateFactFilter.compare(a, b) < 0 ); + b.start("2000-01-02T03:04:05Z"); +// assertTrue( DuplicateFactFilter.compare(a, b) > 0 ); + Fact c = facts.add("xx").start("2000-01-02T03:05Z"); + Fact d = facts.add("lala"); // add duplicate +// assertTrue( DuplicateFactFilter.compare(c, b) > 0 ); +// assertTrue( DuplicateFactFilter.compare(a, d) == 0 ); +// assertTrue( DuplicateFactFilter.compare(a, c) > 0 ); +// assertTrue( DuplicateFactFilter.compare(d, c) > 0 ); + filter.processVisit(facts); - assertEquals(2, facts.size()); + assertEquals(3, facts.size()); // TODO compare with timestamps } } diff --git a/histream-import/src/test/java/de/sekmi/histream/etl/validation/TestValidator.java b/histream-import/src/test/java/de/sekmi/histream/etl/validation/TestValidator.java index d536d07..8220768 100644 --- a/histream-import/src/test/java/de/sekmi/histream/etl/validation/TestValidator.java +++ b/histream-import/src/test/java/de/sekmi/histream/etl/validation/TestValidator.java @@ -69,7 +69,8 @@ public class TestValidator { } Assert.fail("Exception expected"); } - @Test + // XXX implement duplicate fact filter first + //@Test public void validateData4WithDuplicateFilter() throws Exception{ // duplicate concepts try( ObservationSupplier os = ETLObservationSupplier.load(getClass().getResource("/data/test-4-datasource2.xml")) ){ diff --git a/histream-import/src/test/resources/data/test-1-datasource.xml b/histream-import/src/test/resources/data/test-1-datasource.xml index b07268d..9b4f2e2 100644 --- a/histream-import/src/test/resources/data/test-1-datasource.xml +++ b/histream-import/src/test/resources/data/test-1-datasource.xml @@ -139,9 +139,6 @@ - - kalium -