Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
10
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
Raphael
histream
Commits
d284a522
Commit
d284a522
authored
Feb 16, 2018
by
R.W.Majeed
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
implementation of duplicate filter
parent
2fd1ed3e
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
54 additions
and
2 deletions
+54
-2
histream-import/src/main/java/de/sekmi/histream/etl/filter/DuplicateFactFilter.java
...ava/de/sekmi/histream/etl/filter/DuplicateFactFilter.java
+54
-2
No files found.
histream-import/src/main/java/de/sekmi/histream/etl/filter/DuplicateFactFilter.java
View file @
d284a522
package
de.sekmi.histream.etl.filter
;
import
java.util.ArrayList
;
import
java.util.Collections
;
import
java.util.Comparator
;
import
java.util.HashSet
;
import
javax.xml.bind.annotation.XmlElement
;
import
javax.xml.bind.annotation.XmlType
;
import
de.sekmi.histream.DateTimeAccuracy
;
import
de.sekmi.histream.scripting.AbstractFacts
;
import
de.sekmi.histream.scripting.Fact
;
@XmlType
(
name
=
"duplicate-fact"
)
public
class
DuplicateFactFilter
extends
PostProcessingFilter
{
...
...
@@ -11,10 +18,55 @@ public class DuplicateFactFilter extends PostProcessingFilter{
@XmlElement
public
String
[]
concept
;
private
static
class
FactComparator
implements
Comparator
<
Fact
>{
@Override
public
int
compare
(
Fact
o1
,
Fact
o2
)
{
int
cmp
=
o1
.
getObservation
().
getStartTime
().
compareTo
(
o2
.
getObservation
().
getStartTime
()
);
if
(
cmp
==
0
){
// if times are equal, sort by concept
cmp
=
o1
.
getConcept
().
compareTo
(
o2
.
getConcept
());
}
return
cmp
;
}
}
private
void
removeAllDuplicates
(
AbstractFacts
facts
){
// order by start and concept
facts
.
sort
(
new
FactComparator
()
);
ArrayList
<
Integer
>
duplicates
=
new
ArrayList
<>();
// iterate through facts and store duplicate indices
DateTimeAccuracy
start
=
null
;
String
concept
=
null
;
for
(
int
i
=
0
;
i
<
facts
.
size
();
i
++
){
Fact
fact
=
facts
.
get
(
i
);
if
(
start
!=
null
){
// nothing to do for first fact
if
(
start
.
equals
(
fact
.
getObservation
().
getStartTime
())
){
// start time is equal, check if same concept
if
(
concept
.
equals
(
fact
.
getConcept
())
){
// found duplicate
duplicates
.
add
(
i
);
}
}
}
// remember previous concept
start
=
fact
.
getObservation
().
getStartTime
();
concept
=
fact
.
getConcept
();
}
// remove duplicates last first
while
(
!
duplicates
.
isEmpty
()
){
int
index
=
duplicates
.
remove
(
duplicates
.
size
()-
1
);
facts
.
removeIndex
(
index
);
}
}
@Override
public
void
processVisit
(
AbstractFacts
facts
)
{
// TODO Auto-generated method stub
// create set for O(1) lookup
// HashSet<String> match = new HashSet<>(concept.length);
// Collections.addAll(match, concept);
// TODO implement for limited concepts
removeAllDuplicates
(
facts
);
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment