Commit c4efdf30 authored by R.W.Majeed's avatar R.W.Majeed

better implementation of export module started

parent 8c97f304
......@@ -26,6 +26,13 @@
package de.sekmi.histream.export;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
* Annotates DOM fact objects with a {@code @class} attribute.
* @author R.W.Majeed
public class FactClassAnnotator {
private Map<String, String> conceptMap;
private List<WildcardRule> wildcardRules;
private class WildcardRule{
String prefix;
String classId;
public WildcardRule(String prefix, String classId){
this.prefix = prefix;
this.classId = classId;
public FactClassAnnotator(){
this.conceptMap = new HashMap<>();
this.wildcardRules = new ArrayList<>();
public void addMapRule(String concept, String classId){
conceptMap.put(concept, classId);
public void addWildcardRule(String prefix, String classId){
wildcardRules.add(new WildcardRule(prefix, classId));
public void annotateFact(Node fact) throws IllegalArgumentException{
if( fact.getNodeType() != Node.ELEMENT_NODE ){
throw new IllegalArgumentException("Fact node must be of type element");
if( !fact.getLocalName().equals("fact") ){
throw new IllegalArgumentException("Local name of node '"+fact.getLocalName()+"' must be 'fact'");
String concept = ((Element)fact).getAttribute("concept");
// try to find the concept in the concept map first
String clazz = conceptMap.get(concept);
if( clazz == null ){
// not found, try wildcard rules
// wildcard rules should be ordered descending by their prefix length,
// so that the more complex rules are matched first
for( WildcardRule rule : wildcardRules ){
if( concept.startsWith(rule.prefix) ){
// got a match
clazz = rule.classId;
// if found, annotate
if( clazz != null ){
((Element)fact).setAttribute("class", clazz);
package de.sekmi.histream.export;
import java.util.Objects;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Result;
import javax.xml.transform.dom.DOMResult;
import org.w3c.dom.Document;
import org.w3c.dom.DocumentFragment;
import org.w3c.dom.Node;
import de.sekmi.histream.ObservationException;
import de.sekmi.histream.ext.Patient;
import de.sekmi.histream.ext.Visit;
public abstract class VisitFragmentParser extends GroupedXMLWriter {
private XMLOutputFactory factory;
private Document doc;
private DocumentFragment currentPatient;
private DocumentFragment currentVisit;
protected VisitFragmentParser() throws XMLStreamException, ParserConfigurationException {
factory = XMLOutputFactory.newFactory();
factory.setProperty(XMLOutputFactory.IS_REPAIRING_NAMESPACES, Boolean.TRUE);
// write meta data to document node
private void setDOMWriter(Node node) throws XMLStreamException{
Result result = new DOMResult(node);
this.writer = factory.createXMLStreamWriter(result);
private void createDocument() throws ParserConfigurationException{
DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
doc = builder.newDocument();
doc.getDomConfig().setParameter("namespaces", true);
doc.getDomConfig().setParameter("namespace-declarations", true);
//return doc;
protected void endPatient(Patient patient) throws ObservationException {
// TODO Auto-generated method stub
protected void beginPatient(Patient patient) throws ObservationException {
// write patient info to patient fragment
currentPatient = doc.createDocumentFragment();
try {
} catch (XMLStreamException e) {
throw new ObservationException(e);
protected void beginEncounter(Visit visit) throws ObservationException {
// write visit info to visit fragment
currentVisit = doc.createDocumentFragment();
try {
} catch (XMLStreamException e) {
throw new ObservationException(e);
protected void endEncounter(Visit visit) throws ObservationException {
// encounter is finished
// fragment should contain exactly one node -> the visit
Node node = currentVisit.getFirstChild();
protected abstract void visitFragment(Node visit);
package de.sekmi.histream.export;
import java.util.function.Supplier;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Node;
import de.sekmi.histream.Observation;
import de.sekmi.histream.ObservationSupplier;
public class VisitFragmentSupplier implements Supplier<Node> {
private ObservationSupplier supplier;
private Node visit;
private VisitFragmentParser parser;
private boolean closed;
public VisitFragmentSupplier(ObservationSupplier supplier) throws XMLStreamException, ParserConfigurationException {
this.supplier = supplier;
this.parser = new VisitFragmentParser() {
protected void visitFragment(Node visit) {
VisitFragmentSupplier.this.visit = visit;
public Node get() {
if( closed ){
return null;
// feed facts until we get a visit fragment
while( visit == null ){
Observation o = supplier.get();
if( o == null ){
// end of stream
// might get another visit with the close
closed = true;
if( visit == null ){
// end of stream
Node local = visit;
// clear member variable for next call
this.visit = null;
return local;
package de.sekmi.histream.export.config;
public class TestFactClassAnnotator {
package de.sekmi.histream.export.config;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import org.junit.Test;
import org.w3c.dom.Node;
import de.sekmi.histream.ObservationSupplier;
import de.sekmi.histream.export.VisitFragmentSupplier;
public class TestVisitFragmentParser {
public void verifyVisitFragmentContent() throws Exception{
FileObservationProviderTest t = new FileObservationProviderTest();
final Node visitNode = null;
try( ObservationSupplier s = t.getExampleSupplier() ){
VisitFragmentSupplier sup = new VisitFragmentSupplier(s);
Node n = sup.get();
private void printDOM(Node node) throws UnsupportedEncodingException, TransformerException{
TransformerFactory tf = TransformerFactory.newInstance();
Transformer transformer = tf.newTransformer();
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
transformer.setOutputProperty(OutputKeys.METHOD, "xml");
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
transformer.setOutputProperty("{}indent-amount", "4");
transformer.transform(new DOMSource(node),
new StreamResult(new OutputStreamWriter(System.out, "UTF-8")));
<?xml version="1.0" encoding="UTF-8"?>
<export xmlns=""
the export is a three step process:
First, the specified concepts are queried from the fact store/database.
Second, groups are specified, a map is built which can be used
to assign class ids to concepts. This map is used to transform the
resulting eav xml to an extended form where all facts are annotated
with their matching class id. This transformation can be done on the
fly for each encounter DOM (see below)
Third, the resulting XML is read and for each encounter, a DOM is
constructed (and annotated as specified above). For each encounter
the xpath expressions in the lower part of this document are executed
to construct table data.
<group class="d_lab">
<concept id="LOINC:26436-6" />
<concept id="LOINC:26436-6:NEG" />
<group class="diag">
<concept wildcard="ICD10:*" />
<column header="pid" xpath="@id"/>
<column header="birthdate" xpath="eav:birthdate"/>
<column header="gender" xpath="eav:gender"/>
<column header="pid" xpath="../@id"/>
<column header="visit" xpath="@id"/>
<!-- concepts for the visit table must occur only once
per visit and may not repeat -->
<column header="start" xpath="eav:start"/>
<!-- What is better? -->
<!-- (a) First column, then concepts with value -->
<column header="diagnostik_labort_ts" na="NULL" xpath="facts/fact[@class='d_lab']/@start">
<concept code="LOINC:26436-6"/>
<concept code="LOINC:26436-6:NEG"/>
<value type="script">
<!-- (b) first concepts then columns with value -->
<concept code="LOINC:26436-6"/>
<concept code="LOINC:26436-6:NEG"/>
<column header="lab_ts" type="attribute">
<!-- We want this:
6:NEG -> not tested
6/mod=OPB -> tested non pathological
6/mod=PB -> tested pathological
<column header="lab_ergebnis" type="attribute">
<table id="diagnosen">
<!-- makes more sense for fact tables to specify
the list of concepts first. -->
<concept wildcard="ICD10:*"/>
<concept code="ICD9:123"/>
<column header="pid" type="patient-ref"/>
<column header="visit" type="visit-ref"/>
<column header="start" type="attribute">
<column header="primary" type="attribute" na="nein">
<!-- if a modifier element is provided, the property
will use the context of that modifier -->
<column header="text" type="attribute">
<!-- reference sequence from patient -->
<column header="seq1" type="sequence">
<sequence ref="seq1"/>
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment