Commit ce91fac1 authored by R.W.Majeed's avatar R.W.Majeed
Browse files

anonymization moved to separate project (dwh-prefs). Dependency injection via CDI

parent 980797bf
...@@ -5,6 +5,7 @@ import java.io.InputStream; ...@@ -5,6 +5,7 @@ import java.io.InputStream;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.time.ZoneId; import java.time.ZoneId;
import java.util.Date;
import java.util.function.Consumer; import java.util.function.Consumer;
import javax.xml.bind.JAXBException; import javax.xml.bind.JAXBException;
...@@ -20,6 +21,7 @@ import org.aktin.cda.CDAStatus.Status; ...@@ -20,6 +21,7 @@ import org.aktin.cda.CDAStatus.Status;
import org.aktin.cda.UnsupportedTemplateException; import org.aktin.cda.UnsupportedTemplateException;
import org.aktin.cda.etl.transform.Transformation; import org.aktin.cda.etl.transform.Transformation;
import org.aktin.cda.etl.transform.TransformationFactory; import org.aktin.cda.etl.transform.TransformationFactory;
import org.aktin.dwh.Anonymizer;
import org.w3c.dom.Document; import org.w3c.dom.Document;
import de.sekmi.histream.Observation; import de.sekmi.histream.Observation;
...@@ -37,15 +39,14 @@ public abstract class AbstractCDAImporter implements CDAProcessor{ ...@@ -37,15 +39,14 @@ public abstract class AbstractCDAImporter implements CDAProcessor{
private TransformationFactory cdaToDataWarehouse; private TransformationFactory cdaToDataWarehouse;
private XMLInputFactory inputFactory; private XMLInputFactory inputFactory;
public AbstractCDAImporter() throws IOException { public AbstractCDAImporter(Anonymizer anonymizer) throws IOException {
// create transformer // create transformer
cdaToDataWarehouse = new TransformationFactory(); cdaToDataWarehouse = new TransformationFactory();
cdaToDataWarehouse.setAnonymizer(anonymizer);
// XML input factory // XML input factory
inputFactory = XMLInputFactory.newInstance(); inputFactory = XMLInputFactory.newInstance();
} }
/** /**
* Get the observation factory which will be used to create observations * Get the observation factory which will be used to create observations
* @return observation factory * @return observation factory
...@@ -91,9 +92,17 @@ public abstract class AbstractCDAImporter implements CDAProcessor{ ...@@ -91,9 +92,17 @@ public abstract class AbstractCDAImporter implements CDAProcessor{
// insert facts // insert facts
suppl.stream().forEach(getObservationInserter()); suppl.stream().forEach(getObservationInserter());
CDAStatus.Status status = deleted?Status.Updated:Status.Created; CDAStatus.Status status;
Descriptor desc = new Descriptor(sourceId); Descriptor desc = new Descriptor(sourceId);
// TODO use/write timestamps and version desc.lastModified = new Date();
if( deleted ){
status = Status.Updated;
}else{
status = Status.Created;
desc.created = desc.lastModified;
}
// TODO use/write version
return new CDAStatus(desc, status); return new CDAStatus(desc, status);
} catch (IOException e) { } catch (IOException e) {
throw new CDAException("Unable to read EAV temp file: "+file, e); throw new CDAException("Unable to read EAV temp file: "+file, e);
......
...@@ -23,6 +23,7 @@ import org.aktin.cda.CDAException; ...@@ -23,6 +23,7 @@ import org.aktin.cda.CDAException;
import org.aktin.cda.CDAStatus; import org.aktin.cda.CDAStatus;
import org.aktin.cda.CDASummary; import org.aktin.cda.CDASummary;
import org.aktin.cda.DocumentNotFoundException; import org.aktin.cda.DocumentNotFoundException;
import org.aktin.dwh.Anonymizer;
import org.aktin.dwh.PreferenceKey; import org.aktin.dwh.PreferenceKey;
import org.w3c.dom.Document; import org.w3c.dom.Document;
...@@ -54,8 +55,8 @@ public class CDAImporter extends AbstractCDAImporter implements AutoCloseable{ ...@@ -54,8 +55,8 @@ public class CDAImporter extends AbstractCDAImporter implements AutoCloseable{
* @throws IOException unable to load CDA to ETL transformation script * @throws IOException unable to load CDA to ETL transformation script
*/ */
@Inject // TODO change to ObservationFactory and see if this works @Inject // TODO change to ObservationFactory and see if this works
public CDAImporter(ObservationFactory factory, Preferences prefs) throws NamingException, SQLException, IOException { public CDAImporter(ObservationFactory factory, Preferences prefs, Anonymizer anonymizer) throws NamingException, SQLException, IOException {
super(); super(anonymizer);
this.factory = factory; this.factory = factory;
this.localZone = ZoneId.of(prefs.get(PreferenceKey.timeZoneId)); this.localZone = ZoneId.of(prefs.get(PreferenceKey.timeZoneId));
log.info("Default timezone for CDA documents: "+localZone); log.info("Default timezone for CDA documents: "+localZone);
...@@ -73,10 +74,6 @@ public class CDAImporter extends AbstractCDAImporter implements AutoCloseable{ ...@@ -73,10 +74,6 @@ public class CDAImporter extends AbstractCDAImporter implements AutoCloseable{
*/ */
// data dialect // data dialect
DataDialect dd = new DataDialect(); DataDialect dd = new DataDialect();
String tz = prefs.get(PreferenceKey.i2b2DatabaseTimezone);
if( tz != null ){
dd.setTimeZone(ZoneId.of(tz));
}
try{ try{
inserter = new I2b2Inserter(); inserter = new I2b2Inserter();
inserter.open(crcDS.getConnection(), dd); inserter.open(crcDS.getConnection(), dd);
......
...@@ -16,6 +16,7 @@ import javax.xml.transform.stream.StreamResult; ...@@ -16,6 +16,7 @@ import javax.xml.transform.stream.StreamResult;
import org.aktin.cda.etl.transform.fun.CalculateEncounterHash; import org.aktin.cda.etl.transform.fun.CalculateEncounterHash;
import org.aktin.cda.etl.transform.fun.CalculatePatientHash; import org.aktin.cda.etl.transform.fun.CalculatePatientHash;
import org.aktin.cda.etl.transform.fun.CalculateSourceId; import org.aktin.cda.etl.transform.fun.CalculateSourceId;
import org.aktin.dwh.Anonymizer;
import org.w3c.dom.Document; import org.w3c.dom.Document;
import net.sf.saxon.Configuration; import net.sf.saxon.Configuration;
...@@ -32,6 +33,7 @@ public class Transformation { ...@@ -32,6 +33,7 @@ public class Transformation {
private TransformerFactoryImpl transformerFactory; private TransformerFactoryImpl transformerFactory;
private Templates transformerTemplates; private Templates transformerTemplates;
private Anonymizer anonymizer;
/** /**
* Construct a CDA template to EAV transformation * Construct a CDA template to EAV transformation
...@@ -42,10 +44,10 @@ public class Transformation { ...@@ -42,10 +44,10 @@ public class Transformation {
* @throws TransformerFactoryConfigurationError if the transformer factory failed to initialize * @throws TransformerFactoryConfigurationError if the transformer factory failed to initialize
* @throws TransformerConfigurationException transformer setup error * @throws TransformerConfigurationException transformer setup error
*/ */
public Transformation(String moduleId, String templateId, Document xslt)throws TransformerFactoryConfigurationError, TransformerConfigurationException{ public Transformation(String moduleId, String templateId, Document xslt, Anonymizer anonymizer)throws TransformerFactoryConfigurationError, TransformerConfigurationException{
this.moduleId = moduleId; this.moduleId = moduleId;
this.templateId = templateId; this.templateId = templateId;
this.anonymizer = anonymizer;
// create transformer // create transformer
// ususally a transformer is created via TransformerFactory.newInstance(), // ususally a transformer is created via TransformerFactory.newInstance(),
// but this may return a non-saxon parser // but this may return a non-saxon parser
...@@ -70,9 +72,9 @@ public class Transformation { ...@@ -70,9 +72,9 @@ public class Transformation {
// } // }
// Configuration config = ((TransformerFactoryImpl)factory).getConfiguration(); // Configuration config = ((TransformerFactoryImpl)factory).getConfiguration();
Configuration config = transformerFactory.getConfiguration(); Configuration config = transformerFactory.getConfiguration();
config.registerExtensionFunction(new CalculatePatientHash()); config.registerExtensionFunction(new CalculatePatientHash(anonymizer));
config.registerExtensionFunction(new CalculateEncounterHash()); config.registerExtensionFunction(new CalculateEncounterHash(anonymizer));
config.registerExtensionFunction(new CalculateSourceId()); config.registerExtensionFunction(new CalculateSourceId(anonymizer));
// TODO don't need moduleId and factory? // TODO don't need moduleId and factory?
} }
......
...@@ -22,6 +22,7 @@ import javax.xml.xpath.XPathExpressionException; ...@@ -22,6 +22,7 @@ import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory; import javax.xml.xpath.XPathFactory;
import org.aktin.cda.NamespaceContextImpl; import org.aktin.cda.NamespaceContextImpl;
import org.aktin.dwh.Anonymizer;
import org.w3c.dom.Document; import org.w3c.dom.Document;
import org.w3c.dom.Node; import org.w3c.dom.Node;
import org.w3c.dom.NodeList; import org.w3c.dom.NodeList;
...@@ -38,6 +39,7 @@ public class TransformationFactory { ...@@ -38,6 +39,7 @@ public class TransformationFactory {
private Map<String, Transformation> cache; private Map<String, Transformation> cache;
private XPath xpath; private XPath xpath;
private DocumentBuilderFactory builderFactory; private DocumentBuilderFactory builderFactory;
private Anonymizer anonymizer;
public TransformationFactory(){ public TransformationFactory(){
// inputFactory = XMLInputFactory.newInstance(); // inputFactory = XMLInputFactory.newInstance();
...@@ -77,6 +79,7 @@ public class TransformationFactory { ...@@ -77,6 +79,7 @@ public class TransformationFactory {
} }
private Transformation loadTransformation(String templateId) throws IOException, TransformerConfigurationException, TransformerFactoryConfigurationError{ private Transformation loadTransformation(String templateId) throws IOException, TransformerConfigurationException, TransformerFactoryConfigurationError{
Objects.requireNonNull(this.anonymizer, "no anonymizer configured");
// need to locate the transformation // need to locate the transformation
URL url = locateTransformationByTemplate(templateId); URL url = locateTransformationByTemplate(templateId);
if( url == null ){ if( url == null ){
...@@ -101,9 +104,12 @@ public class TransformationFactory { ...@@ -101,9 +104,12 @@ public class TransformationFactory {
// this should be reported to the developers // this should be reported to the developers
log.warning("Mismatch between template name="+templateId+" and declared template="+declaredTemplate); log.warning("Mismatch between template name="+templateId+" and declared template="+declaredTemplate);
} }
return new Transformation(moduleId, templateId, doc); return new Transformation(moduleId, templateId, doc, anonymizer);
} }
public void setAnonymizer(Anonymizer anonymizer){
this.anonymizer = anonymizer;
}
public Transformation getTransformation(String templateId) throws IOException, TransformerConfigurationException, TransformerFactoryConfigurationError{ public Transformation getTransformation(String templateId) throws IOException, TransformerConfigurationException, TransformerFactoryConfigurationError{
// look in cache // look in cache
Transformation transform = cache.get(templateId); Transformation transform = cache.get(templateId);
......
package org.aktin.cda.etl.transform.fun; package org.aktin.cda.etl.transform.fun;
import org.aktin.dwh.Anonymizer;
import net.sf.saxon.om.StructuredQName; import net.sf.saxon.om.StructuredQName;
import net.sf.saxon.value.SequenceType; import net.sf.saxon.value.SequenceType;
public class CalculateEncounterHash extends OneWayHashFunction{ public class CalculateEncounterHash extends OneWayHashFunction{
public CalculateEncounterHash(Anonymizer anonymizer) {
super(anonymizer);
}
public static final StructuredQName QNAME = OneWayHashFunction.buildFunctionQName("encounter-hash"); public static final StructuredQName QNAME = OneWayHashFunction.buildFunctionQName("encounter-hash");
protected static final SequenceType[] TWO_STRINGS = new SequenceType[]{SequenceType.SINGLE_STRING,SequenceType.SINGLE_STRING}; protected static final SequenceType[] TWO_STRINGS = new SequenceType[]{SequenceType.SINGLE_STRING,SequenceType.SINGLE_STRING};
......
package org.aktin.cda.etl.transform.fun; package org.aktin.cda.etl.transform.fun;
import org.aktin.dwh.Anonymizer;
import net.sf.saxon.om.StructuredQName; import net.sf.saxon.om.StructuredQName;
import net.sf.saxon.value.SequenceType; import net.sf.saxon.value.SequenceType;
public class CalculatePatientHash extends OneWayHashFunction{ public class CalculatePatientHash extends OneWayHashFunction{
public CalculatePatientHash(Anonymizer anonymizer) {
super(anonymizer);
}
public static final StructuredQName QNAME = OneWayHashFunction.buildFunctionQName("patient-hash"); public static final StructuredQName QNAME = OneWayHashFunction.buildFunctionQName("patient-hash");
protected static final SequenceType[] TWO_STRINGS = new SequenceType[]{SequenceType.SINGLE_STRING,SequenceType.SINGLE_STRING}; protected static final SequenceType[] TWO_STRINGS = new SequenceType[]{SequenceType.SINGLE_STRING,SequenceType.SINGLE_STRING};
......
package org.aktin.cda.etl.transform.fun; package org.aktin.cda.etl.transform.fun;
import org.aktin.dwh.Anonymizer;
import net.sf.saxon.om.StructuredQName; import net.sf.saxon.om.StructuredQName;
import net.sf.saxon.value.SequenceType; import net.sf.saxon.value.SequenceType;
...@@ -28,6 +30,10 @@ import net.sf.saxon.value.SequenceType; ...@@ -28,6 +30,10 @@ import net.sf.saxon.value.SequenceType;
* *
*/ */
public class CalculateSourceId extends OneWayHashFunction{ public class CalculateSourceId extends OneWayHashFunction{
public CalculateSourceId(Anonymizer anonymizer) {
super(anonymizer);
}
public static final StructuredQName QNAME = OneWayHashFunction.buildFunctionQName("import-hash"); public static final StructuredQName QNAME = OneWayHashFunction.buildFunctionQName("import-hash");
protected static final SequenceType[] FIVE_STRINGS = new SequenceType[]{SequenceType.SINGLE_STRING,SequenceType.SINGLE_STRING,SequenceType.SINGLE_STRING,SequenceType.SINGLE_STRING,SequenceType.SINGLE_STRING}; protected static final SequenceType[] FIVE_STRINGS = new SequenceType[]{SequenceType.SINGLE_STRING,SequenceType.SINGLE_STRING,SequenceType.SINGLE_STRING,SequenceType.SINGLE_STRING,SequenceType.SINGLE_STRING};
......
package org.aktin.cda.etl.transform.fun;
import javax.inject.Singleton;
import org.aktin.dwh.Anonymizer;
// technically, singleton is not needed
@Singleton
public class OneWayAnonymizer implements Anonymizer {
@Override
public String calculatePatientPseudonym(String root, String extension) {
return OneWayHashFunction.calculateHash(root, extension);
}
@Override
public String calculateEncounterPseudonym(String root, String extension) {
return OneWayHashFunction.calculateHash(root, extension);
}
}
package org.aktin.cda.etl.transform.fun; package org.aktin.cda.etl.transform.fun;
import java.nio.ByteBuffer; import org.aktin.dwh.Anonymizer;
import java.nio.charset.Charset;
import java.security.DigestException;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Base64;
import net.sf.saxon.expr.XPathContext; import net.sf.saxon.expr.XPathContext;
import net.sf.saxon.lib.ExtensionFunctionCall; import net.sf.saxon.lib.ExtensionFunctionCall;
...@@ -25,7 +20,11 @@ import net.sf.saxon.value.StringValue; ...@@ -25,7 +20,11 @@ import net.sf.saxon.value.StringValue;
public abstract class OneWayHashFunction extends ExtensionFunctionDefinition { public abstract class OneWayHashFunction extends ExtensionFunctionDefinition {
// private static final Logger log = Logger.getLogger(OneWayHashFunction.class.getName()); // private static final Logger log = Logger.getLogger(OneWayHashFunction.class.getName());
public static final String AKTIN_CDA_FUNCTIONS_NS = "http://aktin.org/cda/functions"; public static final String AKTIN_CDA_FUNCTIONS_NS = "http://aktin.org/cda/functions";
private Anonymizer anonymizer;
public OneWayHashFunction(Anonymizer anonymizer){
this.anonymizer = anonymizer;
}
protected static final StructuredQName buildFunctionQName(String funcName){ protected static final StructuredQName buildFunctionQName(String funcName){
return new StructuredQName("", AKTIN_CDA_FUNCTIONS_NS, funcName); return new StructuredQName("", AKTIN_CDA_FUNCTIONS_NS, funcName);
} }
...@@ -35,39 +34,6 @@ public abstract class OneWayHashFunction extends ExtensionFunctionDefinition { ...@@ -35,39 +34,6 @@ public abstract class OneWayHashFunction extends ExtensionFunctionDefinition {
return SequenceType.SINGLE_STRING; return SequenceType.SINGLE_STRING;
} }
/**
* Calculate a one way hash function for the given input.
* The algorithm is as follows:
* <ol>
* <li>Concatenate the arguments with a slash (/) as separator.</li>
* <li>Encode the input arguments with UTF-8 encoding
* <li>Generate a 160bit SHA-1 checksum</li>
* <li>Produce bas64 encoding with url-safe alphabet</li>
* </ol>
* The resulting string length will be less than 30 characters.
*
* @param strings input
* @return string hash
* @throws DigestException error calculating message digest
*/
public static String calculateHash(String ...strings) {
MessageDigest digest;
try {
digest = MessageDigest.getInstance("SHA-1");
} catch (NoSuchAlgorithmException e) {
// should not happen. SHA-1 is guaranteed to be included in the JRE
throw new IllegalStateException("Digest algorithm not available",e);
}
// join arguments
String composite = String.join("/", strings);
// logging
// encode to bytes
ByteBuffer input = Charset.forName("UTF-8").encode(composite);
// calculate digest and encode with base64
digest.update(input);
String result = Base64.getUrlEncoder().encodeToString(digest.digest());
return result;
}
/** /**
* Implements a call to the hash function with variable arguments. * Implements a call to the hash function with variable arguments.
...@@ -87,7 +53,7 @@ public abstract class OneWayHashFunction extends ExtensionFunctionDefinition { ...@@ -87,7 +53,7 @@ public abstract class OneWayHashFunction extends ExtensionFunctionDefinition {
if( arguments.length == 0 ){ if( arguments.length == 0 ){
throw new XPathException("Need at least one argument for hash calculation"); throw new XPathException("Need at least one argument for hash calculation");
} }
return new StringValue(calculateHash(strings)); return new StringValue(anonymizer.calculateAbstractPseudonym(strings));
} }
} }
......
...@@ -24,7 +24,7 @@ public class CDAImporterMockUp extends AbstractCDAImporter implements Consumer<O ...@@ -24,7 +24,7 @@ public class CDAImporterMockUp extends AbstractCDAImporter implements Consumer<O
private int insertCount; private int insertCount;
public CDAImporterMockUp() throws IOException{ public CDAImporterMockUp() throws IOException{
super(); super(new ConcatAnonymizer());
System.out.println("CONSTRUCT CDAImporterMockUp"); System.out.println("CONSTRUCT CDAImporterMockUp");
factory = new ObservationFactoryImpl(new SimplePatientExtension(), new SimpleVisitExtension()); factory = new ObservationFactoryImpl(new SimplePatientExtension(), new SimpleVisitExtension());
insertCount = 0; insertCount = 0;
......
package org.aktin.cda.etl;
import org.aktin.dwh.Anonymizer;
/**
* Anonymizer for testing. Concatenates the source parts to
* produce a pseudonym string which should not be used for
* production.
* @author R.W.Majeed
*
*/
public class ConcatAnonymizer implements Anonymizer {
@Override
public String calculateAbstractPseudonym(String... parts) {
return String.join("/", parts);
}
}
...@@ -10,6 +10,7 @@ public class TestTransformationLoader { ...@@ -10,6 +10,7 @@ public class TestTransformationLoader {
@Test @Test
public void verifyTransformVariables() throws Exception{ public void verifyTransformVariables() throws Exception{
TransformationFactory f = new TransformationFactory(); TransformationFactory f = new TransformationFactory();
f.setAnonymizer(new ConcatAnonymizer());
//XMLStreamReader xsr = inputFactory.createXMLStreamReader(getClass().getResourceAsStream("/cda-eav.xsl")); //XMLStreamReader xsr = inputFactory.createXMLStreamReader(getClass().getResourceAsStream("/cda-eav.xsl"));
// InputSource input = new InputSource(getClass().getResourceAsStream("/cda-eav.xsl")); // InputSource input = new InputSource(getClass().getResourceAsStream("/cda-eav.xsl"));
Transformation t = f.getTransformation("1.2.276.0.76.10.1019"); Transformation t = f.getTransformation("1.2.276.0.76.10.1019");
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment