Commit ce91fac1 authored by R.W.Majeed's avatar R.W.Majeed
Browse files

anonymization moved to separate project (dwh-prefs). Dependency injection via CDI

parent 980797bf
......@@ -5,6 +5,7 @@ import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.time.ZoneId;
import java.util.Date;
import java.util.function.Consumer;
import javax.xml.bind.JAXBException;
......@@ -20,6 +21,7 @@ import org.aktin.cda.CDAStatus.Status;
import org.aktin.cda.UnsupportedTemplateException;
import org.aktin.cda.etl.transform.Transformation;
import org.aktin.cda.etl.transform.TransformationFactory;
import org.aktin.dwh.Anonymizer;
import org.w3c.dom.Document;
import de.sekmi.histream.Observation;
......@@ -37,14 +39,13 @@ public abstract class AbstractCDAImporter implements CDAProcessor{
private TransformationFactory cdaToDataWarehouse;
private XMLInputFactory inputFactory;
public AbstractCDAImporter() throws IOException {
public AbstractCDAImporter(Anonymizer anonymizer) throws IOException {
// create transformer
cdaToDataWarehouse = new TransformationFactory();
cdaToDataWarehouse.setAnonymizer(anonymizer);
// XML input factory
inputFactory = XMLInputFactory.newInstance();
}
/**
* Get the observation factory which will be used to create observations
......@@ -91,9 +92,17 @@ public abstract class AbstractCDAImporter implements CDAProcessor{
// insert facts
suppl.stream().forEach(getObservationInserter());
CDAStatus.Status status = deleted?Status.Updated:Status.Created;
CDAStatus.Status status;
Descriptor desc = new Descriptor(sourceId);
// TODO use/write timestamps and version
desc.lastModified = new Date();
if( deleted ){
status = Status.Updated;
}else{
status = Status.Created;
desc.created = desc.lastModified;
}
// TODO use/write version
return new CDAStatus(desc, status);
} catch (IOException e) {
throw new CDAException("Unable to read EAV temp file: "+file, e);
......
......@@ -23,6 +23,7 @@ import org.aktin.cda.CDAException;
import org.aktin.cda.CDAStatus;
import org.aktin.cda.CDASummary;
import org.aktin.cda.DocumentNotFoundException;
import org.aktin.dwh.Anonymizer;
import org.aktin.dwh.PreferenceKey;
import org.w3c.dom.Document;
......@@ -54,8 +55,8 @@ public class CDAImporter extends AbstractCDAImporter implements AutoCloseable{
* @throws IOException unable to load CDA to ETL transformation script
*/
@Inject // TODO change to ObservationFactory and see if this works
public CDAImporter(ObservationFactory factory, Preferences prefs) throws NamingException, SQLException, IOException {
super();
public CDAImporter(ObservationFactory factory, Preferences prefs, Anonymizer anonymizer) throws NamingException, SQLException, IOException {
super(anonymizer);
this.factory = factory;
this.localZone = ZoneId.of(prefs.get(PreferenceKey.timeZoneId));
log.info("Default timezone for CDA documents: "+localZone);
......@@ -73,10 +74,6 @@ public class CDAImporter extends AbstractCDAImporter implements AutoCloseable{
*/
// data dialect
DataDialect dd = new DataDialect();
String tz = prefs.get(PreferenceKey.i2b2DatabaseTimezone);
if( tz != null ){
dd.setTimeZone(ZoneId.of(tz));
}
try{
inserter = new I2b2Inserter();
inserter.open(crcDS.getConnection(), dd);
......
......@@ -16,6 +16,7 @@ import javax.xml.transform.stream.StreamResult;
import org.aktin.cda.etl.transform.fun.CalculateEncounterHash;
import org.aktin.cda.etl.transform.fun.CalculatePatientHash;
import org.aktin.cda.etl.transform.fun.CalculateSourceId;
import org.aktin.dwh.Anonymizer;
import org.w3c.dom.Document;
import net.sf.saxon.Configuration;
......@@ -32,6 +33,7 @@ public class Transformation {
private TransformerFactoryImpl transformerFactory;
private Templates transformerTemplates;
private Anonymizer anonymizer;
/**
* Construct a CDA template to EAV transformation
......@@ -42,10 +44,10 @@ public class Transformation {
* @throws TransformerFactoryConfigurationError if the transformer factory failed to initialize
* @throws TransformerConfigurationException transformer setup error
*/
public Transformation(String moduleId, String templateId, Document xslt)throws TransformerFactoryConfigurationError, TransformerConfigurationException{
public Transformation(String moduleId, String templateId, Document xslt, Anonymizer anonymizer)throws TransformerFactoryConfigurationError, TransformerConfigurationException{
this.moduleId = moduleId;
this.templateId = templateId;
this.anonymizer = anonymizer;
// create transformer
// ususally a transformer is created via TransformerFactory.newInstance(),
// but this may return a non-saxon parser
......@@ -70,9 +72,9 @@ public class Transformation {
// }
// Configuration config = ((TransformerFactoryImpl)factory).getConfiguration();
Configuration config = transformerFactory.getConfiguration();
config.registerExtensionFunction(new CalculatePatientHash());
config.registerExtensionFunction(new CalculateEncounterHash());
config.registerExtensionFunction(new CalculateSourceId());
config.registerExtensionFunction(new CalculatePatientHash(anonymizer));
config.registerExtensionFunction(new CalculateEncounterHash(anonymizer));
config.registerExtensionFunction(new CalculateSourceId(anonymizer));
// TODO don't need moduleId and factory?
}
......
......@@ -22,6 +22,7 @@ import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.aktin.cda.NamespaceContextImpl;
import org.aktin.dwh.Anonymizer;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
......@@ -38,7 +39,8 @@ public class TransformationFactory {
private Map<String, Transformation> cache;
private XPath xpath;
private DocumentBuilderFactory builderFactory;
private Anonymizer anonymizer;
public TransformationFactory(){
// inputFactory = XMLInputFactory.newInstance();
// XPath configuration
......@@ -77,6 +79,7 @@ public class TransformationFactory {
}
private Transformation loadTransformation(String templateId) throws IOException, TransformerConfigurationException, TransformerFactoryConfigurationError{
Objects.requireNonNull(this.anonymizer, "no anonymizer configured");
// need to locate the transformation
URL url = locateTransformationByTemplate(templateId);
if( url == null ){
......@@ -101,9 +104,12 @@ public class TransformationFactory {
// this should be reported to the developers
log.warning("Mismatch between template name="+templateId+" and declared template="+declaredTemplate);
}
return new Transformation(moduleId, templateId, doc);
return new Transformation(moduleId, templateId, doc, anonymizer);
}
public void setAnonymizer(Anonymizer anonymizer){
this.anonymizer = anonymizer;
}
public Transformation getTransformation(String templateId) throws IOException, TransformerConfigurationException, TransformerFactoryConfigurationError{
// look in cache
Transformation transform = cache.get(templateId);
......
package org.aktin.cda.etl.transform.fun;
import org.aktin.dwh.Anonymizer;
import net.sf.saxon.om.StructuredQName;
import net.sf.saxon.value.SequenceType;
public class CalculateEncounterHash extends OneWayHashFunction{
public CalculateEncounterHash(Anonymizer anonymizer) {
super(anonymizer);
}
public static final StructuredQName QNAME = OneWayHashFunction.buildFunctionQName("encounter-hash");
protected static final SequenceType[] TWO_STRINGS = new SequenceType[]{SequenceType.SINGLE_STRING,SequenceType.SINGLE_STRING};
......
package org.aktin.cda.etl.transform.fun;
import org.aktin.dwh.Anonymizer;
import net.sf.saxon.om.StructuredQName;
import net.sf.saxon.value.SequenceType;
public class CalculatePatientHash extends OneWayHashFunction{
public CalculatePatientHash(Anonymizer anonymizer) {
super(anonymizer);
}
public static final StructuredQName QNAME = OneWayHashFunction.buildFunctionQName("patient-hash");
protected static final SequenceType[] TWO_STRINGS = new SequenceType[]{SequenceType.SINGLE_STRING,SequenceType.SINGLE_STRING};
@Override
public StructuredQName getFunctionQName() {
return QNAME;
......
package org.aktin.cda.etl.transform.fun;
import org.aktin.dwh.Anonymizer;
import net.sf.saxon.om.StructuredQName;
import net.sf.saxon.value.SequenceType;
......@@ -28,6 +30,10 @@ import net.sf.saxon.value.SequenceType;
*
*/
public class CalculateSourceId extends OneWayHashFunction{
public CalculateSourceId(Anonymizer anonymizer) {
super(anonymizer);
}
public static final StructuredQName QNAME = OneWayHashFunction.buildFunctionQName("import-hash");
protected static final SequenceType[] FIVE_STRINGS = new SequenceType[]{SequenceType.SINGLE_STRING,SequenceType.SINGLE_STRING,SequenceType.SINGLE_STRING,SequenceType.SINGLE_STRING,SequenceType.SINGLE_STRING};
......
package org.aktin.cda.etl.transform.fun;
import javax.inject.Singleton;
import org.aktin.dwh.Anonymizer;
// technically, singleton is not needed
@Singleton
public class OneWayAnonymizer implements Anonymizer {
@Override
public String calculatePatientPseudonym(String root, String extension) {
return OneWayHashFunction.calculateHash(root, extension);
}
@Override
public String calculateEncounterPseudonym(String root, String extension) {
return OneWayHashFunction.calculateHash(root, extension);
}
}
package org.aktin.cda.etl.transform.fun;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.security.DigestException;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Base64;
import org.aktin.dwh.Anonymizer;
import net.sf.saxon.expr.XPathContext;
import net.sf.saxon.lib.ExtensionFunctionCall;
......@@ -25,7 +20,11 @@ import net.sf.saxon.value.StringValue;
public abstract class OneWayHashFunction extends ExtensionFunctionDefinition {
// private static final Logger log = Logger.getLogger(OneWayHashFunction.class.getName());
public static final String AKTIN_CDA_FUNCTIONS_NS = "http://aktin.org/cda/functions";
private Anonymizer anonymizer;
public OneWayHashFunction(Anonymizer anonymizer){
this.anonymizer = anonymizer;
}
protected static final StructuredQName buildFunctionQName(String funcName){
return new StructuredQName("", AKTIN_CDA_FUNCTIONS_NS, funcName);
}
......@@ -35,39 +34,6 @@ public abstract class OneWayHashFunction extends ExtensionFunctionDefinition {
return SequenceType.SINGLE_STRING;
}
/**
* Calculate a one way hash function for the given input.
* The algorithm is as follows:
* <ol>
* <li>Concatenate the arguments with a slash (/) as separator.</li>
* <li>Encode the input arguments with UTF-8 encoding
* <li>Generate a 160bit SHA-1 checksum</li>
* <li>Produce bas64 encoding with url-safe alphabet</li>
* </ol>
* The resulting string length will be less than 30 characters.
*
* @param strings input
* @return string hash
* @throws DigestException error calculating message digest
*/
public static String calculateHash(String ...strings) {
MessageDigest digest;
try {
digest = MessageDigest.getInstance("SHA-1");
} catch (NoSuchAlgorithmException e) {
// should not happen. SHA-1 is guaranteed to be included in the JRE
throw new IllegalStateException("Digest algorithm not available",e);
}
// join arguments
String composite = String.join("/", strings);
// logging
// encode to bytes
ByteBuffer input = Charset.forName("UTF-8").encode(composite);
// calculate digest and encode with base64
digest.update(input);
String result = Base64.getUrlEncoder().encodeToString(digest.digest());
return result;
}
/**
* Implements a call to the hash function with variable arguments.
......@@ -87,7 +53,7 @@ public abstract class OneWayHashFunction extends ExtensionFunctionDefinition {
if( arguments.length == 0 ){
throw new XPathException("Need at least one argument for hash calculation");
}
return new StringValue(calculateHash(strings));
return new StringValue(anonymizer.calculateAbstractPseudonym(strings));
}
}
......
......@@ -24,7 +24,7 @@ public class CDAImporterMockUp extends AbstractCDAImporter implements Consumer<O
private int insertCount;
public CDAImporterMockUp() throws IOException{
super();
super(new ConcatAnonymizer());
System.out.println("CONSTRUCT CDAImporterMockUp");
factory = new ObservationFactoryImpl(new SimplePatientExtension(), new SimpleVisitExtension());
insertCount = 0;
......
package org.aktin.cda.etl;
import org.aktin.dwh.Anonymizer;
/**
* Anonymizer for testing. Concatenates the source parts to
* produce a pseudonym string which should not be used for
* production.
* @author R.W.Majeed
*
*/
public class ConcatAnonymizer implements Anonymizer {
@Override
public String calculateAbstractPseudonym(String... parts) {
return String.join("/", parts);
}
}
......@@ -10,6 +10,7 @@ public class TestTransformationLoader {
@Test
public void verifyTransformVariables() throws Exception{
TransformationFactory f = new TransformationFactory();
f.setAnonymizer(new ConcatAnonymizer());
//XMLStreamReader xsr = inputFactory.createXMLStreamReader(getClass().getResourceAsStream("/cda-eav.xsl"));
// InputSource input = new InputSource(getClass().getResourceAsStream("/cda-eav.xsl"));
Transformation t = f.getTransformation("1.2.276.0.76.10.1019");
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment