Commit b64dbebb authored by R.W.Majeed's avatar R.W.Majeed

support concept code limits and wildcard concepts for data extration. Javadoc.

parent 79d8db9e
......@@ -4,6 +4,7 @@ import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.sql.Timestamp;
import java.util.logging.Logger;
......@@ -27,11 +28,22 @@ public class I2b2ExtractorFactory implements AutoCloseable {
private Integer fetchSize;
DataDialect dialect;
private ObservationFactory observationFactory;
private boolean allowWildcardConceptCodes;
private static String SELECT_PARAMETERS = "patient_num, encounter_num, instance_num, concept_cd, modifier_cd, provider_id, location_cd, start_date, end_date, RTRIM(valtype_cd) valtype_cd, tval_char, nval_num, RTRIM(valueflag_cd) valueflag_cd, units_cd, sourcesystem_cd";
private static String SELECT_TABLE = "observation_fact";
/**
* Boolean feature whether to allow wildcard concept keys.
* <p>
* Use with caution: Unexpected results might happen if wildcard
* concepts overlap. (Such as query fails, duplicate facts, etc.)
* </p>
*/
public static String ALLOW_WILDCARD_CONCEPT_CODES = "de.sekmi.histream.i2b2.wildcard_concepts";
private static String SELECT_PARAMETERS = "f.patient_num, f.encounter_num, f.instance_num, f.concept_cd, f.modifier_cd, f.provider_id, f.location_cd, f.start_date, f.end_date, RTRIM(f.valtype_cd) valtype_cd, f.tval_char, f.nval_num, RTRIM(f.valueflag_cd) valueflag_cd, f.units_cd, f.sourcesystem_cd";
private static String SELECT_TABLE = "observation_fact f";
//private static String SELECT_ORDER_CHRONO = "ORDER BY start_date, patient_num, encounter_num, instance_num, modifier_cd NULLS FIRST";
private static String SELECT_ORDER_GROUP = "ORDER BY patient_num, encounter_num, start_date, instance_num, concept_cd, modifier_cd NULLS FIRST";
private static String SELECT_ORDER_GROUP = "ORDER BY f.patient_num, f.encounter_num, f.start_date, f.instance_num, f.concept_cd, f.modifier_cd NULLS FIRST";
public I2b2ExtractorFactory(DataSource crc_ds, ObservationFactory factory) throws SQLException{
// TODO implement
......@@ -43,6 +55,15 @@ public class I2b2ExtractorFactory implements AutoCloseable {
return observationFactory;
}
public void setFeature(String feature, Object value){
if( feature.equals(ALLOW_WILDCARD_CONCEPT_CODES) ){
if( value instanceof Boolean ){
this.allowWildcardConceptCodes = (Boolean)value;
}else{
throw new IllegalArgumentException("Boolean value expected for feature "+feature);
}
}
}
public PreparedStatement prepareStatement(Connection dbc, String sql) throws SQLException{
PreparedStatement s = dbc.prepareStatement(sql, ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
if( fetchSize != null ){
......@@ -55,9 +76,32 @@ public class I2b2ExtractorFactory implements AutoCloseable {
public void setProperty(String property, Object value){
// de.sekmi.histream.i2b2.extractor.project
}
private void createTemporaryConceptTable(Connection dbc, String[] concepts) throws SQLException{
try( Statement s = dbc.createStatement() ){
s.executeUpdate("CREATE TEMPORARY TABLE temp_concepts(concept VARCHAR(255) PRIMARY KEY)");
}
try( PreparedStatement ps
= dbc.prepareStatement("INSERT INTO temp_concepts(concept) VALUES(?)") ){
for( int i=0; i<concepts.length; i++ ){
ps.clearParameters();
ps.clearWarnings();
ps.setString(1, concepts[i]);
ps.executeUpdate();
}
}
}
/**
* Extract observations for given concept codes with
* {@code observation.start} between start_min and start_end.
* <p>
* The query can use the index {@code of_idx_clusteredconcept} for {@code concept_cd}
* and {@code of_idx_start_date} for {@code start_date}.
* </p>
* <p>
* TODO integration test without concepts, with normal concepts, with wildcard concepts
* </p>
*
* @param start_min start date of returned observations must be greater than start_min
* @param start_max start date of returned observations must be less than start_max
......@@ -76,7 +120,29 @@ public class I2b2ExtractorFactory implements AutoCloseable {
StringBuilder b = new StringBuilder(600);
b.append("SELECT ");
b.append(SELECT_PARAMETERS+" FROM "+SELECT_TABLE+" ");
b.append("WHERE start_date BETWEEN ? AND ? ");
if( concepts != null ){
log.info("Temporary table created for "+concepts.length+" concept ids");
String[] ids = concepts;
int wildcardCount = 0;
if( allowWildcardConceptCodes ){
// TODO check if wildcards actually used (search for *)
ids = new String[concepts.length];
for( int i=0; i<ids.length; i++ ){
ids[i] = concepts[i].replace('*', '%');
if( false == ids[i].equals(concepts[i]) ){
wildcardCount ++;
}
}
// TODO add check for overlapping wildcard concepts (e.g. A* and AB*)
}
createTemporaryConceptTable(dbc, ids);
if( wildcardCount > 0 ){
b.append(" JOIN temp_concepts tc ON f.concept_cd LIKE tc.concept ");
}else{
b.append(" JOIN temp_concepts tc ON f.concept_cd=tc.concept ");
}
}
b.append("WHERE f.start_date BETWEEN ? AND ? ");
b.append(SELECT_ORDER_GROUP);
log.info("SQL: "+b.toString());
......
......@@ -67,7 +67,7 @@ import de.sekmi.histream.impl.AbstractObservationHandler;
* in the visit store (which caches visits anyways) and increase the instance_num only
* for observations with modifiers.
*
* @author marap1
* @author R.W.Majeed
*
*/
public class I2b2Inserter extends AbstractObservationHandler implements ObservationHandler, Closeable, Plugin{
......
......@@ -26,10 +26,16 @@ public class TestExtractor implements DataSource{
e.dump();
}
try( I2b2Extractor e = ef.extract(Timestamp.valueOf("2015-01-16 00:00:00"), Timestamp.valueOf("2015-01-17 00:00:00"), null) ){
try( I2b2Extractor e = ef.extract(Timestamp.valueOf("2015-01-16 00:00:00"), Timestamp.valueOf("2015-01-17 00:00:00"), new String[]{"AKTIN:PLZ"}) ){
e.stream().forEach(System.out::println);
}
System.out.println("-- now with wildcards");
ef.setFeature(I2b2ExtractorFactory.ALLOW_WILDCARD_CONCEPT_CODES, Boolean.TRUE);
try( I2b2Extractor e = ef.extract(Timestamp.valueOf("2015-01-16 00:00:00"), Timestamp.valueOf("2015-01-17 00:00:00"), new String[]{"ICD10GM:*","AKTIN:PLZ"}) ){
e.stream().forEach(System.out::println);
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment