Commit 753982f8 authored by R.W.Majeed's avatar R.W.Majeed

Allow relative URLs in import configuration

parent 016508bf
package de.sekmi.histream.etl;
import java.io.File;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
......@@ -13,11 +13,15 @@ import de.sekmi.histream.Observation;
import de.sekmi.histream.ObservationFactory;
import de.sekmi.histream.ObservationSupplier;
import de.sekmi.histream.etl.config.DataSource;
import de.sekmi.histream.etl.config.Meta;
import de.sekmi.histream.etl.config.PatientTable;
import de.sekmi.histream.etl.config.VisitTable;
import de.sekmi.histream.etl.config.WideTable;
import de.sekmi.histream.ext.Patient;
import de.sekmi.histream.ext.Visit;
import de.sekmi.histream.impl.ObservationFactoryImpl;
import de.sekmi.histream.impl.SimplePatientExtension;
import de.sekmi.histream.impl.SimpleVisitExtension;
/**
* Supplier for observations which are loaded from arbitrary
......@@ -67,10 +71,45 @@ public class ETLObservationSupplier implements ObservationSupplier{
private DataSource ds;
public ETLObservationSupplier(File configuration, ObservationFactory factory) throws IOException, ParseException{
this(JAXB.unmarshal(configuration, DataSource.class), factory);
/**
* Build a new observation supplier with the supplied configuration file.
* Relative URLs within the configuration are resolved against the provided configuration url.
*
* @param configuration location configuration file
* @param factory observation factory
* @return observation supplier
*
* @throws IOException error reading configuration. The error might be caused by a {@link ParseException}.
*
*/
public static ETLObservationSupplier load(URL configuration, ObservationFactory factory) throws IOException{
DataSource ds = JAXB.unmarshal(configuration, DataSource.class);
ds.getMeta().setLocation(configuration);
return new ETLObservationSupplier(ds, factory);
}
public ETLObservationSupplier(DataSource ds, ObservationFactory factory) throws IOException, ParseException {
/**
* Same as {@link #load(URL, ObservationFactory)} with using a default observation factory.
* The default observation factory will only support Patient and Visit extensions.
*
* @param configuration configuration URL
* @return observation factory
* @throws IOException error reading configuration
*/
public static ETLObservationSupplier load(URL configuration) throws IOException{
ObservationFactory of = new ObservationFactoryImpl();
of.registerExtension(new SimplePatientExtension());
of.registerExtension(new SimpleVisitExtension());
return load(configuration, of);
}
/**
* Construct a new observation supplier directly from a {@link DataSource}.
*
* @param ds data source
* @param factory observation factory
* @throws IOException error reading configuration
*/
public ETLObservationSupplier(DataSource ds, ObservationFactory factory) throws IOException {
this.ds = ds;
pt = ds.getPatientTable();
......@@ -78,11 +117,12 @@ public class ETLObservationSupplier implements ObservationSupplier{
wt = ds.getWideTables();
// TODO long tables
String sourceId = ds.getMeta().getSourceId();
Meta meta = ds.getMeta();
// in case of exception, make sure already opened suppliers are closed
IOException error = null;
try{
pr = pt.open(factory, sourceId);
vr = vt.open(factory, sourceId);
pr = pt.open(factory, meta);
vr = vt.open(factory, meta);
queue = new FactGroupingQueue(pr, vr,
factory.getExtensionAccessor(Patient.class),
factory.getExtensionAccessor(Visit.class));
......@@ -91,19 +131,28 @@ public class ETLObservationSupplier implements ObservationSupplier{
wr = new ArrayList<>(wt.size());
for( WideTable t : wt ){
@SuppressWarnings("resource")
RecordSupplier<WideRow> s = t.open(factory, sourceId);
RecordSupplier<WideRow> s = t.open(factory, meta);
queue.addFactTable(s);
wr.add(s);
}
queue.prepare();
}catch( IOException | UncheckedIOException | ParseException | UncheckedParseException e ){
}catch( UncheckedIOException e ){
error = e.getCause();
}catch( UncheckedParseException e ){
error = new IOException(e.getCause());
}catch( ParseException e ){
error = new IOException(e);
}catch( IOException e ){
error = e;
}
if( error != null ){
try{
this.close();
}catch( IOException f ){
e.addSuppressed(f);
error.addSuppressed(f);
}
throw e;
throw error;
}
}
......
......@@ -26,7 +26,7 @@ public class FileRowSupplier extends RowSupplier {
// TODO: check whether needed to close underlying InputStream
// load headers
String line = in.readLine();
String line = in.readLine();
this.headers = fieldSeparatorPattern.split(line);
determineFileTimestamp(location);
......
package de.sekmi.histream.etl;
/**
* Error condition during parsing of input tables.
*
* E.g. when a column header reference in the configuration
* does not appear in the respective table.
*
* @author Raphael
*
*/
public class ParseException extends Exception {
/**
*
......
......@@ -5,6 +5,7 @@ import java.util.function.Supplier;
import de.sekmi.histream.Observation;
import de.sekmi.histream.ObservationFactory;
import de.sekmi.histream.etl.config.Meta;
import de.sekmi.histream.etl.config.Table;
import de.sekmi.histream.ext.ExternalSourceType;
import de.sekmi.histream.impl.ExternalSourceImpl;
......@@ -16,12 +17,12 @@ public class RecordSupplier<R extends FactRow> implements Supplier<R>, AutoClose
private ObservationFactory factory;
private ExternalSourceType source;
public RecordSupplier(RowSupplier rows, Table<R> table, ObservationFactory factory, String sourceId)throws ParseException{
public RecordSupplier(RowSupplier rows, Table<R> table, ObservationFactory factory, Meta meta)throws ParseException{
this.rows = rows;
this.table = table;
this.map = table.getColumnMap(rows.getHeaders());
this.factory = factory;
this.source = new ExternalSourceImpl(sourceId, rows.getTimestamp());
this.source = new ExternalSourceImpl(meta.getSourceId(), rows.getTimestamp());
}
public final ExternalSourceType getSource(){ return this.source;}
......
......@@ -3,6 +3,7 @@ package de.sekmi.histream.etl.config;
import java.util.Arrays;
import java.util.List;
import javax.xml.bind.JAXB;
import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement;
......@@ -10,6 +11,18 @@ import javax.xml.bind.annotation.XmlElementWrapper;
import javax.xml.bind.annotation.XmlRootElement;
import javax.xml.bind.annotation.XmlSeeAlso;
/**
* Data source configuration.
* This is the XML root element which can be loaded
* via {@code JAXB.unmarshal(in, DataSource.class);}
* <p>
* For relative URLs to work, {@link Meta#setLocation(java.net.URL)} must be called to set
* the location of the data source description.
*
* @see JAXB#unmarshal(java.io.File, Class)
* @author Raphael
*
*/
@XmlRootElement
@XmlAccessorType(XmlAccessType.FIELD)
@XmlSeeAlso({Column.class, StringColumn.class})
......
......@@ -13,28 +13,48 @@ import javax.xml.bind.annotation.XmlType;
import de.sekmi.histream.etl.FileRowSupplier;
import de.sekmi.histream.etl.RowSupplier;
/**
* Table source reading plain text tables
*
* @author Raphael
*
*/
@XmlAccessorType(XmlAccessType.FIELD)
@XmlType(name="plain-file")
public class FileSource extends TableSource{
/**
* Location of the table file.
* A relative location might be specified which
* will be resolved against {@link Meta#getLocation()}.
*/
@XmlElement
URL url;
String url;
/**
* File encoding is not used yet.
*/
@XmlElement
String encoding;
/**
* Regular expression pattern for the field separator. e.g. {@code \t}
* The specified string will be processed with {@link Pattern#compile(String)}.
*/
@XmlElement
String separator;
private FileSource(){
}
public FileSource(String url, String separator) throws MalformedURLException{
public FileSource(String urlSpec, String separator) throws MalformedURLException{
this();
this.url = new URL(url);
this.url = urlSpec;
this.separator = separator;
}
@Override
public RowSupplier rows() throws IOException {
return new FileRowSupplier(url, Pattern.compile(separator));
public RowSupplier rows(Meta meta) throws IOException {
URL base = meta.getLocation();
URL source = (base == null)?new URL(url):new URL(base, url);
return new FileRowSupplier(source, Pattern.compile(separator));
}
}
package de.sekmi.histream.etl.config;
import java.net.URL;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlTransient;
/**
* Meta information.
*
* @author Raphael
*
*/
public class Meta {
@XmlElement
String id;
......@@ -9,6 +18,9 @@ public class Meta {
@XmlElement(name="etl-strategy")
String etlStrategy;
@XmlTransient
private URL location;
protected Meta(){
}
......@@ -23,5 +35,12 @@ public class Meta {
public String getETLStrategy(){
return etlStrategy;
}
public URL getLocation(){
return location;
}
public void setLocation(URL location){
this.location = location;
}
}
......@@ -20,7 +20,7 @@ public class SQLSource extends TableSource {
this.connectString = connectString;
}
@Override
public RowSupplier rows() {
public RowSupplier rows(Meta meta) {
// TODO Auto-generated method stub
return null;
}
......
......@@ -54,8 +54,8 @@ public abstract class Table<T extends FactRow> {
public abstract T fillRecord(ColumnMap map, Object[] row, ObservationFactory factory) throws ParseException;
public RecordSupplier<T> open(ObservationFactory factory, String sourceId) throws IOException, ParseException{
return new RecordSupplier<>(source.rows(), this, factory, sourceId);
public RecordSupplier<T> open(ObservationFactory factory, Meta meta) throws IOException, ParseException{
return new RecordSupplier<>(source.rows(meta), this, factory, meta);
}
......
......@@ -16,10 +16,13 @@ import de.sekmi.histream.etl.RowSupplier;
@XmlSeeAlso({FileSource.class, SQLSource.class})
public abstract class TableSource{
/**
* Open a row supplier which provides rows.
* This is a resource which must be closed.
*
* @param meta meta information
* @return row supplier
* @throws IOException IO errors during row supplier construction
*/
public abstract RowSupplier rows() throws IOException;
public abstract RowSupplier rows(Meta meta) throws IOException;
}
\ No newline at end of file
/**
*
*/
/**
* @author marap1
* Import configuration with JAXB annotations
*
*/
package de.sekmi.histream.etl.config;
\ No newline at end of file
package de.sekmi.histream.etl;
import java.io.IOException;
import java.io.InputStream;
import java.util.stream.StreamSupport;
import javax.xml.bind.JAXB;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
......@@ -13,36 +10,23 @@ import org.junit.Test;
import de.sekmi.histream.DateTimeAccuracy;
import de.sekmi.histream.Observation;
import de.sekmi.histream.ObservationFactory;
import de.sekmi.histream.etl.config.DataSource;
import de.sekmi.histream.ext.Patient;
import de.sekmi.histream.ext.Visit;
import de.sekmi.histream.impl.Meta;
import de.sekmi.histream.impl.ObservationFactoryImpl;
import de.sekmi.histream.impl.SimplePatientExtension;
import de.sekmi.histream.impl.SimpleVisitExtension;
import de.sekmi.histream.io.AbstractObservationParser;
import de.sekmi.histream.io.GroupedXMLWriter;
public class TestETLSupplier {
private DataSource ds;
private ObservationFactory of ;
private ETLObservationSupplier os;
@Before
public void loadConfiguration() throws IOException, ParseException{
try( InputStream in = getClass().getResourceAsStream("/test-1-datasource.xml") ){
ds = JAXB.unmarshal(in, DataSource.class);
}
of = new ObservationFactoryImpl();
of.registerExtension(new SimplePatientExtension());
of.registerExtension(new SimpleVisitExtension());
os = new ETLObservationSupplier(ds,of);
os = ETLObservationSupplier.load(getClass().getResource("/test-1-datasource.xml"));
}
@After
public void freeResources() throws IOException{
os.close();
if( os != null )os.close();
}
@Test
......
package de.sekmi.histream.etl.config;
import java.io.IOException;
import java.io.InputStream;
import java.math.BigDecimal;
import java.net.URL;
import java.time.temporal.ChronoField;
import javax.xml.bind.JAXB;
......@@ -29,15 +29,15 @@ public class TestReadTables {
@Before
public void loadConfiguration() throws IOException{
try( InputStream in = getClass().getResourceAsStream("/test-1-datasource.xml") ){
ds = JAXB.unmarshal(in, DataSource.class);
}
URL url = getClass().getResource("/test-1-datasource.xml");
ds = JAXB.unmarshal(url, DataSource.class);
ds.getMeta().setLocation(url);
of = new ObservationFactoryImpl();
}
@Test
public void testReadPatients() throws IOException, ParseException{
try( RecordSupplier<PatientRow> s = ds.patientTable.open(of,ds.getMeta().getSourceId()) ){
try( RecordSupplier<PatientRow> s = ds.patientTable.open(of,ds.getMeta()) ){
PatientRow r = s.get();
Assert.assertEquals("p1", r.getId());
Assert.assertEquals(2003, r.getBirthDate().get(ChronoField.YEAR));
......@@ -46,7 +46,7 @@ public class TestReadTables {
}
@Test
public void testReadVisits() throws IOException, ParseException{
try( RecordSupplier<VisitRow> s = ds.visitTable.open(of,ds.getMeta().getSourceId()) ){
try( RecordSupplier<VisitRow> s = ds.visitTable.open(of,ds.getMeta()) ){
VisitRow r = s.get();
Assert.assertEquals("v1", r.getId());
Assert.assertEquals(2013, r.getStartTime().get(ChronoField.YEAR));
......@@ -55,7 +55,7 @@ public class TestReadTables {
}
@Test
public void testReadWideTable() throws IOException, ParseException{
try( RecordSupplier<WideRow> s = ds.wideTables[0].open(of,ds.getMeta().getSourceId()) ){
try( RecordSupplier<WideRow> s = ds.wideTables[0].open(of,ds.getMeta()) ){
WideRow r = s.get();
Assert.assertNotNull(r);
Assert.assertTrue(r.getFacts().size() > 0);
......
......@@ -7,7 +7,7 @@
<patient-table>
<source xsi:type="plain-file">
<url>file:src/test/resources/test-1-patients.txt</url>
<url>test-1-patients.txt</url>
<separator>\t</separator>
</source>
<idat>
......@@ -24,7 +24,7 @@
<!-- optional -->
<visit-table>
<source xsi:type="plain-file">
<url>file:src/test/resources/test-1-visits.txt</url>
<url>test-1-visits.txt</url>
<type>text/csv</type>
<separator>\t</separator>
</source>
......@@ -46,7 +46,7 @@
<wide-table>
<source xsi:type="plain-file">
<url>file:src/test/resources/test-1-widetable.txt</url>
<url>test-1-widetable.txt</url>
<separator>\t</separator>
</source>
<idat>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment