Loading histream-import/src/main/java/de/sekmi/histream/etl/FileRowSupplier.java +3 −2 Original line number Original line Diff line number Diff line Loading @@ -6,6 +6,7 @@ import java.io.InputStreamReader; import java.io.UncheckedIOException; import java.io.UncheckedIOException; import java.net.URL; import java.net.URL; import java.net.URLConnection; import java.net.URLConnection; import java.nio.charset.Charset; import java.time.Instant; import java.time.Instant; import com.opencsv.CSVParser; import com.opencsv.CSVParser; Loading @@ -19,7 +20,7 @@ public class FileRowSupplier extends RowSupplier { private Instant timestamp; private Instant timestamp; public FileRowSupplier(URL location, String fieldSeparator) throws IOException{ public FileRowSupplier(URL location, String fieldSeparator, Charset charset) throws IOException{ if( fieldSeparator.length() > 1 ){ if( fieldSeparator.length() > 1 ){ if( fieldSeparator.equals("\\t") ){ if( fieldSeparator.equals("\\t") ){ fieldSeparator = "\t"; fieldSeparator = "\t"; Loading @@ -28,7 +29,7 @@ public class FileRowSupplier extends RowSupplier { } } } } this.url = location; this.url = location; this.in = new CSVReader(new InputStreamReader(location.openStream()),fieldSeparator.charAt(0), CSVParser.DEFAULT_QUOTE_CHARACTER, (char)0); this.in = new CSVReader(new InputStreamReader(location.openStream(), charset),fieldSeparator.charAt(0), CSVParser.DEFAULT_QUOTE_CHARACTER, (char)0); // TODO: check whether needed to close underlying InputStream // TODO: check whether needed to close underlying InputStream Loading histream-import/src/main/java/de/sekmi/histream/etl/config/CsvFile.java +17 −7 Original line number Original line Diff line number Diff line Loading @@ -3,6 +3,7 @@ package de.sekmi.histream.etl.config; import java.io.IOException; import java.io.IOException; import java.net.MalformedURLException; import java.net.MalformedURLException; import java.net.URL; import java.net.URL; import java.nio.charset.Charset; import java.util.regex.Pattern; import java.util.regex.Pattern; import javax.xml.bind.annotation.XmlAccessType; import javax.xml.bind.annotation.XmlAccessType; Loading Loading @@ -32,7 +33,7 @@ public class CsvFile extends TableSource{ String url; String url; /** /** * File encoding is not used yet. * Encoding to use for reading text files */ */ @XmlElement @XmlElement String encoding; String encoding; Loading @@ -44,11 +45,11 @@ public class CsvFile extends TableSource{ @XmlElement @XmlElement String separator; String separator; @XmlElement // @XmlElement String quote; // String quote; // @XmlElement // @XmlElement char escape; // char escape; private CsvFile(){ private CsvFile(){ } } Loading @@ -59,9 +60,18 @@ public class CsvFile extends TableSource{ } } @Override @Override public RowSupplier rows(Meta meta) throws IOException { public RowSupplier rows(Meta meta) throws IOException { // resolve url relative to base url from metadata URL base = meta.getLocation(); URL base = meta.getLocation(); URL source = (base == null)?new URL(url):new URL(base, url); URL source = (base == null)?new URL(url):new URL(base, url); return new FileRowSupplier(source, separator); // determine charset Charset charset; if( encoding != null ) { charset = Charset.forName(encoding); }else{ // if not defined, use system charset charset = Charset.defaultCharset(); } return new FileRowSupplier(source, separator, charset); } } } } histream-import/src/test/java/de/sekmi/histream/etl/TestRowSupplier.java +2 −1 Original line number Original line Diff line number Diff line package de.sekmi.histream.etl; package de.sekmi.histream.etl; import java.io.IOException; import java.io.IOException; import java.nio.charset.StandardCharsets; import org.junit.Assert; import org.junit.Assert; import org.junit.Test; import org.junit.Test; Loading @@ -9,7 +10,7 @@ public class TestRowSupplier { @Test @Test public void testLoadRows() throws IOException{ public void testLoadRows() throws IOException{ try( FileRowSupplier r = new FileRowSupplier(getClass().getResource("/data/test-1-patients.txt"), "\t") ){ try( FileRowSupplier r = new FileRowSupplier(getClass().getResource("/data/test-1-patients.txt"), "\t", StandardCharsets.ISO_8859_1) ){ String[] h = r.getHeaders(); String[] h = r.getHeaders(); Assert.assertEquals("patid", h[0]); Assert.assertEquals("patid", h[0]); Assert.assertEquals("nachname", h[2]); Assert.assertEquals("nachname", h[2]); Loading Loading
histream-import/src/main/java/de/sekmi/histream/etl/FileRowSupplier.java +3 −2 Original line number Original line Diff line number Diff line Loading @@ -6,6 +6,7 @@ import java.io.InputStreamReader; import java.io.UncheckedIOException; import java.io.UncheckedIOException; import java.net.URL; import java.net.URL; import java.net.URLConnection; import java.net.URLConnection; import java.nio.charset.Charset; import java.time.Instant; import java.time.Instant; import com.opencsv.CSVParser; import com.opencsv.CSVParser; Loading @@ -19,7 +20,7 @@ public class FileRowSupplier extends RowSupplier { private Instant timestamp; private Instant timestamp; public FileRowSupplier(URL location, String fieldSeparator) throws IOException{ public FileRowSupplier(URL location, String fieldSeparator, Charset charset) throws IOException{ if( fieldSeparator.length() > 1 ){ if( fieldSeparator.length() > 1 ){ if( fieldSeparator.equals("\\t") ){ if( fieldSeparator.equals("\\t") ){ fieldSeparator = "\t"; fieldSeparator = "\t"; Loading @@ -28,7 +29,7 @@ public class FileRowSupplier extends RowSupplier { } } } } this.url = location; this.url = location; this.in = new CSVReader(new InputStreamReader(location.openStream()),fieldSeparator.charAt(0), CSVParser.DEFAULT_QUOTE_CHARACTER, (char)0); this.in = new CSVReader(new InputStreamReader(location.openStream(), charset),fieldSeparator.charAt(0), CSVParser.DEFAULT_QUOTE_CHARACTER, (char)0); // TODO: check whether needed to close underlying InputStream // TODO: check whether needed to close underlying InputStream Loading
histream-import/src/main/java/de/sekmi/histream/etl/config/CsvFile.java +17 −7 Original line number Original line Diff line number Diff line Loading @@ -3,6 +3,7 @@ package de.sekmi.histream.etl.config; import java.io.IOException; import java.io.IOException; import java.net.MalformedURLException; import java.net.MalformedURLException; import java.net.URL; import java.net.URL; import java.nio.charset.Charset; import java.util.regex.Pattern; import java.util.regex.Pattern; import javax.xml.bind.annotation.XmlAccessType; import javax.xml.bind.annotation.XmlAccessType; Loading Loading @@ -32,7 +33,7 @@ public class CsvFile extends TableSource{ String url; String url; /** /** * File encoding is not used yet. * Encoding to use for reading text files */ */ @XmlElement @XmlElement String encoding; String encoding; Loading @@ -44,11 +45,11 @@ public class CsvFile extends TableSource{ @XmlElement @XmlElement String separator; String separator; @XmlElement // @XmlElement String quote; // String quote; // @XmlElement // @XmlElement char escape; // char escape; private CsvFile(){ private CsvFile(){ } } Loading @@ -59,9 +60,18 @@ public class CsvFile extends TableSource{ } } @Override @Override public RowSupplier rows(Meta meta) throws IOException { public RowSupplier rows(Meta meta) throws IOException { // resolve url relative to base url from metadata URL base = meta.getLocation(); URL base = meta.getLocation(); URL source = (base == null)?new URL(url):new URL(base, url); URL source = (base == null)?new URL(url):new URL(base, url); return new FileRowSupplier(source, separator); // determine charset Charset charset; if( encoding != null ) { charset = Charset.forName(encoding); }else{ // if not defined, use system charset charset = Charset.defaultCharset(); } return new FileRowSupplier(source, separator, charset); } } } }
histream-import/src/test/java/de/sekmi/histream/etl/TestRowSupplier.java +2 −1 Original line number Original line Diff line number Diff line package de.sekmi.histream.etl; package de.sekmi.histream.etl; import java.io.IOException; import java.io.IOException; import java.nio.charset.StandardCharsets; import org.junit.Assert; import org.junit.Assert; import org.junit.Test; import org.junit.Test; Loading @@ -9,7 +10,7 @@ public class TestRowSupplier { @Test @Test public void testLoadRows() throws IOException{ public void testLoadRows() throws IOException{ try( FileRowSupplier r = new FileRowSupplier(getClass().getResource("/data/test-1-patients.txt"), "\t") ){ try( FileRowSupplier r = new FileRowSupplier(getClass().getResource("/data/test-1-patients.txt"), "\t", StandardCharsets.ISO_8859_1) ){ String[] h = r.getHeaders(); String[] h = r.getHeaders(); Assert.assertEquals("patid", h[0]); Assert.assertEquals("patid", h[0]); Assert.assertEquals("nachname", h[2]); Assert.assertEquals("nachname", h[2]); Loading