Commit 3963d544 authored by R.W.Majeed's avatar R.W.Majeed

use specified text encoding for reading csv files

parent 16f06ff0
...@@ -6,6 +6,7 @@ import java.io.InputStreamReader; ...@@ -6,6 +6,7 @@ import java.io.InputStreamReader;
import java.io.UncheckedIOException; import java.io.UncheckedIOException;
import java.net.URL; import java.net.URL;
import java.net.URLConnection; import java.net.URLConnection;
import java.nio.charset.Charset;
import java.time.Instant; import java.time.Instant;
import com.opencsv.CSVParser; import com.opencsv.CSVParser;
...@@ -19,7 +20,7 @@ public class FileRowSupplier extends RowSupplier { ...@@ -19,7 +20,7 @@ public class FileRowSupplier extends RowSupplier {
private Instant timestamp; private Instant timestamp;
public FileRowSupplier(URL location, String fieldSeparator) throws IOException{ public FileRowSupplier(URL location, String fieldSeparator, Charset charset) throws IOException{
if( fieldSeparator.length() > 1 ){ if( fieldSeparator.length() > 1 ){
if( fieldSeparator.equals("\\t") ){ if( fieldSeparator.equals("\\t") ){
fieldSeparator = "\t"; fieldSeparator = "\t";
...@@ -28,7 +29,7 @@ public class FileRowSupplier extends RowSupplier { ...@@ -28,7 +29,7 @@ public class FileRowSupplier extends RowSupplier {
} }
} }
this.url = location; this.url = location;
this.in = new CSVReader(new InputStreamReader(location.openStream()),fieldSeparator.charAt(0), CSVParser.DEFAULT_QUOTE_CHARACTER, (char)0); this.in = new CSVReader(new InputStreamReader(location.openStream(), charset),fieldSeparator.charAt(0), CSVParser.DEFAULT_QUOTE_CHARACTER, (char)0);
// TODO: check whether needed to close underlying InputStream // TODO: check whether needed to close underlying InputStream
......
...@@ -3,6 +3,7 @@ package de.sekmi.histream.etl.config; ...@@ -3,6 +3,7 @@ package de.sekmi.histream.etl.config;
import java.io.IOException; import java.io.IOException;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URL; import java.net.URL;
import java.nio.charset.Charset;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import javax.xml.bind.annotation.XmlAccessType; import javax.xml.bind.annotation.XmlAccessType;
...@@ -32,7 +33,7 @@ public class CsvFile extends TableSource{ ...@@ -32,7 +33,7 @@ public class CsvFile extends TableSource{
String url; String url;
/** /**
* File encoding is not used yet. * Encoding to use for reading text files
*/ */
@XmlElement @XmlElement
String encoding; String encoding;
...@@ -44,11 +45,11 @@ public class CsvFile extends TableSource{ ...@@ -44,11 +45,11 @@ public class CsvFile extends TableSource{
@XmlElement @XmlElement
String separator; String separator;
@XmlElement // @XmlElement
String quote; // String quote;
//
@XmlElement // @XmlElement
char escape; // char escape;
private CsvFile(){ private CsvFile(){
} }
...@@ -59,9 +60,18 @@ public class CsvFile extends TableSource{ ...@@ -59,9 +60,18 @@ public class CsvFile extends TableSource{
} }
@Override @Override
public RowSupplier rows(Meta meta) throws IOException { public RowSupplier rows(Meta meta) throws IOException {
// resolve url relative to base url from metadata
URL base = meta.getLocation(); URL base = meta.getLocation();
URL source = (base == null)?new URL(url):new URL(base, url); URL source = (base == null)?new URL(url):new URL(base, url);
return new FileRowSupplier(source, separator); // determine charset
Charset charset;
if( encoding != null ) {
charset = Charset.forName(encoding);
}else{
// if not defined, use system charset
charset = Charset.defaultCharset();
}
return new FileRowSupplier(source, separator, charset);
} }
} }
package de.sekmi.histream.etl; package de.sekmi.histream.etl;
import java.io.IOException; import java.io.IOException;
import java.nio.charset.StandardCharsets;
import org.junit.Assert; import org.junit.Assert;
import org.junit.Test; import org.junit.Test;
...@@ -9,7 +10,7 @@ public class TestRowSupplier { ...@@ -9,7 +10,7 @@ public class TestRowSupplier {
@Test @Test
public void testLoadRows() throws IOException{ public void testLoadRows() throws IOException{
try( FileRowSupplier r = new FileRowSupplier(getClass().getResource("/data/test-1-patients.txt"), "\t") ){ try( FileRowSupplier r = new FileRowSupplier(getClass().getResource("/data/test-1-patients.txt"), "\t", StandardCharsets.ISO_8859_1) ){
String[] h = r.getHeaders(); String[] h = r.getHeaders();
Assert.assertEquals("patid", h[0]); Assert.assertEquals("patid", h[0]);
Assert.assertEquals("nachname", h[2]); Assert.assertEquals("nachname", h[2]);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment