Commit 3963d544 authored by R.W.Majeed's avatar R.W.Majeed
Browse files

use specified text encoding for reading csv files

parent 16f06ff0
Loading
Loading
Loading
Loading
+3 −2
Original line number Original line Diff line number Diff line
@@ -6,6 +6,7 @@ import java.io.InputStreamReader;
import java.io.UncheckedIOException;
import java.io.UncheckedIOException;
import java.net.URL;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLConnection;
import java.nio.charset.Charset;
import java.time.Instant;
import java.time.Instant;


import com.opencsv.CSVParser;
import com.opencsv.CSVParser;
@@ -19,7 +20,7 @@ public class FileRowSupplier extends RowSupplier {


	private Instant timestamp;
	private Instant timestamp;


	public FileRowSupplier(URL location, String fieldSeparator) throws IOException{
	public FileRowSupplier(URL location, String fieldSeparator, Charset charset) throws IOException{
		if( fieldSeparator.length() > 1 ){
		if( fieldSeparator.length() > 1 ){
			if( fieldSeparator.equals("\\t") ){
			if( fieldSeparator.equals("\\t") ){
				fieldSeparator = "\t";
				fieldSeparator = "\t";
@@ -28,7 +29,7 @@ public class FileRowSupplier extends RowSupplier {
			}
			}
		}
		}
		this.url = location;
		this.url = location;
		this.in = new CSVReader(new InputStreamReader(location.openStream()),fieldSeparator.charAt(0), CSVParser.DEFAULT_QUOTE_CHARACTER, (char)0);
		this.in = new CSVReader(new InputStreamReader(location.openStream(), charset),fieldSeparator.charAt(0), CSVParser.DEFAULT_QUOTE_CHARACTER, (char)0);
		
		
		// TODO: check whether needed to close underlying InputStream
		// TODO: check whether needed to close underlying InputStream
		
		
+17 −7
Original line number Original line Diff line number Diff line
@@ -3,6 +3,7 @@ package de.sekmi.histream.etl.config;
import java.io.IOException;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URL;
import java.nio.charset.Charset;
import java.util.regex.Pattern;
import java.util.regex.Pattern;


import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessType;
@@ -32,7 +33,7 @@ public class CsvFile extends TableSource{
	String url;
	String url;
	
	
	/**
	/**
	 * File encoding is not used yet.
	 * Encoding to use for reading text files
	 */
	 */
	@XmlElement
	@XmlElement
	String encoding;
	String encoding;
@@ -44,11 +45,11 @@ public class CsvFile extends TableSource{
	@XmlElement
	@XmlElement
	String separator;
	String separator;
	
	
	@XmlElement
//	@XmlElement
	String quote;
//	String quote;
	
//	
	@XmlElement
//	@XmlElement
	char escape;
//	char escape;
	
	
	private CsvFile(){
	private CsvFile(){
	}
	}
@@ -59,9 +60,18 @@ public class CsvFile extends TableSource{
	}
	}
	@Override
	@Override
	public RowSupplier rows(Meta meta) throws IOException {
	public RowSupplier rows(Meta meta) throws IOException {
		// resolve url relative to base url from metadata
		URL base = meta.getLocation();
		URL base = meta.getLocation();
		URL source = (base == null)?new URL(url):new URL(base, url);
		URL source = (base == null)?new URL(url):new URL(base, url);
		return new FileRowSupplier(source, separator);
		// determine charset
		Charset charset;
		if( encoding != null ) {
			charset = Charset.forName(encoding);
		}else{
			// if not defined, use system charset
			charset = Charset.defaultCharset();
		}
		return new FileRowSupplier(source, separator, charset);
	}
	}


}
}
+2 −1
Original line number Original line Diff line number Diff line
package de.sekmi.histream.etl;
package de.sekmi.histream.etl;


import java.io.IOException;
import java.io.IOException;
import java.nio.charset.StandardCharsets;


import org.junit.Assert;
import org.junit.Assert;
import org.junit.Test;
import org.junit.Test;
@@ -9,7 +10,7 @@ public class TestRowSupplier {
	
	
	@Test
	@Test
	public void testLoadRows() throws IOException{
	public void testLoadRows() throws IOException{
		try( FileRowSupplier r = new FileRowSupplier(getClass().getResource("/data/test-1-patients.txt"), "\t") ){
		try( FileRowSupplier r = new FileRowSupplier(getClass().getResource("/data/test-1-patients.txt"), "\t", StandardCharsets.ISO_8859_1) ){
			String[] h = r.getHeaders();
			String[] h = r.getHeaders();
			Assert.assertEquals("patid", h[0]);
			Assert.assertEquals("patid", h[0]);
			Assert.assertEquals("nachname", h[2]);
			Assert.assertEquals("nachname", h[2]);