Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
histream
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
Operations
Operations
Incidents
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Raphael
histream
Commits
3963d544
Commit
3963d544
authored
Jun 21, 2018
by
R.W.Majeed
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
use specified text encoding for reading csv files
parent
16f06ff0
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
22 additions
and
10 deletions
+22
-10
histream-import/src/main/java/de/sekmi/histream/etl/FileRowSupplier.java
.../src/main/java/de/sekmi/histream/etl/FileRowSupplier.java
+3
-2
histream-import/src/main/java/de/sekmi/histream/etl/config/CsvFile.java
...t/src/main/java/de/sekmi/histream/etl/config/CsvFile.java
+17
-7
histream-import/src/test/java/de/sekmi/histream/etl/TestRowSupplier.java
.../src/test/java/de/sekmi/histream/etl/TestRowSupplier.java
+2
-1
No files found.
histream-import/src/main/java/de/sekmi/histream/etl/FileRowSupplier.java
View file @
3963d544
...
@@ -6,6 +6,7 @@ import java.io.InputStreamReader;
...
@@ -6,6 +6,7 @@ import java.io.InputStreamReader;
import
java.io.UncheckedIOException
;
import
java.io.UncheckedIOException
;
import
java.net.URL
;
import
java.net.URL
;
import
java.net.URLConnection
;
import
java.net.URLConnection
;
import
java.nio.charset.Charset
;
import
java.time.Instant
;
import
java.time.Instant
;
import
com.opencsv.CSVParser
;
import
com.opencsv.CSVParser
;
...
@@ -19,7 +20,7 @@ public class FileRowSupplier extends RowSupplier {
...
@@ -19,7 +20,7 @@ public class FileRowSupplier extends RowSupplier {
private
Instant
timestamp
;
private
Instant
timestamp
;
public
FileRowSupplier
(
URL
location
,
String
fieldSeparator
)
throws
IOException
{
public
FileRowSupplier
(
URL
location
,
String
fieldSeparator
,
Charset
charset
)
throws
IOException
{
if
(
fieldSeparator
.
length
()
>
1
){
if
(
fieldSeparator
.
length
()
>
1
){
if
(
fieldSeparator
.
equals
(
"\\t"
)
){
if
(
fieldSeparator
.
equals
(
"\\t"
)
){
fieldSeparator
=
"\t"
;
fieldSeparator
=
"\t"
;
...
@@ -28,7 +29,7 @@ public class FileRowSupplier extends RowSupplier {
...
@@ -28,7 +29,7 @@ public class FileRowSupplier extends RowSupplier {
}
}
}
}
this
.
url
=
location
;
this
.
url
=
location
;
this
.
in
=
new
CSVReader
(
new
InputStreamReader
(
location
.
openStream
()),
fieldSeparator
.
charAt
(
0
),
CSVParser
.
DEFAULT_QUOTE_CHARACTER
,
(
char
)
0
);
this
.
in
=
new
CSVReader
(
new
InputStreamReader
(
location
.
openStream
()
,
charset
),
fieldSeparator
.
charAt
(
0
),
CSVParser
.
DEFAULT_QUOTE_CHARACTER
,
(
char
)
0
);
// TODO: check whether needed to close underlying InputStream
// TODO: check whether needed to close underlying InputStream
...
...
histream-import/src/main/java/de/sekmi/histream/etl/config/CsvFile.java
View file @
3963d544
...
@@ -3,6 +3,7 @@ package de.sekmi.histream.etl.config;
...
@@ -3,6 +3,7 @@ package de.sekmi.histream.etl.config;
import
java.io.IOException
;
import
java.io.IOException
;
import
java.net.MalformedURLException
;
import
java.net.MalformedURLException
;
import
java.net.URL
;
import
java.net.URL
;
import
java.nio.charset.Charset
;
import
java.util.regex.Pattern
;
import
java.util.regex.Pattern
;
import
javax.xml.bind.annotation.XmlAccessType
;
import
javax.xml.bind.annotation.XmlAccessType
;
...
@@ -32,7 +33,7 @@ public class CsvFile extends TableSource{
...
@@ -32,7 +33,7 @@ public class CsvFile extends TableSource{
String
url
;
String
url
;
/**
/**
*
File encoding is not used yet.
*
Encoding to use for reading text files
*/
*/
@XmlElement
@XmlElement
String
encoding
;
String
encoding
;
...
@@ -44,11 +45,11 @@ public class CsvFile extends TableSource{
...
@@ -44,11 +45,11 @@ public class CsvFile extends TableSource{
@XmlElement
@XmlElement
String
separator
;
String
separator
;
@XmlElement
//
@XmlElement
String
quote
;
//
String quote;
//
@XmlElement
//
@XmlElement
char
escape
;
//
char escape;
private
CsvFile
(){
private
CsvFile
(){
}
}
...
@@ -59,9 +60,18 @@ public class CsvFile extends TableSource{
...
@@ -59,9 +60,18 @@ public class CsvFile extends TableSource{
}
}
@Override
@Override
public
RowSupplier
rows
(
Meta
meta
)
throws
IOException
{
public
RowSupplier
rows
(
Meta
meta
)
throws
IOException
{
// resolve url relative to base url from metadata
URL
base
=
meta
.
getLocation
();
URL
base
=
meta
.
getLocation
();
URL
source
=
(
base
==
null
)?
new
URL
(
url
):
new
URL
(
base
,
url
);
URL
source
=
(
base
==
null
)?
new
URL
(
url
):
new
URL
(
base
,
url
);
return
new
FileRowSupplier
(
source
,
separator
);
// determine charset
Charset
charset
;
if
(
encoding
!=
null
)
{
charset
=
Charset
.
forName
(
encoding
);
}
else
{
// if not defined, use system charset
charset
=
Charset
.
defaultCharset
();
}
return
new
FileRowSupplier
(
source
,
separator
,
charset
);
}
}
}
}
histream-import/src/test/java/de/sekmi/histream/etl/TestRowSupplier.java
View file @
3963d544
package
de.sekmi.histream.etl
;
package
de.sekmi.histream.etl
;
import
java.io.IOException
;
import
java.io.IOException
;
import
java.nio.charset.StandardCharsets
;
import
org.junit.Assert
;
import
org.junit.Assert
;
import
org.junit.Test
;
import
org.junit.Test
;
...
@@ -9,7 +10,7 @@ public class TestRowSupplier {
...
@@ -9,7 +10,7 @@ public class TestRowSupplier {
@Test
@Test
public
void
testLoadRows
()
throws
IOException
{
public
void
testLoadRows
()
throws
IOException
{
try
(
FileRowSupplier
r
=
new
FileRowSupplier
(
getClass
().
getResource
(
"/data/test-1-patients.txt"
),
"\t"
)
){
try
(
FileRowSupplier
r
=
new
FileRowSupplier
(
getClass
().
getResource
(
"/data/test-1-patients.txt"
),
"\t"
,
StandardCharsets
.
ISO_8859_1
)
){
String
[]
h
=
r
.
getHeaders
();
String
[]
h
=
r
.
getHeaders
();
Assert
.
assertEquals
(
"patid"
,
h
[
0
]);
Assert
.
assertEquals
(
"patid"
,
h
[
0
]);
Assert
.
assertEquals
(
"nachname"
,
h
[
2
]);
Assert
.
assertEquals
(
"nachname"
,
h
[
2
]);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment