Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Raphael
histream
Commits
df24a214
Commit
df24a214
authored
Aug 03, 2015
by
R.W.Majeed
Browse files
processing of patient tables implemented
parent
9a9650da
Changes
21
Hide whitespace changes
Inline
Side-by-side
histream-import/src/main/java/de/sekmi/histream/etl/ColumnMap.java
View file @
df24a214
package
de.sekmi.histream.etl
.config
;
package
de.sekmi.histream.etl
;
import
java.util.HashMap
;
import
java.util.Map
;
import
de.sekmi.histream.etl.config.Column
;
/**
* Maps {@link Column}s to header/table indices
*
...
...
@@ -11,6 +13,10 @@ import java.util.Map;
*/
public
class
ColumnMap
{
String
[]
headers
;
/**
* Maps column names to row indices
*/
Map
<
String
,
Integer
>
map
;
public
ColumnMap
(
String
[]
headers
){
...
...
@@ -19,12 +25,20 @@ public class ColumnMap{
}
public
boolean
registerColumn
(
Column
column
){
if
(
map
.
containsKey
(
column
.
getName
())
){
// column name already registered
return
true
;
}
// find name and map to index
for
(
int
i
=
0
;
i
<
headers
.
length
;
i
++
){
if
(
column
.
name
.
equals
(
headers
[
i
]
)
){
map
.
put
(
column
.
n
ame
,
Integer
.
valueOf
(
i
)
);
if
(
headers
[
i
].
equals
(
column
.
getName
()
)
){
map
.
put
(
column
.
getN
ame
()
,
Integer
.
valueOf
(
i
)
);
return
true
;
}
}
return
false
;
}
public
Integer
indexOf
(
Column
column
){
return
map
.
get
(
column
.
getName
());
}
}
\ No newline at end of file
histream-import/src/main/java/de/sekmi/histream/etl/FileRowSupplier.java
0 → 100644
View file @
df24a214
package
de.sekmi.histream.etl
;
import
java.io.BufferedReader
;
import
java.io.IOException
;
import
java.io.InputStreamReader
;
import
java.io.UncheckedIOException
;
import
java.net.URL
;
import
java.util.regex.Pattern
;
public
class
FileRowSupplier
extends
RowSupplier
{
private
Pattern
fieldSeparatorPattern
;
private
BufferedReader
in
;
private
String
[]
headers
;
public
FileRowSupplier
(
URL
location
,
String
fieldSeparator
)
throws
IOException
{
this
(
location
,
Pattern
.
compile
(
Pattern
.
quote
(
fieldSeparator
)));
}
public
FileRowSupplier
(
URL
location
,
Pattern
pattern
)
throws
IOException
{
this
.
fieldSeparatorPattern
=
pattern
;
this
.
in
=
new
BufferedReader
(
new
InputStreamReader
(
location
.
openStream
()));
// TODO: check whether needed to close underlying InputStream
// load headers
String
line
=
in
.
readLine
();
this
.
headers
=
fieldSeparatorPattern
.
split
(
line
);
}
@Override
public
String
[]
getHeaders
()
throws
IOException
{
return
headers
;
}
@Override
public
Object
[]
get
()
{
String
line
;
try
{
line
=
in
.
readLine
();
}
catch
(
IOException
e
)
{
throw
new
UncheckedIOException
(
e
);
}
if
(
line
==
null
){
// end of file
return
null
;
}
String
[]
fields
=
fieldSeparatorPattern
.
split
(
line
);
return
fields
;
}
@Override
public
void
close
()
throws
IOException
{
in
.
close
();
}
}
histream-import/src/main/java/de/sekmi/histream/etl/ParseException.java
0 → 100644
View file @
df24a214
package
de.sekmi.histream.etl
;
public
class
ParseException
extends
Exception
{
/**
*
*/
private
static
final
long
serialVersionUID
=
1L
;
public
ParseException
(
String
message
){
super
(
message
);
}
}
histream-import/src/main/java/de/sekmi/histream/etl/PatientRow.java
0 → 100644
View file @
df24a214
package
de.sekmi.histream.etl
;
import
java.time.Instant
;
import
de.sekmi.histream.DateTimeAccuracy
;
import
de.sekmi.histream.etl.config.PatientTable
;
import
de.sekmi.histream.ext.Patient
;
/**
* Row of patient data
* TODO: implement {@link Patient}
* @author Raphael
*
*/
public
class
PatientRow
implements
Patient
{
String
patid
;
String
firstname
;
String
lastname
;
DateTimeAccuracy
birthdate
;
DateTimeAccuracy
deathdate
;
// TODO concepts
public
void
setNames
(
String
first
,
String
last
){
this
.
firstname
=
first
;
this
.
lastname
=
last
;
}
public
void
setBirthDate
(
DateTimeAccuracy
date
){
this
.
birthdate
=
date
;
}
public
void
setDeathDate
(
DateTimeAccuracy
date
){
this
.
deathdate
=
date
;
}
public
static
PatientRow
load
(
Object
[]
input
,
ColumnMap
map
,
PatientTable
table
){
PatientRow
row
=
new
PatientRow
();
row
.
patid
=
null
;
// table.patid.valueOf(input[map.findIndex(table.patid)])
return
row
;
}
@Override
public
String
getId
()
{
return
patid
;
}
@Override
public
void
setId
(
String
patientId
)
{
this
.
patid
=
patientId
;
}
@Override
public
String
getSourceId
()
{
return
null
;
}
@Override
public
Instant
getSourceTimestamp
()
{
return
null
;
}
@Override
public
void
setSourceId
(
String
arg0
)
{
// TODO Auto-generated method stub
}
@Override
public
void
setSourceTimestamp
(
Instant
arg0
)
{
// TODO Auto-generated method stub
}
@Override
public
DateTimeAccuracy
getBirthDate
()
{
return
this
.
birthdate
;
}
@Override
public
DateTimeAccuracy
getDeathDate
()
{
return
this
.
deathdate
;
}
@Override
public
Sex
getSex
()
{
// TODO Auto-generated method stub
return
null
;
}
@Override
public
void
setSex
(
Sex
arg0
)
{
// TODO Auto-generated method stub
}
}
histream-import/src/main/java/de/sekmi/histream/etl/PatientStream.java
0 → 100644
View file @
df24a214
package
de.sekmi.histream.etl
;
import
java.io.IOException
;
import
java.util.function.Supplier
;
import
de.sekmi.histream.etl.config.PatientTable
;
public
class
PatientStream
implements
Supplier
<
PatientRow
>,
AutoCloseable
{
RowSupplier
rows
;
PatientTable
table
;
ColumnMap
map
;
public
PatientStream
(
RowSupplier
rows
,
PatientTable
table
)
throws
IOException
{
this
.
rows
=
rows
;
this
.
table
=
table
;
this
.
map
=
table
.
getColumnMap
(
rows
.
getHeaders
());
}
@Override
public
void
close
()
throws
IOException
{
rows
.
close
();
}
@Override
public
PatientRow
get
()
{
Object
[]
row
=
rows
.
get
();
if
(
row
==
null
){
// no more rows
return
null
;
}
PatientRow
p
;
try
{
p
=
table
.
fillPatient
(
map
,
row
);
}
catch
(
ParseException
e
)
{
throw
new
UncheckedParseException
(
e
);
}
return
p
;
}
}
histream-import/src/main/java/de/sekmi/histream/etl/RowSupplier.java
0 → 100644
View file @
df24a214
package
de.sekmi.histream.etl
;
import
java.io.IOException
;
import
java.util.function.Supplier
;
public
abstract
class
RowSupplier
implements
Supplier
<
Object
[]>,
AutoCloseable
{
public
RowSupplier
(){
}
public
abstract
String
[]
getHeaders
()
throws
IOException
;
@Override
public
abstract
Object
[]
get
();
@Override
public
abstract
void
close
()
throws
IOException
;
}
histream-import/src/main/java/de/sekmi/histream/etl/UncheckedParseException.java
0 → 100644
View file @
df24a214
package
de.sekmi.histream.etl
;
public
class
UncheckedParseException
extends
RuntimeException
{
/**
*
*/
private
static
final
long
serialVersionUID
=
1L
;
public
UncheckedParseException
(
ParseException
cause
){
super
(
cause
);
}
@Override
public
ParseException
getCause
(){
return
(
ParseException
)
super
.
getCause
();
}
}
histream-import/src/main/java/de/sekmi/histream/etl/VisitRow.java
0 → 100644
View file @
df24a214
package
de.sekmi.histream.etl
;
public
class
VisitRow
{
}
histream-import/src/main/java/de/sekmi/histream/etl/VisitSupplier.java
0 → 100644
View file @
df24a214
package
de.sekmi.histream.etl
;
import
java.util.Spliterator
;
import
java.util.function.Consumer
;
public
class
VisitSupplier
implements
Spliterator
<
VisitRow
>{
@Override
public
boolean
tryAdvance
(
Consumer
<?
super
VisitRow
>
action
)
{
// TODO Auto-generated method stub
return
false
;
}
@Override
public
Spliterator
<
VisitRow
>
trySplit
()
{
// TODO Auto-generated method stub
return
null
;
}
@Override
public
long
estimateSize
()
{
// TODO Auto-generated method stub
return
0
;
}
@Override
public
int
characteristics
()
{
// TODO Auto-generated method stub
return
0
;
}
}
histream-import/src/main/java/de/sekmi/histream/etl/config/Column.java
View file @
df24a214
...
...
@@ -7,9 +7,12 @@ import javax.xml.bind.annotation.XmlSeeAlso;
import
javax.xml.bind.annotation.XmlTransient
;
import
javax.xml.bind.annotation.XmlValue
;
import
de.sekmi.histream.etl.ColumnMap
;
import
de.sekmi.histream.etl.ParseException
;
@XmlTransient
@XmlSeeAlso
({
StringColumn
.
class
})
public
class
Column
{
public
abstract
class
Column
{
protected
Column
(){
}
public
Column
(
String
name
){
...
...
@@ -62,6 +65,8 @@ public class Column {
@XmlValue
String
name
;
public
String
getName
(){
return
name
;}
/**
* Convert a string input value to the output data type. The resulting type depends
* on the type attribute and can be one of Long, BigDecimal, String, DateTime
...
...
@@ -72,7 +77,7 @@ public class Column {
* @param value input value. e.g. from text table column
* @return output type representing the input value
*/
public
Object
valueOf
(
String
value
){
public
Object
valueOf
(
Object
value
)
throws
ParseException
{
if
(
constantValue
!=
null
){
value
=
constantValue
;
}
...
...
@@ -82,20 +87,25 @@ public class Column {
}
if
(
value
!=
null
&&
regexMatch
!=
null
){
value
=
applyRegularExpression
(
value
);
if
(
!(
value
instanceof
String
)
){
throw
new
ParseException
(
"regex-match can only be used on String, but found "
+
value
.
getClass
().
getName
());
}
value
=
applyRegularExpression
((
String
)
value
);
}
return
value
;
}
public
Object
valueOf
(
ColumnMap
map
,
Object
[]
row
)
throws
ParseException
{
return
this
.
valueOf
(
row
[
map
.
indexOf
(
this
)]);
}
public
String
applyRegularExpression
(
String
input
){
// TODO: apply
return
input
;
}
public
static
class
IntegerColumn
extends
Column
{
}
public
static
class
DecimalColumn
extends
Column
{
@XmlTransient
DecimalFormat
decimalFormat
;
...
...
histream-import/src/main/java/de/sekmi/histream/etl/config/DateTimeColumn.java
View file @
df24a214
package
de.sekmi.histream.etl.config
;
import
java.sql.Timestamp
;
import
java.time.format.DateTimeFormatter
;
import
javax.xml.bind.annotation.XmlAttribute
;
import
javax.xml.bind.annotation.XmlTransient
;
import
de.sekmi.histream.DateTimeAccuracy
;
import
de.sekmi.histream.etl.ParseException
;
/**
* Date and Time column.
* TODO implement parsing of partial date/time. e.g. 2003-10
...
...
@@ -31,4 +35,25 @@ public class DateTimeColumn extends Column{
protected
DateTimeColumn
(){
super
();
}
@Override
public
Object
valueOf
(
Object
value
)
throws
ParseException
{
value
=
super
.
valueOf
(
value
);
if
(
value
instanceof
String
){
// parse date according to format
if
(
formatter
==
null
&&
format
!=
null
){
formatter
=
DateTimeFormatter
.
ofPattern
(
format
);
}
if
(
formatter
==
null
){
throw
new
ParseException
(
"format must be specified for DateTime fields if strings are parsed"
);
}
// TODO parse
return
DateTimeAccuracy
.
parse
(
formatter
,(
String
)
value
);
}
else
if
(
value
instanceof
Timestamp
){
// convert from timestamp
return
null
;
}
else
{
throw
new
IllegalArgumentException
(
"Don't know how to parse type "
+
value
.
getClass
()+
" to datetime"
);
}
}
}
\ No newline at end of file
histream-import/src/main/java/de/sekmi/histream/etl/config/FileSource.java
View file @
df24a214
package
de.sekmi.histream.etl.config
;
import
java.io.IOException
;
import
java.net.MalformedURLException
;
import
java.net.URL
;
import
java.util.
stream.Stream
;
import
java.util.
regex.Pattern
;
import
javax.xml.bind.annotation.XmlAccessType
;
import
javax.xml.bind.annotation.XmlAccessorType
;
import
javax.xml.bind.annotation.XmlElement
;
import
de.sekmi.histream.etl.FileRowSupplier
;
import
de.sekmi.histream.etl.RowSupplier
;
@XmlAccessorType
(
XmlAccessType
.
FIELD
)
public
class
FileSource
extends
TableSource
{
@XmlElement
URL
url
;
@XmlElement
String
type
;
String
encoding
;
@XmlElement
String
separator
;
private
FileSource
(){
}
public
FileSource
(
String
url
,
String
type
)
throws
MalformedURLException
{
public
FileSource
(
String
url
,
String
separator
)
throws
MalformedURLException
{
this
();
this
.
url
=
new
URL
(
url
);
this
.
type
=
type
;
this
.
separator
=
separator
;
}
@Override
public
String
[]
getHeaders
()
{
// TODO Auto-generated method stub
return
null
;
public
RowSupplier
rows
()
throws
IOException
{
return
new
FileRowSupplier
(
url
,
Pattern
.
compile
(
separator
));
}
@Override
public
Stream
<
String
[]>
rows
()
{
return
null
;
}
}
histream-import/src/main/java/de/sekmi/histream/etl/config/PatientTable.java
View file @
df24a214
package
de.sekmi.histream.etl.config
;
import
java.io.IOException
;
import
javax.xml.bind.annotation.XmlAccessType
;
import
javax.xml.bind.annotation.XmlAccessorType
;
import
javax.xml.bind.annotation.XmlElement
;
import
javax.xml.bind.annotation.XmlElementWrapper
;
import
de.sekmi.histream.DateTimeAccuracy
;
import
de.sekmi.histream.etl.ColumnMap
;
import
de.sekmi.histream.etl.ParseException
;
import
de.sekmi.histream.etl.PatientRow
;
import
de.sekmi.histream.etl.PatientStream
;
/**
* Patient table. Contains patient id and other identifying information.
* Can also contain medical data
...
...
@@ -22,10 +30,49 @@ public class PatientTable extends Table implements WideInterface{
@XmlAccessorType
(
XmlAccessType
.
FIELD
)
public
static
class
IDAT
extends
IdatColumns
{
StringColumn
firstname
;
StringColumn
last
name
;
StringColumn
sur
name
;
DateTimeColumn
birthdate
;
DateTimeColumn
deathdate
;
StringColumn
gender
;
Column
[]
ignore
;
}
@Override
public
ColumnMap
getColumnMap
(
String
[]
headers
)
{
ColumnMap
map
=
new
ColumnMap
(
headers
);
if
(
!
map
.
registerColumn
(
idat
.
patientId
)
){
throw
new
IllegalArgumentException
(
"patientId column name '"
+
idat
.
patientId
.
name
+
"' not found in patient table headers"
);
}
if
(
idat
.
firstname
!=
null
&&
!
map
.
registerColumn
(
idat
.
firstname
)
){
throw
new
IllegalArgumentException
(
"firstname column not found in patient header"
);
}
if
(
idat
.
surname
!=
null
&&
!
map
.
registerColumn
(
idat
.
surname
)
){
throw
new
IllegalArgumentException
(
"surname column not found in patient header"
);
}
if
(
idat
.
birthdate
!=
null
&&
!
map
.
registerColumn
(
idat
.
birthdate
)
){
throw
new
IllegalArgumentException
(
"birthdate column not found in patient header"
);
}
if
(
idat
.
deathdate
!=
null
&&
!
map
.
registerColumn
(
idat
.
deathdate
)
){
throw
new
IllegalArgumentException
(
"deathdate column not found in patient header"
);
}
if
(
idat
.
gender
!=
null
&&
!
map
.
registerColumn
(
idat
.
gender
)
){
throw
new
IllegalArgumentException
(
"gender column not found in patient header"
);
}
return
map
;
}
public
PatientRow
fillPatient
(
ColumnMap
map
,
Object
[]
row
)
throws
ParseException
{
PatientRow
patient
=
new
PatientRow
();
patient
.
setId
(
idat
.
patientId
.
valueOf
(
map
,
row
).
toString
());
patient
.
setNames
((
String
)
idat
.
firstname
.
valueOf
(
map
,
row
),
(
String
)
idat
.
surname
.
valueOf
(
map
,
row
));
patient
.
setBirthDate
((
DateTimeAccuracy
)
idat
.
birthdate
.
valueOf
(
map
,
row
));
patient
.
setDeathDate
((
DateTimeAccuracy
)
idat
.
deathdate
.
valueOf
(
map
,
row
));
// TODO concepts
return
patient
;
}
public
PatientStream
open
()
throws
IOException
{
return
new
PatientStream
(
source
.
rows
(),
this
);
}
}
histream-import/src/main/java/de/sekmi/histream/etl/config/SQLSource.java
View file @
df24a214
package
de.sekmi.histream.etl.config
;
import
java.util.stream.Stream
;
import
javax.xml.bind.annotation.XmlElement
;
import
de.sekmi.histream.etl.RowSupplier
;
public
class
SQLSource
extends
TableSource
{
@XmlElement
String
jdbcDriver
;
...
...
@@ -19,17 +18,10 @@ public class SQLSource extends TableSource {
this
.
jdbcDriver
=
driver
;
this
.
connectString
=
connectString
;
}
@Override
public
String
[]
getHeader
s
()
{
public
RowSupplier
row
s
()
{
// TODO Auto-generated method stub
return
null
;
}
@Override
public
Stream
<
String
[]>
rows
()
{
// TODO Auto-generated method stub
return
null
;
}
}
histream-import/src/main/java/de/sekmi/histream/etl/config/Table.java
View file @
df24a214
...
...
@@ -3,9 +3,34 @@ package de.sekmi.histream.etl.config;
import
javax.xml.bind.annotation.XmlElement
;
import
javax.xml.bind.annotation.XmlTransient
;