Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Raphael
histream
Commits
334d4ff3
Commit
334d4ff3
authored
Aug 02, 2015
by
R.W.Majeed
Browse files
typed columns
parent
b2b13333
Changes
12
Show whitespace changes
Inline
Side-by-side
histream-import/src/main/java/de/sekmi/histream/etl/config/Column.java
View file @
334d4ff3
package
de.sekmi.histream.etl.config
;
import
java.text.DecimalFormat
;
import
javax.xml.bind.annotation.XmlAttribute
;
import
javax.xml.bind.annotation.XmlSeeAlso
;
import
javax.xml.bind.annotation.XmlTransient
;
import
javax.xml.bind.annotation.XmlValue
;
@XmlTransient
@XmlSeeAlso
({
StringColumn
.
class
})
public
class
Column
{
pr
ivate
Column
(){
pr
otected
Column
(){
}
public
Column
(
String
name
){
this
();
this
.
name
=
name
;
}
/**
* If this string is found in the column data, the resulting value will be null.
*/
@XmlAttribute
String
na
;
/**
* If set (e.g. non-null), this will always overwrite any other value.
* Datatype formats and regular expressions are still applied to the constant value.
*/
@XmlAttribute
(
name
=
"constant-value"
)
String
constantValue
;
/**
* Regular expression which needs to match the input string
*/
@XmlAttribute
(
name
=
"regex-match"
)
String
regexMatch
;
/**
* Replace the input value with the specified string or regular expression group from {@link #regexMatch}.
* If not specified, the full input string is used (regardless of match region).
*/
@XmlAttribute
(
name
=
"regex-replace"
)
String
regexReplace
;
/**
* Action to perform if the {@link #regexMatch} did not match the input string.
* Either use NA (usually null) for the value, or drop the whole concept/fact.
*/
@XmlAttribute
(
name
=
"regex-nomatch-action"
)
String
regexNoMatchAction
;
// either na or drop
/**
* Report a warning if the {@link #regexMatch} did not match the input string.
* Defaults to true.
*/
@XmlAttribute
(
name
=
"regex-nomatch-warning"
)
Boolean
regexNoMatchWarning
;
/**
* Column name to use for reading input values.
*/
@XmlValue
String
name
;
/**
* Convert a string input value to the output data type. The resulting type depends
* on the type attribute and can be one of Long, BigDecimal, String, DateTime
* or DateTimeAccuracy (for incomplete dates).
* <p>
* TODO: how to read SQL table data, which already contains types (e.g. sql.Integer)
*
* @param value input value. e.g. from text table column
* @return output type representing the input value
*/
public
Object
valueOf
(
String
value
){
if
(
constantValue
!=
null
){
value
=
constantValue
;
}
if
(
na
!=
null
&&
value
!=
null
&&
na
.
equals
(
value
)
){
value
=
null
;
}
if
(
value
!=
null
&&
regexMatch
!=
null
){
value
=
applyRegularExpression
(
value
);
}
return
value
;
}
public
String
applyRegularExpression
(
String
input
){
// TODO: apply
return
input
;
}
public
static
class
IntegerColumn
extends
Column
{
}
public
static
class
DecimalColumn
extends
Column
{
@XmlTransient
DecimalFormat
decimalFormat
;
/**
* Decimal format string for parsing via {@link DecimalFormat}
* @see DecimalFormat#DecimalFormat(String)
*/
@XmlAttribute
String
format
;
}
}
histream-import/src/main/java/de/sekmi/histream/etl/config/ColumnMap.java
0 → 100644
View file @
334d4ff3
package
de.sekmi.histream.etl.config
;
import
java.util.HashMap
;
import
java.util.Map
;
/**
* Maps {@link Column}s to header/table indices
*
* @author Raphael
*
*/
public
class
ColumnMap
{
String
[]
headers
;
Map
<
String
,
Integer
>
map
;
public
ColumnMap
(
String
[]
headers
){
this
.
headers
=
headers
;
this
.
map
=
new
HashMap
<>();
}
public
boolean
registerColumn
(
Column
column
){
for
(
int
i
=
0
;
i
<
headers
.
length
;
i
++
){
if
(
column
.
name
.
equals
(
headers
[
i
])
){
map
.
put
(
column
.
name
,
Integer
.
valueOf
(
i
)
);
return
true
;
}
}
return
false
;
}
}
\ No newline at end of file
histream-import/src/main/java/de/sekmi/histream/etl/config/Concept.java
View file @
334d4ff3
...
...
@@ -16,10 +16,10 @@ public class Concept{
String
id
;
// TODO: value should contain also type (string,decimal,integer,...)
Column
value
;
Column
unit
;
String
Column
unit
;
@XmlElement
(
required
=
true
)
Column
start
;
Column
end
;
DateTime
Column
start
;
DateTime
Column
end
;
@XmlElement
(
name
=
"modifier"
)
Modifier
[]
modifiers
;
// ...
...
...
@@ -30,7 +30,7 @@ public class Concept{
String
id
;
// TODO: value with type
Column
value
;
Column
unit
;
String
Column
unit
;
private
Modifier
(){
}
...
...
@@ -43,10 +43,10 @@ public class Concept{
private
Concept
(){
}
public
Concept
(
String
id
,
String
startColumn
){
public
Concept
(
String
id
,
String
startColumn
,
String
format
){
this
();
this
.
id
=
id
;
this
.
start
=
new
Column
(
startColumn
);
this
.
start
=
new
DateTime
Column
(
startColumn
,
format
);
}
}
\ No newline at end of file
histream-import/src/main/java/de/sekmi/histream/etl/config/DataSource.java
View file @
334d4ff3
...
...
@@ -4,8 +4,12 @@ import javax.xml.bind.annotation.XmlAccessType;
import
javax.xml.bind.annotation.XmlAccessorType
;
import
javax.xml.bind.annotation.XmlElement
;
import
javax.xml.bind.annotation.XmlElementWrapper
;
import
javax.xml.bind.annotation.XmlRootElement
;
import
javax.xml.bind.annotation.XmlSeeAlso
;
@XmlRootElement
@XmlAccessorType
(
XmlAccessType
.
FIELD
)
@XmlSeeAlso
({
Column
.
class
,
StringColumn
.
class
})
public
class
DataSource
{
@XmlElement
Meta
meta
;
...
...
histream-import/src/main/java/de/sekmi/histream/etl/config/DataTableIdat.java
View file @
334d4ff3
...
...
@@ -4,5 +4,5 @@ import javax.xml.bind.annotation.XmlElement;
public
class
DataTableIdat
extends
IdatColumns
{
@XmlElement
(
name
=
"visit-id"
)
Column
visitId
;
String
Column
visitId
;
}
histream-import/src/main/java/de/sekmi/histream/etl/config/DateTimeColumn.java
0 → 100644
View file @
334d4ff3
package
de.sekmi.histream.etl.config
;
import
java.time.format.DateTimeFormatter
;
import
javax.xml.bind.annotation.XmlAttribute
;
import
javax.xml.bind.annotation.XmlTransient
;
/**
* Date and Time column.
* TODO implement parsing of partial date/time. e.g. 2003-10
*
* @author Raphael
*
*/
public
class
DateTimeColumn
extends
Column
{
@XmlTransient
DateTimeFormatter
formatter
;
/**
* Decimal format string for parsing via {@link DateTimeFormatter}
* @see DateTimeFormatter#ofPattern(String)
*/
@XmlAttribute
String
format
;
public
DateTimeColumn
(
String
name
,
String
format
){
super
(
name
);
this
.
format
=
format
;
}
protected
DateTimeColumn
(){
super
();
}
}
\ No newline at end of file
histream-import/src/main/java/de/sekmi/histream/etl/config/IdatColumns.java
View file @
334d4ff3
...
...
@@ -4,5 +4,5 @@ import javax.xml.bind.annotation.XmlElement;
public
class
IdatColumns
{
@XmlElement
(
name
=
"patient-id"
)
Column
patientId
;
String
Column
patientId
;
}
histream-import/src/main/java/de/sekmi/histream/etl/config/PatientTable.java
View file @
334d4ff3
...
...
@@ -21,12 +21,11 @@ public class PatientTable extends Table implements WideInterface{
@XmlAccessorType
(
XmlAccessType
.
FIELD
)
public
static
class
IDAT
extends
IdatColumns
{
Column
firstname
;
Column
lastname
;
Column
birthdate
;
Column
deathdate
;
Column
gender
;
String
Column
firstname
;
String
Column
lastname
;
DateTime
Column
birthdate
;
DateTime
Column
deathdate
;
String
Column
gender
;
Column
[]
ignore
;
}
}
histream-import/src/main/java/de/sekmi/histream/etl/config/StringColumn.java
0 → 100644
View file @
334d4ff3
package
de.sekmi.histream.etl.config
;
public
class
StringColumn
extends
Column
{
public
StringColumn
(
String
name
)
{
super
(
name
);
}
protected
StringColumn
(){
super
();
}
}
\ No newline at end of file
histream-import/src/main/java/de/sekmi/histream/etl/config/VisitTable.java
View file @
334d4ff3
...
...
@@ -18,9 +18,9 @@ public class VisitTable extends Table implements WideInterface{
@XmlAccessorType
(
XmlAccessType
.
FIELD
)
public
static
class
IDAT
extends
IdatColumns
{
@XmlElement
(
name
=
"visit-id"
)
Column
visitId
;
Column
start
;
Column
end
;
String
Column
visitId
;
DateTime
Column
start
;
DateTime
Column
end
;
// TODO inpatient/outpatient state
Column
[]
ignore
;
}
...
...
histream-import/src/test/java/de/sekmi/histream/etl/config/TestMarshall.java
View file @
334d4ff3
...
...
@@ -17,7 +17,7 @@ public class TestMarshall {
@Test
public
void
testUnmarshall
()
throws
IOException
{
try
(
InputStream
in
=
getClass
().
getResourceAsStream
(
"/test-1-datasource.
txt
"
)
){
try
(
InputStream
in
=
getClass
().
getResourceAsStream
(
"/test-1-datasource.
xml
"
)
){
DataSource
ds
=
JAXB
.
unmarshal
(
in
,
DataSource
.
class
);
Assert
.
assertNotNull
(
ds
.
meta
);
Assert
.
assertEquals
(
"replace-source"
,
ds
.
meta
.
etlStrategy
);
...
...
@@ -67,24 +67,24 @@ public class TestMarshall {
s
.
patientTable
=
new
PatientTable
();
s
.
patientTable
.
source
=
new
FileSource
(
"file:patient.source"
,
"text/csv"
);
s
.
patientTable
.
idat
=
new
PatientTable
.
IDAT
();
s
.
patientTable
.
idat
.
patientId
=
new
Column
(
"patid"
);
s
.
patientTable
.
idat
.
patientId
=
new
String
Column
(
"patid"
);
s
.
visitTable
=
new
VisitTable
();
s
.
visitTable
.
source
=
new
FileSource
(
"file:lala.txt"
,
"text/plain"
);
s
.
visitTable
.
idat
=
new
VisitTable
.
IDAT
();
s
.
visitTable
.
idat
.
patientId
=
new
Column
(
"patid"
);
s
.
visitTable
.
idat
.
visitId
=
new
Column
(
"visit"
);
s
.
visitTable
.
idat
.
patientId
=
new
String
Column
(
"patid"
);
s
.
visitTable
.
idat
.
visitId
=
new
String
Column
(
"visit"
);
s
.
visitTable
.
concepts
=
new
Concept
[
1
];
s
.
visitTable
.
concepts
[
0
]
=
new
Concept
(
"vconcept"
,
"start"
);
s
.
visitTable
.
concepts
[
0
]
=
new
Concept
(
"vconcept"
,
"start"
,
"yyyy-MM-ddTHH:mm:ss"
);
s
.
wideTables
=
new
WideTable
[
1
];
s
.
wideTables
[
0
]
=
new
WideTable
();
s
.
wideTables
[
0
].
source
=
new
SQLSource
(
"org.postgresql.Driver"
,
"jdbc:postgresql://localhost:15432/i2b2"
);
s
.
wideTables
[
0
].
idat
=
new
DataTableIdat
();
s
.
wideTables
[
0
].
idat
.
patientId
=
new
Column
(
"patid"
);
s
.
wideTables
[
0
].
idat
.
patientId
=
new
String
Column
(
"patid"
);
s
.
wideTables
[
0
].
concepts
=
new
Concept
[
2
];
s
.
wideTables
[
0
].
concepts
[
0
]
=
new
Concept
(
"ACC"
,
"zeit"
);
s
.
wideTables
[
0
].
concepts
[
0
]
=
new
Concept
(
"ACC"
,
"zeit"
,
"yyyy-MM-ddTHH:mm:ss"
);
s
.
wideTables
[
0
].
concepts
[
0
].
modifiers
=
new
Concept
.
Modifier
[
1
];
s
.
wideTables
[
0
].
concepts
[
0
].
modifiers
[
0
]
=
new
Concept
.
Modifier
(
"DOSE"
);
s
.
wideTables
[
0
].
concepts
[
0
].
modifiers
[
0
].
value
=
new
Column
(
"dosis"
);
s
.
wideTables
[
0
].
concepts
[
0
].
modifiers
[
0
].
value
=
new
String
Column
(
"dosis"
);
JAXB
.
marshal
(
s
,
System
.
out
);
...
...
histream-import/src/test/resources/test-1-datasource.
txt
→
histream-import/src/test/resources/test-1-datasource.
xml
View file @
334d4ff3
<
!DOCTYPE configuration >
<datasource version="1.0"
>
<
?xml version="1.0" encoding="UTF-8"?>
<datasource
version=
"1.0"
xmlns:xsi=
"http://www.w3.org/2001/XMLSchema-instance"
>
<meta>
<etl-strategy>
replace-source
</etl-strategy>
<source-id>
test-1
</source-id>
...
...
@@ -22,7 +22,7 @@
-->
<patient-table>
<source
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:type="fileSource">
<source
xsi:type=
"fileSource"
>
<url>
file:test-1-patients.txt
</url>
<type>
text/csv
</type>
</source>
...
...
@@ -33,13 +33,13 @@
<birthdate>
geburtsdatum
</birthdate>
<deathdate>
verstorben
</deathdate>
<gender>
geschlecht
</gender>
<ignore>ignoriert1</ignore>
<ignore>patfakt1</ignore>
<ignore
xsi:type=
"stringColumn"
>
ignoriert1
</ignore>
<ignore
xsi:type=
"stringColumn"
>
patfakt1
</ignore>
</idat>
</patient-table>
<!-- optional -->
<visit-table>
<source
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:type="fileSource">
<source
xsi:type=
"fileSource"
>
<url>
file:test-1-visits.txt
</url>
<type>
text/csv
</type>
</source>
...
...
@@ -52,14 +52,14 @@
<mdat>
<!-- in/out code -->
<concept
id=
"weight"
>
<value>gewicht</value>
<value
xsi:type=
"stringColumn"
>
gewicht
</value>
<start>
start
</start>
</concept>
</mdat>
</visit-table>
<wide-table>
<source
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:type="fileSource">
<source
xsi:type=
"fileSource"
>
<url>
file:test-1-widetable.txt
</url>
<type>
text/csv
</type>
</source>
...
...
@@ -69,11 +69,11 @@
</idat>
<mdat>
<concept
id=
"natrium"
>
<value>na</value>
<value
xsi:type=
"stringColumn"
>
na
</value>
<start>
zeitpunkt
</start>
<unit
constant-value=
"mmol/l"
/>
<modifier
id=
"other"
>
<value
field="other" />
<value
xsi:type=
"stringColumn"
>
other
</value>
</modifier>
</concept>
</mdat>
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment