-
Notifications
You must be signed in to change notification settings - Fork 81
Add DataFrame.renderToMarkdown(): String function #1760
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 3 commits
bb5636e
8df201b
235a185
4671a11
1ea5517
04a2e29
00abb78
113f260
679ef39
239b666
852dedd
d9e3b22
b4f9e42
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -29,47 +29,30 @@ public fun AnyFrame.renderToString( | |
| rowIndex: Boolean = true, | ||
| ): String { | ||
| val sb = StringBuilder() | ||
| val table = prepareTable(rowsLimit, valueLimit, columnTypes, rowIndex) | ||
| val columnLengths = table.values.mapIndexed { col, vals -> | ||
| (vals + table.header[col]).maxOf { it.length } + 1 | ||
| } | ||
|
|
||
| // title | ||
| if (title) { | ||
| sb.appendLine("DataFrame [${size()}]") | ||
| sb.appendLine() | ||
| } | ||
|
|
||
| // data | ||
| val rowsCount = rowsLimit.coerceAtMost(nrow) | ||
| val cols = if (rowIndex) listOf((0 until rowsCount).toColumn()) + columns() else columns() | ||
| val header = cols.mapIndexed { colIndex, col -> | ||
| if (columnTypes && (!rowIndex || colIndex > 0)) { | ||
| "${col.name()}:${renderType(col)}" | ||
| } else { | ||
| col.name() | ||
| } | ||
| } | ||
| val values = cols.map { | ||
| val top = it.take(rowsLimit) | ||
| val precision = if (top.isNumber()) top.asNumbers().scale() else 0 | ||
| val decimalFormat = | ||
| if (precision >= 0) RendererDecimalFormat.fromPrecision(precision) else RendererDecimalFormat.of("%e") | ||
| top.values().map { | ||
| renderValueForStdout(it, valueLimit, decimalFormat = decimalFormat).truncatedContent | ||
| } | ||
| } | ||
| val columnLengths = values.mapIndexed { col, vals -> (vals + header[col]).map { it.length }.maxOrNull()!! + 1 } | ||
|
|
||
| // top border | ||
| if (borders) { | ||
| sb.append("\u230C") | ||
| for (i in 1 until columnLengths.sum() + columnLengths.size) sb.append('-') | ||
| repeat(columnLengths.sum() + columnLengths.size - 1) { sb.append('-') } | ||
| sb.append("\u230D") | ||
| sb.appendLine() | ||
| sb.append("|") | ||
| } | ||
|
|
||
| // header | ||
| for (col in header.indices) { | ||
| for (col in table.header.indices) { | ||
| val len = columnLengths[col] | ||
| val str = header[col] | ||
| val str = table.header[col] | ||
| val padded = if (alignLeft) str.padEnd(len) else str.padStart(len) | ||
| sb.append(padded) | ||
| if (borders) sb.append("|") | ||
|
|
@@ -80,18 +63,18 @@ public fun AnyFrame.renderToString( | |
| if (borders) { | ||
| sb.append("|") | ||
| for (colLength in columnLengths) { | ||
| for (i in 1..colLength) sb.append('-') | ||
| repeat(colLength) { sb.append('-') } | ||
| sb.append("|") | ||
| } | ||
| sb.appendLine() | ||
| } | ||
|
|
||
| // data | ||
| for (row in 0 until rowsCount) { | ||
| for (row in 0 until table.rowsCount) { | ||
| if (borders) sb.append("|") | ||
| for (col in values.indices) { | ||
| for (col in table.values.indices) { | ||
| val len = columnLengths[col] | ||
| val str = values[col][row] | ||
| val str = table.values[col][row] | ||
| val padded = if (alignLeft) str.padEnd(len) else str.padStart(len) | ||
| sb.append(padded) | ||
| if (borders) sb.append("|") | ||
|
|
@@ -100,17 +83,99 @@ public fun AnyFrame.renderToString( | |
| } | ||
|
|
||
| // footer | ||
| if (nrow > rowsLimit) { | ||
| if (table.totalRows > rowsLimit) { | ||
| sb.appendLine("...") | ||
| } else if (borders) { | ||
| sb.append("\u230E") | ||
| for (i in 1 until columnLengths.sum() + columnLengths.size) sb.append('-') | ||
| repeat(columnLengths.sum() + columnLengths.size - 1) { sb.append('-') } | ||
| sb.append("\u230F") | ||
| sb.appendLine() | ||
| } | ||
| return sb.toString() | ||
| } | ||
|
|
||
| private class PreparedTable( | ||
| val header: List<String>, | ||
| val values: List<List<String>>, | ||
| val rowsCount: Int, | ||
| val totalRows: Int, | ||
| ) | ||
|
|
||
| private fun AnyFrame.prepareTable( | ||
| rowsLimit: Int, | ||
| valueLimit: Int, | ||
| columnTypes: Boolean, | ||
| rowIndex: Boolean, | ||
| escapeValue: (String) -> String = { it }, | ||
| ): PreparedTable { | ||
| val rowsCount = rowsLimit.coerceAtMost(nrow) | ||
| val cols = if (rowIndex) listOf((0 until rowsCount).toColumn()) + columns() else columns() | ||
|
koperagen marked this conversation as resolved.
Outdated
|
||
| val header = cols.mapIndexed { colIndex, col -> | ||
| if (columnTypes && (!rowIndex || colIndex > 0)) { | ||
| "${col.name()}:${renderType(col)}" | ||
| } else { | ||
| col.name() | ||
| } | ||
| } | ||
| val values = cols.map { col -> | ||
| val top = col.take(rowsLimit) | ||
| val precision = if (top.isNumber()) top.asNumbers().scale() else 0 | ||
| val decimalFormat = | ||
| if (precision >= 0) RendererDecimalFormat.fromPrecision(precision) else RendererDecimalFormat.of("%e") | ||
| top.values().map { | ||
| escapeValue(renderValueForStdout(it, valueLimit, decimalFormat = decimalFormat).truncatedContent) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe create a new issue for exploring rendering nested frames as |
||
| } | ||
| } | ||
| return PreparedTable(header, values, rowsCount, nrow) | ||
| } | ||
|
|
||
| public fun AnyFrame.renderToMarkdown( | ||
|
koperagen marked this conversation as resolved.
Outdated
|
||
| rowsLimit: Int = 20, | ||
| valueLimit: Int = 40, | ||
| alignLeft: Boolean = false, | ||
| columnTypes: Boolean = false, | ||
| title: Boolean = false, | ||
| rowIndex: Boolean = true, | ||
| ): String { | ||
| val table = prepareTable(rowsLimit, valueLimit, columnTypes, rowIndex) { it.replace("|", "\\|") } | ||
|
|
||
| val sb = StringBuilder() | ||
| if (title) { | ||
| sb.appendLine("**DataFrame [${size()}]**") | ||
| sb.appendLine() | ||
| } | ||
|
|
||
| // header | ||
| sb.append("|") | ||
| for (col in table.header) { | ||
| sb.append(" ${col.replace("|", "\\|")} |") | ||
|
koperagen marked this conversation as resolved.
Outdated
|
||
| } | ||
| sb.appendLine() | ||
|
|
||
| // separator | ||
| sb.append("|") | ||
| repeat(table.header.size) { | ||
| sb.append(if (alignLeft) ":---|" else "---:|") | ||
| } | ||
| sb.appendLine() | ||
|
|
||
| // data | ||
| for (row in 0 until table.rowsCount) { | ||
| sb.append("|") | ||
| for (col in table.values.indices) { | ||
| sb.append(" ${table.values[col][row]} |") | ||
| } | ||
| sb.appendLine() | ||
| } | ||
|
|
||
| // footer | ||
| if (table.totalRows > rowsLimit) { | ||
| sb.appendLine() | ||
| sb.appendLine("*... ${table.totalRows - rowsLimit} more rows*") | ||
| } | ||
| return sb.toString() | ||
| } | ||
|
|
||
| internal val valueToStringLimitDefault = 1000 | ||
| internal val valueToStringLimitForRowAsTable = 50 | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,136 @@ | ||
| package org.jetbrains.kotlinx.dataframe.rendering | ||
|
|
||
| import org.jetbrains.kotlinx.dataframe.api.dataFrameOf | ||
| import org.jetbrains.kotlinx.dataframe.io.renderToMarkdown | ||
| import kotlin.test.Test | ||
| import kotlin.test.assertEquals | ||
|
|
||
| @Suppress("ktlint:standard:argument-list-wrapping") | ||
| class RenderToMarkdownTests { | ||
| private val df = dataFrameOf("name", "age", "city")( | ||
| "Alice", 30, "Berlin", | ||
| "Bob", 25, "Paris", | ||
| "Charlie", 35, "London", | ||
| ) | ||
|
|
||
| @Test | ||
| fun `markdown basic structure`() { | ||
| val result = df.renderToMarkdown() | ||
| val expected = | ||
| """ | ||
| | | name | age | city | | ||
| |---:|---:|---:|---:| | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what do these colons do? I don't think I've seen them before in MD tables
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ah! it's alignment, right? Uhm, maybe we should have "no alignment" as an option, as well as right/left alignment. There are some right-to-left languages that could cause issues with a default alignment. (Actually, they probably already break tables like these, they broke our toString() very much too)
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. could we still have a "no-alignment" option?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sure! it's still in progress, i'll update once more after review |
||
| | 0 | Alice | 30 | Berlin | | ||
| | 1 | Bob | 25 | Paris | | ||
| | 2 | Charlie | 35 | London | | ||
|
|
||
| """.trimIndent() | ||
| assertEquals(expected, result) | ||
| } | ||
|
|
||
| @Test | ||
| fun `markdown basic structure without index`() { | ||
| val result = df.renderToMarkdown(rowIndex = false) | ||
| val expected = | ||
| """ | ||
| | name | age | city | | ||
| |---:|---:|---:| | ||
| | Alice | 30 | Berlin | | ||
| | Bob | 25 | Paris | | ||
| | Charlie | 35 | London | | ||
|
|
||
| """.trimIndent() | ||
| assertEquals(expected, result) | ||
| } | ||
|
|
||
| @Test | ||
| fun `markdown with types structure`() { | ||
| val result = df.renderToMarkdown(columnTypes = true) | ||
| val expected = | ||
| """ | ||
| | | name:String | age:Int | city:String | | ||
| |---:|---:|---:|---:| | ||
| | 0 | Alice | 30 | Berlin | | ||
| | 1 | Bob | 25 | Paris | | ||
| | 2 | Charlie | 35 | London | | ||
|
|
||
| """.trimIndent() | ||
| assertEquals(expected, result) | ||
| } | ||
|
|
||
| @Test | ||
| fun `markdown align left`() { | ||
| val result = df.renderToMarkdown(alignLeft = true, rowIndex = false) | ||
| val expected = | ||
| """ | ||
| | name | age | city | | ||
| |:---|:---|:---| | ||
| | Alice | 30 | Berlin | | ||
| | Bob | 25 | Paris | | ||
| | Charlie | 35 | London | | ||
|
|
||
| """.trimIndent() | ||
| assertEquals(expected, result) | ||
| } | ||
|
|
||
| @Test | ||
| fun `markdown align right`() { | ||
| val result = df.renderToMarkdown(alignLeft = false, rowIndex = false) | ||
| val expected = | ||
| """ | ||
| | name | age | city | | ||
| |---:|---:|---:| | ||
| | Alice | 30 | Berlin | | ||
| | Bob | 25 | Paris | | ||
| | Charlie | 35 | London | | ||
|
|
||
| """.trimIndent() | ||
| assertEquals(expected, result) | ||
| } | ||
|
|
||
| @Test | ||
| fun `markdown with title`() { | ||
| val result = df.renderToMarkdown(title = true, rowIndex = false) | ||
| val expected = | ||
| """ | ||
| **DataFrame [3 x 3]** | ||
|
|
||
| | name | age | city | | ||
| |---:|---:|---:| | ||
| | Alice | 30 | Berlin | | ||
| | Bob | 25 | Paris | | ||
| | Charlie | 35 | London | | ||
|
|
||
| """.trimIndent() | ||
| assertEquals(expected, result) | ||
| } | ||
|
|
||
| @Test | ||
| fun `markdown truncation footer`() { | ||
| val result = df.renderToMarkdown(rowsLimit = 1, rowIndex = false) | ||
| val expected = | ||
| """ | ||
| | name | age | city | | ||
| |---:|---:|---:| | ||
| | Alice | 30 | Berlin | | ||
|
|
||
| *... 2 more rows* | ||
|
|
||
| """.trimIndent() | ||
| assertEquals(expected, result) | ||
| } | ||
|
|
||
| @Test | ||
| fun `markdown escapes pipes in values`() { | ||
| val pipeDf = dataFrameOf("cmd")("a|b") | ||
| val result = pipeDf.renderToMarkdown(rowIndex = false) | ||
| val expected = | ||
| """ | ||
| | cmd | | ||
| |---:| | ||
| | a\|b | | ||
|
|
||
| """.trimIndent() | ||
| assertEquals(expected, result) | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
wait... so this is the... width... of each column in number of characters?