Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 95 additions & 30 deletions core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/string.kt
Original file line number Diff line number Diff line change
Expand Up @@ -29,47 +29,30 @@ public fun AnyFrame.renderToString(
rowIndex: Boolean = true,
): String {
val sb = StringBuilder()
val table = prepareTable(rowsLimit, valueLimit, columnTypes, rowIndex)
val columnLengths = table.values.mapIndexed { col, vals ->
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wait... so this is the... width... of each column in number of characters?

(vals + table.header[col]).maxOf { it.length } + 1
}

// title
if (title) {
sb.appendLine("DataFrame [${size()}]")
sb.appendLine()
}

// data
val rowsCount = rowsLimit.coerceAtMost(nrow)
val cols = if (rowIndex) listOf((0 until rowsCount).toColumn()) + columns() else columns()
val header = cols.mapIndexed { colIndex, col ->
if (columnTypes && (!rowIndex || colIndex > 0)) {
"${col.name()}:${renderType(col)}"
} else {
col.name()
}
}
val values = cols.map {
val top = it.take(rowsLimit)
val precision = if (top.isNumber()) top.asNumbers().scale() else 0
val decimalFormat =
if (precision >= 0) RendererDecimalFormat.fromPrecision(precision) else RendererDecimalFormat.of("%e")
top.values().map {
renderValueForStdout(it, valueLimit, decimalFormat = decimalFormat).truncatedContent
}
}
val columnLengths = values.mapIndexed { col, vals -> (vals + header[col]).map { it.length }.maxOrNull()!! + 1 }

// top border
if (borders) {
sb.append("\u230C")
for (i in 1 until columnLengths.sum() + columnLengths.size) sb.append('-')
repeat(columnLengths.sum() + columnLengths.size - 1) { sb.append('-') }
sb.append("\u230D")
sb.appendLine()
sb.append("|")
}

// header
for (col in header.indices) {
for (col in table.header.indices) {
val len = columnLengths[col]
val str = header[col]
val str = table.header[col]
val padded = if (alignLeft) str.padEnd(len) else str.padStart(len)
sb.append(padded)
if (borders) sb.append("|")
Expand All @@ -80,18 +63,18 @@ public fun AnyFrame.renderToString(
if (borders) {
sb.append("|")
for (colLength in columnLengths) {
for (i in 1..colLength) sb.append('-')
repeat(colLength) { sb.append('-') }
sb.append("|")
}
sb.appendLine()
}

// data
for (row in 0 until rowsCount) {
for (row in 0 until table.rowsCount) {
if (borders) sb.append("|")
for (col in values.indices) {
for (col in table.values.indices) {
val len = columnLengths[col]
val str = values[col][row]
val str = table.values[col][row]
val padded = if (alignLeft) str.padEnd(len) else str.padStart(len)
sb.append(padded)
if (borders) sb.append("|")
Expand All @@ -100,17 +83,99 @@ public fun AnyFrame.renderToString(
}

// footer
if (nrow > rowsLimit) {
if (table.totalRows > rowsLimit) {
sb.appendLine("...")
} else if (borders) {
sb.append("\u230E")
for (i in 1 until columnLengths.sum() + columnLengths.size) sb.append('-')
repeat(columnLengths.sum() + columnLengths.size - 1) { sb.append('-') }
sb.append("\u230F")
sb.appendLine()
}
return sb.toString()
}

private class PreparedTable(
val header: List<String>,
val values: List<List<String>>,
val rowsCount: Int,
val totalRows: Int,
)

private fun AnyFrame.prepareTable(
rowsLimit: Int,
valueLimit: Int,
columnTypes: Boolean,
rowIndex: Boolean,
escapeValue: (String) -> String = { it },
): PreparedTable {
val rowsCount = rowsLimit.coerceAtMost(nrow)
val cols = if (rowIndex) listOf((0 until rowsCount).toColumn()) + columns() else columns()
Comment thread
koperagen marked this conversation as resolved.
Outdated
val header = cols.mapIndexed { colIndex, col ->
if (columnTypes && (!rowIndex || colIndex > 0)) {
"${col.name()}:${renderType(col)}"
} else {
col.name()
}
}
val values = cols.map { col ->
val top = col.take(rowsLimit)
val precision = if (top.isNumber()) top.asNumbers().scale() else 0
val decimalFormat =
if (precision >= 0) RendererDecimalFormat.fromPrecision(precision) else RendererDecimalFormat.of("%e")
top.values().map {
escapeValue(renderValueForStdout(it, valueLimit, decimalFormat = decimalFormat).truncatedContent)
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe create a new issue for exploring rendering nested frames as <details> like toStaticHtml? or at least mention this option in some kdoc :) Speaking of... small kdoc please? :))

}
}
return PreparedTable(header, values, rowsCount, nrow)
}

public fun AnyFrame.renderToMarkdown(
Comment thread
koperagen marked this conversation as resolved.
Outdated
rowsLimit: Int = 20,
valueLimit: Int = 40,
alignLeft: Boolean = false,
columnTypes: Boolean = false,
title: Boolean = false,
rowIndex: Boolean = true,
): String {
val table = prepareTable(rowsLimit, valueLimit, columnTypes, rowIndex) { it.replace("|", "\\|") }

val sb = StringBuilder()
if (title) {
sb.appendLine("**DataFrame [${size()}]**")
sb.appendLine()
}

// header
sb.append("|")
for (col in table.header) {
sb.append(" ${col.replace("|", "\\|")} |")
Comment thread
koperagen marked this conversation as resolved.
Outdated
}
sb.appendLine()

// separator
sb.append("|")
repeat(table.header.size) {
sb.append(if (alignLeft) ":---|" else "---:|")
}
sb.appendLine()

// data
for (row in 0 until table.rowsCount) {
sb.append("|")
for (col in table.values.indices) {
sb.append(" ${table.values[col][row]} |")
}
sb.appendLine()
}

// footer
if (table.totalRows > rowsLimit) {
sb.appendLine()
sb.appendLine("*... ${table.totalRows - rowsLimit} more rows*")
}
return sb.toString()
}

internal val valueToStringLimitDefault = 1000
internal val valueToStringLimitForRowAsTable = 50

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
package org.jetbrains.kotlinx.dataframe.rendering

import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
import org.jetbrains.kotlinx.dataframe.io.renderToMarkdown
import kotlin.test.Test
import kotlin.test.assertEquals

@Suppress("ktlint:standard:argument-list-wrapping")
class RenderToMarkdownTests {
private val df = dataFrameOf("name", "age", "city")(
"Alice", 30, "Berlin",
"Bob", 25, "Paris",
"Charlie", 35, "London",
)

@Test
fun `markdown basic structure`() {
val result = df.renderToMarkdown()
val expected =
"""
| | name | age | city |
|---:|---:|---:|---:|
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what do these colons do? I don't think I've seen them before in MD tables

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah! it's alignment, right? Uhm, maybe we should have "no alignment" as an option, as well as right/left alignment. There are some right-to-left languages that could cause issues with a default alignment. (Actually, they probably already break tables like these, they broke our toString() very much too)

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could we still have a "no-alignment" option?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure! it's still in progress, i'll update once more after review

| 0 | Alice | 30 | Berlin |
| 1 | Bob | 25 | Paris |
| 2 | Charlie | 35 | London |

""".trimIndent()
assertEquals(expected, result)
}

@Test
fun `markdown basic structure without index`() {
val result = df.renderToMarkdown(rowIndex = false)
val expected =
"""
| name | age | city |
|---:|---:|---:|
| Alice | 30 | Berlin |
| Bob | 25 | Paris |
| Charlie | 35 | London |

""".trimIndent()
assertEquals(expected, result)
}

@Test
fun `markdown with types structure`() {
val result = df.renderToMarkdown(columnTypes = true)
val expected =
"""
| | name:String | age:Int | city:String |
|---:|---:|---:|---:|
| 0 | Alice | 30 | Berlin |
| 1 | Bob | 25 | Paris |
| 2 | Charlie | 35 | London |

""".trimIndent()
assertEquals(expected, result)
}

@Test
fun `markdown align left`() {
val result = df.renderToMarkdown(alignLeft = true, rowIndex = false)
val expected =
"""
| name | age | city |
|:---|:---|:---|
| Alice | 30 | Berlin |
| Bob | 25 | Paris |
| Charlie | 35 | London |

""".trimIndent()
assertEquals(expected, result)
}

@Test
fun `markdown align right`() {
val result = df.renderToMarkdown(alignLeft = false, rowIndex = false)
val expected =
"""
| name | age | city |
|---:|---:|---:|
| Alice | 30 | Berlin |
| Bob | 25 | Paris |
| Charlie | 35 | London |

""".trimIndent()
assertEquals(expected, result)
}

@Test
fun `markdown with title`() {
val result = df.renderToMarkdown(title = true, rowIndex = false)
val expected =
"""
**DataFrame [3 x 3]**

| name | age | city |
|---:|---:|---:|
| Alice | 30 | Berlin |
| Bob | 25 | Paris |
| Charlie | 35 | London |

""".trimIndent()
assertEquals(expected, result)
}

@Test
fun `markdown truncation footer`() {
val result = df.renderToMarkdown(rowsLimit = 1, rowIndex = false)
val expected =
"""
| name | age | city |
|---:|---:|---:|
| Alice | 30 | Berlin |

*... 2 more rows*

""".trimIndent()
assertEquals(expected, result)
}

@Test
fun `markdown escapes pipes in values`() {
val pipeDf = dataFrameOf("cmd")("a|b")
val result = pipeDf.renderToMarkdown(rowIndex = false)
val expected =
"""
| cmd |
|---:|
| a\|b |

""".trimIndent()
assertEquals(expected, result)
}
}
Loading
Loading