shurakovndstakeseffectnew
/data/papers/shurakovndstakeseffectnew/out/bundle.json
{
  "created_at": "2026-01-23T01:19:35.757415+00:00",
  "paper_dir": "papers/shurakovndstakeseffectnew",
  "pdf": {
    "path": "papers/shurakovndstakeseffectnew/pdf/paper.pdf"
  },
  "tool_versions": {
    "python": "3.14.2",
    "meta-extract": "0.1.0",
    "requests": "2.32.5",
    "lxml": "6.0.2",
    "pandas": "2.3.3",
    "pypdf": "5.9.0",
    "pypdfium2": "5.3.0",
    "pillow": "12.1.0",
    "grobid-client-python": "0.1.4",
    "camelot-py": "1.0.9",
    "tabula-py": "2.10.0"
  },
  "status": "ok",
  "grobid": {
    "url": "http://localhost:8070",
    "tei_path": "papers/shurakovndstakeseffectnew/out/tei.xml"
  },
  "text": {
    "path": "papers/shurakovndstakeseffectnew/out/text.txt"
  },
  "tables": {
    "out_dir": "papers/shurakovndstakeseffectnew/out/tables",
    "camelot": {
      "stream": [
        {
          "page": "10",
          "index": 1,
          "csv_path": "papers/shurakovndstakeseffectnew/out/tables/camelot_stream_p10_t1.csv",
          "shape": [
            14,
            6
          ],
          "stats": {
            "rows": 14,
            "cols": 6,
            "non_empty_cells": 70,
            "non_empty_ratio": 0.8333333333333334,
            "filled_rows": 13,
            "filled_cols": 6,
            "filled_rows_ratio": 0.9285714285714286,
            "numeric_like_ratio": 0.0
          },
          "parsing_report": {
            "accuracy": 98.88,
            "whitespace": 16.67,
            "order": 1,
            "page": 10
          }
        },
        {
          "page": "12",
          "index": 2,
          "csv_path": "papers/shurakovndstakeseffectnew/out/tables/camelot_stream_p12_t2.csv",
          "shape": [
            11,
            6
          ],
          "stats": {
            "rows": 11,
            "cols": 6,
            "non_empty_cells": 55,
            "non_empty_ratio": 0.8333333333333334,
            "filled_rows": 10,
            "filled_cols": 6,
            "filled_rows_ratio": 0.9090909090909091,
            "numeric_like_ratio": 0.16363636363636364
          },
          "parsing_report": {
            "accuracy": 98.56,
            "whitespace": 16.67,
            "order": 1,
            "page": 12
          }
        }
      ],
      "lattice": [],
      "filtered_out": {
        "stream": 23,
        "lattice": 3
      },
      "filtered_reasons": {
        "stream": {
          "min_cols": 15,
          "filled_rows_ratio": 6,
          "filled_rows": 2
        },
        "lattice": {
          "sparse": 3
        }
      },
      "filter": {
        "min_rows": 2,
        "min_cols": 2,
        "min_non_empty_ratio": 0.2,
        "min_filled_rows": 2,
        "min_filled_cols": 2,
        "min_filled_rows_ratio": 0.5,
        "two_col_min_numeric_like_ratio": 0.35
      },
      "errors": []
    },
    "tabula": {
      "lattice": [],
      "stream": [
        {
          "page": 10,
          "index": 1,
          "csv_path": "papers/shurakovndstakeseffectnew/out/tables/tabula_stream_p10_t1.csv",
          "shape": [
            12,
            6
          ],
          "stats": {
            "rows": 12,
            "cols": 6,
            "non_empty_cells": 63,
            "non_empty_ratio": 0.875,
            "filled_rows": 12,
            "filled_cols": 6,
            "filled_rows_ratio": 1.0,
            "numeric_like_ratio": 0.19047619047619047
          }
        },
        {
          "page": 12,
          "index": 2,
          "csv_path": "papers/shurakovndstakeseffectnew/out/tables/tabula_stream_p12_t2.csv",
          "shape": [
            9,
            6
          ],
          "stats": {
            "rows": 9,
            "cols": 6,
            "non_empty_cells": 48,
            "non_empty_ratio": 0.8888888888888888,
            "filled_rows": 9,
            "filled_cols": 6,
            "filled_rows_ratio": 1.0,
            "numeric_like_ratio": 0.1875
          }
        }
      ],
      "filtered_out": {
        "lattice": 4,
        "stream": 4
      },
      "filtered_reasons": {
        "lattice": {
          "min_rows": 2,
          "sparse": 2
        },
        "stream": {
          "min_cols": 3,
          "sparse": 1
        }
      },
      "filter": {
        "min_rows": 2,
        "min_cols": 2,
        "min_non_empty_ratio": 0.2,
        "min_filled_rows": 2,
        "min_filled_cols": 2,
        "min_filled_rows_ratio": 0.5,
        "two_col_min_numeric_like_ratio": 0.35
      },
      "errors": [],
      "ignored_errors": {},
      "pages": "all"
    }
  },
  "images": {
    "dir": "papers/shurakovndstakeseffectnew/out/images",
    "manifest": "papers/shurakovndstakeseffectnew/out/images/manifest.json",
    "embedded": {
      "count": 5
    },
    "rendered_pages": {
      "count": 5,
      "pages": [
        1,
        11,
        13,
        15,
        16
      ]
    },
    "figures": {
      "count": 6,
      "pages": [
        11,
        13,
        15,
        16
      ],
      "rendered_pages": [
        11,
        13,
        15,
        16
      ]
    }
  },
  "errors": [],
  "markdown": {
    "path": "papers/shurakovndstakeseffectnew/out/fulltext.md"
  }
}