Skip to contents

read content of a Word document and return a data.frame representing the document.

Usage

docx_summary(x, preserve = FALSE, remove_fields = FALSE, detailed = FALSE)

Arguments

x

an rdocx object

preserve

If FALSE (default), text in table cells is collapsed into a single line. If TRUE, line breaks in table cells are preserved as a "\n" character. This feature is adapted from docxtractr::docx_extract_tbl() published under a MIT licensed in the {docxtractr} package by Bob Rudis.

remove_fields

if TRUE, prevent field codes from appearing in the returned data.frame.

detailed

Should information on runs be included in summary dataframe? Defaults to FALSE. If TRUE a list column run is added to the summary containing a summary of formatting properties of runs as a dataframe with rows corresponding to a single run and columns containing the information on formatting properties.

Note

Documents included with body_add_docx() will not be accessible in the results.

Examples

example_docx <- system.file(
  package = "officer",
  "doc_examples/example.docx"
)
doc <- read_docx(example_docx)

docx_summary(doc)
#>       doc_index content_type     style_name
#> 1             1    paragraph      heading 1
#> 2             2    paragraph           <NA>
#> 3             3    paragraph      heading 1
#> 4             4    paragraph List Paragraph
#> 5             5    paragraph List Paragraph
#> 6             6    paragraph List Paragraph
#> 7             7    paragraph      heading 2
#> 8             8    paragraph List Paragraph
#> 9             9    paragraph List Paragraph
#> 10           10    paragraph List Paragraph
#> 11           11    paragraph           <NA>
#> 12           12    paragraph           <NA>
#> 13           13    paragraph      heading 2
#> 14           14    paragraph           <NA>
#> 15           15    paragraph           <NA>
#> 1.1          16   table cell  Light Shading
#> 1.11         16   table cell  Light Shading
#> 1.12         16   table cell  Light Shading
#> 1.13         16   table cell  Light Shading
#> 1.14         16   table cell  Light Shading
#> 1.15         16   table cell  Light Shading
#> 1.16         16   table cell  Light Shading
#> 1.17         16   table cell  Light Shading
#> 1.18         16   table cell  Light Shading
#> 1.19         16   table cell  Light Shading
#> 1.110        16   table cell  Light Shading
#> 1.111        16   table cell  Light Shading
#> 1.112        16   table cell  Light Shading
#> 2.2          16   table cell  Light Shading
#> 2.31         16   table cell  Light Shading
#> 2.22         16   table cell  Light Shading
#> 2.23         16   table cell  Light Shading
#> 2.44         16   table cell  Light Shading
#> 2.25         16   table cell  Light Shading
#> 2.26         16   table cell  Light Shading
#> 2.27         16   table cell  Light Shading
#> 2.28         16   table cell  Light Shading
#> 2.29         16   table cell  Light Shading
#> 2.210        16   table cell  Light Shading
#> 2.211        16   table cell  Light Shading
#> 2.212        16   table cell  Light Shading
#> 3.3          16   table cell  Light Shading
#> 3.21         16   table cell  Light Shading
#> 3.32         16   table cell  Light Shading
#> 3.33         16   table cell  Light Shading
#> 3.24         16   table cell  Light Shading
#> 3.35         16   table cell  Light Shading
#> 3.36         16   table cell  Light Shading
#> 3.37         16   table cell  Light Shading
#> 3.38         16   table cell  Light Shading
#> 3.39         16   table cell  Light Shading
#> 3.310        16   table cell  Light Shading
#> 3.311        16   table cell  Light Shading
#> 3.312        16   table cell  Light Shading
#> 4.4          16   table cell  Light Shading
#> 4.41         16   table cell  Light Shading
#> 4.42         16   table cell  Light Shading
#> 4.43         16   table cell  Light Shading
#> 4.34         16   table cell  Light Shading
#> 4.45         16   table cell  Light Shading
#> 4.46         16   table cell  Light Shading
#> 4.47         16   table cell  Light Shading
#> 4.48         16   table cell  Light Shading
#> 4.49         16   table cell  Light Shading
#> 4.410        16   table cell  Light Shading
#> 4.411        16   table cell  Light Shading
#> 4.412        16   table cell  Light Shading
#> 16           17    paragraph           <NA>
#> 17           18    paragraph           <NA>
#>                                                                          text
#> 1                                                                     Title 1
#> 2                   Lorem ipsum dolor sit amet, consectetur adipiscing elit. 
#> 3                                                                     Title 2
#> 4                                                          Quisque tristique 
#> 5                                                   Augue nisi, et convallis 
#> 6                                                         Sapien mollis nec. 
#> 7                                                                 Sub title 1
#> 8                                                          Quisque tristique 
#> 9                                                   Augue nisi, et convallis 
#> 10                                                        Sapien mollis nec. 
#> 11                                                                           
#> 12             Phasellus nec nunc vitae nulla interdum volutpat eu ac massa. 
#> 13                                                                Sub title 2
#> 14    Morbi rhoncus sapien sit amet leo eleifend, vel fermentum nisi mattis. 
#> 15                                                                           
#> 1.1                                                                    Petals
#> 1.11                                                              5,621498349
#> 1.12                                                              4,994616997
#> 1.13                                                              4,767504884
#> 1.14                                                               25,9242382
#> 1.15                                                              6,489375001
#> 1.16                                                                5,7858682
#> 1.17                                                              5,645575295
#> 1.18                                                              4,828953215
#> 1.19                                                              6,783500773
#> 1.110                                                             5,395076839
#> 1.111                                                             4,683617783
#> 1.112                                                       NoteNew line note
#> 2.2                                                                 Internode
#> 2.31                                                                     <NA>
#> 2.22                                                                       AA
#> 2.23                                                                     <NA>
#> 2.44                                                                     <NA>
#> 2.25                                                              25,21130805
#> 2.26                                                              25,52433147
#> 2.27                                                              Merged cell
#> 2.28                                                                     <NA>
#> 2.29                                                                     <NA>
#> 2.210                                                                    <NA>
#> 2.211                                                              29,2459239
#> 2.212                                                                    <NA>
#> 3.3                                                                     Sepal
#> 3.21                                                    2,46210657918,2034091
#> 3.32                                                              2,429320759
#> 3.33                                                                      AAA
#> 3.24                                                              2,066051345
#> 3.35                                                              2,901582763
#> 3.36                                                              2,655642742
#> 3.37                                                              2,278691288
#> 3.38                                                              2,238467716
#> 3.39                                                              2,202762147
#> 3.310                                                             2,538375992
#> 3.311                                                             2,601945544
#> 3.312                                                                    <NA>
#> 4.4                                                                     Bract
#> 4.41                                                                     <NA>
#> 4.42                                                              17,65204912
#> 4.43                                                                     <NA>
#> 4.34                                                              18,37915478
#> 4.45                                         17,3130473717,0721572418,2902189
#> 4.46                                                                     <NA>
#> 4.47                                                                     <NA>
#> 4.48                                                              19,87376227
#> 4.49                                                              19,85326662
#> 4.410                                                             19,56545356
#> 4.411                                                             18,95335451
#> 4.412                                                                    <NA>
#> 16                                                                           
#> 17                                                                           
#>       level num_id row_id is_header cell_id col_span row_span
#> 1        NA     NA     NA        NA      NA       NA       NA
#> 2        NA     NA     NA        NA      NA       NA       NA
#> 3        NA     NA     NA        NA      NA       NA       NA
#> 4         1      2     NA        NA      NA       NA       NA
#> 5         1      2     NA        NA      NA       NA       NA
#> 6         1      2     NA        NA      NA       NA       NA
#> 7        NA     NA     NA        NA      NA       NA       NA
#> 8         1      1     NA        NA      NA       NA       NA
#> 9         1      1     NA        NA      NA       NA       NA
#> 10        1      1     NA        NA      NA       NA       NA
#> 11       NA     NA     NA        NA      NA       NA       NA
#> 12       NA     NA     NA        NA      NA       NA       NA
#> 13       NA     NA     NA        NA      NA       NA       NA
#> 14       NA     NA     NA        NA      NA       NA       NA
#> 15       NA     NA     NA        NA      NA       NA       NA
#> 1.1      NA     NA      1      TRUE       1        1        1
#> 1.11     NA     NA      2     FALSE       1        2        1
#> 1.12     NA     NA      3     FALSE       1        1        1
#> 1.13     NA     NA      4     FALSE       1        1        1
#> 1.14     NA     NA      5     FALSE       1        2        1
#> 1.15     NA     NA      6     FALSE       1        1        1
#> 1.16     NA     NA      7     FALSE       1        1        1
#> 1.17     NA     NA      8     FALSE       1        1        1
#> 1.18     NA     NA      9     FALSE       1        1        1
#> 1.19     NA     NA     10     FALSE       1        1        1
#> 1.110    NA     NA     11     FALSE       1        1        1
#> 1.111    NA     NA     12     FALSE       1        1        1
#> 1.112    NA     NA     13     FALSE       1        4        1
#> 2.2      NA     NA      1      TRUE       2        1        1
#> 2.31     NA     NA      2     FALSE       2        0        1
#> 2.22     NA     NA      3     FALSE       2        1        2
#> 2.23     NA     NA      4     FALSE       2        1        0
#> 2.44     NA     NA      5     FALSE       2        0        1
#> 2.25     NA     NA      6     FALSE       2        1        1
#> 2.26     NA     NA      7     FALSE       2        1        1
#> 2.27     NA     NA      8     FALSE       2        1        4
#> 2.28     NA     NA      9     FALSE       2        1        0
#> 2.29     NA     NA     10     FALSE       2        1        0
#> 2.210    NA     NA     11     FALSE       2        1        0
#> 2.211    NA     NA     12     FALSE       2        1        1
#> 2.212    NA     NA     13     FALSE       2        0        1
#> 3.3      NA     NA      1      TRUE       3        1        1
#> 3.21     NA     NA      2     FALSE       3        2        1
#> 3.32     NA     NA      3     FALSE       3        1        1
#> 3.33     NA     NA      4     FALSE       3        2        1
#> 3.24     NA     NA      5     FALSE       3        1        1
#> 3.35     NA     NA      6     FALSE       3        1        1
#> 3.36     NA     NA      7     FALSE       3        1        1
#> 3.37     NA     NA      8     FALSE       3        1        1
#> 3.38     NA     NA      9     FALSE       3        1        1
#> 3.39     NA     NA     10     FALSE       3        1        1
#> 3.310    NA     NA     11     FALSE       3        1        1
#> 3.311    NA     NA     12     FALSE       3        1        1
#> 3.312    NA     NA     13     FALSE       3        0        1
#> 4.4      NA     NA      1      TRUE       4        1        1
#> 4.41     NA     NA      2     FALSE       4        0        1
#> 4.42     NA     NA      3     FALSE       4        1        1
#> 4.43     NA     NA      4     FALSE       4        0        1
#> 4.34     NA     NA      5     FALSE       4        1        1
#> 4.45     NA     NA      6     FALSE       4        1        3
#> 4.46     NA     NA      7     FALSE       4        1        0
#> 4.47     NA     NA      8     FALSE       4        1        0
#> 4.48     NA     NA      9     FALSE       4        1        1
#> 4.49     NA     NA     10     FALSE       4        1        1
#> 4.410    NA     NA     11     FALSE       4        1        1
#> 4.411    NA     NA     12     FALSE       4        1        1
#> 4.412    NA     NA     13     FALSE       4        0        1
#> 16       NA     NA     NA        NA      NA       NA       NA
#> 17       NA     NA     NA        NA      NA       NA       NA

docx_summary(doc, preserve = TRUE)[28, ]
#>       doc_index content_type    style_name                text level num_id
#> 1.112        16   table cell Light Shading Note\nNew line note    NA     NA
#>       row_id is_header cell_id col_span row_span
#> 1.112     13     FALSE       1        4        1