.. meta::
   :author: Artifex
   :description: pdf2docx is a Python library to extract data from PDF with PyMuPDF, parse layout with rule, and generate docx file with python-docx
   :keywords: PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables


.. |history_begin| raw:: html

    <details>
    <summary><small style="cursor:pointer;">Show/hide history</small></summary><small>

.. |history_end| raw:: html

    </small></details>



.. raw:: html

    <div style="display:flex;justify-content:space-between;align-items: center;">
        <form class="sidebar-search-container top" method="get" action="search.html" role="search" style="width:100%">
          <input class="sidebar-search" placeholder="Search" name="q" aria-label="Search">
          <input type="hidden" name="check_keywords" value="yes">
          <input type="hidden" name="area" value="default">
        </form>
    </div>

    <div style="display:flex;justify-content:space-between;align-items:center;margin-top:20px;">
        <div class="discordLink" style="display:flex;align-items:center;margin-top: -5px;">
            <a href="https://discord.gg/TSpYGBW4eq" id="findOnDiscord" target=_blank>Find <b>#pymupdf</b> on <b>Discord</b></a>
            <a href="https://discord.gg/TSpYGBW4eq" target=_blank><img src="_images/discord-mark-blue.svg" alt="Discord logo" /></a>
        </div>
    </div>
    <div>
        <a href="https://pymupdf.readthedocs.io" target=_blank><button class="cta orange" style="text-transform:none;">Try PyMuPDF</button></a>
    </div>

