Origin: upstream, 756e1ee94421fb7c570ef49bfdac21c72025417d
From: Julian Smith <julian.smith@artifex.com>
Date: Fri, 21 Nov 2025 14:23:45 +0000
Subject: Make `pymupdf embed-extract` safe by default.

Fixes #4767.
---
diff --git a/docs/module.rst b/docs/module.rst
index 47b33d306..cd8a0340d 100644
--- a/docs/module.rst
+++ b/docs/module.rst
@@ -299,7 +299,7 @@ Extraction
 Extract an embedded file like this::
 
     pymupdf embed-extract -h
-    usage: pymupdf embed-extract [-h] -name NAME [-password PASSWORD] [-output OUTPUT]
+    usage: pymupdf embed-extract [-h] -name NAME [-password PASSWORD] [-unsafe] [-output OUTPUT]
                             input
 
     ---------------------- extract embedded file to disk ----------------------
@@ -311,6 +311,7 @@ Extract an embedded file like this::
     -h, --help            show this help message and exit
     -name NAME            name of entry
     -password PASSWORD    password
+    -unsafe               allow write to stored name even if an existing file or outside current directory
     -output OUTPUT        output filename, default is stored name
 
 For details consult :meth:`Document.embfile_get`. Example (refer to previous section)::
diff --git a/src/__main__.py b/src/__main__.py
index 35914d6c7..50c5d905f 100644
--- a/src/__main__.py
+++ b/src/__main__.py
@@ -350,6 +350,12 @@ def embedded_get(args):
     except (ValueError, pymupdf.mupdf.FzErrorBase) as e:
         sys.exit(f'no such embedded file {args.name!r}: {e}')
     filename = args.output if args.output else d["filename"]
+    if not args.unsafe and not args.output:
+        if os.path.exists(filename):
+            sys.exit(f'refusing to overwrite existing file with stored name: {filename}')
+        filename_abs = os.path.abspath(filename)
+        if not filename_abs.startswith(os.getcwd() + os.sep):
+            sys.exit(f'refusing to write stored name outside current directory: {filename}')
     with open(filename, "wb") as output:
         output.write(stream)
     pymupdf.message("saved entry '%s' as '%s'" % (args.name, filename))
@@ -1024,6 +1030,9 @@ def main():
     ps_embed_extract.add_argument("input", type=str, help="PDF filename")
     ps_embed_extract.add_argument("-name", required=True, help="name of entry")
     ps_embed_extract.add_argument("-password", help="password")
+    ps_embed_extract.add_argument("-unsafe", default=False, action="store_true",
+        help="allow write to stored name even if an existing file or outside current directory"
+    )
     ps_embed_extract.add_argument(
         "-output", help="output filename, default is stored name"
     )
diff --git a/tests/test_4767.py b/tests/test_4767.py
new file mode 100644
index 000000000..d3fc318dc
--- /dev/null
+++ b/tests/test_4767.py
@@ -0,0 +1,86 @@
+import shutil
+import os
+import pymupdf
+import subprocess
+import sys
+
+
+def test_4767():
+    '''
+    Check handling of unsafe paths in `pymupdf embed-extract`.
+    '''
+    with pymupdf.open() as document:
+        document.new_page() 
+        document.embfile_add(
+                'evil_entry',
+                b'poc:traversal test\n',
+                filename="../../test.txt",
+                ufilename="../../test.txt",
+                desc="poc",
+                )
+        document.embfile_add(
+                'evil_entry2',
+                b'poc:traversal test\n',
+                filename="test2.txt",
+                ufilename="test2.txt",
+                desc="poc",
+                )
+        path = os.path.abspath(f'{__file__}/../../tests/test_4767.pdf')
+        document.save(path)
+    testdir = os.path.abspath(f'{__file__}/../../tests/test_4767_dir').replace('\\', '/')
+    shutil.rmtree(testdir, ignore_errors=1)
+    os.makedirs(f'{testdir}/one/two', exist_ok=1)
+    
+    def run(command, *, check=0, capture=1):
+        print(f'Running: {command}')
+        cp = subprocess.run(
+                command, shell=1,
+                text=1,
+                check=check,
+                stdout=subprocess.PIPE if capture else None,
+                stderr=subprocess.STDOUT if capture else None,
+                )
+        print(cp.stdout)
+        return cp
+    
+    def get_paths():
+        paths = list()
+        for dirpath, dirnames, filenames in os.walk(testdir):
+            for filename in filenames:
+                path = f'{dirpath}/{filename}'.replace('\\', '/')
+                paths.append(path)
+        return paths
+    
+    cp = run(f'cd {testdir}/one/two && {sys.executable} -m pymupdf embed-extract {path} -name evil_entry')
+    print(cp.stdout)
+    assert cp.returncode
+    assert cp.stdout == 'refusing to write stored name outside current directory: ../../test.txt\n'
+    assert not get_paths()
+    
+    cp = run(f'cd {testdir}/one/two && {sys.executable} -m pymupdf embed-extract {path} -name evil_entry -unsafe')
+    assert cp.returncode == 0
+    assert cp.stdout == "saved entry 'evil_entry' as '../../test.txt'\n"
+    paths = get_paths()
+    print(f'{paths=}')
+    assert paths == [f'{testdir}/test.txt']
+    
+    cp = run(f'cd {testdir}/one/two && {sys.executable} -m pymupdf embed-extract {path} -name evil_entry2')
+    assert not cp.returncode
+    assert cp.stdout == "saved entry 'evil_entry2' as 'test2.txt'\n"
+    paths = get_paths()
+    print(f'{paths=}')
+    assert paths == [f'{testdir}/test.txt', f'{testdir}/one/two/test2.txt']
+    
+    cp = run(f'cd {testdir}/one/two && {sys.executable} -m pymupdf embed-extract {path} -name evil_entry2')
+    assert cp.returncode
+    assert cp.stdout == "refusing to overwrite existing file with stored name: test2.txt\n"
+    paths = get_paths()
+    print(f'{paths=}')
+    assert paths == [f'{testdir}/test.txt', f'{testdir}/one/two/test2.txt']
+    
+    cp = run(f'cd {testdir}/one/two && {sys.executable} -m pymupdf embed-extract {path} -name evil_entry2 -unsafe')
+    assert not cp.returncode
+    assert cp.stdout == "saved entry 'evil_entry2' as 'test2.txt'\n"
+    paths = get_paths()
+    print(f'{paths=}')
+    assert paths == [f'{testdir}/test.txt', f'{testdir}/one/two/test2.txt']
