Skip to content

Commit

Permalink
Merge pull request #14918 from github/tausbn/python-support-tarslip-e…
Browse files Browse the repository at this point in the history
…xtraction-filters

Python: Add support for extraction filters
  • Loading branch information
tausbn committed Nov 30, 2023
2 parents 30e62d3 + 6e27918 commit 4ef1fe4
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 5 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
category: minorAnalysis
---

- Added support for tarfile extraction filters as defined in [PEP-706](https://peps.python.org/pep-0706). In particular, calls to `TarFile.extract`, and `TarFile.extractall` are no longer considered to be sinks for the `py/tarslip` query if a sufficiently safe filter is provided.
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,38 @@ module TarSlip {
ExcludeTarFilePy() { this.getLocation().getFile().getBaseName() = "tarfile.py" }
}

/**
* Holds if `call` has an unsafe extraction filter, either by default (as the default is unsafe),
* or by being set to an explicitly unsafe value, such as `"fully_trusted"`, or `None`.
*/
private predicate hasUnsafeFilter(API::CallNode call) {
call =
API::moduleImport("tarfile")
.getMember("open")
.getReturn()
.getMember(["extract", "extractall"])
.getACall() and
(
exists(Expr filterValue |
filterValue = call.getParameter(4, "filter").getAValueReachingSink().asExpr() and
(
filterValue.(StrConst).getText() = "fully_trusted"
or
filterValue instanceof None
)
)
or
not exists(call.getParameter(4, "filter"))
)
}

/**
* A sink capturing method calls to `extractall`.
*
* For a call to `file.extractall` without arguments, `file` is considered a sink.
* For a call to `file.extractall`, `file` is considered a sink if
*
* - there are no other arguments, or
* - there are other arguments (except `members`), and the extraction filter is unsafe.
*/
class ExtractAllSink extends Sink {
ExtractAllSink() {
Expand All @@ -69,8 +97,13 @@ module TarSlip {
.getReturn()
.getMember("extractall")
.getACall() and
not exists(call.getArg(_)) and
not exists(call.getArgByName(_)) and
(
not exists(call.getArg(_)) and
not exists(call.getArgByName(_))
or
hasUnsafeFilter(call)
) and
not exists(call.getArgByName("members")) and
this = call.(DataFlow::MethodCallNode).getObject()
)
}
Expand All @@ -84,7 +117,8 @@ module TarSlip {
exists(DataFlow::CallCfgNode call |
call =
API::moduleImport("tarfile").getMember("open").getReturn().getMember("extract").getACall() and
this = call.getArg(0)
this = call.getArg(0) and
hasUnsafeFilter(call)
)
}
}
Expand All @@ -99,7 +133,8 @@ module TarSlip {
.getReturn()
.getMember("extractall")
.getACall() and
this in [call.getArg(0), call.getArgByName("members")]
this in [call.getArg(0), call.getArgByName("members")] and
hasUnsafeFilter(call)
)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,15 @@ edges
| tarslip.py:58:1:58:3 | GSSA Variable tar | tarslip.py:59:5:59:9 | GSSA Variable entry |
| tarslip.py:58:7:58:39 | ControlFlowNode for Attribute() | tarslip.py:58:1:58:3 | GSSA Variable tar |
| tarslip.py:59:5:59:9 | GSSA Variable entry | tarslip.py:61:21:61:25 | ControlFlowNode for entry |
| tarslip.py:90:1:90:3 | GSSA Variable tar | tarslip.py:91:1:91:3 | ControlFlowNode for tar |
| tarslip.py:90:7:90:39 | ControlFlowNode for Attribute() | tarslip.py:90:1:90:3 | GSSA Variable tar |
| tarslip.py:94:1:94:3 | GSSA Variable tar | tarslip.py:95:5:95:9 | GSSA Variable entry |
| tarslip.py:94:7:94:39 | ControlFlowNode for Attribute() | tarslip.py:94:1:94:3 | GSSA Variable tar |
| tarslip.py:95:5:95:9 | GSSA Variable entry | tarslip.py:96:17:96:21 | ControlFlowNode for entry |
| tarslip.py:109:1:109:3 | GSSA Variable tar | tarslip.py:110:1:110:3 | ControlFlowNode for tar |
| tarslip.py:109:7:109:39 | ControlFlowNode for Attribute() | tarslip.py:109:1:109:3 | GSSA Variable tar |
| tarslip.py:112:1:112:3 | GSSA Variable tar | tarslip.py:113:24:113:26 | ControlFlowNode for tar |
| tarslip.py:112:7:112:39 | ControlFlowNode for Attribute() | tarslip.py:112:1:112:3 | GSSA Variable tar |
nodes
| tarslip.py:14:1:14:3 | GSSA Variable tar | semmle.label | GSSA Variable tar |
| tarslip.py:14:7:14:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
Expand All @@ -31,10 +40,27 @@ nodes
| tarslip.py:58:7:58:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| tarslip.py:59:5:59:9 | GSSA Variable entry | semmle.label | GSSA Variable entry |
| tarslip.py:61:21:61:25 | ControlFlowNode for entry | semmle.label | ControlFlowNode for entry |
| tarslip.py:90:1:90:3 | GSSA Variable tar | semmle.label | GSSA Variable tar |
| tarslip.py:90:7:90:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| tarslip.py:91:1:91:3 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar |
| tarslip.py:94:1:94:3 | GSSA Variable tar | semmle.label | GSSA Variable tar |
| tarslip.py:94:7:94:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| tarslip.py:95:5:95:9 | GSSA Variable entry | semmle.label | GSSA Variable entry |
| tarslip.py:96:17:96:21 | ControlFlowNode for entry | semmle.label | ControlFlowNode for entry |
| tarslip.py:109:1:109:3 | GSSA Variable tar | semmle.label | GSSA Variable tar |
| tarslip.py:109:7:109:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| tarslip.py:110:1:110:3 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar |
| tarslip.py:112:1:112:3 | GSSA Variable tar | semmle.label | GSSA Variable tar |
| tarslip.py:112:7:112:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| tarslip.py:113:24:113:26 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar |
subpaths
#select
| tarslip.py:15:1:15:3 | ControlFlowNode for tar | tarslip.py:14:7:14:39 | ControlFlowNode for Attribute() | tarslip.py:15:1:15:3 | ControlFlowNode for tar | This file extraction depends on a $@. | tarslip.py:14:7:14:39 | ControlFlowNode for Attribute() | potentially untrusted source |
| tarslip.py:20:17:20:21 | ControlFlowNode for entry | tarslip.py:18:7:18:39 | ControlFlowNode for Attribute() | tarslip.py:20:17:20:21 | ControlFlowNode for entry | This file extraction depends on a $@. | tarslip.py:18:7:18:39 | ControlFlowNode for Attribute() | potentially untrusted source |
| tarslip.py:39:17:39:21 | ControlFlowNode for entry | tarslip.py:35:7:35:39 | ControlFlowNode for Attribute() | tarslip.py:39:17:39:21 | ControlFlowNode for entry | This file extraction depends on a $@. | tarslip.py:35:7:35:39 | ControlFlowNode for Attribute() | potentially untrusted source |
| tarslip.py:43:24:43:26 | ControlFlowNode for tar | tarslip.py:42:7:42:39 | ControlFlowNode for Attribute() | tarslip.py:43:24:43:26 | ControlFlowNode for tar | This file extraction depends on a $@. | tarslip.py:42:7:42:39 | ControlFlowNode for Attribute() | potentially untrusted source |
| tarslip.py:61:21:61:25 | ControlFlowNode for entry | tarslip.py:58:7:58:39 | ControlFlowNode for Attribute() | tarslip.py:61:21:61:25 | ControlFlowNode for entry | This file extraction depends on a $@. | tarslip.py:58:7:58:39 | ControlFlowNode for Attribute() | potentially untrusted source |
| tarslip.py:91:1:91:3 | ControlFlowNode for tar | tarslip.py:90:7:90:39 | ControlFlowNode for Attribute() | tarslip.py:91:1:91:3 | ControlFlowNode for tar | This file extraction depends on a $@. | tarslip.py:90:7:90:39 | ControlFlowNode for Attribute() | potentially untrusted source |
| tarslip.py:96:17:96:21 | ControlFlowNode for entry | tarslip.py:94:7:94:39 | ControlFlowNode for Attribute() | tarslip.py:96:17:96:21 | ControlFlowNode for entry | This file extraction depends on a $@. | tarslip.py:94:7:94:39 | ControlFlowNode for Attribute() | potentially untrusted source |
| tarslip.py:110:1:110:3 | ControlFlowNode for tar | tarslip.py:109:7:109:39 | ControlFlowNode for Attribute() | tarslip.py:110:1:110:3 | ControlFlowNode for tar | This file extraction depends on a $@. | tarslip.py:109:7:109:39 | ControlFlowNode for Attribute() | potentially untrusted source |
| tarslip.py:113:24:113:26 | ControlFlowNode for tar | tarslip.py:112:7:112:39 | ControlFlowNode for Attribute() | tarslip.py:113:24:113:26 | ControlFlowNode for tar | This file extraction depends on a $@. | tarslip.py:112:7:112:39 | ControlFlowNode for Attribute() | potentially untrusted source |
32 changes: 32 additions & 0 deletions python/ql/test/query-tests/Security/CWE-022-TarSlip/tarslip.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,3 +82,35 @@ def safemembers(members):
for entry in tar:
if not os.path.isabs(entry.name):
tar.extract(entry, "/tmp/unpack/")

# Extraction filters

extraction_filter = "fully_trusted"

tar = tarfile.open(unsafe_filename_tar)
tar.extractall(filter=extraction_filter) # unsafe
tar.close()

tar = tarfile.open(unsafe_filename_tar)
for entry in tar:
tar.extract(entry, filter=extraction_filter) # unsafe

extraction_filter = "data"

tar = tarfile.open(unsafe_filename_tar)
tar.extractall(filter=extraction_filter) # safe
tar.close()

tar = tarfile.open(unsafe_filename_tar)
for entry in tar:
tar.extract(entry, filter=extraction_filter) # safe

extraction_filter = None
tar = tarfile.open(unsafe_filename_tar)
tar.extractall(filter=extraction_filter) # unsafe

tar = tarfile.open(unsafe_filename_tar)
tar.extractall(members=tar, filter=extraction_filter) # unsafe

tar = tarfile.open(unsafe_filename_tar)
tar.extractall(members=safemembers(tar), filter=extraction_filter) # safe -- we assume `safemembers` makes up for the unsafe filter

0 comments on commit 4ef1fe4

Please sign in to comment.