1
0
mirror of https://0xacab.org/jvoisin/mat2 synced 2025-10-06 08:32:52 +02:00

164 Commits

Author SHA1 Message Date
jvoisin
eec5c33a6b Bump the changelog 2022-07-06 19:20:21 +02:00
jvoisin
beebca4bf1 Prevent arbitrary file read via zip archives
A zip file with a file pointing to /etc/passwd would, upon being cleaned by
mat2, produce a file with the filesystem's /etc/passwd file.
2022-07-05 16:27:07 +02:00
jvoisin
e2c4dbf721 Show a scary message in case of path traversal attempt 2022-07-05 15:30:10 +02:00
jvoisin
704367f91e Add support for HEIC files
Thanks to Maxime Morin ( https://www.maijin.fr/ )
for the patch.
2022-05-15 18:57:27 +02:00
jvoisin
2639713709 Minor cleanup 2022-05-05 22:00:09 +02:00
jvoisin
b18e6e11f0 Bump the changelog 2022-04-30 17:31:29 +02:00
jvoisin
62dc8c71c1 Enable gitlab's SAST 2022-04-02 16:19:13 +02:00
jvoisin
697e9583b9 Please the linters 2022-03-29 22:18:06 +02:00
jvoisin
1b37604d3a Make processing multiple files safer concurrence-wise 2022-03-29 22:15:04 +02:00
jvoisin
1c3e2afa1e Escape more control chars in the cli 2022-03-29 22:13:55 +02:00
jvoisin
05b8e97b68 Simplification of the testsuite 2022-03-29 22:13:33 +02:00
jvoisin
2a74a400e2 Fix the svg tests on archlinux 2022-03-28 23:22:42 +02:00
jvoisin
5ccddae7f5 Fix the PDF version
This should prevent the testsuite from breaking,
and marginally increase fingerprinting resistance.
2022-03-28 22:34:57 +02:00
jvoisin
12582ba2f5 Try to use modern rsvg functions when we can 2022-03-16 20:23:49 +01:00
jvoisin
35092562e6 Mention dangerzone 2022-01-06 18:31:34 +01:00
jvoisin
e5dcd39225 Bump the changelog 2022-01-06 17:00:22 +01:00
jvoisin
660f0dea73 Fix the dolphin integration
Kudos to Miguel Angel Marco Buzunariz for the original patch.
2022-01-05 13:54:50 +01:00
jvoisin
cd2b9af902 Fix the Debian CI
This should fix #162
2021-12-26 16:11:26 +01:00
jvoisin
3378f3ab8c Please pylint by iterating on dict directly, instead of calling .keys() 2021-12-26 15:23:26 +01:00
jvoisin
48680b9852 Add a fuzzer based on atheris 2021-12-19 22:37:45 +01:00
jvoisin
d555a02c90 Increase audio processing robustness 2021-12-19 22:33:28 +01:00
jvoisin
143bb0a5f3 Add a check for weird audio files, instead of crashing 2021-12-18 19:43:21 +01:00
jvoisin
a1a7c76dc9 Make mat2 more robust wrt. weird audio files 2021-12-14 23:30:13 +01:00
jvoisin
01b39aa68c Make libmat2 more robust against corrupted zip files 2021-12-13 19:44:44 +01:00
jvoisin
e312868c4e Increase a bit the robustness of mat2
Those issues were found via https://github.com/google/atheris
2021-12-13 19:00:41 +01:00
Denis 'GNUtoo' Carikli
b71bafd2cf CONTRIBUTING.md: Update information about tests
Contributors are now supposed to run tests in the GitLab instance, so
this also needs to be reflected in the CONTRIBUTING.md as otherwise
people not used to forge workflows could completely miss that
information.

Signed-off-by: Denis 'GNUtoo' Carikli <GNUtoo@cyberdimension.org>
2021-12-09 17:16:34 +01:00
jvoisin
22199df4d0 Please the linters wrt. the previous commit 2021-12-09 16:58:24 +01:00
Denis 'GNUtoo' Carikli
1703ed6ebb zip archives: keep individual files compression type
While hardcoding the compression to zipfile.ZIP_DEFLATED works for
most use cases of mat, being able to produce cleaned up uncompressed
zip files is useful for content that cannot be compressed more.

In addition it also enables to use mat2 for reproducible builds of
Android bootanimation files file that don't support compression.

Signed-off-by: Denis 'GNUtoo' Carikli <GNUtoo@cyberdimension.org>
2021-12-08 18:36:39 +01:00
jvoisin
541b3c83b2 We're not in beta anymore. 2021-11-23 17:39:19 +01:00
jvoisin
6afb0cb9d8 Please the linter, again. 2021-11-23 17:36:17 +01:00
jvoisin
1c4e98425a Make the linter happier 2021-11-23 17:34:14 +01:00
jvoisin
fb7440ab5d Please a bit the CI 2021-11-21 11:02:22 +01:00
jvoisin
0c91ac7367 Implement code for internationalization 2021-11-12 20:10:57 +01:00
jvoisin
708841f9f5 Fix the coverage 2021-10-03 19:57:44 +02:00
jvoisin
d4479d9baa Bump the changelog 2021-08-29 13:33:47 +02:00
georg
08a5792a9a libmat2/pdf: Drop printing 'lol'
It seems this was committed without intention to do so.
2021-07-29 15:40:37 +00:00
Antonio Eugenio Burriel
3b094ae449 Fix pdf issues on printers
pyCairo by default renders the PDF surfaces with a resolution of 72
dpi which is so low that the bitmap gets blurred compared to original.

Since pyCairo 1.12.0, a new method set_device_scale(x_scale, y_scale)
is added, which allows changing the canvas resolution.
2021-07-25 14:12:57 +02:00
jvoisin
0b094b594b Improve xlsx support
This should close #156
2021-07-14 23:34:02 +02:00
jvoisin
8c1107c358 Make cairo behave in a less idiotic way
Because raising errors when unable to process
stuff instead of an exception is dumb.
2021-06-21 22:39:45 +02:00
jvoisin
6df615281b Fix the CI for recent exiftool versions
Always a joy to deal with withespaces
2021-06-06 16:33:35 +02:00
georg
49c8b14e59 KDE Dolphin: add German translation
Credits to @RandomGuy234
Closes #154
2021-06-01 18:47:28 +00:00
jvoisin
bf0c777cb9 Improve support for xlsx files 2021-05-20 18:16:28 +02:00
jvoisin
682552d152 Allow bubblewrap to fail for now 2021-05-05 21:04:29 +02:00
jvoisin
c9be50f968 Fix the CI in Fedora 2021-05-05 21:03:20 +02:00
jvoisin
2eec653e99 Please pylint 2021-04-24 17:28:44 +02:00
jvoisin
85c08c5b68 Add support for AIFF files
This should close #151
2021-04-24 17:26:38 +02:00
jvoisin
c5841a241d Bump the changelog 2021-03-19 17:54:21 +01:00
jvoisin
d00ca800b2 Keep sharedStrings.xml when processing MSOffice sheets 2021-03-14 14:41:40 +01:00
jvoisin
8b42b28b70 Don't keep [trash] files when processing MS Office files 2021-03-14 14:35:29 +01:00
jvoisin
e2362b8620 Improve epub support
Warn when there are encrypted fonts in an epub file
2021-03-07 17:50:25 +01:00
jvoisin
626669f95f Add some typing to epub.py 2021-03-07 17:50:17 +01:00
jvoisin
497f5f71fc Improve epub compatibility 2021-03-07 16:59:18 +01:00
jvoisin
cd5f2eb71c Add a missing comma
This should improve epub support
2021-03-07 16:42:38 +01:00
jvoisin
ec082d6483 Improve a bit the support of epub 2021-02-07 17:24:50 +01:00
jvoisin
f8111547ae Improve epub compatibility 2021-01-30 16:24:42 +01:00
jvoisin
88fa71fbde Bump the changelog 2020-12-18 17:55:41 +01:00
Romain Vigier
6cd28ed46c Add Metadata Cleaner link 2020-12-07 11:14:03 +01:00
jvoisin
92dcc8175d Add a mention of mat2-web 2020-11-30 22:02:07 +01:00
jvoisin
7131aa6fd7 Fix the link to the mailing list 2020-11-30 21:52:39 +01:00
jvoisin
7ce2b5121b Please pylint 2020-11-30 18:53:25 +01:00
jvoisin
a517f8d36e Please pylint 2020-11-30 18:52:07 +01:00
jvoisin
61dce89fbd Raise a ValueError explicitly 2020-11-30 18:52:07 +01:00
jvoisin
88b7ec2c48 Don't be silly 2020-11-23 19:55:35 +01:00
Holger Paradies
8bea98911e Fix dolphin integration 2020-11-21 15:31:13 +00:00
jvoisin
62ec8f6c1e Fix the CI on fedora 2020-11-13 17:30:47 +01:00
jvoisin
148bcbba52 Bump coverage 2020-11-13 17:27:23 +01:00
jvoisin
b3def8b5de Mount /etc/alternatives inside bubblewrap
This is now required by ffmpeg
2020-11-13 17:18:20 +01:00
jvoisin
77dde8a049 Please pylint 2020-11-13 12:09:25 +01:00
Romain Vigier
1b361ec27e Don't set a default value when retrieving Xmlns key for SVG metadata 2020-11-12 22:46:14 +01:00
jvoisin
58a1563a99 Better test of corrupted MSOffice files 2020-11-06 16:05:42 +01:00
jvoisin
f638168033 Better handling of malformed pdf 2020-11-06 16:05:24 +01:00
jvoisin
b84f73c5c3 Handle multiple namespaces in MSOffice's content types 2020-11-06 15:29:42 +01:00
jvoisin
96e639dfd3 Fix a regexp for xsls files
This should increase a bit the compability with Excel files
2020-11-06 15:26:30 +01:00
jvoisin
46b3ae1672 Fix a crash affecting some mp3 files 2020-07-22 15:47:35 +02:00
jvoisin
d0bc79442b Add a small bla about donations 2020-06-30 22:03:14 +02:00
rhamnousia
17919c73a9 typo fixes in nautilus/mat2.py 2020-06-29 12:54:24 +00:00
rhamnousia
60d820b053 fixed a minor typo in the nautilus readme 2020-06-28 18:32:00 -04:00
jvoisin
461534a966 Add a list of supported formats in the README 2020-06-09 13:50:51 +02:00
jvoisin
d8b68ef68e Improve a bit Microsoft word support 2020-05-17 16:53:36 +02:00
jvoisin
c8dc020dc5 Improve xlsx support 2020-04-06 20:47:32 +02:00
jvoisin
599909a760 Improve xlsx support 2020-04-02 20:58:10 +02:00
jvoisin
d008b1e2f0 Bump the changelog 2020-03-29 13:21:55 +02:00
jvoisin
d7a03d907b Vastly improve ppt compatibility 2020-03-08 14:06:27 +01:00
jvoisin
a23dc001cd Improve compatibility with MS Office of cleaned ppt 2020-03-07 14:34:07 +01:00
jvoisin
f93df85d03 Improve a bit ppt support 2020-03-07 05:22:36 -08:00
jvoisin
e5b1068ed6 Improve a bit the support of ppt files 2020-03-07 12:49:45 +01:00
Antoine Tenart
843c0d8cc5 mat2: standardize the help messages format
This is a cosmetic patch only.

Signed-off-by: Antoine Tenart <antoine.tenart@ack.tf>
2020-02-27 10:25:29 +01:00
tguinot
56d2c4aa5f Add which pathfinding for executables 2020-02-11 17:23:11 +01:00
jvoisin
12f23e0150 Bump the changelog 2020-02-09 19:00:34 +01:00
jvoisin
72f41c5e05 Clarify a bit the manpage 2020-02-08 17:04:53 +01:00
jvoisin
5270071b94 Remove a couple of residual metadata in pdf
This commit takes care of removing residual metadata
added by mat2 during the cleaning of pdf.
2020-02-08 17:00:37 +01:00
jvoisin
5312603a88 Fix the testsuite 2020-02-08 15:21:13 +01:00
jvoisin
ebe06cb8a9 Add an example of possible quality loss in the manpage 2020-02-08 14:01:13 +01:00
jvoisin
6dd48de4ef Improve a bit the robustness of the testsuite 2020-02-05 21:53:57 +01:00
georg
e0f4f0e302 man: fix typo and improve wording 2020-01-06 11:22:10 +00:00
jvoisin
4acf3af002 Add a note about lightweight mode in the man 2020-01-05 20:32:38 +01:00
jvoisin
ee704db2ff Add support for wav files 2020-01-01 19:47:46 +01:00
jvoisin
693408f1a6 Please mypy
Mypy doesn't like some annotation in web.py,
this commits aims at pleasing it.
2019-12-29 15:20:48 +01:00
jvoisin
0902e9e330 Make the testsuite a bit more robust
Some terminals with a small number of column
could wrap the cli's output in a way that
would make the testsuite fail.
This commit break the tests in several smaller one
to mitigate this.

This issue was originally reported by eleius
[here](https://github.com/actionless/pikaur/issues/433), and forwarded as #153.
2019-12-23 23:10:39 +01:00
Ivy Fay
b2efffdaa4 sandbox: stop mounting new filesystem on /tmp
Mounting new, empty filesystem on /tmp makes impossible to use mat2 for manipulating files stored there. Especially it breaks running tests while creating package and using /tmp as temporary builddir which is common setup in Arch Linux:
https://aur.archlinux.org/packages/mat2/#comment-721221
2019-12-18 02:23:43 -08:00
jvoisin
7465cedee7 Handle tiff images with a .tif extension 2019-12-16 14:55:35 -08:00
jvoisin
f5aef1b391 Improve the reliability of Exiftool-base parsers 2019-12-15 09:04:51 -08:00
jvoisin
2e3496d3d4 Improve the reliability of Gdk-based parsers 2019-12-15 07:05:53 -08:00
jvoisin
be24c681ff Improve the reliability of PNG parsing 2019-12-15 06:57:32 -08:00
jvoisin
efa525c102 Improve the robustness of the HTML parser 2019-12-15 06:50:54 -08:00
jvoisin
f67cd9d7dc Improve the robustness of the CSS parser 2019-12-15 06:44:21 -08:00
jvoisin
615997be38 Update the help section of the readme 2019-12-08 11:28:32 +01:00
jvoisin
4ba4b143e6 Add a note about metadata 2019-12-02 17:10:34 +01:00
Ivy Fay
8c7b23be90 .gitlab-ci.yml: make test command consistent across distros
This switches to use "python3 -m unittest discover -v" onevery distro.
2019-12-02 14:45:32 +00:00
georg
db797e3a52 Mention KDE Dolphin service menu (and fix typo) 2019-12-01 12:28:55 +00:00
jvoisin
da182dc2f8 Bump the changelog 2019-11-30 18:31:07 +01:00
jvoisin
e4114af3b5 Improve a bit ppt support 2019-11-30 11:38:22 +01:00
jvoisin
d56f83bed1 Improve a bit odt handling 2019-11-30 10:25:24 +01:00
georg
697cb36b81 This is mat2, not MAT2
Closes #131
2019-11-30 01:14:41 -08:00
jvoisin
6e52661cfb Fix the testsuite on Python3.8
There is a bug in Python3.8 (https://bugs.python.org/issue38688)
triggering an infinite recursion when copying a tree
in a subfolder of the current one. We're working around it
by using a list instead of an iterator, so that Python
won't "discover" the target folder as part of the source files.

This should fix #130
2019-11-30 10:10:41 +01:00
mathilde
03f5129968 fix copyright attribution formatting 2019-11-27 23:10:38 +01:00
georg
deeee256cc CI: Use pylint, instead of pylint3
It seems, despite the name, both packages depend on Python 3. However,
pylint3 seems deprecated, and upstream recommends to install pylint:
https://www.pylint.org/#install

The current versions of both packages in Debian unstable are:
pylint  2.4.4-1
pylint3 2.2.2-1

This commit fixes failing CI jobs due to the use of pylint3 2.2.2-1,
which seems broken.
2019-11-26 23:16:49 +00:00
jvoisin
df1eb98a40 Please the new version of pylint 2019-11-26 22:12:56 +01:00
jvoisin
ada53cb9c6 Add an integration with Dolphin 2019-11-25 21:56:24 +01:00
jvoisin
655c19d17d Improve a bit the support for ppt files 2019-10-17 23:02:17 +02:00
jvoisin
a389cc760a Fix a stacktrace in ./mat2 when the file can't be cleaned 2019-10-17 22:51:00 +02:00
jvoisin
4034cf9a1a Copy file permissions
Mat2 (the cli) will now copy the input file permissions
to the output file.
2019-10-13 11:54:47 +02:00
jvoisin
5f0b3beb46 Add a way to disable the sandbox
Due to bubblewrap's pickiness, mat2 can now be run
without a sandbox, even if bubblewrap is installed.
2019-10-12 16:13:49 -07:00
jvoisin
3cef7fe7fc Refactor tests 2019-10-12 13:32:04 -07:00
jvoisin
6d19a20935 Remove an unused variable 2019-10-12 21:41:13 +02:00
jvoisin
12489bb682 Remove a useless \ 2019-10-12 21:36:28 +02:00
jvoisin
bb903ec309 Remove useless parenthesis 2019-10-12 21:36:19 +02:00
jvoisin
893faa6604 Fix a test for png's lightweight cleaning on corrupted files 2019-10-12 21:34:31 +02:00
jvoisin
4483c06f19 Replace abstractstaticmethod with abstractmethod
Apparently, abstractstaticmethod is deprecated
since python3.3.
2019-10-12 21:28:27 +02:00
madaidan
58773088ac Mount a new tmpfs on /tmp and drop all capabilities
This mounts a new tmpfs on /tmp so any files residing there would be hidden
from the sandbox. Many programs store some files in there that might be useful
to an attacker.  It also drops all capabilities incase it is ever run with
extra capabilities for whatever reason.
2019-10-05 15:21:40 +02:00
jvoisin
3714553185 Fix bubblewrap
On some machines (like mine), `/proc` has to be mounted.  Also, since
sandboxing with bubblewrap is best effort and assumes that an attacker doesn't
have control outside of the file to clean, it's safe to __try__ to enable some
bubblewrap features, and to silently fail otherwise.
2019-09-21 14:14:39 +02:00
jvoisin
1678d37856 Mark a comment as FP 2019-09-01 19:01:33 +02:00
jvoisin
397a18b0cc Add support for ppm 2019-09-01 09:28:46 -07:00
jvoisin
fc924239fe Add a test for nsid cleaning 2019-09-01 13:52:02 +02:00
jvoisin
0170f0e37e Improve a bit the comments in the code
This is related to the previous commit
2019-09-01 13:52:02 +02:00
jvoisin
0cf0541ad9 Remove nsid fields from MSOffice documents
nsids are random identifiers, usually used to ease merging
between documents, and can trivially be used for fingerprinting.
2019-09-01 13:52:02 +02:00
jvoisin
40669186c9 Add support for inplace cleaning 2019-08-31 10:31:08 -07:00
jvoisin
d76a6cbb18 Some arguments of mat2 are mutually exclusive 2019-08-01 08:14:21 -07:00
jvoisin
49e0c43ac5 Tweak a bit the ci
- gentoo and debian with bubblewrap are not allowed to fail anymore
- don't run coverage on debian without bubblewrap
2019-07-22 23:36:20 +02:00
jvoisin
0c75cd15dc Remove a mypy workaround to bump coverage back to 100% 2019-07-22 23:28:51 +02:00
jvoisin
5280b6c2b3 Add a test for svg namespace 2019-07-22 23:21:06 +02:00
georg
a81ea65d44 CI: Run bubblewrap tests as different user than 'root' to fix errors
It seems, there is a bug somewhere if the test suite is invoked as
'root', and bubblewrap is available.
2019-07-22 13:39:06 -07:00
georg
8bb2826f7a CI: Add job to run codespell, a spell checking software 2019-07-22 13:31:40 -07:00
jvoisin
5c33b290ae Fix mypy 2019-07-20 16:05:55 +02:00
jvoisin
00d728f6cc Display the filename along with the "No metadata found" message 2019-07-18 01:30:28 +02:00
georg
65cfd110f9 Nautilus: Add note that distribution packages ship the extension
Relates #106
2019-07-14 23:07:36 +00:00
georg
1f830bf8ad README: Drop note about Debian jessie, which is oldoldstable nowadays
As such, hopefully, it's not really used widely anymore. If so, this
note isn't really relevant.
2019-07-14 14:19:45 -07:00
georg
d027008e46 README: Add note about the user interfaces provided 2019-07-14 14:01:54 -07:00
georg
1163bdd991 README: Drop note about web disclosure to broaden the possible use cases 2019-07-14 19:22:33 +00:00
georg
1be0a4eefb INSTALL: Update Debian package status
Also, make the note generic, to omit the need to update it "constantly".

Closes #76
2019-07-13 14:29:55 -07:00
jvoisin
dc5603eb1d Please mypy 2019-07-13 23:25:44 +02:00
jvoisin
4999209f9c Add support for svg 2019-07-13 21:26:05 +02:00
jvoisin
bdd5581033 Compress cleaned zip archives by default 2019-07-13 15:04:43 +02:00
jvoisin
47f9cb33bf Please mypy 2019-07-13 15:03:40 +02:00
georg
b784a9fc7f doc/threat_model: this is about mat2, not mat 2019-07-10 14:36:47 +00:00
jvoisin
88b95923ab Parallelize the cli 2019-06-05 22:28:57 +02:00
jvoisin
13d71a2565 Document the archives handling implementation's details 2019-05-16 20:59:15 +02:00
jvoisin
35d550d229 Use memoization get _*_path() functions
This shouldn't make a big difference in the CLI/extension
usage, but might improve the performances of long-running
instances, or people misusing the API.
2019-05-16 00:31:40 +02:00
jvoisin
aa52a5c91c Please mypy wrt. the last two commits 2019-05-14 00:50:17 +02:00
Antoine Tenart
f19f6ed8b6 Rework the dependency checks to distinguish required/optional ones
Rework the dependencies definition to include a 'required' flags, which
is passed by the check_dependencies helper to the callers, so that they
can distinguish between required and optional dependencies.

This help in two ways:
- The unit test for the dependencies was now failing when an optional
  one was missing, due to a previous rework.
- Mat2's --check-dependencies was referring to "required dependencies"
  and was misleading for the user as some of them could be optional.

Signed-off-by: Antoine Tenart <antoine.tenart@ack.tf>
2019-05-13 23:35:26 +02:00
Antoine Tenart
51ab2db279 tests: libmat2: RuntimeError cannot be thrown by chech_dependencies
Remove the try/except logic when calling check_dependencies, as it
cannot throw the exception anymore (it's caught already in the
function).

Signed-off-by: Antoine Tenart <antoine.tenart@ack.tf>
2019-05-13 23:35:06 +02:00
jvoisin
ef665e6dc1 Please pylint 2019-05-13 23:31:46 +02:00
jvoisin
aa0ff643c4 Improve a bit the debug mode 2019-05-13 22:12:00 +02:00
jvoisin
dd9ead4ebe Document how mat2 compares to other software 2019-05-11 00:19:17 +02:00
50 changed files with 3016 additions and 861 deletions

View File

@@ -1,3 +1,6 @@
include:
- template: Security/SAST.gitlab-ci.yml
variables:
CONTAINER_REGISTRY: $CI_REGISTRY/georg/mat2-ci-images
@@ -5,21 +8,35 @@ stages:
- linting
- test
.prepare_env: &prepare_env
before_script: # This is needed to not run the testsuite as root
- useradd --home-dir ${CI_PROJECT_DIR} mat2
- chown -R mat2 .
linting:bandit:
image: $CONTAINER_REGISTRY:linting
stage: linting
script: # TODO: remove B405 and B314
- bandit ./mat2 --format txt --skip B101
- bandit -r ./nautilus/ --format txt --skip B101
- bandit -r ./libmat2 --format txt --skip B101,B404,B603,B405,B314
- bandit -r ./libmat2 --format txt --skip B101,B404,B603,B405,B314,B108,B311
linting:codespell:
image: $CONTAINER_REGISTRY:linting
stage: linting
script:
# Run codespell to check for spelling errors; ignore errors about binary
# files, use a config with ignored words and exclude the git directory,
# which might contain false positives
- codespell -q 2 -I utils/ci/codespell/ignored_words.txt -S .git
linting:pylint:
image: $CONTAINER_REGISTRY:linting
stage: linting
script:
- pylint3 --disable=no-else-return --extension-pkg-whitelist=cairo,gi ./libmat2 ./mat2
- pylint --disable=no-else-return,no-else-raise,no-else-continue,unnecessary-comprehension,raise-missing-from,unsubscriptable-object,use-dict-literal,unspecified-encoding,consider-using-f-string,use-list-literal,too-many-statements --extension-pkg-whitelist=cairo,gi ./libmat2 ./mat2
# Once nautilus-python is in Debian, decomment it form the line below
- pylint3 --disable=no-else-return --extension-pkg-whitelist=Nautilus,GObject,Gtk,Gio,GLib,gi ./nautilus/mat2.py
- pylint --disable=no-else-return,no-else-raise,no-else-continue,unnecessary-comprehension,raise-missing-from,unsubscriptable-object,use-list-literal --extension-pkg-whitelist=Nautilus,GObject,Gtk,Gio,GLib,gi ./nautilus/mat2.py
linting:pyflakes:
image: $CONTAINER_REGISTRY:linting
@@ -37,33 +54,35 @@ tests:archlinux:
image: $CONTAINER_REGISTRY:archlinux
stage: test
script:
- python3 setup.py test
- python3 -m unittest discover -v
tests:debian:
image: $CONTAINER_REGISTRY:debian
stage: test
<<: *prepare_env
script:
- apt-get -qqy purge bubblewrap
- python3-coverage run --branch -m unittest discover -s tests/
- python3-coverage report --fail-under=90 -m --include 'libmat2/*'
- su - mat2 -c "python3-coverage run --branch -m unittest discover -s tests/"
- su - mat2 -c "python3-coverage report --fail-under=95 -m --include 'libmat2/*'"
tests:debian_with_bubblewrap:
image: $CONTAINER_REGISTRY:debian
stage: test
allow_failure: true
<<: *prepare_env
script:
- python3-coverage run --branch -m unittest discover -s tests/
- python3-coverage report --fail-under=100 -m --include 'libmat2/*'
- apt-get -qqy install bubblewrap
- python3 -m unittest discover -v
tests:fedora:
image: $CONTAINER_REGISTRY:fedora
stage: test
script:
- python3 setup.py test
- python3 -m unittest discover -v
tests:gentoo:
image: $CONTAINER_REGISTRY:gentoo
stage: test
allow_failure: true
<<: *prepare_env
script:
- python3 -m unittest discover -v
- su - mat2 -c "python3 -m unittest discover -v"

View File

@@ -14,4 +14,5 @@ disable=
catching-non-exception,
cell-var-from-loop,
locally-disabled,
raise-missing-from,
invalid-sequence-index, # pylint doesn't like things like `Tuple[int, bytes]` in type annotation

View File

@@ -1,3 +1,79 @@
# 0.13.0 - 2022-07-06
- Fix an arbitrary file read
- Add support for heic files
# 0.12.4 - 2022-04-30
- Fix possible errors/crashes when processing multiple files
via the command line interface
- Use a fixed PDF version for the output
- Improve compatibility with modern versions of rsvg
- Improve the robustness of the command line interface with
regard to control characters
# 0.12.3 - 2022-01-06
- Implement code for internationalization
- Keep individual files compression type in zip files
- Increase the robustness of mat2 against weird/corrupted files
- Fix the dolphin integration
- Add a fuzzer
# 0.12.2 - 2021-08-29
- Add support for aiff files
- Improve MS Office support
- Improve compatibility with newer/older version of mat2's dependencies
- Fix possible issues with the resolution of processed pdf
# 0.12.1 - 2021-03-19
- Improve epub support
- Improve MS Office support
# 0.12.0 - 2020-12-18
- Improve significantly MS Office formats support
- Fix some typos in the Nautilus extension
- Improve reliability of the mp3, pdf and svg parsers
- Improve compatibility with ffmpeg when sandboxing is used
- Improve the dolphin extension usability
- libmat2 now raises a ValueError on malformed files while trying to
find the right parser, instead of returning None
# 0.11.0 - 2020-03-29
- Improve significantly MS Office formats support
- Refactor how mat2 looks for executables
# 0.10.1 - 2020-02-09
- Improve the documentation and the manpage
- Improve the robustness of css, html, png, gdk-based, exiftool-based parsers
- Future-proof a bit the testsuite
- Handle tiff files with a .tif extension
- Improve the sandbox' usability
- Add support for wav files
# 0.10.0 - 2019-11-30
- Make mat2 work on Python3.8
- Minor improvement of ppt handling
- Minor improvement of odt handling
- Add an integration KDE's file manager: Dolphin
- mat2 now copies file permissions on the cleaned files
- Add a flag to disable sandboxing
- Tighten a bit the sandboxing
- Improve handling of MSOffice documents
- Add support for inplace cleaning
- Better handling of mutually-exclusive arguments in the command line
- Add support for svg
- Add support for ppm
- Cleaned zip files are compressed by default
- Minor performances improvement when dealing with images/video files
- Better handling of optional dependencies
# 0.9.0 - 2019-05-10
- Add tar/tar.gz/tar.bz2/tar.zx archives support
@@ -67,12 +143,12 @@
# 0.3.1 - 2018-09-01
- Document how to install MAT2 for various distributions
- Document how to install mat2 for various distributions
- Fix various typos in the documentation/comments
- Add ArchLinux to the CI to ensure that MAT2 is running on it
- Add ArchLinux to the CI to ensure that mat2 is running on it
- Fix the handling of files with a name ending in `.JPG`
- Improve the detection of unsupported extensions in upper-case
- Streamline MAT2's logging
- Streamline mat2's logging
# 0.3.0 - 2018-08-03
@@ -92,14 +168,14 @@
- Simplify various code-paths
- Remove superfluous debug message
- Remove the `--check` option that never was implemented anyway
- Add a `-c` option to check for MAT2's dependencies
- Add a `-c` option to check for mat2's dependencies
# 0.1.3 - 2018-07-06
- Improve MAT2 resilience against corrupted images
- Improve mat2 resilience against corrupted images
- Check that the minimal version of Poppler is available
- Simplify how MAT2 deals with office files
- Simplify how mat2 deals with office files
- Improve cleaning of office files
- Thumbnails are removed
- Revisions are removed
@@ -111,8 +187,8 @@
- Rename some files to ease the packaging
- Add linters to the CI (mypy, bandit and pyflakes)
- Prevent exitftool-related parameters injections
- Improve MAT2's resilience against corrupted files
- Make MAT2 work on fedora, thanks to @atenart
- Improve mat2's resilience against corrupted files
- Make mat2 work on fedora, thanks to @atenart
- Tighten the threat model
- Simplify and improve how office files are handled

View File

@@ -1,11 +1,17 @@
# Contributing to MAT2
# Contributing to mat2
The main repository for MAT2 is on [0xacab]( https://0xacab.org/jvoisin/mat2 ),
The main repository for mat2 is on [0xacab]( https://0xacab.org/jvoisin/mat2 ),
but you can send patches to jvoisin by [email](https://dustri.org/) if you prefer.
Do feel free to pick up [an issue]( https://0xacab.org/jvoisin/mat2/issues )
and to send a pull-request. Please do check that everything is fine by running the
testsuite with `python3 -m unittest discover -v` before submitting one :)
and to send a pull-request.
Before sending the pull-request, please do check that everything is fine by
running the full test suite in GitLab. To do that, after forking mat2 in GitLab,
you need to go in Settings -> CI/CD -> Runner and there enable shared runners.
Mat2 also has unit tests (that are also run in the full test suite). You can run
them with `python3 -m unittest discover -v`.
If you're fixing a bug or adding a new feature, please add tests accordingly,
this will greatly improve the odds of your merge-request getting merged.
@@ -16,7 +22,7 @@ If you're adding a new fileformat, please add tests for:
2. Cleaning metadata
3. Raising `ValueError` upon a corrupted file
Since MAT2 is written in Python3, please conform as much as possible to the
Since mat2 is written in Python3, please conform as much as possible to the
[pep8]( https://pep8.org/ ) style; except where it makes no sense of course.
# Doing a release

View File

@@ -14,7 +14,16 @@ pip3 install mat2
## Optional dependencies
When [bubblewrap](https://github.com/projectatomic/bubblewrap) is
installed, MAT2 uses it to sandbox any external processes it invokes.
installed, mat2 uses it to sandbox any external processes it invokes.
## Arch Linux
Thanks to [Francois_B](https://www.sciunto.org/), there is an package available on
[Arch linux's AUR](https://aur.archlinux.org/packages/mat2/).
## Debian
There is a package available in [Debian](https://packages.debian.org/search?keywords=mat2&searchon=names&section=all).
## Fedora
@@ -37,40 +46,6 @@ Then you can install both the Mat2 command and Nautilus extension:
dnf -y install mat2 mat2-nautilus
```
## Debian
There a package available in Debian *buster/sid*. The package [doesn't include
the Nautilus extension yet](https://bugs.debian.org/910491).
For Debian 9 *stretch*, there is a way to install it *manually*:
```
# apt install python3-mutagen python3-gi-cairo gir1.2-gdkpixbuf-2.0 libimage-exiftool-perl gir1.2-glib-2.0 gir1.2-poppler-0.18 ffmpeg
# apt install bubblewrap # if you want sandboxing
$ git clone https://0xacab.org/jvoisin/mat2.git
$ cd mat2
$ ./mat2
```
and if you want to install the über-fancy Nautilus extension:
```
# apt install gnome-common gtk-doc-tools libnautilus-extension-dev python-gi-dev python3-dev build-essential
$ git clone https://github.com/GNOME/nautilus-python
$ cd nautilus-python
$ PYTHON=/usr/bin/python3 ./autogen.sh
$ make
# make install
$ mkdir -p ~/.local/share/nautilus-python/extensions/
$ cp ../nautilus/mat2.py ~/.local/share/nautilus-python/extensions/
$ PYTHONPATH=/home/$USER/mat2 PYTHON=/usr/bin/python3 nautilus
```
## Arch Linux
Thanks to [Francois_B](https://www.sciunto.org/), there is an package available on
[Arch linux's AUR](https://aur.archlinux.org/packages/mat2/).
## Gentoo
MAT2 is available in the [torbrowser overlay](https://github.com/MeisterP/torbrowser-overlay).
mat2 is available in the [torbrowser overlay](https://github.com/MeisterP/torbrowser-overlay).

View File

@@ -6,9 +6,6 @@
```
This software is currently in **beta**, please don't use it for anything
critical.
# Metadata and privacy
Metadata consist of information that characterizes data.
@@ -20,22 +17,32 @@ Metadata within a file can tell a lot about you.
Cameras record data about when a picture was taken and what
camera was used. Office documents like PDF or Office automatically adds
author and company information to documents and spreadsheets.
Maybe you don't want to disclose those information on the web.
Maybe you don't want to disclose those information.
This is precisely the job of MAT2: getting rid, as much as possible, of
This is precisely the job of mat2: getting rid, as much as possible, of
metadata.
mat2 provides a command line tool, and graphical user interfaces via a service
menu for Dolphin, the default file manager of KDE, and an extension for
Nautilus, the default file manager of GNOME.
# Requirements
- `python3-mutagen` for audio support
- `python3-gi-cairo` and `gir1.2-poppler-0.18` for PDF support
- `gir1.2-gdkpixbuf-2.0` for images support
- `gir1.2-rsvg-2.0` for svg support
- `FFmpeg`, optionally, for video support
- `libimage-exiftool-perl` for everything else
- `bubblewrap`, optionally, for sandboxing
Please note that MAT2 requires at least Python3.5, meaning that it
doesn't run on [Debian Jessie](https://packages.debian.org/jessie/python3).
Please note that mat2 requires at least Python3.5.
# Requirements setup on macOS (OS X) using [Homebrew](https://brew.sh/)
```bash
brew install exiftool cairo pygobject3 poppler gdk-pixbuf librsvg ffmpeg
```
# Running the test suite
@@ -50,11 +57,11 @@ $ python3-coverage run --branch -m unittest discover -s tests/
$ python3-coverage report --include -m --include /libmat2/*'
```
# How to use MAT2
# How to use mat2
```bash
usage: mat2 [-h] [-v] [-l] [--check-dependencies] [-V]
[--unknown-members policy] [-s | -L]
```
usage: mat2 [-h] [-V] [--unknown-members policy] [--inplace] [--no-sandbox]
[-v] [-l] [--check-dependencies] [-L | -s]
[files [files ...]]
Metadata anonymisation toolkit 2
@@ -64,27 +71,46 @@ positional arguments:
optional arguments:
-h, --help show this help message and exit
-v, --version show program's version number and exit
-l, --list list all supported fileformats
--check-dependencies check if MAT2 has all the dependencies it needs
-V, --verbose show more verbose status information
--unknown-members policy
how to handle unknown members of archive-style files
(policy should be one of: abort, omit, keep) [Default:
abort]
-s, --show list harmful metadata detectable by MAT2 without
removing them
--inplace clean in place, without backup
--no-sandbox Disable bubblewrap's sandboxing
-v, --version show program's version number and exit
-l, --list list all supported fileformats
--check-dependencies check if mat2 has all the dependencies it needs
-L, --lightweight remove SOME metadata
-s, --show list harmful metadata detectable by mat2 without
removing them
```
Note that MAT2 **will not** clean files in-place, but will produce, for
Note that mat2 **will not** clean files in-place, but will produce, for
example, with a file named "myfile.png" a cleaned version named
"myfile.cleaned.png".
## Web interface
It's possible to run mat2 as a web service, via
[mat2-web](https://0xacab.org/jvoisin/mat2-web).
## Desktop GUI
For GNU/Linux desktops, it's possible to use the
[Metadata Cleaner](https://gitlab.com/rmnvgr/metadata-cleaner) GTK application.
# Supported formats
The following formats are supported: avi, bmp, css, epub/ncx, flac, gif, jpeg,
m4a/mp2/mp3/…, mp4, odc/odf/odg/odi/odp/ods/odt/…, off/opus/oga/spx/…, pdf,
png, ppm, pptx/xlsx/docx/…, svg/svgz/…, tar/tar.gz/tar.bz2/tar.xz/…, tiff,
torrent, wav, wmv, zip, …
# Notes about detecting metadata
While MAT2 is doing its very best to display metadata when the `--show` flag is
passed, it doesn't mean that a file is clean from any metadata if MAT2 doesn't
While mat2 is doing its very best to display metadata when the `--show` flag is
passed, it doesn't mean that a file is clean from any metadata if mat2 doesn't
show any. There is no reliable way to detect every single possible metadata for
complex file formats.
@@ -110,15 +136,21 @@ of the guarantee that mat2 won't modify the data of their files, there is the
watermarks from PDF.
- [Scrambled Exif](https://f-droid.org/packages/com.jarsilio.android.scrambledeggsif/),
an open-source Android application to remove metadata from pictures.
- [Dangerzone](https://dangerzone.rocks/), designed to sanitize harmful documents
into harmless ones.
# Contact
If possible, use the [issues system](https://0xacab.org/jvoisin/mat2/issues)
or the [mailing list](https://mailman.boum.org/listinfo/mat-dev)
or the [mailing list](https://www.autistici.org/mailman/listinfo/mat-dev)
Should a more private contact be needed (eg. for reporting security issues),
you can email Julien (jvoisin) Voisin at `julien.voisin+mat2@dustri.org`,
using the gpg key `9FCDEE9E1A381F311EA62A7404D041E8171901CC`.
# Donations
If you want to donate some money, please give it to [Tails]( https://tails.boum.org/donate/?r=contribute ).
# License
This program is free software: you can redistribute it and/or modify
@@ -134,16 +166,20 @@ GNU General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
Copyright 2018 Julien (jvoisin) Voisin <julien.voisin+mat2@dustri.org>
Copyright 2016 Marie Rose for MAT2's logo
Copyright 2018 Julien (jvoisin) Voisin <julien.voisin+mat2@dustri.org>
Copyright 2016 Marie-Rose for mat2's logo
The `tests/data/dirty_with_nsid.docx` file is licensed under GPLv3,
and was borrowed from the Calibre project: https://calibre-ebook.com/downloads/demos/demo.docx
The `narrated_powerpoint_presentation.pptx` file is in the public domain.
# Thanks
MAT2 wouldn't exist without:
mat2 wouldn't exist without:
- the [Google Summer of Code](https://summerofcode.withgoogle.com/);
- the fine people from [Tails]( https://tails.boum.org);
- friends
Many thanks to them!

View File

@@ -0,0 +1,51 @@
# Exiftool
mat2 is in fact using exiftool to extract metadata from files,
but not to remove them. The previous iteration of mat2, MAT,
was using exiftool to remove metadata, which lead to several cases where
they weren't correctly removed, if at all.
For example, [Exiftool's documentation](https://www.sno.phy.queensu.ca/~phil/exiftool/TagNames/PDF.html)
states the following with regard to PDF:
> All metadata edits are reversible. While this would normally be considered an
advantage, it is a potential security problem because old information is never
actually deleted from the file.
To remove metadata, mat2 usually re-render the file completely, eliminating
all possible original metadata. See the `implementation_notes.md` file for
details.
# jpegoptim, optipng, …
While designed to reduce as much as possible the size of pictures,
those software can be used to remove metadata. They usually have very good
support for a single picture format, and can be used in place of mat2 for them.
# PDF Redact Tools
[PDF Redact Tools](https://github.com/firstlookmedia/pdf-redact-tools) is
a software developed by the people from [First Look
Media](https://firstlook.media/), the entity behind, amongst other things,
[The Intercept](https://theintercept.com/).
The tool uses roughly the same approach than mat2 to deal with PDF,
which is unfortunately the only fileformat that it does support.
It's interesting to note that it has counter-measures against
[yellow dots](https://en.wikipedia.org/wiki/Machine_Identification_Code),
a capacity that mat2 [doesn't possess yet](https://0xacab.org/jvoisin/mat2/issues/43).
# Exiv2
[Exiv2](https://www.exiv2.org/) was considered for mat2,
but it currently [misses a lot of metadata](https://0xacab.org/jvoisin/mat2/issues/85)
# Others non open source software/online service
There are a lot of closed-source software and online service claiming to remove
metadata from your files, but since there is no way to actually verify that
they're effectively removing them, let alone adding unique markers, they
shouldn't be used.

View File

@@ -4,7 +4,7 @@ Implementation notes
Lightweight cleaning mode
-------------------------
Due to *popular* request, MAT2 is providing a *lightweight* cleaning mode,
Due to *popular* request, mat2 is providing a *lightweight* cleaning mode,
that only cleans the superficial metadata of your file, but not
the ones that might be in **embedded** resources. Like for example,
images in a PDF or an office document.
@@ -12,33 +12,49 @@ images in a PDF or an office document.
Revisions handling
------------------
Revisions are handled according to the principle of least astonishment: they are entirely removed.
Revisions are handled according to the principle of least astonishment: they
are entirely removed.
- Either the users aren't aware of the revisions, are thus they should be deleted. For example journalists that are editing a document to erase mentions sources mentions.
- Either the users aren't aware of the revisions, are thus they should be
deleted. For example journalists that are editing a document to erase
mentions sources mentions.
- Or they are aware of it, and will likely not expect MAT2 to be able to keep the revisions, that are basically traces about how, when and who edited the document.
- Or they are aware of it, and will likely not expect mat2 to be able to keep
the revisions, that are basically traces about how, when and who edited the
document.
Race conditions
---------------
MAT2 does its very best to avoid crashing at runtime. This is why it's checking
if the file is valid __at parser creation__. MAT2 doesn't take any measure to
mat2 does its very best to avoid crashing at runtime. This is why it's checking
if the file is valid __at parser creation__. mat2 doesn't take any measure to
ensure that the file is not changed between the time the parser is
instantiated, and the call to clean or show the metadata.
Symlink attacks
---------------
MAT2 output predictable filenames (like yourfile.jpg.cleaned).
mat2 output predictable filenames (like yourfile.jpg.cleaned).
This may lead to symlink attack. Please check if you OS prevent
against them
Archives handling
-----------------
MAT2 doesn't support archives yet, because we haven't found an usable way to ask the user
what to do when a non-supported files are encountered.
By default, when cleaning a non-support file format in an archive,
mat2 will abort with a detailed error message.
While strongly discouraged, it's possible to override this behaviour to force
the exclusion, or inclusion of unknown files into the cleaned archive.
While Python's [zipfile](https://docs.python.org/3/library/zipfile.html) module
provides *safe* way to extract members of a zip archive, the
[tarfile](https://docs.python.org/3/library/tarfile.html) one doesn't,
meaning that it's up to mat2 to implement safety checks. Currently,
it defends against path-traversal, both relative and absolute,
symlink-related attacks, setuid/setgid attacks, duplicate members, block and
char devices, … but there might still be dragons lurking there.
PDF handling
------------
@@ -49,10 +65,10 @@ didn't remove any *deep metadata*, like the ones in embedded pictures. This was
on of the reason MAT was abandoned: the absence of satisfying solution to
handle PDF. But apparently, people are ok with [pdf redact
tools](https://github.com/firstlookmedia/pdf-redact-tools), that simply
transform the PDF into images. So this is what's MAT2 is doing too.
transform the PDF into images. So this is what's mat2 is doing too.
Of course, it would be possible to detect images in PDf file, and process them
with MAT2, but since a PDF can contain a lot of things, like images, videos,
with mat2, but since a PDF can contain a lot of things, like images, videos,
javascript, pdf, blobs, … this is the easiest and safest way to clean them.
Images handling
@@ -65,7 +81,7 @@ XML attacks
-----------
Since our threat model conveniently excludes files crafted to specifically
bypass MAT2, fileformats containing harmful XML are out of our scope.
But since MAT2 is using [etree](https://docs.python.org/3/library/xml.html#xml-vulnerabilities)
bypass mat2, fileformats containing harmful XML are out of our scope.
But since mat2 is using [etree](https://docs.python.org/3/library/xml.html#xml-vulnerabilities)
to process XML, it's "only" vulnerable to DoS, and not memory corruption:
odds are that the user will notice that the cleaning didn't succeed.

View File

@@ -1,4 +1,4 @@
.TH MAT2 "1" "May 2019" "MAT2 0.9.0" "User Commands"
.TH mat2 "1" "July 2022" "mat2 0.13.0" "User Commands"
.SH NAME
mat2 \- the metadata anonymisation toolkit 2
@@ -32,7 +32,7 @@ show program's version number and exit
list all supported fileformats
.TP
\fB\-\-check\-dependencies\fR
check if MAT2 has all the dependencies it needs
check if mat2 has all the dependencies it needs
.TP
\fB\-V\fR, \fB\-\-verbose\fR
show more verbose status information
@@ -41,11 +41,16 @@ show more verbose status information
how to handle unknown members of archive-style files (policy should be one of: abort, omit, keep)
.TP
\fB\-s\fR, \fB\-\-show\fR
list harmful metadata detectable by MAT2 without
removing them
list harmful metadata detectable by mat2 without removing them
.TP
\fB\-L\fR, \fB\-\-lightweight\fR
remove SOME metadata
.TP
\fB\--no-sandbox\fR
disable bubblewrap's sandboxing
.TP
\fB\--inplace\fR
clean in place, without backup
.SH EXAMPLES
To remove all the metadata from a PDF file:
@@ -57,6 +62,24 @@ mat2 ./myfile.pdf
.fi
.PP
.SH NOTES ABOUT METADATA
While mat2 is doing its very best to display metadata when the --show flag is
passed, it doesn't mean that a file is clean from any metadata if mat2 doesn't
show any. There is no reliable way to detect every single possible metadata for
complex file formats.
.PP
This is why you shouldn't rely on metadata's presence to decide if your file must
be cleaned or not.
.PP
Moreover, mat2 goes to great lengths to make sure that as much metadata as
possible are removed. This might sometimes result in a loss of quality of the
processed files. For example, textual based pdf file converted into image based
one means that it'll be no longer possible to select text in them. If you're
experiencing this, you might want to give the lightweight cleaning mode a try,
but keep in mind by doing so, some metadata \fBwon't be cleaned\fR.
.SH BUGS
While mat2 does its very best to remove every single metadata,

View File

@@ -3,7 +3,7 @@ Threat Model
The Metadata Anonymisation Toolkit 2 adversary has a number
of goals, capabilities, and counter-attack types that can be
used to guide us towards a set of requirements for the MAT2.
used to guide us towards a set of requirements for the mat2.
This is an overhaul of MAT's (the first iteration of the software) one.
@@ -53,7 +53,7 @@ Adversary
user. This is the strongest position for the adversary to
have. In this case, the adversary is capable of inserting
arbitrary, custom watermarks specifically for tracking
the user. In general, MAT2 cannot defend against this
the user. In general, mat2 cannot defend against this
adversary, but we list it for completeness' sake.
- The adversary created the document for a group of users.
@@ -65,7 +65,7 @@ Adversary
- The adversary did not create the document, the weakest
position for the adversary to have. The file format is
(most of the time) standard, nothing custom is added:
MAT2 must be able to remove all metadata from the file.
mat2 must be able to remove all metadata from the file.
Requirements
@@ -73,28 +73,28 @@ Requirements
* Processing
- MAT2 *should* avoid interactions with information.
- mat2 *should* avoid interactions with information.
Its goal is to remove metadata, and the user is solely
responsible for the information of the file.
- MAT2 *must* warn when encountering an unknown
format. For example, in a zipfile, if MAT encounters an
- mat2 *must* warn when encountering an unknown
format. For example, in a zipfile, if mat2 encounters an
unknown format, it should warn the user, and ask if the
file should be added to the anonymised archive that is
produced.
- MAT2 *must* not add metadata, since its purpose is to
- mat2 *must* not add metadata, since its purpose is to
anonymise files: every added items of metadata decreases
anonymity.
- MAT2 *should* handle unknown/hidden metadata fields,
- mat2 *should* handle unknown/hidden metadata fields,
like proprietary extensions of open formats.
- MAT2 *must not* fail silently. Upon failure,
MAT2 *must not* modify the file in any way.
- mat2 *must not* fail silently. Upon failure,
mat2 *must not* modify the file in any way.
- MAT2 *might* leak the fact that MAT2 was used on the file,
- mat2 *might* leak the fact that mat2 was used on the file,
since it might be uncommon for some file formats to come
without any kind of metadata, an adversary might suspect that
the user used MAT2 on certain files.
the user used mat2 on certain files.

14
dolphin/README.md Normal file
View File

@@ -0,0 +1,14 @@
Dolphin integration
===================
Thanks to [Miguel Marco](https://riemann.unizar.es/~mmarco/), here is an neat
integration for [Dolphin](https://kde.org/applications/system/org.kde.dolphin),
the KDE file manager:
1. Add the `mat2.desktop` file either in
- `/usr/share/kservices5/ServiceMenus/` to install it globally
- `~/.local/share/kservices5/ServiceMenus/` for a specific user
2. Run `kbuildsycoca5` to update the corresponding database
3. Enjoy your new contextual menu to remove metadata from your files!

13
dolphin/mat2.desktop Normal file
View File

@@ -0,0 +1,13 @@
[Desktop Entry]
X-KDE-ServiceTypes=KonqPopupMenu/Plugin
MimeType=application/pdf;application/vnd.oasis.opendocument.chart;application/vnd.oasis.opendocument.formula;application/vnd.oasis.opendocument.graphics;application/vnd.oasis.opendocument.image;application/vnd.oasis.opendocument.presentation;application/vnd.oasis.opendocument.spreadsheet;application/vnd.oasis.opendocument.text;application/vnd.openxmlformats-officedocument.presentationml.presentation;application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;application/vnd.openxmlformats-officedocument.wordprocessingml.document;application/x-bittorrent;application/zip;audio/flac;audio/mpeg;audio/ogg;audio/x-flac;image/jpeg;image/png;image/tiff;image/x-ms-bmp;text/plain;video/mp4;video/x-msvideo;
Actions=cleanMetadata;
Type=Service
[Desktop Action cleanMetadata]
Name=Clean metadata
Name[de]=Metadaten löschen
Name[es]=Limpiar metadatos
Icon=/usr/share/icons/hicolor/scalable/apps/mat2.svg
Exec=kdialog --yesno "$( mat2 -s %F )" --title "Clean Metadata?" && mat2 %U
Exec[de]=kdialog --yesno "$( mat2 -s %F )" --title "Metadaten löschen?" && mat2 %U

View File

@@ -1,15 +1,15 @@
#!/usr/bin/env python3
import collections
import enum
import importlib
from typing import Dict, Optional
from typing import Dict, Optional, Union
from . import exiftool, video
# make pyflakes happy
assert Dict
assert Optional
assert Union
# A set of extension that aren't supported, despite matching a supported mimetype
UNSUPPORTED_EXTENSIONS = {
@@ -30,35 +30,65 @@ UNSUPPORTED_EXTENSIONS = {
}
DEPENDENCIES = {
'Cairo': 'cairo',
'PyGobject': 'gi',
'GdkPixbuf from PyGobject': 'gi.repository.GdkPixbuf',
'Poppler from PyGobject': 'gi.repository.Poppler',
'GLib from PyGobject': 'gi.repository.GLib',
'Mutagen': 'mutagen',
}
'Cairo': {
'module': 'cairo',
'required': True,
},
'PyGobject': {
'module': 'gi',
'required': True,
},
'GdkPixbuf from PyGobject': {
'module': 'gi.repository.GdkPixbuf',
'required': True,
},
'Poppler from PyGobject': {
'module': 'gi.repository.Poppler',
'required': True,
},
'GLib from PyGobject': {
'module': 'gi.repository.GLib',
'required': True,
},
'Mutagen': {
'module': 'mutagen',
'required': True,
},
}
CMD_DEPENDENCIES = {
'Exiftool': exiftool._get_exiftool_path,
'Ffmpeg': video._get_ffmpeg_path,
}
'Exiftool': {
'cmd': exiftool._get_exiftool_path,
'required': False,
},
'Ffmpeg': {
'cmd': video._get_ffmpeg_path,
'required': False,
},
}
def check_dependencies() -> Dict[str, bool]:
ret = collections.defaultdict(bool) # type: Dict[str, bool]
def check_dependencies() -> Dict[str, Dict[str, bool]]:
ret = dict() # type: Dict[str, dict]
for key, value in DEPENDENCIES.items():
ret[key] = True
ret[key] = {
'found': True,
'required': value['required'],
}
try:
importlib.import_module(value)
importlib.import_module(value['module']) # type: ignore
except ImportError: # pragma: no cover
ret[key] = False # pragma: no cover
ret[key]['found'] = False
for k, v in CMD_DEPENDENCIES.items():
ret[k] = True
ret[k] = {
'found': True,
'required': v['required'],
}
try:
v()
v['cmd']() # type: ignore
except RuntimeError: # pragma: no cover
ret[k] = False
ret[k]['found'] = False
return ret

View File

@@ -32,6 +32,7 @@ class AbstractParser(abc.ABC):
self.output_filename = fname + '.cleaned' + extension
self.lightweight_cleaning = False
self.sandbox = True
@abc.abstractmethod
def get_meta(self) -> Dict[str, Union[str, dict]]:

View File

@@ -47,11 +47,12 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
def __init__(self, filename):
super().__init__(filename)
self.archive_class = None # type: Optional[ArchiveClass]
self.member_class = None # type: Optional[ArchiveMember]
# We ignore typing here because mypy is too stupid
self.archive_class = None # type: ignore
self.member_class = None # type: ignore
# Those are the files that have a format that _isn't_
# supported by MAT2, but that we want to keep anyway.
# supported by mat2, but that we want to keep anyway.
self.files_to_keep = set() # type: Set[Pattern]
# Those are the files that we _do not_ want to keep,
@@ -62,7 +63,9 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
# the archive?
self.unknown_member_policy = UnknownMemberPolicy.ABORT # type: UnknownMemberPolicy
self.is_archive_valid()
# The LGTM comment is to mask a false-positive,
# see https://lgtm.com/projects/g/jvoisin/mat2/
self.is_archive_valid() # lgtm [py/init-calls-subclass]
def is_archive_valid(self):
"""Raise a ValueError is the current archive isn't a valid one."""
@@ -79,29 +82,35 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
# pylint: disable=unused-argument,no-self-use
return {} # pragma: no cover
def _final_checks(self) -> bool:
""" This method is invoked after the file has been cleaned,
allowing to run final verifications.
"""
# pylint: disable=unused-argument,no-self-use
return True
@staticmethod
@abc.abstractstaticmethod
@abc.abstractmethod
def _get_all_members(archive: ArchiveClass) -> List[ArchiveMember]:
"""Return all the members of the archive."""
@staticmethod
@abc.abstractstaticmethod
@abc.abstractmethod
def _clean_member(member: ArchiveMember) -> ArchiveMember:
"""Remove all the metadata for a given member."""
@staticmethod
@abc.abstractstaticmethod
@abc.abstractmethod
def _get_member_meta(member: ArchiveMember) -> Dict[str, str]:
"""Return all the metadata of a given member."""
@staticmethod
@abc.abstractstaticmethod
@abc.abstractmethod
def _get_member_name(member: ArchiveMember) -> str:
"""Return the name of the given member."""
@staticmethod
@abc.abstractstaticmethod
def _add_file_to_archive(archive: ArchiveClass, member: ArchiveMember,
@abc.abstractmethod
def _add_file_to_archive(self, archive: ArchiveClass, member: ArchiveMember,
full_path: str):
"""Add the file at full_path to the archive, via the given member."""
@@ -111,6 +120,18 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
# pylint: disable=unused-argument
return member
@staticmethod
def _get_member_compression(member: ArchiveMember):
"""Get the compression of the archive member."""
# pylint: disable=unused-argument
return None
@staticmethod
def _set_member_compression(member: ArchiveMember, compression) -> ArchiveMember:
"""Set the compression of the archive member."""
# pylint: disable=unused-argument
return member
def get_meta(self) -> Dict[str, Union[str, dict]]:
meta = dict() # type: Dict[str, Union[str, dict]]
@@ -158,7 +179,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
# Some fileformats do require to have the `mimetype` file
# as the first file in the archive.
if self._get_member_name(item) == 'mimetype':
items = [item] + items
items.insert(0, item)
else:
items.append(item)
@@ -169,15 +190,33 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
if member_name[-1] == '/': # `is_dir` is added in Python3.6
continue # don't keep empty folders
zin.extract(member=item, path=temp_folder)
full_path = os.path.join(temp_folder, member_name)
if not os.path.abspath(full_path).startswith(temp_folder):
logging.error("%s contains a file (%s) pointing outside (%s) of its root.",
self.filename, member_name, full_path)
abort = True
break
zin.extract(member=item, path=temp_folder)
try:
original_permissions = os.stat(full_path).st_mode
except FileNotFoundError:
logging.error("Something went wrong during processing of "
"%s in %s, likely a path traversal attack.",
member_name, self.filename)
abort = True
# we're breaking instead of continuing, because this exception
# is raised in case of weird path-traversal-like atttacks.
break
original_permissions = os.stat(full_path).st_mode
os.chmod(full_path, original_permissions | stat.S_IWUSR | stat.S_IRUSR)
original_compression = self._get_member_compression(item)
if self._specific_cleanup(full_path) is False:
logging.warning("Something went wrong during deep cleaning of %s",
member_name)
logging.warning("Something went wrong during deep cleaning of %s in %s",
member_name, self.filename)
abort = True
continue
@@ -197,7 +236,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
logging.warning("In file %s, keeping unknown element %s (format: %s)",
self.filename, member_name, mtype)
else:
logging.error("In file %s, element %s's format (%s) " \
logging.error("In file %s, element %s's format (%s) "
"isn't supported",
self.filename, member_name, mtype)
abort = True
@@ -214,6 +253,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
zinfo = self.member_class(member_name) # type: ignore
zinfo = self._set_member_permissions(zinfo, original_permissions)
zinfo = self._set_member_compression(zinfo, original_compression)
clean_zinfo = self._clean_member(zinfo)
self._add_file_to_archive(zout, clean_zinfo, full_path)
@@ -221,6 +261,8 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
if abort:
os.remove(self.output_filename)
return False
if not self._final_checks():
return False # pragma: no cover
return True
@@ -313,8 +355,7 @@ class TarParser(ArchiveBasedAbstractParser):
metadata['gname'] = member.gname
return metadata
@staticmethod
def _add_file_to_archive(archive: ArchiveClass, member: ArchiveMember,
def _add_file_to_archive(self, archive: ArchiveClass, member: ArchiveMember,
full_path: str):
assert isinstance(member, tarfile.TarInfo) # please mypy
assert isinstance(archive, tarfile.TarFile) # please mypy
@@ -361,8 +402,9 @@ class ZipParser(ArchiveBasedAbstractParser):
def is_archive_valid(self):
try:
zipfile.ZipFile(self.filename)
except zipfile.BadZipFile:
with zipfile.ZipFile(self.filename):
pass
except (zipfile.BadZipFile, OSError):
raise ValueError
@staticmethod
@@ -392,13 +434,13 @@ class ZipParser(ArchiveBasedAbstractParser):
return metadata
@staticmethod
def _add_file_to_archive(archive: ArchiveClass, member: ArchiveMember,
def _add_file_to_archive(self, archive: ArchiveClass, member: ArchiveMember,
full_path: str):
assert isinstance(archive, zipfile.ZipFile) # please mypy
assert isinstance(member, zipfile.ZipInfo) # please mypy
with open(full_path, 'rb') as f:
archive.writestr(member, f.read())
archive.writestr(member, f.read(),
compress_type=member.compress_type)
@staticmethod
def _get_all_members(archive: ArchiveClass) -> List[ArchiveMember]:
@@ -409,3 +451,14 @@ class ZipParser(ArchiveBasedAbstractParser):
def _get_member_name(member: ArchiveMember) -> str:
assert isinstance(member, zipfile.ZipInfo) # please mypy
return member.filename
@staticmethod
def _get_member_compression(member: ArchiveMember):
assert isinstance(member, zipfile.ZipInfo) # please mypy
return member.compress_type
@staticmethod
def _set_member_compression(member: ArchiveMember, compression) -> ArchiveMember:
assert isinstance(member, zipfile.ZipInfo) # please mypy
member.compress_type = compression
return member

View File

@@ -6,28 +6,32 @@ from typing import Dict, Union
import mutagen
from . import abstract, parser_factory
from . import abstract, parser_factory, video
class MutagenParser(abstract.AbstractParser):
def __init__(self, filename):
super().__init__(filename)
try:
mutagen.File(self.filename)
if mutagen.File(self.filename) is None:
raise ValueError
except mutagen.MutagenError:
raise ValueError
def get_meta(self) -> Dict[str, Union[str, dict]]:
f = mutagen.File(self.filename)
if f.tags:
return {k:', '.join(v) for k, v in f.tags.items()}
return {k:', '.join(map(str, v)) for k, v in f.tags.items()}
return {}
def remove_all(self) -> bool:
shutil.copy(self.filename, self.output_filename)
f = mutagen.File(self.output_filename)
f.delete()
f.save()
try:
f.delete()
f.save()
except mutagen.MutagenError:
raise ValueError
return True
@@ -37,7 +41,12 @@ class MP3Parser(MutagenParser):
def get_meta(self) -> Dict[str, Union[str, dict]]:
metadata = {} # type: Dict[str, Union[str, dict]]
meta = mutagen.File(self.filename).tags
if not meta:
return metadata
for key in meta:
if isinstance(key, tuple):
metadata[key[0]] = key[1]
continue
if not hasattr(meta[key], 'text'): # pragma: no cover
continue
metadata[key.rstrip(' \t\r\n\0')] = ', '.join(map(str, meta[key].text))
@@ -77,3 +86,25 @@ class FLACParser(MutagenParser):
meta[name] = p.get_meta() if p else 'harmful data' # type: ignore
os.remove(fname)
return meta
class WAVParser(video.AbstractFFmpegParser):
mimetypes = {'audio/x-wav', }
meta_allowlist = {'AvgBytesPerSec', 'BitsPerSample', 'Directory',
'Duration', 'Encoding', 'ExifToolVersion',
'FileAccessDate', 'FileInodeChangeDate',
'FileModifyDate', 'FileName', 'FilePermissions',
'FileSize', 'FileType', 'FileTypeExtension',
'MIMEType', 'NumChannels', 'SampleRate', 'SourceFile',
}
class AIFFParser(video.AbstractFFmpegParser):
mimetypes = {'audio/aiff', 'audio/x-aiff'}
meta_allowlist = {'AvgBytesPerSec', 'BitsPerSample', 'Directory',
'Duration', 'Encoding', 'ExifToolVersion',
'FileAccessDate', 'FileInodeChangeDate',
'FileModifyDate', 'FileName', 'FilePermissions',
'FileSize', 'FileType', 'FileTypeExtension',
'MIMEType', 'NumChannels', 'SampleRate', 'SourceFile',
'NumSampleFrames', 'SampleSize',
}

View File

@@ -18,17 +18,17 @@ __all__ = ['PIPE', 'run', 'CalledProcessError']
PIPE = subprocess.PIPE
CalledProcessError = subprocess.CalledProcessError
# pylint: disable=subprocess-run-check
def _get_bwrap_path() -> str:
bwrap_path = '/usr/bin/bwrap'
if os.path.isfile(bwrap_path):
if os.access(bwrap_path, os.X_OK):
return bwrap_path
which_path = shutil.which('bwrap')
if which_path:
return which_path
raise RuntimeError("Unable to find bwrap") # pragma: no cover
# pylint: disable=bad-whitespace
def _get_bwrap_args(tempdir: str,
input_filename: str,
output_filename: Optional[str] = None) -> List[str]:
@@ -37,7 +37,7 @@ def _get_bwrap_args(tempdir: str,
# XXX: use --ro-bind-try once all supported platforms
# have a bubblewrap recent enough to support it.
ro_bind_dirs = ['/usr', '/lib', '/lib64', '/bin', '/sbin', cwd]
ro_bind_dirs = ['/usr', '/lib', '/lib64', '/bin', '/sbin', '/etc/alternatives', cwd]
for bind_dir in ro_bind_dirs:
if os.path.isdir(bind_dir): # pragma: no cover
ro_bind_args.extend(['--ro-bind', bind_dir, bind_dir])
@@ -49,9 +49,16 @@ def _get_bwrap_args(tempdir: str,
args = ro_bind_args + \
['--dev', '/dev',
'--proc', '/proc',
'--chdir', cwd,
'--unshare-all',
'--unshare-user-try',
'--unshare-ipc',
'--unshare-pid',
'--unshare-net',
'--unshare-uts',
'--unshare-cgroup-try',
'--new-session',
'--cap-drop', 'all',
# XXX: enable --die-with-parent once all supported platforms have
# a bubblewrap recent enough to support it.
# '--die-with-parent',
@@ -69,7 +76,6 @@ def _get_bwrap_args(tempdir: str,
return args
# pylint: disable=bad-whitespace
def run(args: List[str],
input_filename: str,
output_filename: Optional[str] = None,

View File

@@ -1,7 +1,9 @@
import logging
import re
import uuid
import zipfile
import xml.etree.ElementTree as ET # type: ignore
from typing import Dict, Any
from . import archive, office
@@ -15,11 +17,28 @@ class EPUBParser(archive.ZipParser):
'META-INF/container.xml',
'mimetype',
'OEBPS/content.opf',
'content.opf',
'hmh.opf',
'OPS/.+.xml'
}))
self.files_to_omit = set(map(re.compile, { # type: ignore
'iTunesMetadata.plist',
'META-INF/calibre_bookmarks.txt',
'OEBPS/package.opf',
}))
self.uniqid = uuid.uuid4()
def _specific_get_meta(self, full_path, file_path):
if file_path != 'OEBPS/content.opf':
def is_archive_valid(self):
super().is_archive_valid()
with zipfile.ZipFile(self.filename) as zin:
for item in self._get_all_members(zin):
member_name = self._get_member_name(item)
if member_name.endswith('META-INF/encryption.xml'):
raise ValueError('the file contains encrypted fonts')
def _specific_get_meta(self, full_path, file_path) -> Dict[str, Any]:
if not file_path.endswith('.opf'):
return {}
with open(full_path, encoding='utf-8') as f:
@@ -30,14 +49,32 @@ class EPUBParser(archive.ZipParser):
except (TypeError, UnicodeDecodeError):
return {file_path: 'harmful content', }
def _specific_cleanup(self, full_path: str):
if full_path.endswith('OEBPS/content.opf'):
def _specific_cleanup(self, full_path: str) -> bool:
if full_path.endswith('hmh.opf') or full_path.endswith('content.opf'):
return self.__handle_contentopf(full_path)
elif full_path.endswith('OEBPS/toc.ncx'):
return self.__handle_tocncx(full_path)
elif re.search('/OPS/[^/]+.xml$', full_path):
return self.__handle_ops_xml(full_path)
return True
def __handle_tocncx(self, full_path: str):
def __handle_ops_xml(self, full_path: str) -> bool:
try:
tree, namespace = office._parse_xml(full_path)
except ET.ParseError: # pragma: nocover
logging.error("Unable to parse %s in %s.", full_path, self.filename)
return False
for item in tree.iterfind('.//', namespace): # pragma: nocover
if item.tag.strip().lower().endswith('head'):
item.clear()
break
tree.write(full_path, xml_declaration=True, encoding='utf-8',
short_empty_elements=False)
return True
def __handle_tocncx(self, full_path: str) -> bool:
try:
tree, namespace = office._parse_xml(full_path)
except ET.ParseError: # pragma: nocover
@@ -53,7 +90,7 @@ class EPUBParser(archive.ZipParser):
short_empty_elements=False)
return True
def __handle_contentopf(self, full_path: str):
def __handle_contentopf(self, full_path: str) -> bool:
try:
tree, namespace = office._parse_xml(full_path)
except ET.ParseError:
@@ -71,7 +108,7 @@ class EPUBParser(archive.ZipParser):
item.append(uniqid)
# items without mandatory content
for name in {'language', 'title'}:
for name in ['language', 'title']:
uniqid = ET.Element(self.metadata_namespace + name)
item.append(uniqid)
break # there is only a single <metadata> block

View File

@@ -1,10 +1,13 @@
import functools
import json
import logging
import os
import shutil
import subprocess
from typing import Dict, Union, Set
from . import abstract
from . import subprocess
from . import bubblewrap
# Make pyflakes happy
assert Set
@@ -18,9 +21,18 @@ class ExiftoolParser(abstract.AbstractParser):
meta_allowlist = set() # type: Set[str]
def get_meta(self) -> Dict[str, Union[str, dict]]:
out = subprocess.run([_get_exiftool_path(), '-json', self.filename],
input_filename=self.filename,
check=True, stdout=subprocess.PIPE).stdout
try:
if self.sandbox:
out = bubblewrap.run([_get_exiftool_path(), '-json',
self.filename],
input_filename=self.filename,
check=True, stdout=subprocess.PIPE).stdout
else:
out = subprocess.run([_get_exiftool_path(), '-json',
self.filename],
check=True, stdout=subprocess.PIPE).stdout
except subprocess.CalledProcessError: # pragma: no cover
raise ValueError
meta = json.loads(out.decode('utf-8'))[0]
for key in self.meta_allowlist:
meta.pop(key, None)
@@ -28,8 +40,7 @@ class ExiftoolParser(abstract.AbstractParser):
def _lightweight_cleanup(self) -> bool:
if os.path.exists(self.output_filename):
try:
# exiftool can't force output to existing files
try: # exiftool can't force output to existing files
os.remove(self.output_filename)
except OSError as e: # pragma: no cover
logging.error("The output file %s is already existing and \
@@ -48,23 +59,25 @@ class ExiftoolParser(abstract.AbstractParser):
'-o', self.output_filename,
self.filename]
try:
subprocess.run(cmd, check=True,
input_filename=self.filename,
output_filename=self.output_filename)
if self.sandbox:
bubblewrap.run(cmd, check=True,
input_filename=self.filename,
output_filename=self.output_filename)
else:
subprocess.run(cmd, check=True)
except subprocess.CalledProcessError as e: # pragma: no cover
logging.error("Something went wrong during the processing of %s: %s", self.filename, e)
return False
return True
@functools.lru_cache()
def _get_exiftool_path() -> str: # pragma: no cover
possible_pathes = {
'/usr/bin/exiftool', # debian/fedora
'/usr/bin/vendor_perl/exiftool', # archlinux
}
which_path = shutil.which('exiftool')
if which_path:
return which_path
for possible_path in possible_pathes:
if os.path.isfile(possible_path):
if os.access(possible_path, os.X_OK):
return possible_path
# Exiftool on Arch Linux has a weird path
if os.access('/usr/bin/vendor_perl/exiftool', os.X_OK):
return '/usr/bin/vendor_perl/exiftool'
raise RuntimeError("Unable to find exiftool")

View File

@@ -1,17 +1,63 @@
import imghdr
import os
from typing import Set
import re
from typing import Set, Dict, Union, Any
import cairo
import gi
gi.require_version('GdkPixbuf', '2.0')
from gi.repository import GdkPixbuf, GLib
gi.require_version('Rsvg', '2.0')
from gi.repository import GdkPixbuf, GLib, Rsvg
from . import exiftool
from . import exiftool, abstract
# Make pyflakes happy
assert Set
assert Any
class SVGParser(exiftool.ExiftoolParser):
mimetypes = {'image/svg+xml', }
meta_allowlist = {'Directory', 'ExifToolVersion', 'FileAccessDate',
'FileInodeChangeDate', 'FileModifyDate', 'FileName',
'FilePermissions', 'FileSize', 'FileType',
'FileTypeExtension', 'ImageHeight', 'ImageWidth',
'MIMEType', 'SVGVersion', 'SourceFile', 'ViewBox'
}
def remove_all(self) -> bool:
try:
svg = Rsvg.Handle.new_from_file(self.filename)
except GLib.GError:
raise ValueError
try:
_, _, _, _, has_viewbox, viewbox = svg.get_intrinsic_dimensions()
if has_viewbox is False:
raise ValueError
_, width, height = svg.get_intrinsic_size_in_pixels()
except AttributeError:
dimensions = svg.get_dimensions()
height, width = dimensions.height, dimensions.width
surface = cairo.SVGSurface(self.output_filename, height, width)
context = cairo.Context(surface)
try:
svg.render_document(context, viewbox)
except AttributeError:
svg.render_cairo(context)
surface.finish()
return True
def get_meta(self) -> Dict[str, Union[str, dict]]:
meta = super().get_meta()
# The namespace is mandatory, but only the …/2000/svg is valid.
ns = 'http://www.w3.org/2000/svg'
if meta.get('Xmlns') == ns:
meta.pop('Xmlns')
return meta
class PNGParser(exiftool.ExiftoolParser):
mimetypes = {'image/png', }
@@ -31,7 +77,8 @@ class PNGParser(exiftool.ExiftoolParser):
try: # better fail here than later
cairo.ImageSurface.create_from_png(self.filename)
except MemoryError: # pragma: no cover
except: # pragma: no cover
# Cairo is returning some weird exceptions :/
raise ValueError
def remove_all(self) -> bool:
@@ -79,7 +126,13 @@ class GdkPixbufAbstractParser(exiftool.ExiftoolParser):
pixbuf = GdkPixbuf.Pixbuf.new_from_file(self.filename)
if extension.lower() == '.jpg':
extension = '.jpeg' # gdk is picky
pixbuf.savev(self.output_filename, type=extension[1:], option_keys=[], option_values=[])
elif extension.lower() == '.tif':
extension = '.tiff' # gdk is picky
try:
pixbuf.savev(self.output_filename, type=extension[1:],
option_keys=[], option_values=[])
except GLib.GError: # pragma: no cover
return False
return True
@@ -108,3 +161,43 @@ class TiffParser(GdkPixbufAbstractParser):
'FilePermissions', 'FileSize', 'FileType',
'FileTypeExtension', 'ImageHeight', 'ImageSize',
'ImageWidth', 'MIMEType', 'Megapixels', 'SourceFile'}
class PPMParser(abstract.AbstractParser):
mimetypes = {'image/x-portable-pixmap'}
def get_meta(self) -> Dict[str, Union[str, dict]]:
meta = {} # type: Dict[str, Union[str, Dict[Any, Any]]]
with open(self.filename) as f:
for idx, line in enumerate(f):
if line.lstrip().startswith('#'):
meta[str(idx)] = line.lstrip().rstrip()
return meta
def remove_all(self) -> bool:
with open(self.filename) as fin:
with open(self.output_filename, 'w') as fout:
for line in fin:
if not line.lstrip().startswith('#'):
line = re.sub(r"\s+", "", line, flags=re.UNICODE)
fout.write(line)
return True
class HEICParser(exiftool.ExiftoolParser):
mimetypes = {'image/heic'}
meta_allowlist = {'SourceFile', 'ExifToolVersion', 'FileName','Directory',
'FileSize', 'FileModifyDate', 'FileAccessDate',
'FileInodeChangeDate', 'FilePermissions', 'FileType',
'FileTypeExtension', 'MIMEType', 'MajorBrand', 'MinorVersion',
'CompatibleBrands','HandlerType', 'PrimaryItemReference',
'HEVCConfigurationVersion', 'GeneralProfileSpace',
'GeneralTierFlag', 'GeneralProfileIDC',
'GenProfileCompatibilityFlags', 'ConstraintIndicatorFlags',
'GeneralLevelIDC', 'MinSpatialSegmentationIDC',
'ParallelismType','ChromaFormat', 'BitDepthLuma', 'BitDepthChroma',
'NumTemporalLayers', 'TemporalIDNested', 'ImageWidth',
'ImageHeight', 'ImageSpatialExtent', 'ImagePixelDepth',
'AverageFrameRate', 'ConstantFrameRate', 'MediaDataSize',
'MediaDataOffset','ImageSize', 'Megapixels'}
def remove_all(self) -> bool:
return self._lightweight_cleanup()

View File

@@ -1,3 +1,5 @@
import random
import uuid
import logging
import os
import re
@@ -44,6 +46,12 @@ def _sort_xml_attributes(full_path: str) -> bool:
class MSOfficeParser(ZipParser):
"""
The methods modifying XML documents are usually doing so in two loops:
1. finding the tag/attributes to remove;
2. actually editing the document
since it's tricky to modify the XML while iterating on it.
"""
mimetypes = {
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
@@ -62,36 +70,68 @@ class MSOfficeParser(ZipParser):
# Do we want to keep the following ones?
'application/vnd.openxmlformats-officedocument.wordprocessingml.settings+xml',
# See https://0xacab.org/jvoisin/mat2/issues/71
'application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml', # /word/numbering.xml
}
def __init__(self, filename):
super().__init__(filename)
# MSOffice documents are using various counters for cross-references,
# we collect them all, to make sure that they're effectively counters,
# and not unique id used for fingerprinting.
self.__counters = {
'cNvPr': set(),
'rid': set(),
}
self.files_to_keep = set(map(re.compile, { # type: ignore
r'^\[Content_Types\]\.xml$',
r'^_rels/\.rels$',
r'^word/_rels/document\.xml\.rels$',
r'^word/_rels/footer[0-9]*\.xml\.rels$',
r'^word/_rels/header[0-9]*\.xml\.rels$',
r'^xl/sharedStrings\.xml$', # https://docs.microsoft.com/en-us/office/open-xml/working-with-the-shared-string-table
r'^xl/calcChain\.xml$',
r'^(?:word|ppt|xl)/_rels/document\.xml\.rels$',
r'^(?:word|ppt|xl)/_rels/footer[0-9]*\.xml\.rels$',
r'^(?:word|ppt|xl)/_rels/header[0-9]*\.xml\.rels$',
r'^(?:word|ppt|xl)/styles\.xml$',
# TODO: randomize axId ( https://docs.microsoft.com/en-us/openspecs/office_standards/ms-oi29500/089f849f-fcd6-4fa0-a281-35aa6a432a16 )
r'^(?:word|ppt|xl)/charts/chart[0-9]*\.xml$',
r'^xl/workbook\.xml$',
r'^xl/worksheets/sheet[0-9]+\.xml$',
r'^ppt/slideLayouts/_rels/slideLayout[0-9]+\.xml\.rels$',
r'^ppt/slideLayouts/slideLayout[0-9]+\.xml$',
r'^(?:word|ppt|xl)/tableStyles\.xml$',
r'^ppt/slides/_rels/slide[0-9]*\.xml\.rels$',
r'^ppt/slides/slide[0-9]*\.xml$',
# https://msdn.microsoft.com/en-us/library/dd908153(v=office.12).aspx
r'^word/stylesWithEffects\.xml$',
r'^(?:word|ppt|xl)/stylesWithEffects\.xml$',
r'^ppt/presentation\.xml$',
# TODO: check if p:bgRef can be randomized
r'^ppt/slideMasters/slideMaster[0-9]+\.xml',
r'^ppt/slideMasters/_rels/slideMaster[0-9]+\.xml\.rels',
r'^xl/worksheets/_rels/sheet[0-9]+\.xml\.rels',
r'^xl/drawings/vmlDrawing[0-9]+\.vml',
r'^xl/drawings/drawing[0-9]+\.xml',
}))
self.files_to_omit = set(map(re.compile, { # type: ignore
r'^\[trash\]/',
r'^customXml/',
r'webSettings\.xml$',
r'^docProps/custom\.xml$',
r'^word/printerSettings/',
r'^word/theme',
r'^word/people\.xml$',
r'^(?:word|ppt|xl)/printerSettings/',
r'^(?:word|ppt|xl)/theme',
r'^(?:word|ppt|xl)/people\.xml$',
r'^(?:word|ppt|xl)/numbering\.xml$',
r'^(?:word|ppt|xl)/tags/',
# View properties like view mode, last viewed slide etc
r'^(?:word|ppt|xl)/viewProps\.xml$',
# Additional presentation-wide properties like printing properties,
# presentation show properties etc.
r'^(?:word|ppt|xl)/presProps\.xml$',
r'^(?:word|ppt|xl)/comments[0-9]+\.xml$',
# we have an allowlist in self.files_to_keep,
# so we can trash everything else
r'^word/_rels/',
r'^(?:word|ppt|xl)/_rels/',
}))
if self.__fill_files_to_keep_via_content_types() is False:
@@ -114,7 +154,7 @@ class MSOfficeParser(ZipParser):
except ET.ParseError:
return False
for c in tree:
if 'PartName' not in c.attrib or 'ContentType' not in c.attrib:
if 'PartName' not in c.attrib or 'ContentType' not in c.attrib: # pragma: no cover
continue
elif c.attrib['ContentType'] in self.content_types_to_keep:
fname = c.attrib['PartName'][1:] # remove leading `/`
@@ -124,24 +164,22 @@ class MSOfficeParser(ZipParser):
@staticmethod
def __remove_rsid(full_path: str) -> bool:
""" The method will remove "revision session ID". We're '}rsid'
""" The method will remove "revision session ID". We're using '}rsid'
instead of proper parsing, since rsid can have multiple forms, like
`rsidRDefault`, `rsidR`, `rsids`, …
We're removing rsid tags in two times, because we can't modify
the xml while we're iterating on it.
For more details, see
- https://msdn.microsoft.com/en-us/library/office/documentformat.openxml.wordprocessing.previoussectionproperties.rsidrpr.aspx
- https://blogs.msdn.microsoft.com/brian_jones/2006/12/11/whats-up-with-all-those-rsids/
"""
try:
tree, namespace = _parse_xml(full_path)
except ET.ParseError:
except ET.ParseError as e: # pragma: no cover
logging.error("Unable to parse %s: %s", full_path, e)
return False
# rsid, tags or attributes, are always under the `w` namespace
if 'w' not in namespace.keys():
if 'w' not in namespace:
return True
parent_map = {c:p for p in tree.iter() for c in p}
@@ -162,14 +200,41 @@ class MSOfficeParser(ZipParser):
return True
@staticmethod
def __remove_revisions(full_path: str) -> bool:
""" In this function, we're changing the XML document in several
different times, since we don't want to change the tree we're currently
iterating on.
def __remove_nsid(full_path: str) -> bool:
"""
nsid are random identifiers that can be used to ease the merging of
some components of a document. They can also be used for
fingerprinting.
See the spec for more details: https://docs.microsoft.com/en-us/dotnet/api/documentformat.openxml.wordprocessing.nsid?view=openxml-2.8.1
"""
try:
tree, namespace = _parse_xml(full_path)
except ET.ParseError as e:
except ET.ParseError as e: # pragma: no cover
logging.error("Unable to parse %s: %s", full_path, e)
return False
# The nsid tag is always under the `w` namespace
if 'w' not in namespace:
return True
parent_map = {c:p for p in tree.iter() for c in p}
elements_to_remove = list()
for element in tree.iterfind('.//w:nsid', namespace):
elements_to_remove.append(element)
for element in elements_to_remove:
parent_map[element].remove(element)
tree.write(full_path, xml_declaration=True)
return True
@staticmethod
def __remove_revisions(full_path: str) -> bool:
try:
tree, namespace = _parse_xml(full_path)
except ET.ParseError as e: # pragma: no cover
logging.error("Unable to parse %s: %s", full_path, e)
return False
@@ -208,11 +273,12 @@ class MSOfficeParser(ZipParser):
"""
try:
tree, namespace = _parse_xml(full_path)
except ET.ParseError: # pragma: no cover
except ET.ParseError as e: # pragma: no cover
logging.error("Unable to parse %s: %s", full_path, e)
return False
if len(namespace.items()) != 1:
return False # there should be only one namespace for Types
if len(namespace.items()) != 1: # pragma: no cover
logging.debug("Got several namespaces for Types: %s", namespace.items())
removed_fnames = set()
with zipfile.ZipFile(self.filename) as zin:
@@ -234,23 +300,80 @@ class MSOfficeParser(ZipParser):
tree.write(full_path, xml_declaration=True)
return True
def _final_checks(self) -> bool:
for k, v in self.__counters.items():
if v and len(v) != max(v):
# TODO: make this an error and return False
# once the ability to correct the counters is implemented
logging.warning("%s contains invalid %s: %s", self.filename, k, v)
return True
return True
def __collect_counters(self, full_path: str):
with open(full_path, encoding='utf-8') as f:
content = f.read()
# "relationship Id"
for i in re.findall(r'(?:\s|r:)[iI][dD]="rId([0-9]+)"(?:\s|/)', content):
self.__counters['rid'].add(int(i))
# "connector for Non-visual property"
for i in re.findall(r'<p:cNvPr id="([0-9]+)"', content):
self.__counters['cNvPr'].add(int(i))
@staticmethod
def __randomize_creationId(full_path: str) -> bool:
try:
tree, namespace = _parse_xml(full_path)
except ET.ParseError as e: # pragma: no cover
logging.error("Unable to parse %s: %s", full_path, e)
return False
if 'p14' not in namespace:
return True # pragma: no cover
for item in tree.iterfind('.//p14:creationId', namespace):
item.set('val', '%s' % random.randint(0, 2**32))
tree.write(full_path, xml_declaration=True)
return True
@staticmethod
def __randomize_sldMasterId(full_path: str) -> bool:
try:
tree, namespace = _parse_xml(full_path)
except ET.ParseError as e: # pragma: no cover
logging.error("Unable to parse %s: %s", full_path, e)
return False
if 'p' not in namespace:
return True # pragma: no cover
for item in tree.iterfind('.//p:sldMasterId', namespace):
item.set('id', '%s' % random.randint(0, 2**32))
tree.write(full_path, xml_declaration=True)
return True
def _specific_cleanup(self, full_path: str) -> bool:
# pylint: disable=too-many-return-statements
# pylint: disable=too-many-return-statements,too-many-branches
if os.stat(full_path).st_size == 0: # Don't process empty files
return True
if not full_path.endswith('.xml'):
return True
if self.__randomize_creationId(full_path) is False:
return False
self.__collect_counters(full_path)
if full_path.endswith('/[Content_Types].xml'):
# this file contains references to files that we might
# remove, and MS Office doesn't like dangling references
if self.__remove_content_type_members(full_path) is False:
if self.__remove_content_type_members(full_path) is False: # pragma: no cover
return False
elif full_path.endswith('/word/document.xml'):
# this file contains the revisions
if self.__remove_revisions(full_path) is False:
return False
return False # pragma: no cover
elif full_path.endswith('/docProps/app.xml'):
# This file must be present and valid,
# so we're removing as much as we can.
@@ -265,9 +388,22 @@ class MSOfficeParser(ZipParser):
f.write(b'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>')
f.write(b'<cp:coreProperties xmlns:cp="http://schemas.openxmlformats.org/package/2006/metadata/core-properties">')
f.write(b'</cp:coreProperties>')
elif full_path.endswith('/ppt/tableStyles.xml'): # pragma: no cover
# This file must be present and valid,
# so we're removing as much as we can.
with open(full_path, 'wb') as f:
f.write(b'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>')
uid = str(uuid.uuid4()).encode('utf-8')
f.write(b'<a:tblStyleLst def="{%s}" xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"/>' % uid)
elif full_path.endswith('ppt/presentation.xml'):
if self.__randomize_sldMasterId(full_path) is False:
return False # pragma: no cover
if self.__remove_rsid(full_path) is False:
return False
return False # pragma: no cover
if self.__remove_nsid(full_path) is False:
return False # pragma: no cover
try:
_sort_xml_attributes(full_path)
@@ -337,6 +473,7 @@ class LibreOfficeParser(ZipParser):
}))
self.files_to_omit = set(map(re.compile, { # type: ignore
r'^meta\.xml$',
r'^layout-cache$',
r'^Configurations2/',
r'^Thumbnails/',
}))
@@ -349,7 +486,7 @@ class LibreOfficeParser(ZipParser):
logging.error("Unable to parse %s: %s", full_path, e)
return False
if 'office' not in namespace.keys(): # no revisions in the current file
if 'office' not in namespace: # no revisions in the current file
return True
for text in tree.getroot().iterfind('.//office:text', namespace):

View File

@@ -1,4 +1,3 @@
import logging
import glob
import os
import mimetypes
@@ -12,6 +11,10 @@ T = TypeVar('T', bound='abstract.AbstractParser')
mimetypes.add_type('application/epub+zip', '.epub')
mimetypes.add_type('application/x-dtbncx+xml', '.ncx') # EPUB Navigation Control XML File
# This should be removed after we move to python3.10
# https://github.com/python/cpython/commit/20a5b7e986377bdfd929d7e8c4e3db5847dfdb2d
mimetypes.add_type('image/heic', '.heic')
def __load_all_parsers():
""" Loads every parser in a dynamic way """
@@ -40,7 +43,10 @@ def _get_parsers() -> List[T]:
def get_parser(filename: str) -> Tuple[Optional[T], Optional[str]]:
""" Return the appropriate parser for a given filename. """
""" Return the appropriate parser for a given filename.
:raises ValueError: Raised if the instantiation of the parser went wrong.
"""
mtype, _ = mimetypes.guess_type(filename)
_, extension = os.path.splitext(filename)
@@ -53,10 +59,6 @@ def get_parser(filename: str) -> Tuple[Optional[T], Optional[str]]:
for parser_class in _get_parsers(): # type: ignore
if mtype in parser_class.mimetypes:
try:
return parser_class(filename), mtype
except ValueError as e:
logging.info("Got an exception when trying to instanciate "
"%s for %s: %s", parser_class, filename, e)
return None, mtype
# This instantiation might raise a ValueError on malformed files
return parser_class(filename), mtype
return None, mtype

View File

@@ -19,9 +19,10 @@ from . import abstract
poppler_version = Poppler.get_version()
if LooseVersion(poppler_version) < LooseVersion('0.46'): # pragma: no cover
raise ValueError("MAT2 needs at least Poppler version 0.46 to work. \
raise ValueError("mat2 needs at least Poppler version 0.46 to work. \
The installed version is %s." % poppler_version) # pragma: no cover
FIXED_PDF_VERSION = cairo.PDFVersion.VERSION_1_5
class PDFParser(abstract.AbstractParser):
mimetypes = {'application/pdf', }
@@ -32,7 +33,7 @@ class PDFParser(abstract.AbstractParser):
def __init__(self, filename):
super().__init__(filename)
self.uri = 'file://' + os.path.abspath(self.filename)
self.__scale = 2 # how much precision do we want for the render
self.__scale = 200 / 72.0 # how much precision do we want for the render
try: # Check now that the file is valid, to avoid surprises later
Poppler.Document.new_from_file(self.uri, None)
except GLib.GError: # Invalid PDF
@@ -52,6 +53,7 @@ class PDFParser(abstract.AbstractParser):
tmp_path = tempfile.mkstemp()[1]
pdf_surface = cairo.PDFSurface(tmp_path, 10, 10) # resized later anyway
pdf_surface.restrict_to_version(FIXED_PDF_VERSION)
pdf_context = cairo.Context(pdf_surface) # context draws on the surface
for pagenum in range(pages_count):
@@ -80,15 +82,19 @@ class PDFParser(abstract.AbstractParser):
_, tmp_path = tempfile.mkstemp()
pdf_surface = cairo.PDFSurface(tmp_path, 32, 32) # resized later anyway
pdf_surface.restrict_to_version(FIXED_PDF_VERSION)
pdf_context = cairo.Context(pdf_surface)
for pagenum in range(pages_count):
page = document.get_page(pagenum)
if page is None: # pragma: no cover
logging.error("Unable to get PDF pages")
return False
page_width, page_height = page.get_size()
logging.info("Rendering page %d/%d", pagenum + 1, pages_count)
width = int(page_width) * self.__scale
height = int(page_height) * self.__scale
width = int(page_width * self.__scale)
height = int(page_height * self.__scale)
img_surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, width, height)
img_context = cairo.Context(img_surface)
@@ -102,7 +108,11 @@ class PDFParser(abstract.AbstractParser):
buf.seek(0)
img = cairo.ImageSurface.create_from_png(buf)
pdf_surface.set_size(page_width*self.__scale, page_height*self.__scale)
if cairo.version_info < (1, 12, 0):
pdf_surface.set_size(width, height)
else:
pdf_surface.set_size(page_width, page_height)
pdf_surface.set_device_scale(1 / self.__scale, 1 / self.__scale)
pdf_context.set_source_surface(img, 0, 0)
pdf_context.paint()
pdf_context.show_page() # draw pdf_context on pdf_surface
@@ -122,6 +132,17 @@ class PDFParser(abstract.AbstractParser):
document.set_creator('')
document.set_creation_date(-1)
document.save('file://' + os.path.abspath(out_file))
# Cairo adds "/Producer" and "/CreationDate", and Poppler sometimes
# fails to remove them, we have to use this terrible regex.
# It should(tm) be alright though, because cairo's output format
# for metadata is fixed.
with open(out_file, 'rb') as f:
out = re.sub(rb'<<[\s\n]*/Producer.*?>>', b' << >>', f.read(), 0,
re.DOTALL | re.IGNORECASE)
with open(out_file, 'wb') as f:
f.write(out)
return True
@staticmethod

View File

@@ -33,10 +33,10 @@ class TorrentParser(abstract.AbstractParser):
return True
class _BencodeHandler():
class _BencodeHandler:
"""
Since bencode isn't that hard to parse,
MAT2 comes with its own parser, based on the spec
mat2 comes with its own parser, based on the spec
https://wiki.theory.org/index.php/BitTorrentSpecification#Bencoding
"""
def __init__(self):

View File

@@ -1,10 +1,12 @@
import os
import subprocess
import functools
import shutil
import logging
from typing import Dict, Union
from . import exiftool
from . import subprocess
from . import bubblewrap
class AbstractFFmpegParser(exiftool.ExiftoolParser):
@@ -32,9 +34,12 @@ class AbstractFFmpegParser(exiftool.ExiftoolParser):
'-flags:a', '+bitexact', # don't add any metadata
self.output_filename]
try:
subprocess.run(cmd, check=True,
input_filename=self.filename,
output_filename=self.output_filename)
if self.sandbox:
bubblewrap.run(cmd, check=True,
input_filename=self.filename,
output_filename=self.output_filename)
else:
subprocess.run(cmd, check=True)
except subprocess.CalledProcessError as e:
logging.error("Something went wrong during the processing of %s: %s", self.filename, e)
return False
@@ -45,7 +50,7 @@ class AbstractFFmpegParser(exiftool.ExiftoolParser):
ret = dict() # type: Dict[str, Union[str, dict]]
for key, value in meta.items():
if key in self.meta_key_value_allowlist.keys():
if key in self.meta_key_value_allowlist:
if value == self.meta_key_value_allowlist[key]:
continue
ret[key] = value
@@ -130,10 +135,10 @@ class MP4Parser(AbstractFFmpegParser):
}
@functools.lru_cache()
def _get_ffmpeg_path() -> str: # pragma: no cover
ffmpeg_path = '/usr/bin/ffmpeg'
if os.path.isfile(ffmpeg_path):
if os.access(ffmpeg_path, os.X_OK):
return ffmpeg_path
which_path = shutil.which('ffmpeg')
if which_path:
return which_path
raise RuntimeError("Unable to find ffmpeg")

View File

@@ -1,5 +1,5 @@
from html import parser, escape
from typing import Dict, Any, List, Tuple, Set
from typing import Dict, Any, List, Tuple, Set, Optional
import re
import string
@@ -17,7 +17,11 @@ class CSSParser(abstract.AbstractParser):
def remove_all(self) -> bool:
with open(self.filename, encoding='utf-8') as f:
cleaned = re.sub(r'/\*.*?\*/', '', f.read(), 0, self.flags)
try:
content = f.read()
except UnicodeDecodeError: # pragma: no cover
raise ValueError
cleaned = re.sub(r'/\*.*?\*/', '', content, 0, self.flags)
with open(self.output_filename, 'w', encoding='utf-8') as f:
f.write(cleaned)
return True
@@ -25,7 +29,11 @@ class CSSParser(abstract.AbstractParser):
def get_meta(self) -> Dict[str, Any]:
metadata = {}
with open(self.filename, encoding='utf-8') as f:
cssdoc = re.findall(r'/\*(.*?)\*/', f.read(), self.flags)
try:
content = f.read()
except UnicodeDecodeError: # pragma: no cover
raise ValueError
cssdoc = re.findall(r'/\*(.*?)\*/', content, self.flags)
for match in cssdoc:
for line in match.splitlines():
try:
@@ -96,9 +104,21 @@ class _HTMLParser(parser.HTMLParser):
self.tag_required_blocklist = required_blocklisted_tags
self.tag_blocklist = blocklisted_tags
def handle_starttag(self, tag: str, attrs: List[Tuple[str, str]]):
original_tag = self.get_starttag_text()
self.__validation_queue.append(original_tag)
# pylint: disable=R0201
def error(self, message): # pragma: no cover
""" Amusingly, Python's documentation doesn't mention that this
function needs to be implemented in subclasses of the parent class
of parser.HTMLParser. This was found by fuzzing,
triggering the following exception:
NotImplementedError: subclasses of ParserBase must override error()
"""
raise ValueError(message)
def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]):
# Ignore the type, because mypy is too stupid to infer
# that get_starttag_text() can't return None.
original_tag = self.get_starttag_text() # type: ignore
self.__validation_queue.append(original_tag) # type: ignore
if tag in self.tag_blocklist:
self.__in_dangerous_tag += 1
@@ -140,7 +160,8 @@ class _HTMLParser(parser.HTMLParser):
if data.strip():
self.__textrepr += escape(data)
def handle_startendtag(self, tag: str, attrs: List[Tuple[str, str]]):
def handle_startendtag(self, tag: str,
attrs: List[Tuple[str, Optional[str]]]):
if tag in self.tag_required_blocklist | self.tag_blocklist:
meta = {k:v for k, v in attrs}
name = meta.get('name', 'harmful metadata')

150
mat2
View File

@@ -1,35 +1,44 @@
#!/usr/bin/env python3
import os
from typing import Tuple, Generator, List, Union
import shutil
from typing import Tuple, List, Union, Set
import sys
import mimetypes
import argparse
import logging
import unicodedata
import concurrent.futures
try:
from libmat2 import parser_factory, UNSUPPORTED_EXTENSIONS
from libmat2 import check_dependencies, UnknownMemberPolicy
except ValueError as e:
print(e)
except ValueError as ex:
print(ex)
sys.exit(1)
__version__ = '0.9.0'
__version__ = '0.13.0'
# Make pyflakes happy
assert Set
assert Tuple
assert Union
logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.WARNING)
def __print_without_chars(s: str):
""" Remove control characters
We might use 'Cc' instead of 'C', but better safe than sorry
https://www.unicode.org/reports/tr44/#GC_Values_Table
"""
print(''.join(ch for ch in s if not unicodedata.category(ch).startswith('C')))
def __check_file(filename: str, mode: int = os.R_OK) -> bool:
if not os.path.exists(filename):
print("[-] %s doesn't exist." % filename)
__print_without_chars("[-] %s doesn't exist." % filename)
return False
elif not os.path.isfile(filename):
print("[-] %s is not a regular file." % filename)
__print_without_chars("[-] %s is not a regular file." % filename)
return False
elif not os.access(filename, mode):
mode_str = [] # type: List[str]
@@ -37,93 +46,115 @@ def __check_file(filename: str, mode: int = os.R_OK) -> bool:
mode_str += 'readable'
if mode & os.W_OK:
mode_str += 'writeable'
print("[-] %s is not %s." % (filename, 'nor '.join(mode_str)))
__print_without_chars("[-] %s is not %s." % (filename, 'nor '.join(mode_str)))
return False
return True
def create_arg_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(description='Metadata anonymisation toolkit 2')
parser.add_argument('files', nargs='*', help='the files to process')
parser.add_argument('-v', '--version', action='version',
version='MAT2 %s' % __version__)
parser.add_argument('-l', '--list', action='store_true',
help='list all supported fileformats')
parser.add_argument('--check-dependencies', action='store_true',
help='check if MAT2 has all the dependencies it needs')
parser.add_argument('-V', '--verbose', action='store_true',
help='show more verbose status information')
parser.add_argument('--unknown-members', metavar='policy', default='abort',
help='how to handle unknown members of archive-style '
'files (policy should be one of: %s) [Default: abort]' %
', '.join(p.value for p in UnknownMemberPolicy))
parser.add_argument('--inplace', action='store_true',
help='clean in place, without backup')
parser.add_argument('--no-sandbox', dest='sandbox', action='store_true',
default=False, help='Disable bubblewrap\'s sandboxing')
excl_group = parser.add_mutually_exclusive_group()
excl_group.add_argument('files', nargs='*', help='the files to process',
default=[])
excl_group.add_argument('-v', '--version', action='version',
version='mat2 %s' % __version__)
excl_group.add_argument('-l', '--list', action='store_true', default=False,
help='list all supported fileformats')
excl_group.add_argument('--check-dependencies', action='store_true',
default=False,
help='check if mat2 has all the dependencies it '
'needs')
excl_group = parser.add_mutually_exclusive_group()
excl_group.add_argument('-L', '--lightweight', action='store_true',
help='remove SOME metadata')
excl_group.add_argument('-s', '--show', action='store_true',
help='list harmful metadata detectable by mat2 '
'without removing them')
info = parser.add_mutually_exclusive_group()
info.add_argument('-s', '--show', action='store_true',
help='list harmful metadata detectable by MAT2 without removing them')
info.add_argument('-L', '--lightweight', action='store_true',
help='remove SOME metadata')
return parser
def show_meta(filename: str):
def show_meta(filename: str, sandbox: bool):
if not __check_file(filename):
return
p, mtype = parser_factory.get_parser(filename) # type: ignore
if p is None:
print("[-] %s's format (%s) is not supported" % (filename, mtype))
try:
p, mtype = parser_factory.get_parser(filename) # type: ignore
except ValueError as e:
__print_without_chars("[-] something went wrong when processing %s: %s" % (filename, e))
return
if p is None:
__print_without_chars("[-] %s's format (%s) is not supported" % (filename, mtype))
return
p.sandbox = sandbox
__print_meta(filename, p.get_meta())
def __print_meta(filename: str, metadata: dict, depth: int = 1):
padding = " " * depth*2
if not metadata:
print(padding + "No metadata found")
__print_without_chars(padding + "No metadata found in %s." % filename)
return
print("[%s] Metadata for %s:" % ('+'*depth, filename))
__print_without_chars("[%s] Metadata for %s:" % ('+'*depth, filename))
for (k, v) in sorted(metadata.items()):
if isinstance(v, dict):
__print_meta(k, v, depth+1)
continue
# Remove control characters
# We might use 'Cc' instead of 'C', but better safe than sorry
# https://www.unicode.org/reports/tr44/#GC_Values_Table
try:
v = ''.join(ch for ch in v if not unicodedata.category(ch).startswith('C'))
try: # FIXME this is ugly.
__print_without_chars(padding + " %s: %s" % (k, v))
except UnicodeEncodeError:
__print_without_chars(padding + " %s: harmful content" % k)
except TypeError:
pass # for things that aren't iterable
try: # FIXME this is ugly.
print(padding + " %s: %s" % (k, v))
except UnicodeEncodeError:
print(padding + " %s: harmful content" % k)
def clean_meta(filename: str, is_lightweight: bool, policy: UnknownMemberPolicy) -> bool:
if not __check_file(filename, os.R_OK):
def clean_meta(filename: str, is_lightweight: bool, inplace: bool, sandbox: bool,
policy: UnknownMemberPolicy) -> bool:
mode = (os.R_OK | os.W_OK) if inplace else os.R_OK
if not __check_file(filename, mode):
return False
p, mtype = parser_factory.get_parser(filename) # type: ignore
try:
p, mtype = parser_factory.get_parser(filename) # type: ignore
except ValueError as e:
__print_without_chars("[-] something went wrong when cleaning %s: %s" % (filename, e))
return False
if p is None:
print("[-] %s's format (%s) is not supported" % (filename, mtype))
__print_without_chars("[-] %s's format (%s) is not supported" % (filename, mtype))
return False
p.unknown_member_policy = policy
p.lightweight_cleaning = is_lightweight
p.sandbox = sandbox
try:
return p.remove_all()
logging.debug('Cleaning %s…', filename)
ret = p.remove_all()
if ret is True:
shutil.copymode(filename, p.output_filename)
if inplace is True:
os.rename(p.output_filename, filename)
return ret
except RuntimeError as e:
print("[-] %s can't be cleaned: %s" % (filename, e))
__print_without_chars("[-] %s can't be cleaned: %s" % (filename, e))
return False
def show_parsers():
print('[+] Supported formats:')
formats = set() # Set[str]
@@ -138,53 +169,66 @@ def show_parsers():
# mimetype, so there is not point in showing the mimetype at all
continue
formats.add(' - %s (%s)' % (mtype, ', '.join(extensions)))
print('\n'.join(sorted(formats)))
__print_without_chars('\n'.join(sorted(formats)))
def __get_files_recursively(files: List[str]) -> Generator[str, None, None]:
def __get_files_recursively(files: List[str]) -> List[str]:
ret = set() # type: Set[str]
for f in files:
if os.path.isdir(f):
for path, _, _files in os.walk(f):
for _f in _files:
fname = os.path.join(path, _f)
if __check_file(fname):
yield fname
ret.add(fname)
elif __check_file(f):
yield f
ret.add(f)
return list(ret)
def main() -> int:
arg_parser = create_arg_parser()
args = arg_parser.parse_args()
if args.verbose:
logging.basicConfig(level=logging.INFO)
logging.getLogger().setLevel(logging.DEBUG)
if not args.files:
if args.list:
show_parsers()
return 0
elif args.check_dependencies:
print("Dependencies required for MAT2 %s:" % __version__)
__print_without_chars("Dependencies for mat2 %s:" % __version__)
for key, value in sorted(check_dependencies().items()):
print('- %s: %s' % (key, 'yes' if value else 'no'))
__print_without_chars('- %s: %s %s' % (key, 'yes' if value['found'] else 'no',
'(optional)' if not value['required'] else ''))
else:
arg_parser.print_help()
return 0
elif args.show:
for f in __get_files_recursively(args.files):
show_meta(f)
show_meta(f, args.sandbox)
return 0
else:
inplace = args.inplace
policy = UnknownMemberPolicy(args.unknown_members)
if policy == UnknownMemberPolicy.KEEP:
logging.warning('Keeping unknown member files may leak metadata in the resulting file!')
no_failure = True
for f in __get_files_recursively(args.files):
if clean_meta(f, args.lightweight, policy) is False:
no_failure = False
files = __get_files_recursively(args.files)
# We have to use Processes instead of Threads, since
# we're using tempfile.mkdtemp, which isn't thread-safe.
futures = list()
with concurrent.futures.ProcessPoolExecutor() as executor:
for f in files:
future = executor.submit(clean_meta, f, args.lightweight,
inplace, args.sandbox, policy)
futures.append(future)
for future in concurrent.futures.as_completed(futures):
no_failure &= future.result()
return 0 if no_failure is True else -1

View File

@@ -9,4 +9,7 @@
Simply copy the `mat2.py` file to `~/.local/share/nautilus-python/extensions`,
and launch Nautilus; you should now have a "Remove metadata" item in the
right-clic menu on supported files.
right-click menu on supported files.
Please note: This is not needed if using a distribution provided package. It
only applies if installing from source.

View File

@@ -2,7 +2,7 @@
"""
Because writing GUI is non-trivial (cf. https://0xacab.org/jvoisin/mat2/issues/3),
we decided to write a Nautilus extensions instead
we decided to write a Nautilus extension instead
(cf. https://0xacab.org/jvoisin/mat2/issues/2).
The code is a little bit convoluted because Gtk isn't thread-safe,
@@ -16,6 +16,7 @@ import queue
import threading
from typing import Tuple, Optional, List
from urllib.parse import unquote
import gettext
import gi
gi.require_version('Nautilus', '3.0')
@@ -25,6 +26,8 @@ from gi.repository import Nautilus, GObject, Gtk, Gio, GLib, GdkPixbuf
from libmat2 import parser_factory
_ = gettext.gettext
def _remove_metadata(fpath) -> Tuple[bool, Optional[str]]:
""" This is a simple wrapper around libmat2, because it's
@@ -36,7 +39,7 @@ def _remove_metadata(fpath) -> Tuple[bool, Optional[str]]:
return parser.remove_all(), mtype
class Mat2Extension(GObject.GObject, Nautilus.MenuProvider, Nautilus.LocationWidgetProvider):
""" This class adds an item to the right-clic menu in Nautilus. """
""" This class adds an item to the right-click menu in Nautilus. """
def __init__(self):
super().__init__()
@@ -51,11 +54,11 @@ class Mat2Extension(GObject.GObject, Nautilus.MenuProvider, Nautilus.LocationWid
self.infobar.set_show_close_button(True)
self.infobar_hbox = Gtk.Box(orientation=Gtk.Orientation.HORIZONTAL)
btn = Gtk.Button("Show")
btn = Gtk.Button(_("Show"))
btn.connect("clicked", self.__cb_show_failed)
self.infobar_hbox.pack_end(btn, False, False, 0)
infobar_msg = Gtk.Label("Failed to clean some items")
infobar_msg = Gtk.Label(_("Failed to clean some items"))
self.infobar_hbox.pack_start(infobar_msg, False, False, 0)
self.infobar.get_content_area().pack_start(self.infobar_hbox, True, True, 0)
@@ -90,9 +93,9 @@ class Mat2Extension(GObject.GObject, Nautilus.MenuProvider, Nautilus.LocationWid
window = Gtk.Window()
headerbar = Gtk.HeaderBar()
window.set_titlebar(headerbar)
headerbar.props.title = "Metadata removal failed"
headerbar.props.title = _("Metadata removal failed")
close_buton = Gtk.Button("Close")
close_buton = Gtk.Button(_("Close"))
close_buton.connect("clicked", lambda _: window.close())
headerbar.pack_end(close_buton)
@@ -107,9 +110,9 @@ class Mat2Extension(GObject.GObject, Nautilus.MenuProvider, Nautilus.LocationWid
""" Validate if a given file FileInfo `fileinfo` can be processed.
Returns a boolean, and a textreason why"""
if fileinfo.get_uri_scheme() != "file" or fileinfo.is_directory():
return False, "Not a file"
return False, _("Not a file")
elif not fileinfo.can_write():
return False, "Not writeable"
return False, _("Not writeable")
return True, ""
def __create_treeview(self) -> Gtk.TreeView:
@@ -120,7 +123,7 @@ class Mat2Extension(GObject.GObject, Nautilus.MenuProvider, Nautilus.LocationWid
column_pixbuf = Gtk.TreeViewColumn("Icon", renderer_pixbuf, pixbuf=0)
treeview.append_column(column_pixbuf)
for idx, name in enumerate(['File', 'Reason']):
for idx, name in enumerate([_('File'), _('Reason')]):
renderer_text = Gtk.CellRendererText()
column_text = Gtk.TreeViewColumn(name, renderer_text, text=idx+1)
treeview.append_column(column_text)
@@ -180,7 +183,7 @@ class Mat2Extension(GObject.GObject, Nautilus.MenuProvider, Nautilus.LocationWid
return False
progressbar.pulse()
progressbar.set_text("Cleaning %s" % fname)
progressbar.set_text(_("Cleaning %s") % fname)
progressbar.show_all()
self.infobar_hbox.show_all()
self.infobar.show_all()
@@ -202,7 +205,7 @@ class Mat2Extension(GObject.GObject, Nautilus.MenuProvider, Nautilus.LocationWid
fpath = unquote(fileinfo.get_uri()[7:]) # `len('file://') = 7`
success, mtype = _remove_metadata(fpath)
if not success:
self.failed_items.append((fname, mtype, 'Unsupported/invalid'))
self.failed_items.append((fname, mtype, _('Unsupported/invalid')))
processing_queue.put(None) # signal that we processed all the files
return True
@@ -231,13 +234,13 @@ class Mat2Extension(GObject.GObject, Nautilus.MenuProvider, Nautilus.LocationWid
"""
# Do not show the menu item if not a single file has a chance to be
# processed by mat2.
if not any([is_valid for (is_valid, _) in map(self.__validate, files)]):
if not any((is_valid for (is_valid, _) in map(self.__validate, files))):
return None
item = Nautilus.MenuItem(
name="MAT2::Remove_metadata",
label="Remove metadata",
tip="Remove metadata"
name="mat2::Remove_metadata",
label=_("Remove metadata"),
tip=_("Remove metadata")
)
item.connect('activate', self.__cb_menu_activate, files)

View File

@@ -5,7 +5,7 @@ with open("README.md", encoding='utf-8') as fh:
setuptools.setup(
name="mat2",
version='0.9.0',
version='0.13.0',
author="Julien (jvoisin) Voisin",
author_email="julien.voisin+mat2@dustri.org",
description="A handy tool to trash your metadata",

BIN
tests/data/dirty.aiff Normal file

Binary file not shown.

BIN
tests/data/dirty.heic Normal file

Binary file not shown.

8
tests/data/dirty.ppm Normal file
View File

@@ -0,0 +1,8 @@
P3
# A metadata
3 2 1
1 0 1 0 1 0 0 0 1
# And an other one
1 1 0 1 0 1 1 0 0
# and a final one here

636
tests/data/dirty.svg Normal file
View File

@@ -0,0 +1,636 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!-- Created with Inkscape (http://www.inkscape.org/) -->
<svg
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:cc="http://creativecommons.org/ns#"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:svg="http://www.w3.org/2000/svg"
xmlns="http://www.w3.org/2000/svg"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
inkscape:export-ydpi="384"
inkscape:export-xdpi="384"
inkscape:export-filename="mat2.png"
width="128"
height="128"
id="svg11300"
sodipodi:version="0.32"
inkscape:version="0.92.3 (2405546, 2018-03-11)"
sodipodi:docname="dirty.svg"
inkscape:output_extension="org.inkscape.output.svg.inkscape"
version="1.0"
style="display:inline;enable-background:new"
viewBox="0 0 128 128">
<script
id="script4600" />
<title
id="title4162">Adwaita Icon Template</title>
<defs
id="defs3" />
<sodipodi:namedview
stroke="#ef2929"
fill="#f57900"
id="base"
pagecolor="#ffffff"
bordercolor="#666666"
borderopacity="0.25490196"
inkscape:pageopacity="0.0"
inkscape:pageshadow="2"
inkscape:zoom="4.0446508"
inkscape:cx="61.536232"
inkscape:cy="41.548134"
inkscape:current-layer="layer1"
showgrid="true"
inkscape:grid-bbox="true"
inkscape:document-units="px"
inkscape:showpageshadow="false"
inkscape:window-width="1366"
inkscape:window-height="747"
inkscape:window-x="0"
inkscape:window-y="21"
width="400px"
height="300px"
inkscape:snap-nodes="true"
inkscape:snap-bbox="false"
objecttolerance="7"
gridtolerance="12"
guidetolerance="13"
inkscape:window-maximized="1"
inkscape:pagecheckerboard="false"
showguides="true"
inkscape:guide-bbox="true"
inkscape:locked="false"
inkscape:measure-start="0,0"
inkscape:measure-end="0,0"
inkscape:object-nodes="true"
inkscape:bbox-nodes="true"
inkscape:snap-global="true"
inkscape:object-paths="true"
inkscape:snap-intersection-paths="true"
inkscape:snap-bbox-edge-midpoints="true"
inkscape:snap-bbox-midpoints="true"
showborder="false"
inkscape:snap-center="true"
inkscape:snap-object-midpoints="true"
inkscape:snap-midpoints="true"
inkscape:snap-smooth-nodes="true">
<inkscape:grid
type="xygrid"
id="grid5883"
spacingx="2"
spacingy="2"
enabled="true"
visible="true"
empspacing="4"
originx="0"
originy="0" />
<sodipodi:guide
position="64,8"
orientation="0,1"
id="guide1073"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="12,64"
orientation="1,0"
id="guide1075"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="64,104"
orientation="0,1"
id="guide1099"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="64,128"
orientation="0,1"
id="guide993"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="104,64"
orientation="1,0"
id="guide995"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="9.2651362e-08,64"
orientation="1,0"
id="guide867"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="120,64"
orientation="1,0"
id="guide869"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="64,116"
orientation="0,1"
id="guide871"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<inkscape:grid
type="xygrid"
id="grid873"
spacingx="1"
spacingy="1"
empspacing="8"
color="#000000"
opacity="0.49019608"
empcolor="#000000"
empopacity="0.08627451"
dotted="true" />
<sodipodi:guide
position="24,64"
orientation="1,0"
id="guide877"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="116,64"
orientation="1,0"
id="guide879"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="64,120"
orientation="0,1"
id="guide881"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="64,12"
orientation="0,1"
id="guide883"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="8,64"
orientation="1,0"
id="guide885"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="128,64"
orientation="1,0"
id="guide887"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="64,0"
orientation="0,1"
id="guide897"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="64,24"
orientation="0,1"
id="guide899"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="256,256"
orientation="-0.70710678,0.70710678"
id="guide950"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="64,64"
orientation="0.70710678,0.70710678"
id="guide952"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
</sodipodi:namedview>
<metadata
id="metadata4">
<rdf:RDF>
<cc:Work
rdf:about="">
<dc:format>image/svg+xml</dc:format>
<dc:type
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
<dc:creator>
<cc:Agent>
<dc:title>GNOME Design Team</dc:title>
</cc:Agent>
</dc:creator>
<dc:source>mat2's source code</dc:source>
<cc:license
rdf:resource="http://creativecommons.org/licenses/by-sa/4.0/" />
<dc:title>Adwaita Icon Template</dc:title>
<dc:subject>
<rdf:Bag>
<rdf:li>mat2</rdf:li>
<rdf:li>logo</rdf:li>
<rdf:li>metadata</rdf:li>
</rdf:Bag>
</dc:subject>
<dc:date>2019 07 13</dc:date>
<dc:rights>
<cc:Agent>
<dc:title>LGPL</dc:title>
</cc:Agent>
</dc:rights>
<dc:publisher>
<cc:Agent>
<dc:title>jvoisin</dc:title>
</cc:Agent>
</dc:publisher>
<dc:identifier>mat2-testdata-svg</dc:identifier>
<dc:relation />
<dc:language>English</dc:language>
<dc:coverage />
<dc:description>This is a test svg image for mat2's testsuite</dc:description>
<dc:contributor>
<cc:Agent>
<dc:title>jvoisin, and Rose for the design</dc:title>
</cc:Agent>
</dc:contributor>
</cc:Work>
<cc:License
rdf:about="http://creativecommons.org/licenses/by-sa/4.0/">
<cc:permits
rdf:resource="http://creativecommons.org/ns#Reproduction" />
<cc:permits
rdf:resource="http://creativecommons.org/ns#Distribution" />
<cc:requires
rdf:resource="http://creativecommons.org/ns#Notice" />
<cc:requires
rdf:resource="http://creativecommons.org/ns#Attribution" />
<cc:permits
rdf:resource="http://creativecommons.org/ns#DerivativeWorks" />
<cc:requires
rdf:resource="http://creativecommons.org/ns#ShareAlike" />
</cc:License>
</rdf:RDF>
</metadata>
<g
id="layer1"
inkscape:label="Icon"
inkscape:groupmode="layer"
style="display:inline"
transform="translate(0,-172)">
<g
inkscape:groupmode="layer"
id="layer2"
inkscape:label="baseplate"
style="display:none">
<text
xml:space="preserve"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.33333349px;line-height:125%;font-family:Cantarell;-inkscape-font-specification:'Cantarell, Normal';text-align:start;writing-mode:lr-tb;text-anchor:start;display:inline;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.33264872;enable-background:new"
x="7.9499588"
y="148.65199"
id="context"
inkscape:label="context"><tspan
sodipodi:role="line"
id="tspan2716"
x="7.9499588"
y="148.65199"
style="font-size:5.33333349px;stroke-width:0.33264872">apps</tspan></text>
<text
inkscape:label="icon-name"
id="text3021"
y="157.23398"
x="7.7533054"
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:5.33333349px;line-height:125%;font-family:Cantarell;-inkscape-font-specification:'Cantarell, Bold';text-align:start;writing-mode:lr-tb;text-anchor:start;display:inline;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.33264872;enable-background:new"
xml:space="preserve"><tspan
y="157.23398"
x="7.7533054"
id="tspan3023"
sodipodi:role="line"
style="font-size:5.33333349px;stroke-width:0.33264872">org.gnome.</tspan></text>
<g
style="display:inline;fill:#000000;enable-background:new"
transform="matrix(7.9911709,0,0,8.0036407,-167.7909,-4846.0776)"
id="g12027"
inkscape:export-xdpi="12"
inkscape:export-ydpi="12" />
<rect
style="display:inline;overflow:visible;visibility:visible;fill:#f0f0f0;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.5;marker:none;enable-background:accumulate"
id="rect13805"
width="128"
height="128"
x="9.2651362e-08"
y="172"
inkscape:label="512x512" />
<g
id="g883"
style="fill:none;fill-opacity:0.25098039;stroke:#a579b3;stroke-opacity:1"
transform="translate(-24,24)" />
<g
id="g900"
style="fill:none;fill-opacity:0.25098039;stroke:#a579b3;stroke-opacity:1"
transform="translate(-24,24)" />
<g
id="g1168"
transform="matrix(0.25,0,0,0.25,6.9488522e-8,225)">
<circle
cx="256"
cy="44"
r="240"
id="path1142"
style="opacity:0.1;fill:#2864b0;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;marker:none;marker-start:none;marker-mid:none;marker-end:none;paint-order:normal" />
<rect
ry="32"
rx="32"
y="-180"
x="96"
height="448"
width="319.99979"
id="rect1110"
style="opacity:0.1;fill:#2864b0;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;marker:none;marker-start:none;marker-mid:none;marker-end:none;paint-order:normal" />
<rect
ry="32"
rx="32"
y="-164"
x="48"
height="416"
width="416"
id="rect1110-8"
style="display:inline;opacity:0.1;fill:#2864b0;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;marker:none;marker-start:none;marker-mid:none;marker-end:none;paint-order:normal;enable-background:new" />
<rect
ry="32"
rx="32"
y="-116"
x="32"
height="320"
width="448"
id="rect1110-8-9"
style="display:inline;opacity:0.1;fill:#2864b0;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;marker:none;marker-start:none;marker-mid:none;marker-end:none;paint-order:normal;enable-background:new" />
</g>
</g>
<g
inkscape:groupmode="layer"
id="layer9"
inkscape:label="hires"
style="display:none" />
<g
id="g944"
transform="matrix(1,0,0,0.93868822,0,14.545966)">
<path
style="fill:#99c1f1;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.41013032;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m 50.53899,195.25817 6.396029,-11.43484 1.082405,-0.87215 4.821622,-10.46578 0.885604,-0.38763 2.558412,4.74837 2.755213,9.59364 1.672808,1.35667 3.542417,-0.87215 5.707227,12.59771 12.988859,9.59364 3.050415,3.87621 v 2.71335 l -16.334476,-1.25977 -7.084833,1.45359 -4.428021,-0.38763 -7.084833,0.29072 -11.414452,-0.58143 -3.640817,0.96905 -9.052843,-1.64739 -2.066409,0.0969 -1.476008,-0.48452 1.377607,-1.45358 1.869609,-1.06596 6.002428,-11.04722 1.279206,0.48453 5.412025,-6.49267 z"
id="path3455"
inkscape:connector-curvature="0" />
<path
style="fill:#241f31;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 49.358184,215.31759 -3.444016,0.9206 -9.003641,-1.74429 -1.918809,0.24226 -1.623608,-0.58143 1.574407,-1.50204 1.722008,-0.96905 5.953228,-11.09567 1.279205,0.53298 5.510426,-6.54112 0.344401,0.29072 -4.969223,10.27197 2.214011,1.93811 -0.246001,4.45765 z"
id="path3459"
inkscape:connector-curvature="0" />
<path
style="fill:#241f31;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 50.438601,195.22912 6.470906,-11.5803 1.113274,-0.6167 4.870575,-10.62099 0.904535,-0.41113 -0.417479,3.3576 0.626218,0.89079 0.834954,15.89722 1.391594,3.70021 -3.687722,5.34476 0.208739,1.37044 -0.347898,5.68737 1.87865,3.28908 7.375442,2.19272 1.252433,2.19272 -0.487057,0.13704 -4.244358,-0.54818 -6.540486,0.41114 -2.435287,-2.19272 -0.626216,-4.24839 -2.087389,-6.16703 -4.035619,-3.42612 -2.087388,-4.38544"
id="path3461"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 32.069579,218.11563 c -0.06958,-0.27409 0.695796,-1.23341 0.695796,-1.23341 l 2.783185,-0.0685 1.739491,2.26124 4.661836,5.13919 0.139158,1.57602 -4.174778,5.96145 -0.487057,6.16703 -2.922344,2.26124 -0.06958,1.57601 h -1.113274 l -1.322013,-3.08351 2.017809,-14.86938 z"
id="path3400"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 48.83827,222.43255 1.600331,-3.01499 -0.695796,-0.75375 -5.635951,-1.16488 -3.200663,0.82227 -0.06958,1.50749 1.53075,0.75375 1.461174,2.67237 -0.208739,1.71307 1.739489,1.02783 2.296129,-0.54818 z"
id="path3402"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 51.203977,217.70449 1.113274,-0.68522 2.365707,1.02784 1.322013,2.67237 -2.226548,2.26125 -1.322013,-0.82227 -1.322013,-0.61671 0.834956,-1.71306 z"
id="path3404"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 43.758957,226.61242 1.948228,0.68522 0.417479,1.91863 -0.626216,1.30193 -1.182854,0.34261 -1.113275,1.02784 -0.765376,3.63169 0.626218,3.01499 -1.252435,0.68522 -0.487057,-0.41113 -0.278319,-1.5075 -1.80907,-1.37045 -0.765376,-3.49464 3.618141,-3.42613 1.669912,-2.67237"
id="path3406"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 50.57776,223.25481 0.13916,0.68523 -2.783187,3.83726 0.06958,1.64454 -0.626218,1.50749 -1.60033,1.43897 -0.06958,0.75375 1.600333,1.91863 1.182854,3.08351 0.974114,0.68523 1.669911,-2.80942 -0.278318,-3.22056 3.966039,-3.3576 0.695796,-1.09636 -3.270243,-4.45396 z"
id="path3408"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 51.839954,236.39856 -0.834826,1.58948 0.166966,1.26061 1.057445,1.97315 0.500896,-0.32886 0.389584,-1.7539 1.447031,-1.151 2.337512,-4.0559 -0.22262,-1.04138 -1.947927,-1.69909 -2.114892,1.31542 0.278276,3.39819 z"
id="path3410"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 57.593778,229.84236 -1.043694,1.09636 0.765375,0.89079 1.043695,-0.20556 v -1.43898 z"
id="path3412"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 59.344793,218.25267 -0.765376,2.19272 -0.695796,0.27409 -0.695796,1.91863 -2.226548,2.26124 2.574446,3.56317 h 1.182854 l 0.487057,0.75375 0.626217,1.09636 1.948229,1.30193 2.922346,-0.6167 1.53075,-2.26125 -1.043694,-3.3576 -1.043693,-1.64454 1.322011,-2.60385 -0.904535,-1.37045 -2.226548,0.0685 z"
id="path3416"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 72.150522,238.17554 -0.518261,1.78635 1.036524,2.16915 1.684349,-2.04155 -0.647826,-2.16915 z"
id="path3418"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 66.789813,223.66595 1.600333,-0.75375 1.739489,-4.11135 2.922346,0.75375 1.322013,0.41114 0.139159,6.7152 -1.461172,1.02784 -2.226548,4.17987 -0.834956,-0.41114 -0.626216,0.95932 -2.574448,-0.61671 0.904537,-3.08351 z"
id="path3422"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 77.505077,218.59529 1.182854,-0.20557 2.435287,1.30193 -0.974115,1.02783 -2.087389,3.63169 -1.391593,0.0685 -1.113274,-0.61671 1.043695,-2.19271 z"
id="path3426"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 73.539038,231.06638 1.043695,-1.30193 1.043694,-2.80942 4.522676,1.71306 -0.974115,2.87795 -1.94823,-0.41114 -1.80907,1.09636 z"
id="path3428"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 78.200873,225.6531 7.932079,-7.94861 3.339822,1.09636 0.974115,0.13705 1.600331,-1.02784 3.339822,0.0685 -5.079314,12.81371 -3.200663,-1.98715 0.139161,-1.16489 -0.695798,-0.6167 -0.208737,-1.16488 -1.043696,0.27409 -3.200663,2.39829 z"
id="path3430"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 81.401536,230.99786 c 0,-0.2741 2.156968,-1.98716 2.156968,-1.98716 l 2.017811,1.30193 -0.904535,2.32976 -1.182855,0.75375 z"
id="path3432"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 81.679855,237.8501 0.765375,-1.91863 0.208739,-1.2334 2.156969,0.20557 2.156968,-2.87795 3.409403,1.02784 -0.904535,2.80942 -0.904535,0.34261 -0.626218,2.80943 1.043694,4.72805 -0.904535,1.09636 -1.80907,-2.19272 -0.626217,-1.37045 z"
id="path3434"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 78.131294,238.60385 0.626216,3.08351 -0.626216,3.22056 0.765375,0.95931 -0.626216,5.68737 2.504866,2.32976 1.87865,-0.47965 0.417478,-3.35761 1.669911,-0.0685 3.757301,-1.8501 -0.20874,-1.98716 -2.226548,-0.20556 -1.182854,-3.01499 -3.200662,-2.05568 -1.252434,-2.39828 z"
id="path3436"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 84.532619,251.41755 -0.278318,1.43898 -0.695797,0.6167 1.322013,2.67238 2.365709,-0.20557 1.53075,-2.94647 -2.365707,-1.98715 z"
id="path3438"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 64.993183,249.51154 -1.14959,2.51583 0.766392,1.69818 2.618509,0.25159 0.702526,1.19502 1.021857,2.39003 -0.574794,2.32714 3.89583,1.88688 0.95799,-1.06923 0.510928,-4.59139 -4.023561,-2.70451 -0.127732,-4.21402 z"
id="path3440"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 72.912822,251.00642 h 1.391592 l 2.574446,0.75375 1.391593,1.98715 1.461172,1.30193 -0.139159,3.42612 -3.409402,1.57602 -0.974115,-1.85011 0.626217,-3.3576 -3.270243,-1.85011 z"
id="path3442"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 72.147446,264.77944 1.80907,-1.98715 3.339822,-1.85011 1.322013,-0.0685 4.661835,-3.63169 1.391594,0.34261 0.556637,4.52248 -3.200664,4.04283 -2.852765,-0.82227 -1.80907,0.54818 -0.765376,1.43897 -2.087389,0.68522 z"
id="path3444"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 75.06979,272.93361 0.765376,-1.30192 1.252433,-0.41114 0.904535,-2.87794 1.94823,-0.61671 0.556637,2.60386 -3.339822,6.0985 -1.391593,-0.0685 z"
id="path3446"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 71.451649,268.20556 -1.252433,1.85011 2.504867,1.98715 0.765376,0.82227 1.73949,-2.39829 -2.296127,-2.80942 -1.461173,0.27409 z"
id="path3448"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 62.24531,254.0948 1.461172,1.02784 1.948229,0.54818 0.487058,1.64454 -1.461173,2.67237 -0.06958,1.78159 -1.669911,1.85011 -1.252433,-2.05568 0.487057,-2.80942 -1.391593,-0.34261 -0.904535,-2.80942 z"
id="path3450"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 47.585836,246.55246 -0.695796,3.70021 -0.139159,1.37045 1.87865,0.68523 1.391592,0.95931 1.809071,-1.64454 -0.417478,-0.95931 z"
id="path3452"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 54.682958,247.78586 -1.043694,1.02784 0.208739,1.98715 1.600331,0.89079 0.626217,-0.47965 0.06958,-2.26125 z"
id="path3454"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 48.629531,258.95503 4.800994,-6.16703 3.409402,0.82227 0.556637,1.78159 3.131083,4.79657 -1.669911,5.82441 -3.200663,-1.37045 -0.417478,-3.49464 -2.087388,1.30192 z"
id="path3456"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 45.915924,252.71948 -0.487056,1.98715 1.60033,1.57602 1.461174,-0.20557 -0.347899,-2.19272 z"
id="path3458"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 67.555189,261.6274 -1.80907,2.80943 -2.435287,8.42826 2.783185,3.76874 1.461172,-0.0685 1.113274,-2.12419 1.043696,-0.20557 0.487057,-1.09636 -1.043694,-4.45396 1.182853,-4.31692 z"
id="path3460"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 58.718577,267.79443 1.600331,-1.23341 2.017809,1.71306 -0.904535,1.85011 z"
id="path3462"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 58.509838,276.49678 2.156968,-4.591 1.391593,-0.27409 0.834955,1.50749 -2.017809,5.13919 z"
id="path3464"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 71.242911,274.02997 1.391592,0.20557 1.043694,3.01499 2.01781,0.68522 1.530751,1.57602 -0.904535,2.87795 -2.365707,2.32976 -0.139159,3.56317 -1.322013,1.98715 -2.504867,-1.85011 -0.278318,-2.67237 -1.530752,-1.78159 -1.113274,-3.08351 3.61814,-4.17987 z"
id="path3466"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 62.893354,276.5653 3.270244,1.16489 0.06958,3.70021 -0.556637,0.68523 0.974115,3.70021 1.252433,1.64454 0.06958,3.08351 -2.017809,1.37045 -2.574447,8.08566 -2.574447,-1.30193 -1.948229,-9.79872 z"
id="path3468"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 58.440258,283.5546 h 0.556637 l 0.417478,0.95931 -0.208739,1.30193 -1.461172,0.13704 z"
id="path3472"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 56.700767,279.16916 -1.113274,0.95931 0.834956,2.80943 1.600331,0.20556 0.487058,-2.05567 -0.695796,-1.91863 z"
id="path3474"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 53.152207,272.17987 0.139159,5.13918 1.87865,1.23341 0.834955,-0.54818 0.904535,-3.63169 1.530752,-1.57602 -1.669911,-3.97431 -3.548561,3.08352 z"
id="path3476"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 45.915924,258.33832 -0.208739,3.83726 -4.731414,3.97431 1.948229,2.80942 8.488716,0.82227 0.417478,1.98715 1.043694,-0.75375 0.487057,-2.19272 1.182854,-1.64454 -0.417478,-1.09635 -1.87865,-2.60386 -3.757299,-1.37045 -1.461174,-3.22056 z"
id="path3480"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 40.279975,263.68308 1.669912,0.6167 3.061502,-6.37259 -0.904535,-5.61884 -2.504867,-0.34262 -1.391592,-1.2334 2.156968,-7.606 -2.087388,-4.45396 -3.409402,1.57602 -0.834956,3.42612 -1.87865,0.20557 -0.347898,2.1242 1.530752,1.64454 h 1.322013 l 0.626217,3.90578 2.296127,5.61884 -0.347898,2.19272 z"
id="path3482"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 66.531337,247.61066 -0.590018,-0.31657 -0.420783,-1.71262 0.427793,-0.66945 1.306823,-1.13114 2.316342,-1.38746 1.06612,0.23465 -0.01701,2.21105 -2.36166,3.35302 z"
id="path4284"
inkscape:connector-curvature="0"
inkscape:transform-center-x="4.9927099"
inkscape:transform-center-y="-9.3161687" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 72.373733,232.22199 -0.815102,1.03206 4.017286,4.12827 1.571981,0.17201 1.339096,-0.86006 0.931544,0.63071 2.387083,-2.98152 -2.794634,-0.91739 -3.027519,0.22934 z"
id="path3601"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 57.407878,237.1102 -1.301737,2.34289 -1.301738,0.61888 -0.17955,1.45878 -4.488748,1.54719 -0.403989,1.50299 0.314213,0.30944 1.032412,0.0884 v 1.41457 l 1.660839,1.50299 2.154598,-1.94504 1.571064,0.35364 2.738136,-1.94504 -1.436399,-2.56392 0.987525,-3.44803 -0.583538,-1.37037 z"
id="path3603"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 62.104217,246.96003 5.843936,-6.55723 0.659867,-2.66044 2.221783,-0.40757 -0.386451,-3.39556 -2.000988,-0.60704 -6.246127,-0.36572 -2.624948,2.5137 1.519708,2.75102 -0.347742,5.51876 z"
id="path3605"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 71.024647,249.63275 5.822153,1.31875 1.047988,-3.89891 -1.280874,-1.43343 0.523995,-6.02038 -3.551515,5.275 0.34933,2.06413 -2.037753,0.80272 -1.164431,0.45869 z"
id="path3607"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 59.099222,247.24427 -2.095974,1.72011 -0.05822,1.60543 0.465772,1.72011 1.455539,0.97473 -0.407551,0.97473 2.328861,-0.34402 2.27064,-2.86685 -1.571981,-0.57337 -0.640437,-2.86685 -1.51376,-0.40136 z"
id="path3609"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 44.078067,234.34346 0.291107,4.47228 -1.863089,1.43342 2.095976,3.72691 2.037753,0.0573 2.27064,-3.55489 -2.969297,-4.98831 z"
id="path3611"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 44.660282,245.46683 -3.318627,4.30027 1.339096,1.26141 2.561747,-0.28668 1.222652,-3.15354 z"
id="path3613"
inkscape:connector-curvature="0" />
</g>
</g>
</svg>

After

Width:  |  Height:  |  Size: 34 KiB

BIN
tests/data/dirty.wav Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

636
tests/data/weird.svg Normal file
View File

@@ -0,0 +1,636 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!-- Created with Inkscape (http://www.inkscape.org/) -->
<svg
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:cc="http://creativecommons.org/ns#"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:svg="http://www.w3.org/2000/svg"
xmlns="http://www.w3.org/1337/svg"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
inkscape:export-ydpi="384"
inkscape:export-xdpi="384"
inkscape:export-filename="mat2.png"
width="128"
height="128"
id="svg11300"
sodipodi:version="0.32"
inkscape:version="0.92.3 (2405546, 2018-03-11)"
sodipodi:docname="dirty.svg"
inkscape:output_extension="org.inkscape.output.svg.inkscape"
version="1.0"
style="display:inline;enable-background:new"
viewBox="0 0 128 128">
<script
id="script4600" />
<title
id="title4162">Adwaita Icon Template</title>
<defs
id="defs3" />
<sodipodi:namedview
stroke="#ef2929"
fill="#f57900"
id="base"
pagecolor="#ffffff"
bordercolor="#666666"
borderopacity="0.25490196"
inkscape:pageopacity="0.0"
inkscape:pageshadow="2"
inkscape:zoom="4.0446508"
inkscape:cx="61.536232"
inkscape:cy="41.548134"
inkscape:current-layer="layer1"
showgrid="true"
inkscape:grid-bbox="true"
inkscape:document-units="px"
inkscape:showpageshadow="false"
inkscape:window-width="1366"
inkscape:window-height="747"
inkscape:window-x="0"
inkscape:window-y="21"
width="400px"
height="300px"
inkscape:snap-nodes="true"
inkscape:snap-bbox="false"
objecttolerance="7"
gridtolerance="12"
guidetolerance="13"
inkscape:window-maximized="1"
inkscape:pagecheckerboard="false"
showguides="true"
inkscape:guide-bbox="true"
inkscape:locked="false"
inkscape:measure-start="0,0"
inkscape:measure-end="0,0"
inkscape:object-nodes="true"
inkscape:bbox-nodes="true"
inkscape:snap-global="true"
inkscape:object-paths="true"
inkscape:snap-intersection-paths="true"
inkscape:snap-bbox-edge-midpoints="true"
inkscape:snap-bbox-midpoints="true"
showborder="false"
inkscape:snap-center="true"
inkscape:snap-object-midpoints="true"
inkscape:snap-midpoints="true"
inkscape:snap-smooth-nodes="true">
<inkscape:grid
type="xygrid"
id="grid5883"
spacingx="2"
spacingy="2"
enabled="true"
visible="true"
empspacing="4"
originx="0"
originy="0" />
<sodipodi:guide
position="64,8"
orientation="0,1"
id="guide1073"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="12,64"
orientation="1,0"
id="guide1075"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="64,104"
orientation="0,1"
id="guide1099"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="64,128"
orientation="0,1"
id="guide993"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="104,64"
orientation="1,0"
id="guide995"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="9.2651362e-08,64"
orientation="1,0"
id="guide867"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="120,64"
orientation="1,0"
id="guide869"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="64,116"
orientation="0,1"
id="guide871"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<inkscape:grid
type="xygrid"
id="grid873"
spacingx="1"
spacingy="1"
empspacing="8"
color="#000000"
opacity="0.49019608"
empcolor="#000000"
empopacity="0.08627451"
dotted="true" />
<sodipodi:guide
position="24,64"
orientation="1,0"
id="guide877"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="116,64"
orientation="1,0"
id="guide879"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="64,120"
orientation="0,1"
id="guide881"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="64,12"
orientation="0,1"
id="guide883"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="8,64"
orientation="1,0"
id="guide885"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="128,64"
orientation="1,0"
id="guide887"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="64,0"
orientation="0,1"
id="guide897"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="64,24"
orientation="0,1"
id="guide899"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="256,256"
orientation="-0.70710678,0.70710678"
id="guide950"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="64,64"
orientation="0.70710678,0.70710678"
id="guide952"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
</sodipodi:namedview>
<metadata
id="metadata4">
<rdf:RDF>
<cc:Work
rdf:about="">
<dc:format>image/svg+xml</dc:format>
<dc:type
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
<dc:creator>
<cc:Agent>
<dc:title>GNOME Design Team</dc:title>
</cc:Agent>
</dc:creator>
<dc:source>mat2's source code</dc:source>
<cc:license
rdf:resource="http://creativecommons.org/licenses/by-sa/4.0/" />
<dc:title>Adwaita Icon Template</dc:title>
<dc:subject>
<rdf:Bag>
<rdf:li>mat2</rdf:li>
<rdf:li>logo</rdf:li>
<rdf:li>metadata</rdf:li>
</rdf:Bag>
</dc:subject>
<dc:date>2019 07 13</dc:date>
<dc:rights>
<cc:Agent>
<dc:title>LGPL</dc:title>
</cc:Agent>
</dc:rights>
<dc:publisher>
<cc:Agent>
<dc:title>jvoisin</dc:title>
</cc:Agent>
</dc:publisher>
<dc:identifier>mat2-testdata-svg</dc:identifier>
<dc:relation />
<dc:language>English</dc:language>
<dc:coverage />
<dc:description>This is a test svg image for mat2's testsuite</dc:description>
<dc:contributor>
<cc:Agent>
<dc:title>jvoisin, and Rose for the design</dc:title>
</cc:Agent>
</dc:contributor>
</cc:Work>
<cc:License
rdf:about="http://creativecommons.org/licenses/by-sa/4.0/">
<cc:permits
rdf:resource="http://creativecommons.org/ns#Reproduction" />
<cc:permits
rdf:resource="http://creativecommons.org/ns#Distribution" />
<cc:requires
rdf:resource="http://creativecommons.org/ns#Notice" />
<cc:requires
rdf:resource="http://creativecommons.org/ns#Attribution" />
<cc:permits
rdf:resource="http://creativecommons.org/ns#DerivativeWorks" />
<cc:requires
rdf:resource="http://creativecommons.org/ns#ShareAlike" />
</cc:License>
</rdf:RDF>
</metadata>
<g
id="layer1"
inkscape:label="Icon"
inkscape:groupmode="layer"
style="display:inline"
transform="translate(0,-172)">
<g
inkscape:groupmode="layer"
id="layer2"
inkscape:label="baseplate"
style="display:none">
<text
xml:space="preserve"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.33333349px;line-height:125%;font-family:Cantarell;-inkscape-font-specification:'Cantarell, Normal';text-align:start;writing-mode:lr-tb;text-anchor:start;display:inline;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.33264872;enable-background:new"
x="7.9499588"
y="148.65199"
id="context"
inkscape:label="context"><tspan
sodipodi:role="line"
id="tspan2716"
x="7.9499588"
y="148.65199"
style="font-size:5.33333349px;stroke-width:0.33264872">apps</tspan></text>
<text
inkscape:label="icon-name"
id="text3021"
y="157.23398"
x="7.7533054"
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:5.33333349px;line-height:125%;font-family:Cantarell;-inkscape-font-specification:'Cantarell, Bold';text-align:start;writing-mode:lr-tb;text-anchor:start;display:inline;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.33264872;enable-background:new"
xml:space="preserve"><tspan
y="157.23398"
x="7.7533054"
id="tspan3023"
sodipodi:role="line"
style="font-size:5.33333349px;stroke-width:0.33264872">org.gnome.</tspan></text>
<g
style="display:inline;fill:#000000;enable-background:new"
transform="matrix(7.9911709,0,0,8.0036407,-167.7909,-4846.0776)"
id="g12027"
inkscape:export-xdpi="12"
inkscape:export-ydpi="12" />
<rect
style="display:inline;overflow:visible;visibility:visible;fill:#f0f0f0;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.5;marker:none;enable-background:accumulate"
id="rect13805"
width="128"
height="128"
x="9.2651362e-08"
y="172"
inkscape:label="512x512" />
<g
id="g883"
style="fill:none;fill-opacity:0.25098039;stroke:#a579b3;stroke-opacity:1"
transform="translate(-24,24)" />
<g
id="g900"
style="fill:none;fill-opacity:0.25098039;stroke:#a579b3;stroke-opacity:1"
transform="translate(-24,24)" />
<g
id="g1168"
transform="matrix(0.25,0,0,0.25,6.9488522e-8,225)">
<circle
cx="256"
cy="44"
r="240"
id="path1142"
style="opacity:0.1;fill:#2864b0;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;marker:none;marker-start:none;marker-mid:none;marker-end:none;paint-order:normal" />
<rect
ry="32"
rx="32"
y="-180"
x="96"
height="448"
width="319.99979"
id="rect1110"
style="opacity:0.1;fill:#2864b0;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;marker:none;marker-start:none;marker-mid:none;marker-end:none;paint-order:normal" />
<rect
ry="32"
rx="32"
y="-164"
x="48"
height="416"
width="416"
id="rect1110-8"
style="display:inline;opacity:0.1;fill:#2864b0;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;marker:none;marker-start:none;marker-mid:none;marker-end:none;paint-order:normal;enable-background:new" />
<rect
ry="32"
rx="32"
y="-116"
x="32"
height="320"
width="448"
id="rect1110-8-9"
style="display:inline;opacity:0.1;fill:#2864b0;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;marker:none;marker-start:none;marker-mid:none;marker-end:none;paint-order:normal;enable-background:new" />
</g>
</g>
<g
inkscape:groupmode="layer"
id="layer9"
inkscape:label="hires"
style="display:none" />
<g
id="g944"
transform="matrix(1,0,0,0.93868822,0,14.545966)">
<path
style="fill:#99c1f1;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.41013032;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m 50.53899,195.25817 6.396029,-11.43484 1.082405,-0.87215 4.821622,-10.46578 0.885604,-0.38763 2.558412,4.74837 2.755213,9.59364 1.672808,1.35667 3.542417,-0.87215 5.707227,12.59771 12.988859,9.59364 3.050415,3.87621 v 2.71335 l -16.334476,-1.25977 -7.084833,1.45359 -4.428021,-0.38763 -7.084833,0.29072 -11.414452,-0.58143 -3.640817,0.96905 -9.052843,-1.64739 -2.066409,0.0969 -1.476008,-0.48452 1.377607,-1.45358 1.869609,-1.06596 6.002428,-11.04722 1.279206,0.48453 5.412025,-6.49267 z"
id="path3455"
inkscape:connector-curvature="0" />
<path
style="fill:#241f31;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 49.358184,215.31759 -3.444016,0.9206 -9.003641,-1.74429 -1.918809,0.24226 -1.623608,-0.58143 1.574407,-1.50204 1.722008,-0.96905 5.953228,-11.09567 1.279205,0.53298 5.510426,-6.54112 0.344401,0.29072 -4.969223,10.27197 2.214011,1.93811 -0.246001,4.45765 z"
id="path3459"
inkscape:connector-curvature="0" />
<path
style="fill:#241f31;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 50.438601,195.22912 6.470906,-11.5803 1.113274,-0.6167 4.870575,-10.62099 0.904535,-0.41113 -0.417479,3.3576 0.626218,0.89079 0.834954,15.89722 1.391594,3.70021 -3.687722,5.34476 0.208739,1.37044 -0.347898,5.68737 1.87865,3.28908 7.375442,2.19272 1.252433,2.19272 -0.487057,0.13704 -4.244358,-0.54818 -6.540486,0.41114 -2.435287,-2.19272 -0.626216,-4.24839 -2.087389,-6.16703 -4.035619,-3.42612 -2.087388,-4.38544"
id="path3461"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 32.069579,218.11563 c -0.06958,-0.27409 0.695796,-1.23341 0.695796,-1.23341 l 2.783185,-0.0685 1.739491,2.26124 4.661836,5.13919 0.139158,1.57602 -4.174778,5.96145 -0.487057,6.16703 -2.922344,2.26124 -0.06958,1.57601 h -1.113274 l -1.322013,-3.08351 2.017809,-14.86938 z"
id="path3400"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 48.83827,222.43255 1.600331,-3.01499 -0.695796,-0.75375 -5.635951,-1.16488 -3.200663,0.82227 -0.06958,1.50749 1.53075,0.75375 1.461174,2.67237 -0.208739,1.71307 1.739489,1.02783 2.296129,-0.54818 z"
id="path3402"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 51.203977,217.70449 1.113274,-0.68522 2.365707,1.02784 1.322013,2.67237 -2.226548,2.26125 -1.322013,-0.82227 -1.322013,-0.61671 0.834956,-1.71306 z"
id="path3404"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 43.758957,226.61242 1.948228,0.68522 0.417479,1.91863 -0.626216,1.30193 -1.182854,0.34261 -1.113275,1.02784 -0.765376,3.63169 0.626218,3.01499 -1.252435,0.68522 -0.487057,-0.41113 -0.278319,-1.5075 -1.80907,-1.37045 -0.765376,-3.49464 3.618141,-3.42613 1.669912,-2.67237"
id="path3406"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 50.57776,223.25481 0.13916,0.68523 -2.783187,3.83726 0.06958,1.64454 -0.626218,1.50749 -1.60033,1.43897 -0.06958,0.75375 1.600333,1.91863 1.182854,3.08351 0.974114,0.68523 1.669911,-2.80942 -0.278318,-3.22056 3.966039,-3.3576 0.695796,-1.09636 -3.270243,-4.45396 z"
id="path3408"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 51.839954,236.39856 -0.834826,1.58948 0.166966,1.26061 1.057445,1.97315 0.500896,-0.32886 0.389584,-1.7539 1.447031,-1.151 2.337512,-4.0559 -0.22262,-1.04138 -1.947927,-1.69909 -2.114892,1.31542 0.278276,3.39819 z"
id="path3410"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 57.593778,229.84236 -1.043694,1.09636 0.765375,0.89079 1.043695,-0.20556 v -1.43898 z"
id="path3412"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 59.344793,218.25267 -0.765376,2.19272 -0.695796,0.27409 -0.695796,1.91863 -2.226548,2.26124 2.574446,3.56317 h 1.182854 l 0.487057,0.75375 0.626217,1.09636 1.948229,1.30193 2.922346,-0.6167 1.53075,-2.26125 -1.043694,-3.3576 -1.043693,-1.64454 1.322011,-2.60385 -0.904535,-1.37045 -2.226548,0.0685 z"
id="path3416"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 72.150522,238.17554 -0.518261,1.78635 1.036524,2.16915 1.684349,-2.04155 -0.647826,-2.16915 z"
id="path3418"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 66.789813,223.66595 1.600333,-0.75375 1.739489,-4.11135 2.922346,0.75375 1.322013,0.41114 0.139159,6.7152 -1.461172,1.02784 -2.226548,4.17987 -0.834956,-0.41114 -0.626216,0.95932 -2.574448,-0.61671 0.904537,-3.08351 z"
id="path3422"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 77.505077,218.59529 1.182854,-0.20557 2.435287,1.30193 -0.974115,1.02783 -2.087389,3.63169 -1.391593,0.0685 -1.113274,-0.61671 1.043695,-2.19271 z"
id="path3426"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 73.539038,231.06638 1.043695,-1.30193 1.043694,-2.80942 4.522676,1.71306 -0.974115,2.87795 -1.94823,-0.41114 -1.80907,1.09636 z"
id="path3428"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 78.200873,225.6531 7.932079,-7.94861 3.339822,1.09636 0.974115,0.13705 1.600331,-1.02784 3.339822,0.0685 -5.079314,12.81371 -3.200663,-1.98715 0.139161,-1.16489 -0.695798,-0.6167 -0.208737,-1.16488 -1.043696,0.27409 -3.200663,2.39829 z"
id="path3430"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 81.401536,230.99786 c 0,-0.2741 2.156968,-1.98716 2.156968,-1.98716 l 2.017811,1.30193 -0.904535,2.32976 -1.182855,0.75375 z"
id="path3432"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 81.679855,237.8501 0.765375,-1.91863 0.208739,-1.2334 2.156969,0.20557 2.156968,-2.87795 3.409403,1.02784 -0.904535,2.80942 -0.904535,0.34261 -0.626218,2.80943 1.043694,4.72805 -0.904535,1.09636 -1.80907,-2.19272 -0.626217,-1.37045 z"
id="path3434"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 78.131294,238.60385 0.626216,3.08351 -0.626216,3.22056 0.765375,0.95931 -0.626216,5.68737 2.504866,2.32976 1.87865,-0.47965 0.417478,-3.35761 1.669911,-0.0685 3.757301,-1.8501 -0.20874,-1.98716 -2.226548,-0.20556 -1.182854,-3.01499 -3.200662,-2.05568 -1.252434,-2.39828 z"
id="path3436"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 84.532619,251.41755 -0.278318,1.43898 -0.695797,0.6167 1.322013,2.67238 2.365709,-0.20557 1.53075,-2.94647 -2.365707,-1.98715 z"
id="path3438"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 64.993183,249.51154 -1.14959,2.51583 0.766392,1.69818 2.618509,0.25159 0.702526,1.19502 1.021857,2.39003 -0.574794,2.32714 3.89583,1.88688 0.95799,-1.06923 0.510928,-4.59139 -4.023561,-2.70451 -0.127732,-4.21402 z"
id="path3440"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 72.912822,251.00642 h 1.391592 l 2.574446,0.75375 1.391593,1.98715 1.461172,1.30193 -0.139159,3.42612 -3.409402,1.57602 -0.974115,-1.85011 0.626217,-3.3576 -3.270243,-1.85011 z"
id="path3442"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 72.147446,264.77944 1.80907,-1.98715 3.339822,-1.85011 1.322013,-0.0685 4.661835,-3.63169 1.391594,0.34261 0.556637,4.52248 -3.200664,4.04283 -2.852765,-0.82227 -1.80907,0.54818 -0.765376,1.43897 -2.087389,0.68522 z"
id="path3444"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 75.06979,272.93361 0.765376,-1.30192 1.252433,-0.41114 0.904535,-2.87794 1.94823,-0.61671 0.556637,2.60386 -3.339822,6.0985 -1.391593,-0.0685 z"
id="path3446"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 71.451649,268.20556 -1.252433,1.85011 2.504867,1.98715 0.765376,0.82227 1.73949,-2.39829 -2.296127,-2.80942 -1.461173,0.27409 z"
id="path3448"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 62.24531,254.0948 1.461172,1.02784 1.948229,0.54818 0.487058,1.64454 -1.461173,2.67237 -0.06958,1.78159 -1.669911,1.85011 -1.252433,-2.05568 0.487057,-2.80942 -1.391593,-0.34261 -0.904535,-2.80942 z"
id="path3450"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 47.585836,246.55246 -0.695796,3.70021 -0.139159,1.37045 1.87865,0.68523 1.391592,0.95931 1.809071,-1.64454 -0.417478,-0.95931 z"
id="path3452"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 54.682958,247.78586 -1.043694,1.02784 0.208739,1.98715 1.600331,0.89079 0.626217,-0.47965 0.06958,-2.26125 z"
id="path3454"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 48.629531,258.95503 4.800994,-6.16703 3.409402,0.82227 0.556637,1.78159 3.131083,4.79657 -1.669911,5.82441 -3.200663,-1.37045 -0.417478,-3.49464 -2.087388,1.30192 z"
id="path3456"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 45.915924,252.71948 -0.487056,1.98715 1.60033,1.57602 1.461174,-0.20557 -0.347899,-2.19272 z"
id="path3458"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 67.555189,261.6274 -1.80907,2.80943 -2.435287,8.42826 2.783185,3.76874 1.461172,-0.0685 1.113274,-2.12419 1.043696,-0.20557 0.487057,-1.09636 -1.043694,-4.45396 1.182853,-4.31692 z"
id="path3460"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 58.718577,267.79443 1.600331,-1.23341 2.017809,1.71306 -0.904535,1.85011 z"
id="path3462"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 58.509838,276.49678 2.156968,-4.591 1.391593,-0.27409 0.834955,1.50749 -2.017809,5.13919 z"
id="path3464"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 71.242911,274.02997 1.391592,0.20557 1.043694,3.01499 2.01781,0.68522 1.530751,1.57602 -0.904535,2.87795 -2.365707,2.32976 -0.139159,3.56317 -1.322013,1.98715 -2.504867,-1.85011 -0.278318,-2.67237 -1.530752,-1.78159 -1.113274,-3.08351 3.61814,-4.17987 z"
id="path3466"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 62.893354,276.5653 3.270244,1.16489 0.06958,3.70021 -0.556637,0.68523 0.974115,3.70021 1.252433,1.64454 0.06958,3.08351 -2.017809,1.37045 -2.574447,8.08566 -2.574447,-1.30193 -1.948229,-9.79872 z"
id="path3468"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 58.440258,283.5546 h 0.556637 l 0.417478,0.95931 -0.208739,1.30193 -1.461172,0.13704 z"
id="path3472"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 56.700767,279.16916 -1.113274,0.95931 0.834956,2.80943 1.600331,0.20556 0.487058,-2.05567 -0.695796,-1.91863 z"
id="path3474"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 53.152207,272.17987 0.139159,5.13918 1.87865,1.23341 0.834955,-0.54818 0.904535,-3.63169 1.530752,-1.57602 -1.669911,-3.97431 -3.548561,3.08352 z"
id="path3476"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 45.915924,258.33832 -0.208739,3.83726 -4.731414,3.97431 1.948229,2.80942 8.488716,0.82227 0.417478,1.98715 1.043694,-0.75375 0.487057,-2.19272 1.182854,-1.64454 -0.417478,-1.09635 -1.87865,-2.60386 -3.757299,-1.37045 -1.461174,-3.22056 z"
id="path3480"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 40.279975,263.68308 1.669912,0.6167 3.061502,-6.37259 -0.904535,-5.61884 -2.504867,-0.34262 -1.391592,-1.2334 2.156968,-7.606 -2.087388,-4.45396 -3.409402,1.57602 -0.834956,3.42612 -1.87865,0.20557 -0.347898,2.1242 1.530752,1.64454 h 1.322013 l 0.626217,3.90578 2.296127,5.61884 -0.347898,2.19272 z"
id="path3482"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 66.531337,247.61066 -0.590018,-0.31657 -0.420783,-1.71262 0.427793,-0.66945 1.306823,-1.13114 2.316342,-1.38746 1.06612,0.23465 -0.01701,2.21105 -2.36166,3.35302 z"
id="path4284"
inkscape:connector-curvature="0"
inkscape:transform-center-x="4.9927099"
inkscape:transform-center-y="-9.3161687" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 72.373733,232.22199 -0.815102,1.03206 4.017286,4.12827 1.571981,0.17201 1.339096,-0.86006 0.931544,0.63071 2.387083,-2.98152 -2.794634,-0.91739 -3.027519,0.22934 z"
id="path3601"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 57.407878,237.1102 -1.301737,2.34289 -1.301738,0.61888 -0.17955,1.45878 -4.488748,1.54719 -0.403989,1.50299 0.314213,0.30944 1.032412,0.0884 v 1.41457 l 1.660839,1.50299 2.154598,-1.94504 1.571064,0.35364 2.738136,-1.94504 -1.436399,-2.56392 0.987525,-3.44803 -0.583538,-1.37037 z"
id="path3603"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 62.104217,246.96003 5.843936,-6.55723 0.659867,-2.66044 2.221783,-0.40757 -0.386451,-3.39556 -2.000988,-0.60704 -6.246127,-0.36572 -2.624948,2.5137 1.519708,2.75102 -0.347742,5.51876 z"
id="path3605"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 71.024647,249.63275 5.822153,1.31875 1.047988,-3.89891 -1.280874,-1.43343 0.523995,-6.02038 -3.551515,5.275 0.34933,2.06413 -2.037753,0.80272 -1.164431,0.45869 z"
id="path3607"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 59.099222,247.24427 -2.095974,1.72011 -0.05822,1.60543 0.465772,1.72011 1.455539,0.97473 -0.407551,0.97473 2.328861,-0.34402 2.27064,-2.86685 -1.571981,-0.57337 -0.640437,-2.86685 -1.51376,-0.40136 z"
id="path3609"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 44.078067,234.34346 0.291107,4.47228 -1.863089,1.43342 2.095976,3.72691 2.037753,0.0573 2.27064,-3.55489 -2.969297,-4.98831 z"
id="path3611"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 44.660282,245.46683 -3.318627,4.30027 1.339096,1.26141 2.561747,-0.28668 1.222652,-3.15354 z"
id="path3613"
inkscape:connector-curvature="0" />
</g>
</g>
</svg>

After

Width:  |  Height:  |  Size: 34 KiB

54
tests/fuzz.py Normal file
View File

@@ -0,0 +1,54 @@
import mimetypes
import os
import sys
sys.path.append('..')
import atheris
with atheris.instrument_imports(enable_loader_override=False):
from libmat2 import parser_factory, UNSUPPORTED_EXTENSIONS
extensions = set()
for parser in parser_factory._get_parsers(): # type: ignore
for mtype in parser.mimetypes:
if mtype.startswith('video'):
continue
if 'aif' in mtype:
continue
if 'wav' in mtype:
continue
if 'gif' in mtype:
continue
if 'aifc' in mtype:
continue
for extension in mimetypes.guess_all_extensions(mtype):
if extension not in UNSUPPORTED_EXTENSIONS:
extensions.add(extension)
extensions = list(extensions)
def TestOneInput(data):
fdp = atheris.FuzzedDataProvider(data)
extension = fdp.PickValueInList(extensions)
data = fdp.ConsumeBytes(sys.maxsize)
fname = '/tmp/mat2_fuzz' + extension
with open(fname, 'wb') as f:
f.write(data)
try:
p, _ = parser_factory.get_parser(fname)
if p:
p.sandbox = False
p.get_meta()
p.remove_all()
p, _ = parser_factory.get_parser(fname)
p.get_meta()
except ValueError:
pass
os.remove(fname)
atheris.Setup(sys.argv, TestOneInput)
atheris.Fuzz()

View File

@@ -1,6 +1,7 @@
import random
import os
import shutil
import stat
import subprocess
import unittest
import glob
@@ -20,29 +21,39 @@ class TestHelp(unittest.TestCase):
def test_help(self):
proc = subprocess.Popen(mat2_binary + ['--help'], stdout=subprocess.PIPE)
stdout, _ = proc.communicate()
self.assertIn(b'usage: mat2 [-h] [-v] [-l] [--check-dependencies] [-V]',
stdout)
self.assertIn(b'[--unknown-members policy] [-s | -L]', stdout)
self.assertIn(b'mat2 [-h] [-V]', stdout)
self.assertIn(b'[--unknown-members policy]', stdout)
self.assertIn(b'[--inplace]', stdout)
self.assertIn(b'[--no-sandbox]', stdout)
self.assertIn(b' [-v] [-l]', stdout)
self.assertIn(b'[--check-dependencies]', stdout)
self.assertIn(b'[-L | -s]', stdout)
self.assertIn(b'[files ...]', stdout)
def test_no_arg(self):
proc = subprocess.Popen(mat2_binary, stdout=subprocess.PIPE)
stdout, _ = proc.communicate()
self.assertIn(b'usage: mat2 [-h] [-v] [-l] [--check-dependencies] [-V]',
stdout)
self.assertIn(b'[--unknown-members policy] [-s | -L]', stdout)
self.assertIn(b'mat2 [-h] [-V]', stdout)
self.assertIn(b'[--unknown-members policy]', stdout)
self.assertIn(b'[--inplace]', stdout)
self.assertIn(b'[--no-sandbox]', stdout)
self.assertIn(b' [-v] [-l] [--check-dependencies] [-L | -s]', stdout)
self.assertIn(b'[files ...]', stdout)
class TestVersion(unittest.TestCase):
def test_version(self):
proc = subprocess.Popen(mat2_binary + ['--version'], stdout=subprocess.PIPE)
stdout, _ = proc.communicate()
self.assertTrue(stdout.startswith(b'MAT2 '))
self.assertTrue(stdout.startswith(b'mat2 '))
class TestDependencies(unittest.TestCase):
def test_dependencies(self):
proc = subprocess.Popen(mat2_binary + ['--check-dependencies'], stdout=subprocess.PIPE)
stdout, _ = proc.communicate()
self.assertTrue(b'MAT2' in stdout)
self.assertTrue(b'mat2' in stdout)
class TestReturnValue(unittest.TestCase):
def test_nonzero(self):
@@ -110,6 +121,52 @@ class TestCleanMeta(unittest.TestCase):
os.remove('./tests/data/clean.jpg')
def test_jpg_nosandbox(self):
shutil.copy('./tests/data/dirty.jpg', './tests/data/clean.jpg')
proc = subprocess.Popen(mat2_binary + ['--show', '--no-sandbox', './tests/data/clean.jpg'],
stdout=subprocess.PIPE)
stdout, _ = proc.communicate()
self.assertIn(b'Comment: Created with GIMP', stdout)
proc = subprocess.Popen(mat2_binary + ['./tests/data/clean.jpg'],
stdout=subprocess.PIPE)
stdout, _ = proc.communicate()
proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/clean.cleaned.jpg'],
stdout=subprocess.PIPE)
stdout, _ = proc.communicate()
self.assertNotIn(b'Comment: Created with GIMP', stdout)
os.remove('./tests/data/clean.jpg')
os.remove('./tests/data/clean.cleaned.jpg')
class TestCopyPermissions(unittest.TestCase):
def test_jpg_777(self):
shutil.copy('./tests/data/dirty.jpg', './tests/data/clean.jpg')
os.chmod('./tests/data/clean.jpg', 0o777)
proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/clean.jpg'],
stdout=subprocess.PIPE)
stdout, _ = proc.communicate()
self.assertIn(b'Comment: Created with GIMP', stdout)
proc = subprocess.Popen(mat2_binary + ['./tests/data/clean.jpg'],
stdout=subprocess.PIPE)
stdout, _ = proc.communicate()
proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/clean.cleaned.jpg'],
stdout=subprocess.PIPE)
stdout, _ = proc.communicate()
self.assertNotIn(b'Comment: Created with GIMP', stdout)
permissions = os.stat('./tests/data/clean.cleaned.jpg')[stat.ST_MODE]
self.assertEqual(permissions, 0o100777)
os.remove('./tests/data/clean.jpg')
os.remove('./tests/data/clean.cleaned.jpg')
class TestIsSupported(unittest.TestCase):
def test_pdf(self):
@@ -179,6 +236,7 @@ class TestGetMeta(unittest.TestCase):
self.assertIn(b'i am a : various comment', stdout)
self.assertIn(b'artist: jvoisin', stdout)
class TestControlCharInjection(unittest.TestCase):
def test_jpg(self):
proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/control_chars.jpg'],
@@ -206,7 +264,9 @@ class TestCommandLineParallel(unittest.TestCase):
os.remove(path)
def test_different(self):
shutil.copytree('./tests/data/', './tests/data/parallel')
src = './tests/data/'
dst = './tests/data/parallel'
shutil.copytree(src, dst)
proc = subprocess.Popen(mat2_binary + glob.glob('./tests/data/parallel/dirty.*'),
stdout=subprocess.PIPE)
@@ -218,7 +278,7 @@ class TestCommandLineParallel(unittest.TestCase):
self.assertIsNotNone(p)
p = parser_factory.get_parser(p.output_filename)
self.assertEqual(p.get_meta(), {})
shutil.rmtree('./tests/data/parallel')
shutil.rmtree('./tests/data/parallel/')
def test_faulty(self):
for i in range(self.iterations):
@@ -239,3 +299,35 @@ class TestCommandLineParallel(unittest.TestCase):
os.remove('./tests/data/dirty_%d.cleaned.jpg' % i)
os.remove(path)
os.remove('./tests/data/dirty_%d.docx' % i)
class TestInplaceCleaning(unittest.TestCase):
def test_cleaning(self):
shutil.copy('./tests/data/dirty.jpg', './tests/data/clean.jpg')
proc = subprocess.Popen(mat2_binary + ['--inplace', './tests/data/clean.jpg'],
stdout=subprocess.PIPE)
stdout, _ = proc.communicate()
proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/clean.jpg'],
stdout=subprocess.PIPE)
stdout, _ = proc.communicate()
self.assertIn(b' No metadata found in ./tests/data/clean.jpg.\n', stdout)
os.remove('./tests/data/clean.jpg')
def test_cleaning_multiple_one_fails(self):
files = ['./tests/data/clean_%d.jpg' % i for i in range(9)]
for f in files:
shutil.copy('./tests/data/dirty.jpg', f)
shutil.copy('./tests/data/dirty.torrent', './tests/data/clean_9.jpg')
proc = subprocess.Popen(mat2_binary + ['--inplace'] + files,
stdout=subprocess.PIPE)
stdout, _ = proc.communicate()
for f in files:
p = images.JPGParser(f)
meta = p.get_meta()
self.assertEqual(meta, {})
for i in range(10):
os.remove('./tests/data/clean_%d.jpg' % i)

View File

@@ -65,8 +65,10 @@ class TestCorruptedEmbedded(unittest.TestCase):
def test_docx(self):
shutil.copy('./tests/data/embedded_corrupted.docx', './tests/data/clean.docx')
parser, _ = parser_factory.get_parser('./tests/data/clean.docx')
self.assertFalse(parser.remove_all())
self.assertIsNotNone(parser.get_meta())
with self.assertRaises(ValueError):
parser.remove_all()
with self.assertRaises(ValueError):
self.assertIsNotNone(parser.get_meta())
os.remove('./tests/data/clean.docx')
def test_odt(self):
@@ -89,9 +91,8 @@ class TestExplicitelyUnsupportedFiles(unittest.TestCase):
class TestWrongContentTypesFileOffice(unittest.TestCase):
def test_office_incomplete(self):
shutil.copy('./tests/data/malformed_content_types.docx', './tests/data/clean.docx')
p = office.MSOfficeParser('./tests/data/clean.docx')
self.assertIsNotNone(p)
self.assertFalse(p.remove_all())
with self.assertRaises(ValueError):
office.MSOfficeParser('./tests/data/clean.docx')
os.remove('./tests/data/clean.docx')
def test_office_broken(self):
@@ -121,8 +122,8 @@ class TestCorruptedFiles(unittest.TestCase):
def test_png2(self):
shutil.copy('./tests/test_libmat2.py', './tests/clean.png')
parser, _ = parser_factory.get_parser('./tests/clean.png')
self.assertIsNone(parser)
with self.assertRaises(ValueError):
parser_factory.get_parser('./tests/clean.png')
os.remove('./tests/clean.png')
def test_torrent(self):
@@ -187,6 +188,15 @@ class TestCorruptedFiles(unittest.TestCase):
audio.MP3Parser('./tests/data/clean.mp3')
os.remove('./tests/data/clean.mp3')
def test_wrong_tif(self):
shutil.copy('./tests/data/dirty.tiff', './tests/data/clean.tif')
p = images.TiffParser('./tests/data/clean.tif')
p.remove_all()
p = images.TiffParser('./tests/data/clean.cleaned.tif')
self.assertEqual(p.get_meta(), {})
os.remove('./tests/data/clean.tif')
os.remove('./tests/data/clean.cleaned.tif')
def test_jpg(self):
shutil.copy('./tests/data/dirty.mp3', './tests/data/clean.jpg')
with self.assertRaises(ValueError):
@@ -194,10 +204,9 @@ class TestCorruptedFiles(unittest.TestCase):
os.remove('./tests/data/clean.jpg')
def test_png_lightweight(self):
return
shutil.copy('./tests/data/dirty.torrent', './tests/data/clean.png')
p = images.PNGParser('./tests/data/clean.png')
self.assertTrue(p.remove_all())
with self.assertRaises(ValueError):
images.PNGParser('./tests/data/clean.png')
os.remove('./tests/data/clean.png')
def test_avi(self):
@@ -230,10 +239,10 @@ class TestCorruptedFiles(unittest.TestCase):
zout.write('./tests/data/embedded_corrupted.docx')
p, mimetype = parser_factory.get_parser('./tests/data/clean.zip')
self.assertEqual(mimetype, 'application/zip')
meta = p.get_meta()
self.assertEqual(meta['tests/data/dirty.flac']['comments'], 'Thank you for using MAT !')
self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!')
self.assertFalse(p.remove_all())
with self.assertRaises(ValueError):
p.get_meta()
with self.assertRaises(ValueError):
self.assertFalse(p.remove_all())
os.remove('./tests/data/clean.zip')
def test_html(self):
@@ -308,10 +317,10 @@ class TestCorruptedFiles(unittest.TestCase):
zout.addfile(tarinfo, f)
p, mimetype = parser_factory.get_parser('./tests/data/clean.tar')
self.assertEqual(mimetype, 'application/x-tar')
meta = p.get_meta()
self.assertEqual(meta['./tests/data/dirty.flac']['comments'], 'Thank you for using MAT !')
self.assertEqual(meta['./tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!')
self.assertFalse(p.remove_all())
with self.assertRaises(ValueError):
p.get_meta()
with self.assertRaises(ValueError):
self.assertFalse(p.remove_all())
os.remove('./tests/data/clean.tar')
shutil.copy('./tests/data/dirty.png', './tests/data/clean.tar')

View File

@@ -137,3 +137,34 @@ class TestRsidRemoval(unittest.TestCase):
os.remove('./tests/data/clean.docx')
os.remove('./tests/data/clean.cleaned.docx')
class TestNsidRemoval(unittest.TestCase):
def test_office(self):
shutil.copy('./tests/data/dirty_with_nsid.docx', './tests/data/clean.docx')
p = office.MSOfficeParser('./tests/data/clean.docx')
meta = p.get_meta()
self.assertIsNotNone(meta)
how_many_rsid = False
with zipfile.ZipFile('./tests/data/clean.docx') as zin:
for item in zin.infolist():
if not item.filename.endswith('.xml'):
continue
num = zin.read(item).decode('utf-8').lower().count('w:rsid')
how_many_rsid += num
self.assertEqual(how_many_rsid, 1190)
ret = p.remove_all()
self.assertTrue(ret)
with zipfile.ZipFile('./tests/data/clean.cleaned.docx') as zin:
for item in zin.infolist():
if not item.filename.endswith('.xml'):
continue
num = zin.read(item).decode('utf-8').lower().count('w:nsid')
self.assertEqual(num, 0)
os.remove('./tests/data/clean.docx')
os.remove('./tests/data/clean.cleaned.docx')

View File

@@ -14,13 +14,10 @@ from libmat2 import check_dependencies, video, archive, web, epub
class TestCheckDependencies(unittest.TestCase):
def test_deps(self):
try:
ret = check_dependencies()
except RuntimeError:
return # this happens if not every dependency is installed
ret = check_dependencies()
for key, value in ret.items():
self.assertTrue(value, "The value for %s is False" % key)
if value['required']:
self.assertTrue(value['found'], "The value for %s is False" % key)
class TestParserFactory(unittest.TestCase):
@@ -76,13 +73,13 @@ class TestParameterInjection(unittest.TestCase):
class TestUnsupportedEmbeddedFiles(unittest.TestCase):
def test_odt_with_svg(self):
def test_odt_with_py(self):
shutil.copy('./tests/data/embedded.odt', './tests/data/clean.odt')
p = office.LibreOfficeParser('./tests/data/clean.odt')
self.assertFalse(p.remove_all())
os.remove('./tests/data/clean.odt')
def test_docx_with_svg(self):
def test_docx_with_py(self):
shutil.copy('./tests/data/embedded.docx', './tests/data/clean.docx')
p = office.MSOfficeParser('./tests/data/clean.docx')
self.assertFalse(p.remove_all())
@@ -116,6 +113,13 @@ class TestGetMeta(unittest.TestCase):
meta = p.get_meta()
self.assertEqual(meta['Comment'], 'Created with GIMP')
def test_ppm(self):
p = images.PPMParser('./tests/data/dirty.ppm')
meta = p.get_meta()
self.assertEqual(meta['1'], '# A metadata')
self.assertEqual(meta['4'], '# And an other one')
self.assertEqual(meta['6'], '# and a final one here')
def test_tiff(self):
p = images.TiffParser('./tests/data/dirty.tiff')
meta = p.get_meta()
@@ -123,6 +127,11 @@ class TestGetMeta(unittest.TestCase):
self.assertEqual(meta['Model'], 'C7070WZ')
self.assertEqual(meta['ModifyDate'], '2005:12:26 17:09:35')
def test_wav(self):
p = audio.WAVParser('./tests/data/dirty.wav')
meta = p.get_meta()
self.assertEqual(meta['Artist'], 'jvoisin')
def test_mp3(self):
p = audio.MP3Parser('./tests/data/dirty.mp3')
meta = p.get_meta()
@@ -166,14 +175,30 @@ class TestGetMeta(unittest.TestCase):
def test_zip(self):
with zipfile.ZipFile('./tests/data/dirty.zip', 'w') as zout:
zout.write('./tests/data/dirty.flac')
zout.write('./tests/data/dirty.docx')
zout.write('./tests/data/dirty.jpg')
zout.write('./tests/data/dirty.flac',
compress_type = zipfile.ZIP_STORED)
zout.write('./tests/data/dirty.docx',
compress_type = zipfile.ZIP_DEFLATED)
zout.write('./tests/data/dirty.jpg',
compress_type = zipfile.ZIP_BZIP2)
zout.write('./tests/data/dirty.txt',
compress_type = zipfile.ZIP_LZMA)
p, mimetype = parser_factory.get_parser('./tests/data/dirty.zip')
self.assertEqual(mimetype, 'application/zip')
meta = p.get_meta()
self.assertEqual(meta['tests/data/dirty.flac']['comments'], 'Thank you for using MAT !')
self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!')
with zipfile.ZipFile('./tests/data/dirty.zip') as zipin:
members = {
'tests/data/dirty.flac' : zipfile.ZIP_STORED,
'tests/data/dirty.docx': zipfile.ZIP_DEFLATED,
'tests/data/dirty.jpg' : zipfile.ZIP_BZIP2,
'tests/data/dirty.txt' : zipfile.ZIP_LZMA,
}
for k, v in members.items():
self.assertEqual(zipin.getinfo(k).compress_type, v)
os.remove('./tests/data/dirty.zip')
def test_wmv(self):
@@ -217,6 +242,21 @@ class TestGetMeta(unittest.TestCase):
self.assertEqual(meta['./tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!')
os.remove('./tests/data/dirty.tar')
def test_svg(self):
p = images.SVGParser('./tests/data/weird.svg')
self.assertEqual(p.get_meta()['Xmlns'], 'http://www.w3.org/1337/svg')
def test_aiff(self):
p = audio.AIFFParser('./tests/data/dirty.aiff')
meta = p.get_meta()
self.assertEqual(meta['Name'], 'I am so')
def test_heic(self):
p = images.HEICParser('./tests/data/dirty.heic')
meta = p.get_meta()
self.assertEqual(meta['ProfileCopyright'], 'Public Domain')
self.assertEqual(meta['ProfileDescription'], 'GIMP built-in sRGB')
class TestRemovingThumbnails(unittest.TestCase):
def test_odt(self):
@@ -276,367 +316,247 @@ class TestRevisionsCleaning(unittest.TestCase):
os.remove('./tests/data/revision_clean.docx')
os.remove('./tests/data/revision_clean.cleaned.docx')
class TestCleaning(unittest.TestCase):
def test_pdf(self):
shutil.copy('./tests/data/dirty.pdf', './tests/data/clean.pdf')
p = pdf.PDFParser('./tests/data/clean.pdf')
data = [{
'name': 'pdf',
'parser': pdf.PDFParser,
'meta': {'producer': 'pdfTeX-1.40.14'},
'expected_meta': {'creation-date': -1, 'format': 'PDF-1.5', 'mod-date': -1},
}, {
'name': 'png',
'parser': images.PNGParser,
'meta': {'Comment': 'This is a comment, be careful!'},
'expected_meta': {},
}, {
'name': 'jpg',
'parser': images.JPGParser,
'meta': {'Comment': 'Created with GIMP'},
'expected_meta': {},
}, {
'name': 'wav',
'parser': audio.WAVParser,
'meta': {'Comment': 'Zomg, a comment!'},
'expected_meta': {},
}, {
'name': 'aiff',
'parser': audio.AIFFParser,
'meta': {'Annotation': 'Thank you for using MAT !'},
'expected_meta': {},
},
{
'name': 'mp3',
'parser': audio.MP3Parser,
'meta': {'TXXX:I am a': 'various comment'},
'expected_meta': {},
}, {
'name': 'ogg',
'parser': audio.OGGParser,
'meta': {'title': 'I am so'},
'expected_meta': {},
}, {
'name': 'flac',
'parser': audio.FLACParser,
'meta': {'title': 'I am so'},
'expected_meta': {},
}, {
'name': 'docx',
'parser': office.MSOfficeParser,
'meta': {'word/media/image1.png' :
{'Comment': 'This is a comment, be careful!',
'ModifyDate': '2018:03:20 21:59:25',
'PixelUnits': 'meters',
'PixelsPerUnitX': 2835,
'PixelsPerUnitY': 2835,
'create_system': 'Weird',
'date_time': '2018-03-31 13:15:38'} ,
},
'expected_meta': {},
}, {
'name': 'odt',
'parser': office.LibreOfficeParser,
'meta': {
'Pictures/1000000000000032000000311EC5314D.png': {
'create_system': 'Weird',
'date_time': '2011-07-26 02:40:16',
'PixelsPerUnitX': 4847,
'PixelsPerUnitY': 4760,
'PixelUnits': 'meters',
},
},
'expected_meta': {},
},{
'name': 'tiff',
'parser': images.TiffParser,
'meta': {'Model': 'C7070WZ'},
'expected_meta':
{'Orientation': 'Horizontal (normal)',
'ResolutionUnit': 'inches',
'XResolution': 72,
'YResolution': 72}
},{
'name': 'bmp',
'parser': harmless.HarmlessParser,
'meta': {},
'expected_meta': {},
},{
'name': 'torrent',
'parser': torrent.TorrentParser,
'meta': {'created by': b'mktorrent 1.0', 'creation date': 1522397702},
'expected_meta': {},
}, {
'name': 'odf',
'parser': office.LibreOfficeParser,
'meta': {'meta.xml': {'create_system': 'Weird', 'date_time':
'2018-04-22 22:20:24', 'meta:initial-creator': 'Julien Voisin',
'meta:creation-date': '2018-04-23T00:18:59.438231281',
'dc:date': '2018-04-23T00:20:23.978564933', 'dc:creator':
'Julien Voisin', 'meta:editing-duration': 'PT1M24S',
'meta:editing-cycles': '1', 'meta:generator':
'LibreOffice/5.4.6.2$Linux_X86_64 LibreOffice_project/40m0$Build-2'}},
'expected_meta': {},
}, {
'name': 'odg',
'parser': office.LibreOfficeParser,
'meta': {'meta.xml': {'create_system': 'Weird', 'date_time':
'2018-04-22 22:26:58', 'meta:initial-creator': 'Julien Voisin',
'meta:creation-date': '2018-04-23T00:25:59.953271949',
'dc:date': '2018-04-23T00:26:59.385838550', 'dc:creator':
'Julien Voisin', 'meta:editing-duration': 'PT59S',
'meta:editing-cycles': '1', 'meta:generator':
'LibreOffice/5.4.6.2$Linux_X86_64 LibreOffice_project/40m0$Build-2'}},
'expected_meta': {},
}, {
'name': 'txt',
'parser': harmless.HarmlessParser,
'meta': {},
'expected_meta': {},
},{
'name': 'gif',
'parser': images.GIFParser,
'meta': {'Comment': 'this is a test comment'},
'expected_meta': {'TransparentColor': '5'},
},{
'name': 'css',
'parser': web.CSSParser,
'meta': {
'harmful data': 'underline is cool',
'version': '1.0',
'author': 'jvoisin'
},
'expected_meta': {},
},{
'name': 'svg',
'parser': images.SVGParser,
'meta': {
'WorkDescription': "This is a test svg image for mat2's testsuite",
},
'expected_meta': {
'ImageSize': '128x128',
'Megapixels': '0.016',
},
} ,{
'name': 'ppm',
'parser': images.PPMParser,
'meta': {
'1': '# A metadata',
},
'expected_meta': {},
} ,{
'name': 'avi',
'ffmpeg': 1,
'parser': video.AVIParser,
'meta': {
'Software': 'MEncoder SVN-r33148-4.0.1',
},
'expected_meta': {},
} ,{
'name': 'mp4',
'ffmpeg': 1,
'parser': video.MP4Parser,
'meta': {
'Encoder': 'HandBrake 0.9.4 2009112300',
},
'expected_meta': {
'AverageBitrate': 465641,
'BufferSize': 0,
'CompatibleBrands': ['isom', 'iso2', 'avc1', 'mp41'],
'ColorRepresentation': 'nclx 1 1 1',
'CompressorID': 'avc1',
'GraphicsMode': 'srcCopy',
'HandlerDescription': 'SoundHandler',
'HandlerType': 'Metadata',
'HandlerVendorID': 'Apple',
'MajorBrand': 'Base Media v1 [IS0 14496-12:2003]',
'MaxBitrate': 465641,
'MediaDataOffset': 48,
'MediaDataSize': 379872,
'MediaHeaderVersion': 0,
'MinorVersion': '0.2.0',
'MovieDataOffset': 48,
'MovieHeaderVersion': 0,
'NextTrackID': 3,
'PreferredRate': 1,
'Rotation': 0,
'TimeScale': 1000,
'TrackHeaderVersion': 0,
'TrackID': 1,
'TrackLayer': 0},
},{
'name': 'wmv',
'ffmpeg': 1,
'parser': video.WMVParser,
'meta': {
'EncodingSettings': 'Lavf52.103.0',
},
'expected_meta': {},
},{
'name': 'heic',
'parser': images.HEICParser,
'meta': {},
'expected_meta': {},
}
]
def test_all_parametred(self):
for case in self.data:
with self.subTest(case=case):
if 'ffmpeg' in case:
try:
video._get_ffmpeg_path()
except RuntimeError:
raise unittest.SkipTest
print('[+] Testing %s' % case['name'])
target = './tests/data/clean.' + case['name']
shutil.copy('./tests/data/dirty.' + case['name'], target)
p1 = case['parser'](target)
for k, v in p1.get_meta().items():
if k not in case['meta']:
continue
if isinstance(v, dict):
for _k, _v in v.items():
if _k in case['meta'][k]:
self.assertEqual(_v, case['meta'][k][_k])
else:
self.assertEqual(v, case['meta'][k])
p1.lightweight_cleaning = True
self.assertTrue(p1.remove_all())
p2 = case['parser'](p1.output_filename)
meta = p2.get_meta()
if meta:
for k, v in p2.get_meta().items():
self.assertIn(k, case['expected_meta'], '"%s" is not in "%s" (%s)' % (k, case['expected_meta'], case['name']))
self.assertIn(str(case['expected_meta'][k]), str(v))
self.assertTrue(p2.remove_all())
os.remove(target)
os.remove(p1.output_filename)
os.remove(p2.output_filename)
meta = p.get_meta()
self.assertEqual(meta['producer'], 'pdfTeX-1.40.14')
ret = p.remove_all()
self.assertTrue(ret)
p = pdf.PDFParser('./tests/data/clean.cleaned.pdf')
expected_meta = {'creation-date': -1, 'format': 'PDF-1.5', 'mod-date': -1}
self.assertEqual(p.get_meta(), expected_meta)
self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.pdf')
os.remove('./tests/data/clean.cleaned.pdf')
os.remove('./tests/data/clean.cleaned.cleaned.pdf')
def test_png(self):
shutil.copy('./tests/data/dirty.png', './tests/data/clean.png')
p = images.PNGParser('./tests/data/clean.png')
meta = p.get_meta()
self.assertEqual(meta['Comment'], 'This is a comment, be careful!')
ret = p.remove_all()
self.assertTrue(ret)
p = images.PNGParser('./tests/data/clean.cleaned.png')
self.assertEqual(p.get_meta(), {})
self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.png')
os.remove('./tests/data/clean.cleaned.png')
os.remove('./tests/data/clean.cleaned.cleaned.png')
def test_jpg(self):
shutil.copy('./tests/data/dirty.jpg', './tests/data/clean.jpg')
p = images.JPGParser('./tests/data/clean.jpg')
meta = p.get_meta()
self.assertEqual(meta['Comment'], 'Created with GIMP')
ret = p.remove_all()
self.assertTrue(ret)
p = images.JPGParser('./tests/data/clean.cleaned.jpg')
self.assertEqual(p.get_meta(), {})
self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.jpg')
os.remove('./tests/data/clean.cleaned.jpg')
os.remove('./tests/data/clean.cleaned.cleaned.jpg')
def test_mp3(self):
shutil.copy('./tests/data/dirty.mp3', './tests/data/clean.mp3')
p = audio.MP3Parser('./tests/data/clean.mp3')
meta = p.get_meta()
self.assertEqual(meta['TXXX:I am a'], 'various comment')
ret = p.remove_all()
self.assertTrue(ret)
p = audio.MP3Parser('./tests/data/clean.cleaned.mp3')
self.assertEqual(p.get_meta(), {})
self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.mp3')
os.remove('./tests/data/clean.cleaned.mp3')
os.remove('./tests/data/clean.cleaned.cleaned.mp3')
def test_ogg(self):
shutil.copy('./tests/data/dirty.ogg', './tests/data/clean.ogg')
p = audio.OGGParser('./tests/data/clean.ogg')
meta = p.get_meta()
self.assertEqual(meta['title'], 'I am so')
ret = p.remove_all()
self.assertTrue(ret)
p = audio.OGGParser('./tests/data/clean.cleaned.ogg')
self.assertEqual(p.get_meta(), {})
self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.ogg')
os.remove('./tests/data/clean.cleaned.ogg')
os.remove('./tests/data/clean.cleaned.cleaned.ogg')
def test_flac(self):
shutil.copy('./tests/data/dirty.flac', './tests/data/clean.flac')
p = audio.FLACParser('./tests/data/clean.flac')
meta = p.get_meta()
self.assertEqual(meta['title'], 'I am so')
ret = p.remove_all()
self.assertTrue(ret)
p = audio.FLACParser('./tests/data/clean.cleaned.flac')
self.assertEqual(p.get_meta(), {})
self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.flac')
os.remove('./tests/data/clean.cleaned.flac')
os.remove('./tests/data/clean.cleaned.cleaned.flac')
def test_office(self):
shutil.copy('./tests/data/dirty.docx', './tests/data/clean.docx')
p = office.MSOfficeParser('./tests/data/clean.docx')
meta = p.get_meta()
self.assertIsNotNone(meta)
ret = p.remove_all()
self.assertTrue(ret)
p = office.MSOfficeParser('./tests/data/clean.cleaned.docx')
self.assertEqual(p.get_meta(), {})
self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.docx')
os.remove('./tests/data/clean.cleaned.docx')
os.remove('./tests/data/clean.cleaned.cleaned.docx')
def test_libreoffice(self):
shutil.copy('./tests/data/dirty.odt', './tests/data/clean.odt')
p = office.LibreOfficeParser('./tests/data/clean.odt')
meta = p.get_meta()
self.assertIsNotNone(meta)
ret = p.remove_all()
self.assertTrue(ret)
p = office.LibreOfficeParser('./tests/data/clean.cleaned.odt')
self.assertEqual(p.get_meta(), {})
self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.odt')
os.remove('./tests/data/clean.cleaned.odt')
os.remove('./tests/data/clean.cleaned.cleaned.odt')
def test_tiff(self):
shutil.copy('./tests/data/dirty.tiff', './tests/data/clean.tiff')
p = images.TiffParser('./tests/data/clean.tiff')
meta = p.get_meta()
self.assertEqual(meta['Model'], 'C7070WZ')
ret = p.remove_all()
self.assertTrue(ret)
p = images.TiffParser('./tests/data/clean.cleaned.tiff')
self.assertEqual(p.get_meta(), {})
self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.tiff')
os.remove('./tests/data/clean.cleaned.tiff')
os.remove('./tests/data/clean.cleaned.cleaned.tiff')
def test_bmp(self):
shutil.copy('./tests/data/dirty.bmp', './tests/data/clean.bmp')
p = harmless.HarmlessParser('./tests/data/clean.bmp')
meta = p.get_meta()
self.assertEqual(meta, {}) # bmp has no meta :)
ret = p.remove_all()
self.assertTrue(ret)
p = harmless.HarmlessParser('./tests/data/clean.cleaned.bmp')
self.assertEqual(p.get_meta(), {})
self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.bmp')
os.remove('./tests/data/clean.cleaned.bmp')
os.remove('./tests/data/clean.cleaned.cleaned.bmp')
def test_torrent(self):
shutil.copy('./tests/data/dirty.torrent', './tests/data/clean.torrent')
p = torrent.TorrentParser('./tests/data/clean.torrent')
meta = p.get_meta()
self.assertEqual(meta, {'created by': b'mktorrent 1.0', 'creation date': 1522397702})
ret = p.remove_all()
self.assertTrue(ret)
p = torrent.TorrentParser('./tests/data/clean.cleaned.torrent')
self.assertEqual(p.get_meta(), {})
self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.torrent')
os.remove('./tests/data/clean.cleaned.torrent')
os.remove('./tests/data/clean.cleaned.cleaned.torrent')
def test_odf(self):
shutil.copy('./tests/data/dirty.odf', './tests/data/clean.odf')
p = office.LibreOfficeParser('./tests/data/clean.odf')
meta = p.get_meta()
self.assertEqual(meta['meta.xml']['meta:creation-date'], '2018-04-23T00:18:59.438231281')
ret = p.remove_all()
self.assertTrue(ret)
p = office.LibreOfficeParser('./tests/data/clean.cleaned.odf')
self.assertEqual(p.get_meta(), {})
self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.odf')
os.remove('./tests/data/clean.cleaned.odf')
os.remove('./tests/data/clean.cleaned.cleaned.odf')
def test_odg(self):
shutil.copy('./tests/data/dirty.odg', './tests/data/clean.odg')
p = office.LibreOfficeParser('./tests/data/clean.odg')
meta = p.get_meta()
self.assertEqual(meta['meta.xml']['dc:date'], '2018-04-23T00:26:59.385838550')
ret = p.remove_all()
self.assertTrue(ret)
p = office.LibreOfficeParser('./tests/data/clean.cleaned.odg')
self.assertEqual(p.get_meta(), {})
self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.odg')
os.remove('./tests/data/clean.cleaned.odg')
os.remove('./tests/data/clean.cleaned.cleaned.odg')
def test_txt(self):
shutil.copy('./tests/data/dirty.txt', './tests/data/clean.txt')
p = harmless.HarmlessParser('./tests/data/clean.txt')
meta = p.get_meta()
self.assertEqual(meta, {})
ret = p.remove_all()
self.assertTrue(ret)
p = harmless.HarmlessParser('./tests/data/clean.cleaned.txt')
self.assertEqual(p.get_meta(), {})
self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.txt')
os.remove('./tests/data/clean.cleaned.txt')
os.remove('./tests/data/clean.cleaned.cleaned.txt')
def test_avi(self):
try:
video._get_ffmpeg_path()
except RuntimeError:
raise unittest.SkipTest
shutil.copy('./tests/data/dirty.avi', './tests/data/clean.avi')
p = video.AVIParser('./tests/data/clean.avi')
meta = p.get_meta()
self.assertEqual(meta['Software'], 'MEncoder SVN-r33148-4.0.1')
ret = p.remove_all()
self.assertTrue(ret)
p = video.AVIParser('./tests/data/clean.cleaned.avi')
self.assertEqual(p.get_meta(), {})
self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.avi')
os.remove('./tests/data/clean.cleaned.avi')
os.remove('./tests/data/clean.cleaned.cleaned.avi')
def test_zip(self):
with zipfile.ZipFile('./tests/data/dirty.zip', 'w') as zout:
zout.write('./tests/data/dirty.flac')
zout.write('./tests/data/dirty.docx')
zout.write('./tests/data/dirty.jpg')
p = archive.ZipParser('./tests/data/dirty.zip')
meta = p.get_meta()
self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!')
ret = p.remove_all()
self.assertTrue(ret)
p = archive.ZipParser('./tests/data/dirty.cleaned.zip')
self.assertEqual(p.get_meta(), {})
self.assertTrue(p.remove_all())
os.remove('./tests/data/dirty.zip')
os.remove('./tests/data/dirty.cleaned.zip')
os.remove('./tests/data/dirty.cleaned.cleaned.zip')
def test_mp4(self):
try:
video._get_ffmpeg_path()
except RuntimeError:
raise unittest.SkipTest
shutil.copy('./tests/data/dirty.mp4', './tests/data/clean.mp4')
p = video.MP4Parser('./tests/data/clean.mp4')
meta = p.get_meta()
self.assertEqual(meta['Encoder'], 'HandBrake 0.9.4 2009112300')
ret = p.remove_all()
self.assertTrue(ret)
p = video.MP4Parser('./tests/data/clean.cleaned.mp4')
self.assertNotIn('Encoder', p.get_meta())
self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.mp4')
os.remove('./tests/data/clean.cleaned.mp4')
os.remove('./tests/data/clean.cleaned.cleaned.mp4')
def test_wmv(self):
try:
video._get_ffmpeg_path()
except RuntimeError:
raise unittest.SkipTest
shutil.copy('./tests/data/dirty.wmv', './tests/data/clean.wmv')
p = video.WMVParser('./tests/data/clean.wmv')
meta = p.get_meta()
self.assertEqual(meta['EncodingSettings'], 'Lavf52.103.0')
ret = p.remove_all()
self.assertTrue(ret)
p = video.WMVParser('./tests/data/clean.cleaned.wmv')
self.assertNotIn('EncodingSettings', p.get_meta())
self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.wmv')
os.remove('./tests/data/clean.cleaned.wmv')
os.remove('./tests/data/clean.cleaned.cleaned.wmv')
def test_gif(self):
shutil.copy('./tests/data/dirty.gif', './tests/data/clean.gif')
p = images.GIFParser('./tests/data/clean.gif')
meta = p.get_meta()
self.assertEqual(meta['Comment'], 'this is a test comment')
ret = p.remove_all()
self.assertTrue(ret)
p = images.GIFParser('./tests/data/clean.cleaned.gif')
self.assertNotIn('EncodingSettings', p.get_meta())
self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.gif')
os.remove('./tests/data/clean.cleaned.gif')
os.remove('./tests/data/clean.cleaned.cleaned.gif')
def test_html(self):
shutil.copy('./tests/data/dirty.html', './tests/data/clean.html')
@@ -683,7 +603,6 @@ class TestCleaning(unittest.TestCase):
os.remove('./tests/data/clean.html')
os.remove('./tests/data/clean.cleaned.html')
def test_epub(self):
shutil.copy('./tests/data/dirty.epub', './tests/data/clean.epub')
p = epub.EPUBParser('./tests/data/clean.epub')
@@ -706,25 +625,41 @@ class TestCleaning(unittest.TestCase):
os.remove('./tests/data/clean.cleaned.cleaned.epub')
def test_css(self):
shutil.copy('./tests/data/dirty.css', './tests/data/clean.css')
p = web.CSSParser('./tests/data/clean.css')
self.assertEqual(p.get_meta(), {
'harmful data': 'underline is cool',
'version': '1.0',
'author': 'jvoisin'})
class TestCleaningArchives(unittest.TestCase):
def test_zip(self):
with zipfile.ZipFile('./tests/data/dirty.zip', 'w') as zout:
zout.write('./tests/data/dirty.flac',
compress_type = zipfile.ZIP_STORED)
zout.write('./tests/data/dirty.docx',
compress_type = zipfile.ZIP_DEFLATED)
zout.write('./tests/data/dirty.jpg',
compress_type = zipfile.ZIP_BZIP2)
zout.write('./tests/data/dirty.txt',
compress_type = zipfile.ZIP_LZMA)
p = archive.ZipParser('./tests/data/dirty.zip')
meta = p.get_meta()
self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!')
ret = p.remove_all()
self.assertTrue(ret)
p = web.CSSParser('./tests/data/clean.cleaned.css')
p = archive.ZipParser('./tests/data/dirty.cleaned.zip')
self.assertEqual(p.get_meta(), {})
self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.css')
os.remove('./tests/data/clean.cleaned.css')
os.remove('./tests/data/clean.cleaned.cleaned.css')
with zipfile.ZipFile('./tests/data/dirty.zip') as zipin:
members = {
'tests/data/dirty.flac' : zipfile.ZIP_STORED,
'tests/data/dirty.docx': zipfile.ZIP_DEFLATED,
'tests/data/dirty.jpg' : zipfile.ZIP_BZIP2,
'tests/data/dirty.txt' : zipfile.ZIP_LZMA,
}
for k, v in members.items():
self.assertEqual(zipin.getinfo(k).compress_type, v)
os.remove('./tests/data/dirty.zip')
os.remove('./tests/data/dirty.cleaned.zip')
os.remove('./tests/data/dirty.cleaned.cleaned.zip')
def test_tar(self):
with tarfile.TarFile.open('./tests/data/dirty.tar', 'w') as zout:
@@ -865,3 +800,53 @@ class TestCleaning(unittest.TestCase):
os.remove('./tests/data/dirty.tar.xz')
os.remove('./tests/data/dirty.cleaned.tar.xz')
os.remove('./tests/data/dirty.cleaned.cleaned.tar.xz')
class TestNoSandbox(unittest.TestCase):
def test_avi_nosandbox(self):
shutil.copy('./tests/data/dirty.avi', './tests/data/clean.avi')
p = video.AVIParser('./tests/data/clean.avi')
p.sandbox = False
meta = p.get_meta()
self.assertEqual(meta['Software'], 'MEncoder SVN-r33148-4.0.1')
ret = p.remove_all()
self.assertTrue(ret)
p = video.AVIParser('./tests/data/clean.cleaned.avi')
self.assertEqual(p.get_meta(), {})
self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.avi')
os.remove('./tests/data/clean.cleaned.avi')
os.remove('./tests/data/clean.cleaned.cleaned.avi')
def test_png_nosandbox(self):
shutil.copy('./tests/data/dirty.png', './tests/data/clean.png')
p = images.PNGParser('./tests/data/clean.png')
p.sandbox = False
p.lightweight_cleaning = True
meta = p.get_meta()
self.assertEqual(meta['Comment'], 'This is a comment, be careful!')
ret = p.remove_all()
self.assertTrue(ret)
p = images.PNGParser('./tests/data/clean.cleaned.png')
self.assertEqual(p.get_meta(), {})
self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.png')
os.remove('./tests/data/clean.cleaned.png')
os.remove('./tests/data/clean.cleaned.cleaned.png')
class TestComplexOfficeFiles(unittest.TestCase):
def test_complex_pptx(self):
target = './tests/data/clean.pptx'
shutil.copy('./tests/data/narrated_powerpoint_presentation.pptx', target)
p = office.MSOfficeParser(target)
self.assertTrue(p.remove_all())
os.remove(target)
os.remove(p.output_filename)

View File

@@ -1,106 +0,0 @@
#!/usr/bin/env python3
import unittest
import shutil
import os
from libmat2 import pdf, images, torrent
class TestLightWeightCleaning(unittest.TestCase):
def test_pdf(self):
shutil.copy('./tests/data/dirty.pdf', './tests/data/clean.pdf')
p = pdf.PDFParser('./tests/data/clean.pdf')
meta = p.get_meta()
self.assertEqual(meta['producer'], 'pdfTeX-1.40.14')
p.lightweight_cleaning = True
ret = p.remove_all()
self.assertTrue(ret)
p = pdf.PDFParser('./tests/data/clean.cleaned.pdf')
expected_meta = {'creation-date': -1, 'format': 'PDF-1.5', 'mod-date': -1}
self.assertEqual(p.get_meta(), expected_meta)
os.remove('./tests/data/clean.pdf')
os.remove('./tests/data/clean.cleaned.pdf')
def test_png(self):
shutil.copy('./tests/data/dirty.png', './tests/data/clean.png')
p = images.PNGParser('./tests/data/clean.png')
meta = p.get_meta()
self.assertEqual(meta['Comment'], 'This is a comment, be careful!')
p.lightweight_cleaning = True
ret = p.remove_all()
self.assertTrue(ret)
p = images.PNGParser('./tests/data/clean.cleaned.png')
self.assertEqual(p.get_meta(), {})
p = images.PNGParser('./tests/data/clean.png')
p.lightweight_cleaning = True
ret = p.remove_all()
self.assertTrue(ret)
os.remove('./tests/data/clean.png')
os.remove('./tests/data/clean.cleaned.png')
def test_jpg(self):
shutil.copy('./tests/data/dirty.jpg', './tests/data/clean.jpg')
p = images.JPGParser('./tests/data/clean.jpg')
meta = p.get_meta()
self.assertEqual(meta['Comment'], 'Created with GIMP')
p.lightweight_cleaning = True
ret = p.remove_all()
self.assertTrue(ret)
p = images.JPGParser('./tests/data/clean.cleaned.jpg')
self.assertEqual(p.get_meta(), {})
os.remove('./tests/data/clean.jpg')
os.remove('./tests/data/clean.cleaned.jpg')
def test_torrent(self):
shutil.copy('./tests/data/dirty.torrent', './tests/data/clean.torrent')
p = torrent.TorrentParser('./tests/data/clean.torrent')
meta = p.get_meta()
self.assertEqual(meta['created by'], b'mktorrent 1.0')
p.lightweight_cleaning = True
ret = p.remove_all()
self.assertTrue(ret)
p = torrent.TorrentParser('./tests/data/clean.cleaned.torrent')
self.assertEqual(p.get_meta(), {})
os.remove('./tests/data/clean.torrent')
os.remove('./tests/data/clean.cleaned.torrent')
def test_tiff(self):
shutil.copy('./tests/data/dirty.tiff', './tests/data/clean.tiff')
p = images.TiffParser('./tests/data/clean.tiff')
meta = p.get_meta()
self.assertEqual(meta['ImageDescription'], 'OLYMPUS DIGITAL CAMERA ')
p.lightweight_cleaning = True
ret = p.remove_all()
self.assertTrue(ret)
p = images.TiffParser('./tests/data/clean.cleaned.tiff')
self.assertEqual(p.get_meta(),
{
'Orientation': 'Horizontal (normal)',
'ResolutionUnit': 'inches',
'XResolution': 72,
'YResolution': 72
}
)
os.remove('./tests/data/clean.tiff')
os.remove('./tests/data/clean.cleaned.tiff')

View File

@@ -0,0 +1,76 @@
#!/usr/bin/env python3
import unittest
import shutil
import os
from libmat2 import pdf, images, torrent
class TestLightWeightCleaning(unittest.TestCase):
data = [{
'name': 'pdf',
'parser': pdf.PDFParser,
'meta': {'producer': 'pdfTeX-1.40.14'},
'expected_meta': {'creation-date': -1, 'format': 'PDF-1.5', 'mod-date': -1},
}, {
'name': 'png',
'parser': images.PNGParser,
'meta': {'Comment': 'This is a comment, be careful!'},
'expected_meta': {},
}, {
'name': 'jpg',
'parser': images.JPGParser,
'meta': {'Comment': 'Created with GIMP'},
'expected_meta': {},
}, {
'name': 'torrent',
'parser': torrent.TorrentParser,
'meta': {'created by': b'mktorrent 1.0'},
'expected_meta': {},
},{
'name': 'tiff',
'parser': images.TiffParser,
'meta': {'ImageDescription': 'OLYMPUS DIGITAL CAMERA '},
'expected_meta': {
'Orientation': 'Horizontal (normal)',
'ResolutionUnit': 'inches',
'XResolution': 72,
'YResolution': 72
}
},
]
def test_all(self):
for case in self.data:
target = './tests/data/clean.' + case['name']
shutil.copy('./tests/data/dirty.' + case['name'], target)
p1 = case['parser'](target)
meta = p1.get_meta()
for k, v in case['meta'].items():
self.assertEqual(meta[k], v)
p1.lightweight_cleaning = True
self.assertTrue(p1.remove_all())
p2 = case['parser'](p1.output_filename)
self.assertEqual(p2.get_meta(), case['expected_meta'])
os.remove(target)
os.remove(p1.output_filename)
def test_exiftool_overwrite(self):
target = './tests/data/clean.png'
shutil.copy('./tests/data/dirty.png', target)
p1 = images.PNGParser(target)
p1.lightweight_cleaning = True
shutil.copy('./tests/data/dirty.png', p1.output_filename)
self.assertTrue(p1.remove_all())
p2 = images.PNGParser(p1.output_filename)
self.assertEqual(p2.get_meta(), {})
os.remove(target)
os.remove(p1.output_filename)

View File

@@ -7,25 +7,26 @@ import os
from libmat2 import office, UnknownMemberPolicy
class TestPolicy(unittest.TestCase):
target = './tests/data/clean.docx'
def test_policy_omit(self):
shutil.copy('./tests/data/embedded.docx', './tests/data/clean.docx')
p = office.MSOfficeParser('./tests/data/clean.docx')
shutil.copy('./tests/data/embedded.docx', self.target)
p = office.MSOfficeParser(self.target)
p.unknown_member_policy = UnknownMemberPolicy.OMIT
self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.docx')
os.remove('./tests/data/clean.cleaned.docx')
os.remove(p.filename)
def test_policy_keep(self):
shutil.copy('./tests/data/embedded.docx', './tests/data/clean.docx')
p = office.MSOfficeParser('./tests/data/clean.docx')
shutil.copy('./tests/data/embedded.docx', self.target)
p = office.MSOfficeParser(self.target)
p.unknown_member_policy = UnknownMemberPolicy.KEEP
self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.docx')
os.remove('./tests/data/clean.cleaned.docx')
os.remove(p.filename)
os.remove(p.output_filename)
def test_policy_unknown(self):
shutil.copy('./tests/data/embedded.docx', './tests/data/clean.docx')
p = office.MSOfficeParser('./tests/data/clean.docx')
shutil.copy('./tests/data/embedded.docx', self.target)
p = office.MSOfficeParser(self.target)
with self.assertRaises(ValueError):
p.unknown_member_policy = UnknownMemberPolicy('unknown_policy_name_totally_invalid')
os.remove('./tests/data/clean.docx')
os.remove(p.filename)

View File

@@ -0,0 +1,3 @@
# Words to be ignored by codespell.
# Put one word per line and sort alphabetically.
process'